diff --git a/README.md b/README.md index fdf8339..5fddf3f 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ sphinxsearch * The web root dir is `/public` * Deploy the database using [MySQL Workbench](https://www.mysql.com/products/workbench) project presented in the `/database` folder -* Install [Sphinx Search Server](https://sphinxsearch.com), [MEGAcmd](https://mega.nz/cmd) (on remote snaps enabled) +* Install [Sphinx Search Server](https://sphinxsearch.com) * Configuration examples presented at `/config` folder * Make sure `/storage/cache`, `/storage/tmp`, `/storage/snap` folders are writable * Set up the `/crontab` by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt) @@ -155,10 +155,8 @@ GET m=SphinxQL * [x] Flexible settings compatible with IPv4/IPv6 networks * [x] Extended search syntax support * [x] Compressed page history snaps with multi-provider storage sync - + [x] Local - + [x] Remote - + [x] MEGAcmd/FTP - + [ ] Yggdrasil over NAT + + [x] Local (unlimited locations) + + [x] Remote FTP (unlimited mirrors) + [x] Privacy-oriented downloads counting, traffic controls ##### UI @@ -213,7 +211,7 @@ GET m=SphinxQL * [x] Deprecated DB items auto deletion / host settings update + [x] Pages + [x] Snaps - + [x] Snap downloads + + [ ] Snap downloads + [ ] Missed snap file relations + [x] Manifests + [x] Logs @@ -232,7 +230,7 @@ GET m=SphinxQL + [x] generate + [x] truncate * [x] hostPageSnap - + [x] truncate + + [ ] truncate * [ ] hostPage + [ ] add diff --git a/cli/yggo.php b/cli/yggo.php index c32cee9..f817673 100644 --- a/cli/yggo.php +++ b/cli/yggo.php @@ -176,56 +176,6 @@ switch ($argv[1]) { } break; - case 'hostPageSnap': - - if (empty($argv[2])) { - echo PHP_EOL . _('hostPageSnap method requires action argument') . PHP_EOL; - } - - switch ($argv[2]) { - - case 'truncate': - - foreach ($db->getHosts() as $host) { - - foreach ($db->getHostPages($host->hostId) as $hostPage) { - - $snapFilePath = chunk_split($hostPage->hostPageId, 1, '/'); - - foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) { - - if ($hostPageSnap->storageLocal) { - - unlink(__DIR__ . '/../storage/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); - } - - if ($hostPageSnap->storageMega) { - - $ftp = new Ftp(); - - if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) { - $ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); - } - } - - $db->deleteHostPageSnapDownloads($hostPageSnap->hostPageSnapId); - $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId); - - // @TODO reset primary key indexes - } - } - } - - echo _('hostPageSnap, hostPageSnapDownload tables successfully truncated') . PHP_EOL; - exit; - - break; - default: - - echo PHP_EOL . _('undefined action argument') . PHP_EOL; - } - - break; } // Default message @@ -242,7 +192,6 @@ echo _(' crawl - execute crawler step in the crontab echo _(' clean - execute cleaner step in the crontab queue') . PHP_EOL; echo _(' hostPage rank reindex - generate rank indexes in hostPage table') . PHP_EOL; echo _(' hostPageDom generate [selectors] - make hostPageDom index based on related hostPage.data field') . PHP_EOL; -echo _(' hostPageDom truncate - flush hostPageDom table') . PHP_EOL; -echo _(' hostPageSnap truncate - flush hostPageSnap, hostPageSnapDownload tables') . PHP_EOL . PHP_EOL; +echo _(' hostPageDom truncate - flush hostPageDom table') . PHP_EOL . PHP_EOL; echo _('get support: https://github.com/YGGverse/YGGo/issues') . PHP_EOL . PHP_EOL; diff --git a/config/app.php.txt b/config/app.php.txt index 252f44e..7376e52 100644 --- a/config/app.php.txt +++ b/config/app.php.txt @@ -63,18 +63,6 @@ define('WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT', 100); */ define('WEBSITE_IDENTICON_IMAGE_CACHE', true); -/* - * Total snap files size allowed to download in bytes in WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE_TIME_OFFSET period - * - */ -define('WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE', 10485760); - -/* - * Time offset quota when WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE reached - * - */ -define('WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE_TIME_OFFSET', 60*60); - // Database define('DB_HOST', '127.0.0.1'); define('DB_PORT', 3306); @@ -90,19 +78,60 @@ define('SPHINX_PORT', 9306); define('MEMCACHED_HOST', '127.0.0.1'); define('MEMCACHED_PORT', 11211); -// Third-party connections (optional) - -/* - * Mega.nz remote storage - * - * FTP storage integration through MEGAcmd (https://mega.io/cmd) - * - * Connect mega-ftp instance on CRAWL_PAGE_MIME_SNAP_MEGA enabled - * - */ -define('MEGA_FTP_HOST', '127.0.0.1'); -define('MEGA_FTP_PORT', 4990); -define('MEGA_FTP_DIRECTORY', ''); +// Snaps + +/* + * Storage nodes configuration + * + * Supports optional single 'localhost' and multiple 'FTP' servers + * + * Comment specified node to disable specified connection + * + * Make empty array to disable snaps or set quote.mime = false or quote.size = 0 to disable specified instance + * + */ +define('SNAP_STORAGE', json_encode((object) + [ + 'localhost' => [ + [ + 'directory' => __DIR__ . '/../storage/snap/hp/', + 'quota' => [ + 'mime' => false, + 'size' => 10000000024, + 'request' => [ + 'download' => [ + 'size' => 10000024, + 'seconds' => 60*60 + ] + ] + ], + // ... + ] + ], + 'ftp' => [ + [ + 'port' => 21, + 'host' => '', + 'username' => '', + 'password' => '', + 'directory' => '/snap', + 'timeout' => 30, + 'passive' => true, + 'quota' => [ + 'mime' => 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico', + 'size' => 10000000024, + 'request' => [ + 'download' => [ + 'size' => 10000024, + 'seconds' => 60*60 + ] + ] + ], + ], + // ... + ] + ] +)); // Proxy settings @@ -217,28 +246,6 @@ define('CRAWL_PAGE_HOME_SECONDS_OFFSET', 60*60*24*7); */ define('CRAWL_PAGE_MIME_INDEX', 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico,image/svg+xml,video/mp4,video/ogg,video/webm,audio/mpeg,audio/ogg,audio/wav,audio/mp4,audio/aac,audio/aacp,audio/webm,audio/x-caf,audio/x-mpegurl,audio/flac'); -/* - * Snap pages locally match MIME types - * - * comma separated | false to disable - * - */ -define('CRAWL_PAGE_MIME_SNAP_LOCAL', 'text/html'); - -/* - * Snap pages to mega.nz match MIME types - * - * comma separated | false to disable - * - * Requires connection: - * - * MEGA_FTP_HOST - * MEGA_FTP_PORT - * MEGA_FTP_DIRECTORY - * - */ -define('CRAWL_PAGE_MIME_SNAP_MEGA', 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico'); - /* * Renew manifests index by timing offset provided * @@ -289,9 +296,6 @@ define('CRAWL_HOST_DEFAULT_STATUS', true); * this option disabled requires huge disk storage, * it's experimental feature, oriented for index operations * - * see CRAWL_PAGE_MIME_SNAP_LOCAL - * to create compressed data snaps - * */ define('CRAWL_HOST_DEFAULT_META_ONLY', true); diff --git a/crontab/cleaner.php b/crontab/cleaner.php index fc35fc6..ae2b0d0 100644 --- a/crontab/cleaner.php +++ b/crontab/cleaner.php @@ -93,23 +93,43 @@ try { foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) { - if ($hostPageSnap->storageLocal) { + // Delete snap files + foreach (json_decode(SNAP_STORAGE) as $name => $storages) { - unlink(__DIR__ . '/../storage/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); - } + foreach ($storages as $storage) { - if ($hostPageSnap->storageMega) { + // Generate storage id + $crc32name = crc32(sprintf('%s.%s', $name, $snapStorageIndex)); - $ftp = new Ftp(); + switch ($name) { - if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) { - $ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); - } - } + case 'localhost': + + @unlink($storage->directory . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); + + break; + case 'ftp': + + $ftp = new Ftp(); - $db->deleteHostPageSnapDownloads($hostPageSnap->hostPageSnapId); + if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) { + $ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); + } - $hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId); + break; + } + + // Clean up DB registry + foreach ($db->getHostPageSnapStorages($hostPageSnap->hostPageSnapId) as $hostPageSnapStorage) { + + $db->deleteHostPageSnapDownloads($hostPageSnapStorage->hostPageSnapStorageId); + } + + $db->deleteHostPageSnapStorages($hostPageSnap->hostPageSnapId); + + $hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId); + } + } } // Delete host page @@ -139,23 +159,43 @@ try { foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) { - if ($hostPageSnap->storageLocal) { + // Delete snap files + foreach (json_decode(SNAP_STORAGE) as $name => $storages) { - unlink(__DIR__ . '/../storage/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); - } + foreach ($storages as $storage) { - if ($hostPageSnap->storageMega) { + // Generate storage id + $crc32name = crc32(sprintf('%s.%s', $name, $snapStorageIndex)); - $ftp = new Ftp(); + switch ($name) { - if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) { - $ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); - } - } + case 'localhost': - $db->deleteHostPageSnapDownloads($hostPageSnap->hostPageSnapId); + @unlink($storage->directory . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); - $hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId); + break; + case 'ftp': + + $ftp = new Ftp(); + + if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) { + $ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); + } + + break; + } + + // Clean up DB registry + foreach ($db->getHostPageSnapStorages($hostPageSnap->hostPageSnapId) as $hostPageSnapStorage) { + + $db->deleteHostPageSnapDownloads($hostPageSnapStorage->hostPageSnapStorageId); + } + + $db->deleteHostPageSnapStorages($hostPageSnap->hostPageSnapId); + + $hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId); + } + } } // Delete host page @@ -225,39 +265,59 @@ try { $hostPagesBansRemoved += $db->resetBannedHostPages(time() - CLEAN_PAGE_BAN_SECONDS_OFFSET); // Clean up banned pages extra data - foreach ($db->getHostPagesBanned() as $hostPageBanned) { + foreach ($db->getHostPagesBanned() as $hostPage) { // Delete host page descriptions - $hostPagesDescriptionsDeleted += $db->deleteHostPageDescriptions($hostPageBanned->hostPageId); + $hostPagesDescriptionsDeleted += $db->deleteHostPageDescriptions($hostPage->hostPageId); // Delete host page DOMs - $hostPagesDomsDeleted += $db->deleteHostPageDoms($hostPageBanned->hostPageId); + $hostPagesDomsDeleted += $db->deleteHostPageDoms($hostPage->hostPageId); // Delete host page refs data - $hostPagesToHostPageDeleted += $db->deleteHostPageToHostPage($hostPageBanned->hostPageId); + $hostPagesToHostPageDeleted += $db->deleteHostPageToHostPage($hostPage->hostPageId); // Delete host page snaps - $snapFilePath = chunk_split($hostPageBanned->hostPageId, 1, '/'); + $snapFilePath = chunk_split($hostPage->hostPageId, 1, '/'); - foreach ($db->getHostPageSnaps($hostPageBanned->hostPageId) as $hostPageSnap) { + foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) { - if ($hostPageSnap->storageLocal) { + // Delete snap files + foreach (json_decode(SNAP_STORAGE) as $name => $storages) { - unlink(__DIR__ . '/../storage/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); - } + foreach ($storages as $storage) { - if ($hostPageSnap->storageMega) { + // Generate storage id + $crc32name = crc32(sprintf('%s.%s', $name, $snapStorageIndex)); - $ftp = new Ftp(); + switch ($name) { - if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) { - $ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); - } - } + case 'localhost': - $db->deleteHostPageSnapDownloads($hostPageSnap->hostPageSnapId); + @unlink($storage->directory . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); - $hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId); + break; + case 'ftp': + + $ftp = new Ftp(); + + if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) { + $ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); + } + + break; + } + + // Clean up DB registry + foreach ($db->getHostPageSnapStorages($hostPageSnap->hostPageSnapId) as $hostPageSnapStorage) { + + $db->deleteHostPageSnapDownloads($hostPageSnapStorage->hostPageSnapStorageId); + } + + $db->deleteHostPageSnapStorages($hostPageSnap->hostPageSnapId); + + $hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId); + } + } } } @@ -271,35 +331,8 @@ try { $logsCleanerDeleted += $db->deleteLogCleaner(time() - CLEAN_LOG_SECONDS_OFFSET); $logsCrawlerDeleted += $db->deleteLogCrawler(time() - CRAWL_LOG_SECONDS_OFFSET); - // Delete failed snaps - foreach ($db->getHosts() as $host) { - - foreach ($db->getHostPages($host->hostId) as $hostPage) { - - $snapFilePath = chunk_split($hostPage->hostPageId, 1, '/'); - - foreach ($db->getHostPageSnaps($hostPage->hostPageId, false, false, 'AND') as $hostPageSnap) { - - if ($hostPageSnap->storageLocal) { - - unlink(__DIR__ . '/../storage/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); - } - - if ($hostPageSnap->storageMega) { - - $ftp = new Ftp(); - - if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) { - $ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip'); - } - } - - $db->deleteHostPageSnapDownloads($hostPageSnap->hostPageSnapId); - - $hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId); - } - } - } + // Delete failed snap files + // @TODO // Commit results $db->commit(); diff --git a/crontab/crawler.php b/crontab/crawler.php index 2c7c1b4..b2810aa 100644 --- a/crontab/crawler.php +++ b/crontab/crawler.php @@ -665,50 +665,18 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND } // Begin snaps - $snapLocal = false; - $snapMega = false; - - // Snap local enabled and MIME in white list - if (false !== CRAWL_PAGE_MIME_SNAP_LOCAL) { - - foreach ((array) explode(',', CRAWL_PAGE_MIME_SNAP_LOCAL) as $mime) { - - // MIME type allowed in settings - if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) { - - $snapLocal = true; - break; - } - } - } - - // Snap MEGA enabled and MIME in white list - if (false !== CRAWL_PAGE_MIME_SNAP_MEGA) { - - foreach ((array) explode(',', CRAWL_PAGE_MIME_SNAP_MEGA) as $mime) { - - // MIME type allowed in settings - if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) { - - $snapMega = true; - break; - } - } - } - - // At least one snap storage match settings condition - if ($snapLocal || $snapMega) { + if (SNAP_STORAGE) { $crc32data = crc32($content); - // Create not duplicated data snaps only, even new time + // Create not duplicated data snaps only, even newer by time added if (!$db->findHostPageSnap($queueHostPage->hostPageId, $crc32data)) { $snapTime = time(); $snapPath = chunk_split($queueHostPage->hostPageId, 1, '/'); $snapTmp = __DIR__ . '/../storage/tmp/snap/hp/' . $snapPath . $snapTime . '.zip'; - @mkdir(__DIR__ . '/../storage/tmp/snap/hp/' . $snapPath, 0755, true); + @mkdir(__DIR__ . '/../storage/tmp/snap/hp/' . $snapPath, 0755, true); // Create new ZIP container $zip = new ZipArchive(); @@ -718,10 +686,10 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND // Insert compressed snap data into the tmp storage if (true === $zip->addFromString('DATA', $content) && true === $zip->addFromString('META', sprintf('TIMESTAMP: %s', $snapTime) . PHP_EOL . - sprintf('CRC32: %s', $crc32data . PHP_EOL . - sprintf('MIME: %s', Filter::mime($contentType)) . PHP_EOL . - sprintf('SOURCE: %s', Filter::url(WEBSITE_DOMAIN . '/explore.php?hp=' . $queueHostPage->hostPageId)) . PHP_EOL . - sprintf('TARGET: %s', Filter::url($queueHostPageURL))))) { + sprintf('CRC32: %s', $crc32data . PHP_EOL . + sprintf('MIME: %s', Filter::mime($contentType)) . PHP_EOL . + sprintf('SOURCE: %s', Filter::url(WEBSITE_DOMAIN . '/explore.php?hp=' . $queueHostPage->hostPageId)) . PHP_EOL . + sprintf('TARGET: %s', Filter::url($queueHostPageURL))))) { // Done $zip->close(); @@ -730,48 +698,103 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND if (file_exists($snapTmp)) { // Register snap in DB - if ($hostPageSnapId = $db->addHostPageSnap($queueHostPage->hostPageId, $crc32data, $snapTime)) { + if ($hostPageSnapId = $db->addHostPageSnap($queueHostPage->hostPageId, $crc32data, filesize($snapTmp), $snapTime)) { $hostPagesSnapAdded++; + } + } + } + } + } + + // Copy files to each storage + + $snapStorageIndex = 0; + + foreach (json_decode(SNAP_STORAGE) as $name => $storages) { + + foreach ($storages as $storage) { - // Copy tmp snap to the permanent local storage - if ($snapLocal) { + $snapStorageIndex++; - @mkdir(__DIR__ . '/../storage/snap/hp/' . $snapPath, 0755, true); + // Generate storage id + $crc32name = crc32(sprintf('%s.%s', $name, $snapStorageIndex)); - if (copy($snapTmp, __DIR__ . '/../storage/snap/hp/' . $snapPath . $snapTime . '.zip')) { + switch ($name) { - // Update snap location info - $db->updateHostPageSnapStorageLocal($hostPageSnapId, true); - } + case 'localhost': + + // Validate size quota + if ($db->getTotalHostPageSnapSizeByStorage($hostPageSnapId, $crc32name) >= $storage->quota->size) continue 2; + + // Validate mime + if (!$storage->quota->mime) continue 2; + + $snapMimeValid = false; + foreach ((array) explode(',', $storage->quota->mime) as $mime) { + + if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) { + + $snapMimeValid = true; + break; } + } - // Copy tmp snap to the permanent MEGA storage - if ($snapMega) { + if (!$snapMimeValid) continue 2; - $ftp = new Ftp(); + // Copy tmp snap file to the permanent storage + @mkdir($storage->directory . $snapPath, 0755, true); - if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) { + if (copy($snapTmp, $storage->directory . $snapPath . $snapTime . '.zip')) { - $ftp->mkdir('hp/' . $snapPath, true); + // Register storage name + $db->addHostPageSnapStorage($hostPageSnapId, $crc32name, time()); + } + + break; + case 'ftp': + + // Validate size quota + if ($db->getTotalHostPageSnapSizeByStorage($hostPageSnapId, $crc32name) >= $storage->quota->size) continue 2; - if ($ftp->copy($snapTmp, 'hp/' . $snapPath . $snapTime . '.zip')) { + // Validate mime + if (!$storage->quota->mime) continue 2; - // Update snap location info - $db->updateHostPageSnapStorageMega($hostPageSnapId, true); - } + $snapMimeValid = false; + foreach ((array) explode(',', $storage->quota->mime) as $mime) { - $ftp->close(); - } + if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) { + + $snapMimeValid = true; + break; } } - } + + if (!$snapMimeValid) continue 2; + + // Copy tmp snap file to the permanent storage + $ftp = new Ftp(); + + if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) { + + $ftp->mkdir('hp/' . $snapPath, true); + + if ($ftp->copy($snapTmp, 'hp/' . $snapPath . $snapTime . '.zip')) { + + // Register storage name + $db->addHostPageSnapStorage($hostPageSnapId, $crc32name, time()); + } + + $ftp->close(); + } + + break; } } - - // Remove tmp - @unlink($snapTmp); } + + // Delete tmp snap + unlink($snapTmp); } // Skip page links following with meta robots:nofollow attribute diff --git a/database/yggo.mwb b/database/yggo.mwb index f06cc50..60b72b1 100644 Binary files a/database/yggo.mwb and b/database/yggo.mwb differ diff --git a/library/ftp.php b/library/ftp.php index 6e8af3c..56e4c2a 100644 --- a/library/ftp.php +++ b/library/ftp.php @@ -15,18 +15,14 @@ class Ftp { mixed $login = null, mixed $password = null, string $directory = '/', - int $timeout = 90) { + int $timeout = 90, + bool $passive = false) { if (!$this->_connection = ftp_connect($host, $port, $timeout)) { return false; } - if (!ftp_pasv($this->_connection, $this->_passive)) { - - return false; - } - if (!empty($login) && !empty($password)) { if (!ftp_login($this->_connection, $login, $password)) { @@ -35,6 +31,11 @@ class Ftp { } } + if ($passive && !ftp_pasv($this->_connection, $this->_passive)) { + + return false; + } + return ftp_chdir($this->_connection, $directory); } diff --git a/library/mysql.php b/library/mysql.php index 63724e2..59a6dbf 100644 --- a/library/mysql.php +++ b/library/mysql.php @@ -462,60 +462,56 @@ class MySQL { return $query->fetchAll(); } - public function addHostPageSnap(int $hostPageId, string $crc32data, int $timeAdded) { + public function addHostPageSnap(int $hostPageId, string $crc32data, int $size, int $timeAdded) { $query = $this->_db->prepare('INSERT INTO `hostPageSnap` (`hostPageId`, `crc32data`, - `timeAdded`) VALUES (?, ?, ?)'); + `size`, + `timeAdded`) VALUES (?, ?, ?, ?)'); - $query->execute([$hostPageId, $crc32data, $timeAdded]); + $query->execute([$hostPageId, $crc32data, $size, $timeAdded]); return $this->_db->lastInsertId(); } - public function updateHostPageSnapStorageLocal(int $hostPageSnapId, mixed $value) { + public function deleteHostPageSnap(int $hostPageSnapId) { - $query = $this->_db->prepare('UPDATE `hostPageSnap` SET `storageLocal` = ? WHERE `hostPageSnapId` = ? LIMIT 1'); + $query = $this->_db->prepare('DELETE FROM `hostPageSnap` WHERE `hostPageSnapId` = ? LIMIT 1'); - $query->execute([$value, $hostPageSnapId]); + $query->execute([$hostPageSnapId]); return $query->rowCount(); } - public function updateHostPageSnapStorageMega(int $hostPageSnapId, mixed $value) { + public function getTotalHostPageSnaps(int $hostPageId) { - $query = $this->_db->prepare('UPDATE `hostPageSnap` SET `storageMega` = ? WHERE `hostPageSnapId` = ? LIMIT 1'); + $query = $this->_db->prepare('SELECT COUNT(*) AS `total` FROM `hostPageSnap` WHERE `hostPageId` = ?'); - $query->execute([$value, $hostPageSnapId]); + $query->execute([$hostPageId]); - return $query->rowCount(); + return $query->fetch()->total; } - public function deleteHostPageSnap(int $hostPageSnapId) { + public function getHostPageSnaps(int $hostPageId) { - $query = $this->_db->prepare('DELETE FROM `hostPageSnap` WHERE `hostPageSnapId` = ? LIMIT 1'); + $query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageId` = ? ORDER BY `timeAdded` DESC'); - $query->execute([$hostPageSnapId]); + $query->execute([$hostPageId]); - return $query->rowCount(); + return $query->fetchAll(); } - public function getTotalHostPageSnaps(int $hostPageId, bool $storageLocal = true, bool $storageMega = true) { + public function getTotalHostPageSnapSizeByStorage(int $hostPageId, int $crc32name) { - $query = $this->_db->prepare('SELECT COUNT(*) AS `total` FROM `hostPageSnap` WHERE `hostPageId` = ? AND (`storageLocal` = ? OR `storageMega` = ?)'); + $query = $this->_db->prepare('SELECT SUM(`hostPageSnap`.`size`) AS `total` FROM `hostPageSnap` + JOIN `hostPageSnapStorage` ON (`hostPageSnapStorage`.`hostPageSnapId` = `hostPageSnap`.`hostPageSnapId`) - $query->execute([$hostPageId, $storageLocal, $storageMega]); + WHERE `hostPageSnap`.`hostPageSnapId` = ? + AND `hostPageSnapStorage`.`crc32name` = ?'); - return $query->fetch()->total; - } + $query->execute([$hostPageId, $crc32name]); - public function getHostPageSnaps(int $hostPageId, bool $storageLocal = true, bool $storageMega = true, string $condition = 'OR') { - - $query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageId` = ? AND (`storageLocal` = ? ' . ($condition == 'OR' ? 'OR' : 'AND') . ' `storageMega` = ?) ORDER BY `timeAdded` DESC'); - - $query->execute([$hostPageId, $storageLocal, $storageMega]); - - return $query->fetchAll(); + return $query->fetch()->total; } public function getHostPageSnap(int $hostPageSnapId) { @@ -536,44 +532,62 @@ class MySQL { return $query->fetch(); } - public function addHostPageSnapDownload(int $hostPageSnapId, string $crc32ip, int $timeAdded) { + public function addHostPageSnapDownload(int $hostPageSnapStorageId, string $crc32ip, int $timeAdded) { - $query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapId`, + $query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapStorageId`, `crc32ip`, `timeAdded`) VALUES (?, ?, ?)'); - $query->execute([$hostPageSnapId, $crc32ip, $timeAdded]); + $query->execute([$hostPageSnapStorageId, $crc32ip, $timeAdded]); return $this->_db->lastInsertId(); } - public function updateHostPageSnapDownload(int $hostPageSnapDownloadId, string $storage, int $size, mixed $httpCode = NULL) { + public function addHostPageSnapStorage(int $hostPageSnapId, int $crc32name, int $timeAdded) { - $query = $this->_db->prepare('UPDATE `hostPageSnapDownload` SET `storage` = ?, `size` = ?, `httpCode` = ? WHERE `hostPageSnapDownloadId` = ? LIMIT 1'); + $query = $this->_db->prepare('INSERT INTO `hostPageSnapStorage` (`hostPageSnapId`, + `crc32name`, + `timeAdded`) VALUES (?, ?, ?)'); - $query->execute([$storage, $size, $httpCode, $hostPageSnapDownloadId]); + $query->execute([$hostPageSnapId, $crc32name, $timeAdded]); - return $query->rowCount(); + return $this->_db->lastInsertId(); + } + + public function getHostPageSnapStorageByCRC32Name(int $hostPageSnapId, int $crc32name) { + + $query = $this->_db->prepare('SELECT * FROM `hostPageSnapStorage` WHERE `hostPageSnapId` = ? AND `crc32name` = ?'); + + $query->execute([$hostPageSnapId, $crc32name]); + + return $query->fetch(); } - public function deleteHostPageSnapDownloads(int $hostPageSnapId) { + public function getHostPageSnapStorages(int $hostPageSnapId) { - $query = $this->_db->prepare('DELETE FROM `hostPageSnapDownload` WHERE `hostPageSnapId` = ? LIMIT 1'); + $query = $this->_db->prepare('SELECT * FROM `hostPageSnapStorage` WHERE `hostPageSnapId` = ?'); $query->execute([$hostPageSnapId]); - return $query->rowCount(); + return $query->fetchAll(); } - public function findHostPageSnapDownloadsTotalSize(int $crc32ip, int $timeOffset) { + public function deleteHostPageSnapStorages(int $hostPageSnapId) { - $query = $this->_db->prepare('SELECT SUM(`size`) AS `size` FROM `hostPageSnapDownload` + $query = $this->_db->prepare('DELETE FROM `hostPageSnapStorage` WHERE `hostPageSnapId` = ?'); - WHERE `crc32ip` = ? AND `timeAdded` < ?'); + $query->execute([$hostPageSnapId]); - $query->execute([$crc32ip, $timeOffset]); + return $query->rowCount(); + } - return $query->fetch()->size; + public function deleteHostPageSnapDownloads(int $hostPageSnapStorageId) { + + $query = $this->_db->prepare('DELETE FROM `hostPageSnapDownload` WHERE `hostPageSnapStorageId` = ?'); + + $query->execute([$hostPageSnapStorageId]); + + return $query->rowCount(); } public function addHostPageDom(int $hostPageId, int $timeAdded, string $selector, string $value) { diff --git a/media/db-prototype.png b/media/db-prototype.png index 4699644..7de3f9f 100644 Binary files a/media/db-prototype.png and b/media/db-prototype.png differ diff --git a/public/api.php b/public/api.php index 0031905..7ad232a 100644 --- a/public/api.php +++ b/public/api.php @@ -111,7 +111,6 @@ if (API_ENABLED) { 'crawlHostPageSecondsOffset' => CRAWL_PAGE_SECONDS_OFFSET, 'crawlHostPageHomeSecondsOffset' => CRAWL_PAGE_HOME_SECONDS_OFFSET, 'crawlHostPageMimeIndex' => CRAWL_PAGE_MIME_INDEX, - 'crawlHostPageMimeSnapLocal' => CRAWL_PAGE_MIME_SNAP_LOCAL, 'cleanHostSecondsOffset' => CLEAN_HOST_SECONDS_OFFSET, 'crawlRobotsDefaultRules' => CRAWL_ROBOTS_DEFAULT_RULES, 'crawlRobotsPostfixRules' => CRAWL_ROBOTS_POSTFIX_RULES, diff --git a/public/file.php b/public/file.php index efb209d..b86ccb6 100644 --- a/public/file.php +++ b/public/file.php @@ -46,102 +46,93 @@ switch ($type) { // Connect database $db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD); + // Init request + $crc32ip = crc32(!empty($_SERVER['REMOTE_ADDR']) ? $_SERVER['REMOTE_ADDR'] : ''); + // Get snap details from DB if ($hostPageSnap = $db->getHostPageSnap(!empty($_GET['hps']) ? (int) $_GET['hps'] : 0)) { - // Init variables - $crc32ip = crc32(!empty($_SERVER['REMOTE_ADDR']) ? $_SERVER['REMOTE_ADDR'] : ''); - $time = time(); - - $hostPageDownloadsTotalSize = $db->findHostPageSnapDownloadsTotalSize($crc32ip, $time - WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE_TIME_OFFSET); + // Get snap file - // Check for downloading quotas - if ($hostPageDownloadsTotalSize >= WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE) { + $snapStorageIndex = 0; - header('HTTP/1.0 403 Forbidden'); + foreach (json_decode(SNAP_STORAGE) as $name => $storages) { - echo _('403 Access forbidden by requests quota'); + foreach ($storages as $storage) { - exit; - } + $snapStorageIndex++; - // Register snap download - $hostPageSnapDownloadId = $db->addHostPageSnapDownload($hostPageSnap->hostPageSnapId, $crc32ip, $time); + // Generate storage id + $crc32name = crc32(sprintf('%s.%s', $name, $snapStorageIndex)); - // Init variables - $snapSize = 0; - $snapFile = 'hp/' . chunk_split($hostPageSnap->hostPageId, 1, '/') . $hostPageSnap->timeAdded . '.zip'; + switch ($name) { - // Download local snap in higher priority if possible - if ($hostPageSnap->storageLocal && file_exists(__DIR__ . '/../storage/snap/' . $snapFile) && - is_readable(__DIR__ . '/../storage/snap/' . $snapFile)) { + case 'localhost': - $snapSize = (int) @filesize(__DIR__ . '/../storage/snap/' . $snapFile); + if ($hostPageSnapStorage = $db->getHostPageSnapStorageByCRC32Name($hostPageSnap->hostPageSnapId, $crc32name)) { - $db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'local', $snapSize, 200); + // Check request quota + //if () - header('Content-Type: application/zip'); - header(sprintf('Content-Length: %s', $snapSize)); - header(sprintf('Content-Disposition: filename="snap.%s.%s.%s.zip"', $hostPageSnap->hostPageSnapId, - $hostPageSnap->hostPageId, - $hostPageSnap->timeAdded)); - readfile(__DIR__ . '/../storage/snap/' . $snapFile); + // Get file + $snapFile = 'hp/' . chunk_split($hostPageSnap->hostPageId, 1, '/') . $hostPageSnap->timeAdded . '.zip'; - // Then try to download from MEGA storage if exists - } else if ($hostPageSnap->storageMega) { + // Download local snap in higher priority if possible + if (file_exists($storage->directory . $snapFile) && + is_readable($storage->directory . $snapFile)) { - $ftp = new Ftp(); + // Register snap download + $db->addHostPageSnapDownload($hostPageSnapStorage->hostPageSnapStorageId, $crc32ip, time()); - if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) { + // Return snap file + header('Content-Type: application/zip'); + header(sprintf('Content-Length: %s', $snapSize)); + header(sprintf('Content-Disposition: filename="snap.%s.%s.%s.zip"', $hostPageSnap->hostPageSnapId, + $hostPageSnap->hostPageId, + $hostPageSnap->timeAdded)); + readfile($storage->directory . $snapFile); - if ($snapSize = $ftp->size($snapFile)) { + exit; + } + } - $db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'mega', $snapSize, 200); + break; + case 'ftp': - header('Content-Type: application/zip'); - header(sprintf('Content-Length: %s', $snapSize)); - header(sprintf('Content-Disposition: filename="snap.%s.%s.%s.zip"', $hostPageSnap->hostPageSnapId, - $hostPageSnap->hostPageId, - $hostPageSnap->timeAdded)); + if ($hostPageSnapStorage = $db->getHostPageSnapStorageByCRC32Name($hostPageSnap->hostPageSnapId, $crc32name)) { - $ftp->get($snapFile, 'php://output'); + $ftp = new Ftp(); - } else { + if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) { - $db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'mega', $snapSize, 404); + // Register snap download + $db->addHostPageSnapDownload($hostPageSnapStorage->hostPageSnapStorageId, $crc32ip, time()); - header('HTTP/1.0 404 Not Found'); - - echo _('404 File not found'); - } + // Return snap file + header('Content-Type: application/zip'); + header(sprintf('Content-Length: %s', $snapSize)); + header(sprintf('Content-Disposition: filename="snap.%s.%s.%s.zip"', $hostPageSnap->hostPageSnapId, + $hostPageSnap->hostPageId, + $hostPageSnap->timeAdded)); - } else { + $ftp->get($snapFile, 'php://output'); - $db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'mega', $snapSize, 404); + exit; + } + } - header('HTTP/1.0 404 Not Found'); - - echo _('404 File not found'); + break; + } } - - // Return 404 when file not found - } else { - - $db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'other', $snapSize, 404); - - header('HTTP/1.0 404 Not Found'); - - echo _('404 File not found'); } + } - } else { - - header('HTTP/1.0 404 Not Found'); + header('HTTP/1.0 404 Not Found'); - echo _('404 Snap not found'); - } + echo _('404 Snap not found'); break; + default: header('HTTP/1.0 404 Not Found');