diff --git a/README.md b/README.md index 82e995d..73e3634 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ sphinxsearch * Deploy the database using [MySQL Workbench](https://www.mysql.com/products/workbench) project presented in the `/database` folder * Install [Sphinx Search Server](https://sphinxsearch.com) * Configuration examples are placed at `/config` folder -* Make sure `/storage/cache`, `/storage/tmp`, `/public/snap` folders writable +* Make sure `/storage/cache`, `/storage/tmp`, `/storage/snap` folders writable * Set up the `/crontab` scripts by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt) #### JSON API @@ -142,7 +142,8 @@ GET m=SphinxQL ##### Basic features -* [x] Web pages full text ranking search based on Sphinx engine +* [x] Web pages full text ranking search + + [x] Sphinx * [x] Unlimited content MIME crawling * [x] Flexible settings compatible with IPv4/IPv6 networks * [x] Extended search syntax support @@ -151,6 +152,7 @@ GET m=SphinxQL + [x] Remote + [x] MEGAcmd/FTP + [ ] Yggdrasil over NAT + + [x] Privacy-oriented downloads counting, traffic controls ##### UI @@ -196,6 +198,9 @@ GET m=SphinxQL ##### Cleaner * [x] Deprecated DB items auto deletion / host settings update + [x] Pages + + [x] Snaps + + [x] Snap downloads + + [ ] Not related snap files + [x] Manifests + [x] Logs + [x] Crawler diff --git a/config/app.php.txt b/config/app.php.txt index 4f54080..1decdb9 100644 --- a/config/app.php.txt +++ b/config/app.php.txt @@ -63,6 +63,18 @@ define('WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT', 100); */ define('WEBSITE_IDENTICON_IMAGE_CACHE', true); +/* + * Total snap files size allowed to download in bytes in WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE_TIME_OFFSET period + * + */ +define('WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE', 10485760); + +/* + * Time offset quota when WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE reached + * + */ +define('WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE_TIME_OFFSET', 60*60); + // Database define('DB_HOST', '127.0.0.1'); define('DB_PORT', 3306); diff --git a/crontab/cleaner.php b/crontab/cleaner.php index 91716b4..e2ac895 100644 --- a/crontab/cleaner.php +++ b/crontab/cleaner.php @@ -93,7 +93,7 @@ try { if ($snapFileLocalExists) { - if (unlink('../public/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip')) { + if (unlink('../storage/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip')) { $snapFileLocalExists = false; } @@ -113,6 +113,9 @@ try { } if (!$snapFileLocalExists && !$snapFileMegaExists) { + + $db->deleteHostPageSnapDownloads($hostPageSnap->hostPageSnapId); + $hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId); } } @@ -146,7 +149,7 @@ try { if ($snapFileLocalExists) { - if (unlink('../public/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip')) { + if (unlink('../storage/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip')) { $snapFileLocalExists = false; } @@ -166,6 +169,9 @@ try { } if (!$snapFileLocalExists && !$snapFileMegaExists) { + + $db->deleteHostPageSnapDownloads($hostPageSnap->hostPageSnapId); + $hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId); } } diff --git a/crontab/crawler.php b/crontab/crawler.php index d25ba0c..6d08673 100644 --- a/crontab/crawler.php +++ b/crontab/crawler.php @@ -427,7 +427,7 @@ try { $crc32data = crc32($content); // Create not duplicated data snaps only, even new time - if (!$db->getHostPageSnap($queueHostPage->hostPageId, $crc32data)) { + if (!$db->findHostPageSnap($queueHostPage->hostPageId, $crc32data)) { $snapTime = time(); $snapPath = chunk_split($queueHostPage->hostPageId, 1, '/'); @@ -462,9 +462,9 @@ try { // Copy tmp snap to the permanent local storage if ($snapLocal) { - @mkdir('../public/snap/hp/' . $snapPath, 0755, true); + @mkdir('../storage/snap/hp/' . $snapPath, 0755, true); - if (copy($snapTmp, '../public/snap/hp/' . $snapPath . $snapTime . '.zip')) { + if (copy($snapTmp, '../storage/snap/hp/' . $snapPath . $snapTime . '.zip')) { // Update snap location info $db->updateHostPageSnapStorageLocal($hostPageSnapId, true); diff --git a/database/yggo.mwb b/database/yggo.mwb index 0a4d9e3..a64fd96 100644 Binary files a/database/yggo.mwb and b/database/yggo.mwb differ diff --git a/library/ftp.php b/library/ftp.php index 1b51f0f..6e8af3c 100644 --- a/library/ftp.php +++ b/library/ftp.php @@ -50,7 +50,7 @@ class Ftp { public function get(string $source, string $target) { - return ftp_get($this->_connection, $source, $target); + return ftp_get($this->_connection, $target, $source); } public function mkdir(string $name, bool $recursive = false) { diff --git a/library/mysql.php b/library/mysql.php index 4d0f159..0370c5c 100644 --- a/library/mysql.php +++ b/library/mysql.php @@ -416,15 +416,75 @@ class MySQL { return $query->fetchAll(); } - public function getHostPageSnap(int $hostPageId, int $crc32data) { + public function getHostPageSnap(int $hostPageSnapId) { - $query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageId` = ? AND `hostPageId` = ? LIMIT 1'); + $query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageSnapId` = ? LIMIT 1'); + + $query->execute([$hostPageSnapId]); + + return $query->fetch(); + } + + public function findHostPageSnap(int $hostPageId, int $crc32data) { + + $query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageId` = ? AND `crc32data` = ? LIMIT 1'); $query->execute([$hostPageId, $crc32data]); return $query->fetch(); } + /* not in use + public function getHostPageSnapDownloads(int $hostPageSnapId) { + + $query = $this->_db->prepare('SELECT * FROM `hostPageSnapDownload` WHERE `hostPageSnapId` = ? LIMIT 1'); + + $query->execute([$hostPageSnapId]); + + return $query->fetchAll(); + } + */ + + public function addHostPageSnapDownload(int $hostPageSnapId, string $crc32ip, int $timeAdded) { + + $query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapId`, + `crc32ip`, + `timeAdded`) VALUES (?, ?, ?)'); + + $query->execute([$hostPageSnapId, $crc32ip, $timeAdded]); + + return $this->_db->lastInsertId(); + } + + public function updateHostPageSnapDownload(int $hostPageSnapDownloadId, string $storage, int $size) { + + $query = $this->_db->prepare('UPDATE `hostPageSnapDownload` SET `storage` = ?, `size` = ? WHERE `hostPageSnapDownloadId` = ? LIMIT 1'); + + $query->execute([$storage, $size, $hostPageSnapDownloadId]); + + return $query->rowCount(); + } + + public function deleteHostPageSnapDownloads(int $hostPageSnapId) { + + $query = $this->_db->prepare('DELETE FROM `hostPageSnapDownload` WHERE `hostPageSnapId` = ? LIMIT 1'); + + $query->execute([$hostPageSnapId]); + + return $query->rowCount(); + } + + public function findHostPageSnapDownloadsTotalSize(int $crc32ip, int $timeOffset) { + + $query = $this->_db->prepare('SELECT SUM(`size`) AS `size` FROM `hostPageSnapDownload` + + WHERE `crc32ip` = ? AND `timeAdded` < ?'); + + $query->execute([$crc32ip, $timeOffset]); + + return $query->fetch()->size; + } + // Cleaner tools public function getCleanerQueue(int $limit, int $timeFrom) { @@ -654,6 +714,7 @@ class MySQL { $this->_db->query('OPTIMIZE TABLE `hostPage`'); $this->_db->query('OPTIMIZE TABLE `hostPageDescription`'); $this->_db->query('OPTIMIZE TABLE `hostPageSnap`'); + $this->_db->query('OPTIMIZE TABLE `hostPageSnapDownload`'); $this->_db->query('OPTIMIZE TABLE `hostPageToHostPage`'); $this->_db->query('OPTIMIZE TABLE `logCleaner`'); diff --git a/media/db-prototype.png b/media/db-prototype.png index da660df..c838292 100644 Binary files a/media/db-prototype.png and b/media/db-prototype.png differ diff --git a/public/explore.php b/public/explore.php index a5a3524..dabed90 100644 --- a/public/explore.php +++ b/public/explore.php @@ -223,7 +223,7 @@ $placeholder = Filter::plural($totalPages, [sprintf(_('Over %s page or enter the - favicon + identicon scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . urldecode($hostPage->uri)) ?> @@ -244,7 +244,7 @@ $placeholder = Filter::plural($totalPages, [sprintf(_('Over %s page or enter the getHostPageSnaps($hp) as $hostPageSnap) { ?>

- + timeAdded) ?>

@@ -262,7 +262,7 @@ $placeholder = Filter::plural($totalPages, [sprintf(_('Over %s page or enter the getFoundHostPage($hostPageIdSource->hostPageIdSource)) { ?>

- favicon + identicon scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 32 ? '...' . mb_substr(urldecode($hostPage->uri), -32) : urldecode($hostPage->uri))) ?> | diff --git a/public/file.php b/public/file.php new file mode 100644 index 0000000..766b744 --- /dev/null +++ b/public/file.php @@ -0,0 +1,150 @@ +generateImageResource($query, $width, $height, false, $radius)); + } + + echo file_get_contents($filename); + + } else { + + $icon = new Icon(); + + echo $icon->generateImageResource($query, $width, $height, false, $radius); + } + + break; + case 'snap': + + // Connect database + $db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD); + + // Get snap details from DB + if ($hostPageSnap = $db->getHostPageSnap(!empty($_GET['hps']) ? (int) $_GET['hps'] : 0)) { + + // Init variables + $crc32ip = crc32(!empty($_SERVER['REMOTE_ADDR']) ? $_SERVER['REMOTE_ADDR'] : ''); + $time = time(); + + $hostPageDownloadsTotalSize = $db->findHostPageSnapDownloadsTotalSize($crc32ip, $time - WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE_TIME_OFFSET); + + // Check for downloading quotas + if ($hostPageDownloadsTotalSize >= WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE) { + + header('HTTP/1.0 403 Forbidden'); + + echo _('403 Access forbidden by requests quota'); + + exit; + } + + // Register snap download + $hostPageSnapDownloadId = $db->addHostPageSnapDownload($hostPageSnap->hostPageSnapId, $crc32ip, $time); + + // Init variables + $snapSize = 0; + $snapFile = 'hp/' . chunk_split($hostPageSnap->hostPageId, 1, '/') . $hostPageSnap->timeAdded . '.zip'; + + // Download local snap in higher priority if possible + if ($hostPageSnap->storageLocal && file_exists('../storage/snap/' . $snapFile) && + is_readable('../storage/snap/' . $snapFile)) { + + $snapSize = (int) @filesize('../storage/snap/' . $snapFile); + + $db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'local', $snapSize); + + header('Content-Type: application/zip'); + header(sprintf('Content-Length: %s', $snapSize)); + header(sprintf('Content-Disposition: filename="snap.%s.%s.%s.zip"', $hostPageSnap->hostPageSnapId, + $hostPageSnap->hostPageId, + $hostPageSnap->timeAdded)); + readfile('../storage/snap/' . $snapFile); + + // Then try to download from MEGA storage if exists + } else if ($hostPageSnap->storageMega) { + + $ftp = new Ftp(); + + if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) { + + if ($snapSize = $ftp->size($snapFile)) { + + $db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'mega', $snapSize); + + header('Content-Type: application/zip'); + header(sprintf('Content-Length: %s', $snapSize)); + header(sprintf('Content-Disposition: filename="snap.%s.%s.%s.zip"', $hostPageSnap->hostPageSnapId, + $hostPageSnap->hostPageId, + $hostPageSnap->timeAdded)); + + $ftp->get($snapFile, 'php://output'); + + } else { + + $db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'mega', $snapSize); + + header('HTTP/1.0 404 Not Found'); + + echo _('404 File not found'); + } + + } else { + + $db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'mega', $snapSize); + + header('HTTP/1.0 404 Not Found'); + + echo _('404 File not found'); + } + + // Return 404 when file not found + } else { + + $db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'other', $snapSize); + + header('HTTP/1.0 404 Not Found'); + + echo _('404 File not found'); + } + + } else { + + header('HTTP/1.0 404 Not Found'); + + echo _('404 Snap not found'); + } + + break; + default: + + header('HTTP/1.0 404 Not Found'); + + echo _('404'); +} diff --git a/public/image.php b/public/image.php deleted file mode 100644 index 1a00ff9..0000000 --- a/public/image.php +++ /dev/null @@ -1,36 +0,0 @@ -generateImageResource($hash, $width, $height, false, $radius)); - } - - echo file_get_contents($filename); - - } else { - - $icon = new Icon(); - - echo $icon->generateImageResource($hash, $width, $height, false, $radius); - } -} diff --git a/public/search.php b/public/search.php index a2fef99..ec45627 100644 --- a/public/search.php +++ b/public/search.php @@ -335,7 +335,7 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) { - favicon + identicon scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 32 ? '...' . mb_substr(urldecode($hostPage->uri), -32) : urldecode($hostPage->uri))) ?> | @@ -355,7 +355,7 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {

- favicon + identicon scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 32 ? '...' . mb_substr(urldecode($hostPage->uri), -32) : urldecode($hostPage->uri))) ?>