Browse Source

implement multi-storage snap downloads

main
ghost 2 years ago
parent
commit
81f7ea1e1e
  1. 9
      README.md
  2. 12
      config/app.php.txt
  3. 10
      crontab/cleaner.php
  4. 6
      crontab/crawler.php
  5. BIN
      database/yggo.mwb
  6. 2
      library/ftp.php
  7. 65
      library/mysql.php
  8. BIN
      media/db-prototype.png
  9. 6
      public/explore.php
  10. 150
      public/file.php
  11. 36
      public/image.php
  12. 4
      public/search.php
  13. 0
      storage/snap/index.html

9
README.md

@ -38,7 +38,7 @@ sphinxsearch
* Deploy the database using [MySQL Workbench](https://www.mysql.com/products/workbench) project presented in the `/database` folder * Deploy the database using [MySQL Workbench](https://www.mysql.com/products/workbench) project presented in the `/database` folder
* Install [Sphinx Search Server](https://sphinxsearch.com) * Install [Sphinx Search Server](https://sphinxsearch.com)
* Configuration examples are placed at `/config` folder * Configuration examples are placed at `/config` folder
* Make sure `/storage/cache`, `/storage/tmp`, `/public/snap` folders writable * Make sure `/storage/cache`, `/storage/tmp`, `/storage/snap` folders writable
* Set up the `/crontab` scripts by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt) * Set up the `/crontab` scripts by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt)
#### JSON API #### JSON API
@ -142,7 +142,8 @@ GET m=SphinxQL
##### Basic features ##### Basic features
* [x] Web pages full text ranking search based on Sphinx engine * [x] Web pages full text ranking search
+ [x] Sphinx
* [x] Unlimited content MIME crawling * [x] Unlimited content MIME crawling
* [x] Flexible settings compatible with IPv4/IPv6 networks * [x] Flexible settings compatible with IPv4/IPv6 networks
* [x] Extended search syntax support * [x] Extended search syntax support
@ -151,6 +152,7 @@ GET m=SphinxQL
+ [x] Remote + [x] Remote
+ [x] MEGAcmd/FTP + [x] MEGAcmd/FTP
+ [ ] Yggdrasil over NAT + [ ] Yggdrasil over NAT
+ [x] Privacy-oriented downloads counting, traffic controls
##### UI ##### UI
@ -196,6 +198,9 @@ GET m=SphinxQL
##### Cleaner ##### Cleaner
* [x] Deprecated DB items auto deletion / host settings update * [x] Deprecated DB items auto deletion / host settings update
+ [x] Pages + [x] Pages
+ [x] Snaps
+ [x] Snap downloads
+ [ ] Not related snap files
+ [x] Manifests + [x] Manifests
+ [x] Logs + [x] Logs
+ [x] Crawler + [x] Crawler

12
config/app.php.txt

@ -63,6 +63,18 @@ define('WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT', 100);
*/ */
define('WEBSITE_IDENTICON_IMAGE_CACHE', true); define('WEBSITE_IDENTICON_IMAGE_CACHE', true);
/*
* Total snap files size allowed to download in bytes in WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE_TIME_OFFSET period
*
*/
define('WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE', 10485760);
/*
* Time offset quota when WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE reached
*
*/
define('WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE_TIME_OFFSET', 60*60);
// Database // Database
define('DB_HOST', '127.0.0.1'); define('DB_HOST', '127.0.0.1');
define('DB_PORT', 3306); define('DB_PORT', 3306);

10
crontab/cleaner.php

@ -93,7 +93,7 @@ try {
if ($snapFileLocalExists) { if ($snapFileLocalExists) {
if (unlink('../public/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip')) { if (unlink('../storage/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip')) {
$snapFileLocalExists = false; $snapFileLocalExists = false;
} }
@ -113,6 +113,9 @@ try {
} }
if (!$snapFileLocalExists && !$snapFileMegaExists) { if (!$snapFileLocalExists && !$snapFileMegaExists) {
$db->deleteHostPageSnapDownloads($hostPageSnap->hostPageSnapId);
$hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId); $hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
} }
} }
@ -146,7 +149,7 @@ try {
if ($snapFileLocalExists) { if ($snapFileLocalExists) {
if (unlink('../public/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip')) { if (unlink('../storage/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip')) {
$snapFileLocalExists = false; $snapFileLocalExists = false;
} }
@ -166,6 +169,9 @@ try {
} }
if (!$snapFileLocalExists && !$snapFileMegaExists) { if (!$snapFileLocalExists && !$snapFileMegaExists) {
$db->deleteHostPageSnapDownloads($hostPageSnap->hostPageSnapId);
$hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId); $hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
} }
} }

6
crontab/crawler.php

@ -427,7 +427,7 @@ try {
$crc32data = crc32($content); $crc32data = crc32($content);
// Create not duplicated data snaps only, even new time // Create not duplicated data snaps only, even new time
if (!$db->getHostPageSnap($queueHostPage->hostPageId, $crc32data)) { if (!$db->findHostPageSnap($queueHostPage->hostPageId, $crc32data)) {
$snapTime = time(); $snapTime = time();
$snapPath = chunk_split($queueHostPage->hostPageId, 1, '/'); $snapPath = chunk_split($queueHostPage->hostPageId, 1, '/');
@ -462,9 +462,9 @@ try {
// Copy tmp snap to the permanent local storage // Copy tmp snap to the permanent local storage
if ($snapLocal) { if ($snapLocal) {
@mkdir('../public/snap/hp/' . $snapPath, 0755, true); @mkdir('../storage/snap/hp/' . $snapPath, 0755, true);
if (copy($snapTmp, '../public/snap/hp/' . $snapPath . $snapTime . '.zip')) { if (copy($snapTmp, '../storage/snap/hp/' . $snapPath . $snapTime . '.zip')) {
// Update snap location info // Update snap location info
$db->updateHostPageSnapStorageLocal($hostPageSnapId, true); $db->updateHostPageSnapStorageLocal($hostPageSnapId, true);

BIN
database/yggo.mwb

Binary file not shown.

2
library/ftp.php

@ -50,7 +50,7 @@ class Ftp {
public function get(string $source, string $target) { public function get(string $source, string $target) {
return ftp_get($this->_connection, $source, $target); return ftp_get($this->_connection, $target, $source);
} }
public function mkdir(string $name, bool $recursive = false) { public function mkdir(string $name, bool $recursive = false) {

65
library/mysql.php

@ -416,15 +416,75 @@ class MySQL {
return $query->fetchAll(); return $query->fetchAll();
} }
public function getHostPageSnap(int $hostPageId, int $crc32data) { public function getHostPageSnap(int $hostPageSnapId) {
$query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageId` = ? AND `hostPageId` = ? LIMIT 1'); $query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageSnapId` = ? LIMIT 1');
$query->execute([$hostPageSnapId]);
return $query->fetch();
}
public function findHostPageSnap(int $hostPageId, int $crc32data) {
$query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageId` = ? AND `crc32data` = ? LIMIT 1');
$query->execute([$hostPageId, $crc32data]); $query->execute([$hostPageId, $crc32data]);
return $query->fetch(); return $query->fetch();
} }
/* not in use
public function getHostPageSnapDownloads(int $hostPageSnapId) {
$query = $this->_db->prepare('SELECT * FROM `hostPageSnapDownload` WHERE `hostPageSnapId` = ? LIMIT 1');
$query->execute([$hostPageSnapId]);
return $query->fetchAll();
}
*/
public function addHostPageSnapDownload(int $hostPageSnapId, string $crc32ip, int $timeAdded) {
$query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapId`,
`crc32ip`,
`timeAdded`) VALUES (?, ?, ?)');
$query->execute([$hostPageSnapId, $crc32ip, $timeAdded]);
return $this->_db->lastInsertId();
}
public function updateHostPageSnapDownload(int $hostPageSnapDownloadId, string $storage, int $size) {
$query = $this->_db->prepare('UPDATE `hostPageSnapDownload` SET `storage` = ?, `size` = ? WHERE `hostPageSnapDownloadId` = ? LIMIT 1');
$query->execute([$storage, $size, $hostPageSnapDownloadId]);
return $query->rowCount();
}
public function deleteHostPageSnapDownloads(int $hostPageSnapId) {
$query = $this->_db->prepare('DELETE FROM `hostPageSnapDownload` WHERE `hostPageSnapId` = ? LIMIT 1');
$query->execute([$hostPageSnapId]);
return $query->rowCount();
}
public function findHostPageSnapDownloadsTotalSize(int $crc32ip, int $timeOffset) {
$query = $this->_db->prepare('SELECT SUM(`size`) AS `size` FROM `hostPageSnapDownload`
WHERE `crc32ip` = ? AND `timeAdded` < ?');
$query->execute([$crc32ip, $timeOffset]);
return $query->fetch()->size;
}
// Cleaner tools // Cleaner tools
public function getCleanerQueue(int $limit, int $timeFrom) { public function getCleanerQueue(int $limit, int $timeFrom) {
@ -654,6 +714,7 @@ class MySQL {
$this->_db->query('OPTIMIZE TABLE `hostPage`'); $this->_db->query('OPTIMIZE TABLE `hostPage`');
$this->_db->query('OPTIMIZE TABLE `hostPageDescription`'); $this->_db->query('OPTIMIZE TABLE `hostPageDescription`');
$this->_db->query('OPTIMIZE TABLE `hostPageSnap`'); $this->_db->query('OPTIMIZE TABLE `hostPageSnap`');
$this->_db->query('OPTIMIZE TABLE `hostPageSnapDownload`');
$this->_db->query('OPTIMIZE TABLE `hostPageToHostPage`'); $this->_db->query('OPTIMIZE TABLE `hostPageToHostPage`');
$this->_db->query('OPTIMIZE TABLE `logCleaner`'); $this->_db->query('OPTIMIZE TABLE `logCleaner`');

BIN
media/db-prototype.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 161 KiB

After

Width:  |  Height:  |  Size: 185 KiB

6
public/explore.php

@ -223,7 +223,7 @@ $placeholder = Filter::plural($totalPages, [sprintf(_('Over %s page or enter the
<?php } ?> <?php } ?>
<?php } ?> <?php } ?>
<a href="<?php echo $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>"> <a href="<?php echo $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>">
<img src="<?php echo WEBSITE_DOMAIN; ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" /> <img src="<?php echo WEBSITE_DOMAIN; ?>/file.php?type=identicon&query=<?php echo urlencode($hostPage->name) ?>" alt="identicon" width="16" height="16" class="icon" />
<?php echo htmlentities(urldecode($hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . urldecode($hostPage->uri)) ?> <?php echo htmlentities(urldecode($hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . urldecode($hostPage->uri)) ?>
</a> </a>
</div> </div>
@ -244,7 +244,7 @@ $placeholder = Filter::plural($totalPages, [sprintf(_('Over %s page or enter the
<?php if ($totalHostPageSnaps) { ?> <?php if ($totalHostPageSnaps) { ?>
<?php foreach ($db->getHostPageSnaps($hp) as $hostPageSnap) { ?> <?php foreach ($db->getHostPageSnaps($hp) as $hostPageSnap) { ?>
<p> <p>
<a href="<?php echo WEBSITE_DOMAIN . '/snap/hp/' . chunk_split($hostPageSnap->hostPageId, 1, '/') . $hostPageSnap->timeAdded . '.zip' ?>"> <a href="<?php echo WEBSITE_DOMAIN . '/file.php?type=snap&hps=' . $hostPageSnap->hostPageSnapId ?>">
<?php echo date('c', $hostPageSnap->timeAdded) ?> <?php echo date('c', $hostPageSnap->timeAdded) ?>
</a> </a>
</p> </p>
@ -262,7 +262,7 @@ $placeholder = Filter::plural($totalPages, [sprintf(_('Over %s page or enter the
<?php if ($hostPage = $db->getFoundHostPage($hostPageIdSource->hostPageIdSource)) { ?> <?php if ($hostPage = $db->getFoundHostPage($hostPageIdSource->hostPageIdSource)) { ?>
<p> <p>
<a href="<?php echo $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>"> <a href="<?php echo $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>">
<img src="<?php echo WEBSITE_DOMAIN; ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" /> <img src="<?php echo WEBSITE_DOMAIN; ?>/file.php?type=identicon&query=<?php echo urlencode($hostPage->name) ?>" alt="identicon" width="16" height="16" class="icon" />
<?php echo htmlentities(urldecode($hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 32 ? '...' . mb_substr(urldecode($hostPage->uri), -32) : urldecode($hostPage->uri))) ?> <?php echo htmlentities(urldecode($hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 32 ? '...' . mb_substr(urldecode($hostPage->uri), -32) : urldecode($hostPage->uri))) ?>
</a> </a>
| |

150
public/file.php

@ -0,0 +1,150 @@
<?php
require_once('../config/app.php');
require_once('../library/icon.php');
require_once('../library/mysql.php');
require_once('../library/ftp.php');
$type = !empty($_GET['type']) ? $_GET['type'] : false;
switch ($type) {
case 'identicon':
$query = md5($_GET['query']);
$width = isset($_GET['width']) ? (int) $_GET['width'] : 16;
$height = isset($_GET['height']) ? (int) $_GET['height'] : 16;
$radius = isset($_GET['radius']) ? (int) $_GET['radius'] : 0;
header('Content-Type: image/webp');
if (WEBSITE_IDENTICON_IMAGE_CACHE) {
$filename = dirname(__FILE__) . '/../storage/cache/' . $query . '.webp';
if (!file_exists($filename)) {
$icon = new Icon();
file_put_contents($filename, $icon->generateImageResource($query, $width, $height, false, $radius));
}
echo file_get_contents($filename);
} else {
$icon = new Icon();
echo $icon->generateImageResource($query, $width, $height, false, $radius);
}
break;
case 'snap':
// Connect database
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
// Get snap details from DB
if ($hostPageSnap = $db->getHostPageSnap(!empty($_GET['hps']) ? (int) $_GET['hps'] : 0)) {
// Init variables
$crc32ip = crc32(!empty($_SERVER['REMOTE_ADDR']) ? $_SERVER['REMOTE_ADDR'] : '');
$time = time();
$hostPageDownloadsTotalSize = $db->findHostPageSnapDownloadsTotalSize($crc32ip, $time - WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE_TIME_OFFSET);
// Check for downloading quotas
if ($hostPageDownloadsTotalSize >= WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE) {
header('HTTP/1.0 403 Forbidden');
echo _('403 Access forbidden by requests quota');
exit;
}
// Register snap download
$hostPageSnapDownloadId = $db->addHostPageSnapDownload($hostPageSnap->hostPageSnapId, $crc32ip, $time);
// Init variables
$snapSize = 0;
$snapFile = 'hp/' . chunk_split($hostPageSnap->hostPageId, 1, '/') . $hostPageSnap->timeAdded . '.zip';
// Download local snap in higher priority if possible
if ($hostPageSnap->storageLocal && file_exists('../storage/snap/' . $snapFile) &&
is_readable('../storage/snap/' . $snapFile)) {
$snapSize = (int) @filesize('../storage/snap/' . $snapFile);
$db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'local', $snapSize);
header('Content-Type: application/zip');
header(sprintf('Content-Length: %s', $snapSize));
header(sprintf('Content-Disposition: filename="snap.%s.%s.%s.zip"', $hostPageSnap->hostPageSnapId,
$hostPageSnap->hostPageId,
$hostPageSnap->timeAdded));
readfile('../storage/snap/' . $snapFile);
// Then try to download from MEGA storage if exists
} else if ($hostPageSnap->storageMega) {
$ftp = new Ftp();
if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) {
if ($snapSize = $ftp->size($snapFile)) {
$db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'mega', $snapSize);
header('Content-Type: application/zip');
header(sprintf('Content-Length: %s', $snapSize));
header(sprintf('Content-Disposition: filename="snap.%s.%s.%s.zip"', $hostPageSnap->hostPageSnapId,
$hostPageSnap->hostPageId,
$hostPageSnap->timeAdded));
$ftp->get($snapFile, 'php://output');
} else {
$db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'mega', $snapSize);
header('HTTP/1.0 404 Not Found');
echo _('404 File not found');
}
} else {
$db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'mega', $snapSize);
header('HTTP/1.0 404 Not Found');
echo _('404 File not found');
}
// Return 404 when file not found
} else {
$db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'other', $snapSize);
header('HTTP/1.0 404 Not Found');
echo _('404 File not found');
}
} else {
header('HTTP/1.0 404 Not Found');
echo _('404 Snap not found');
}
break;
default:
header('HTTP/1.0 404 Not Found');
echo _('404');
}

36
public/image.php

@ -1,36 +0,0 @@
<?php
require_once('../config/app.php');
require_once('../library/icon.php');
if (isset($_GET['q'])) {
$hash = md5($_GET['q']);
$width = isset($_GET['width']) ? (int) $_GET['width'] : 16;
$height = isset($_GET['height']) ? (int) $_GET['height'] : 16;
$radius = isset($_GET['radius']) ? (int) $_GET['radius'] : 0;
header("Content-Type: image/webp");
if (WEBSITE_IDENTICON_IMAGE_CACHE) {
$filename = dirname(__FILE__) . '/../storage/cache/' . $hash . '.webp';
if (!file_exists($filename)) {
$icon = new Icon();
file_put_contents($filename, $icon->generateImageResource($hash, $width, $height, false, $radius));
}
echo file_get_contents($filename);
} else {
$icon = new Icon();
echo $icon->generateImageResource($hash, $width, $height, false, $radius);
}
}

4
public/search.php

@ -335,7 +335,7 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
<?php } ?> <?php } ?>
<?php } ?> <?php } ?>
<a href="<?php echo $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>"> <a href="<?php echo $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>">
<img src="<?php echo WEBSITE_DOMAIN; ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" /> <img src="<?php echo WEBSITE_DOMAIN; ?>/file.php?type=identicon&query=<?php echo urlencode($hostPage->name) ?>" alt="identicon" width="16" height="16" class="icon" />
<?php echo htmlentities(urldecode($hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 32 ? '...' . mb_substr(urldecode($hostPage->uri), -32) : urldecode($hostPage->uri))) ?> <?php echo htmlentities(urldecode($hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 32 ? '...' . mb_substr(urldecode($hostPage->uri), -32) : urldecode($hostPage->uri))) ?>
</a> </a>
| |
@ -355,7 +355,7 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
<?php $i++ ?> <?php $i++ ?>
<p> <p>
<a href="<?php echo $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>"> <a href="<?php echo $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>">
<img src="<?php echo WEBSITE_DOMAIN; ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" /> <img src="<?php echo WEBSITE_DOMAIN; ?>/file.php?type=identicon&query=<?php echo urlencode($hostPage->name) ?>" alt="identicon" width="16" height="16" class="icon" />
<?php echo htmlentities(urldecode($hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 32 ? '...' . mb_substr(urldecode($hostPage->uri), -32) : urldecode($hostPage->uri))) ?> <?php echo htmlentities(urldecode($hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 32 ? '...' . mb_substr(urldecode($hostPage->uri), -32) : urldecode($hostPage->uri))) ?>
</a> </a>
<!-- <!--

0
public/snap/index.html → storage/snap/index.html

Loading…
Cancel
Save