mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-08 22:07:56 +00:00
implement unlimited snap storage mirrors, delete megaCMD integration
This commit is contained in:
parent
9b52e3b7f5
commit
712d67f6bf
12
README.md
12
README.md
@ -39,7 +39,7 @@ sphinxsearch
|
|||||||
|
|
||||||
* The web root dir is `/public`
|
* The web root dir is `/public`
|
||||||
* Deploy the database using [MySQL Workbench](https://www.mysql.com/products/workbench) project presented in the `/database` folder
|
* Deploy the database using [MySQL Workbench](https://www.mysql.com/products/workbench) project presented in the `/database` folder
|
||||||
* Install [Sphinx Search Server](https://sphinxsearch.com), [MEGAcmd](https://mega.nz/cmd) (on remote snaps enabled)
|
* Install [Sphinx Search Server](https://sphinxsearch.com)
|
||||||
* Configuration examples presented at `/config` folder
|
* Configuration examples presented at `/config` folder
|
||||||
* Make sure `/storage/cache`, `/storage/tmp`, `/storage/snap` folders are writable
|
* Make sure `/storage/cache`, `/storage/tmp`, `/storage/snap` folders are writable
|
||||||
* Set up the `/crontab` by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt)
|
* Set up the `/crontab` by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt)
|
||||||
@ -155,10 +155,8 @@ GET m=SphinxQL
|
|||||||
* [x] Flexible settings compatible with IPv4/IPv6 networks
|
* [x] Flexible settings compatible with IPv4/IPv6 networks
|
||||||
* [x] Extended search syntax support
|
* [x] Extended search syntax support
|
||||||
* [x] Compressed page history snaps with multi-provider storage sync
|
* [x] Compressed page history snaps with multi-provider storage sync
|
||||||
+ [x] Local
|
+ [x] Local (unlimited locations)
|
||||||
+ [x] Remote
|
+ [x] Remote FTP (unlimited mirrors)
|
||||||
+ [x] MEGAcmd/FTP
|
|
||||||
+ [ ] Yggdrasil over NAT
|
|
||||||
+ [x] Privacy-oriented downloads counting, traffic controls
|
+ [x] Privacy-oriented downloads counting, traffic controls
|
||||||
|
|
||||||
##### UI
|
##### UI
|
||||||
@ -213,7 +211,7 @@ GET m=SphinxQL
|
|||||||
* [x] Deprecated DB items auto deletion / host settings update
|
* [x] Deprecated DB items auto deletion / host settings update
|
||||||
+ [x] Pages
|
+ [x] Pages
|
||||||
+ [x] Snaps
|
+ [x] Snaps
|
||||||
+ [x] Snap downloads
|
+ [ ] Snap downloads
|
||||||
+ [ ] Missed snap file relations
|
+ [ ] Missed snap file relations
|
||||||
+ [x] Manifests
|
+ [x] Manifests
|
||||||
+ [x] Logs
|
+ [x] Logs
|
||||||
@ -232,7 +230,7 @@ GET m=SphinxQL
|
|||||||
+ [x] generate
|
+ [x] generate
|
||||||
+ [x] truncate
|
+ [x] truncate
|
||||||
* [x] hostPageSnap
|
* [x] hostPageSnap
|
||||||
+ [x] truncate
|
+ [ ] truncate
|
||||||
* [ ] hostPage
|
* [ ] hostPage
|
||||||
+ [ ] add
|
+ [ ] add
|
||||||
|
|
||||||
|
53
cli/yggo.php
53
cli/yggo.php
@ -176,56 +176,6 @@ switch ($argv[1]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case 'hostPageSnap':
|
|
||||||
|
|
||||||
if (empty($argv[2])) {
|
|
||||||
echo PHP_EOL . _('hostPageSnap method requires action argument') . PHP_EOL;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch ($argv[2]) {
|
|
||||||
|
|
||||||
case 'truncate':
|
|
||||||
|
|
||||||
foreach ($db->getHosts() as $host) {
|
|
||||||
|
|
||||||
foreach ($db->getHostPages($host->hostId) as $hostPage) {
|
|
||||||
|
|
||||||
$snapFilePath = chunk_split($hostPage->hostPageId, 1, '/');
|
|
||||||
|
|
||||||
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
|
||||||
|
|
||||||
if ($hostPageSnap->storageLocal) {
|
|
||||||
|
|
||||||
unlink(__DIR__ . '/../storage/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($hostPageSnap->storageMega) {
|
|
||||||
|
|
||||||
$ftp = new Ftp();
|
|
||||||
|
|
||||||
if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) {
|
|
||||||
$ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$db->deleteHostPageSnapDownloads($hostPageSnap->hostPageSnapId);
|
|
||||||
$db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
|
|
||||||
|
|
||||||
// @TODO reset primary key indexes
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
echo _('hostPageSnap, hostPageSnapDownload tables successfully truncated') . PHP_EOL;
|
|
||||||
exit;
|
|
||||||
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
|
|
||||||
echo PHP_EOL . _('undefined action argument') . PHP_EOL;
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Default message
|
// Default message
|
||||||
@ -242,7 +192,6 @@ echo _(' crawl - execute crawler step in the crontab
|
|||||||
echo _(' clean - execute cleaner step in the crontab queue') . PHP_EOL;
|
echo _(' clean - execute cleaner step in the crontab queue') . PHP_EOL;
|
||||||
echo _(' hostPage rank reindex - generate rank indexes in hostPage table') . PHP_EOL;
|
echo _(' hostPage rank reindex - generate rank indexes in hostPage table') . PHP_EOL;
|
||||||
echo _(' hostPageDom generate [selectors] - make hostPageDom index based on related hostPage.data field') . PHP_EOL;
|
echo _(' hostPageDom generate [selectors] - make hostPageDom index based on related hostPage.data field') . PHP_EOL;
|
||||||
echo _(' hostPageDom truncate - flush hostPageDom table') . PHP_EOL;
|
echo _(' hostPageDom truncate - flush hostPageDom table') . PHP_EOL . PHP_EOL;
|
||||||
echo _(' hostPageSnap truncate - flush hostPageSnap, hostPageSnapDownload tables') . PHP_EOL . PHP_EOL;
|
|
||||||
|
|
||||||
echo _('get support: https://github.com/YGGverse/YGGo/issues') . PHP_EOL . PHP_EOL;
|
echo _('get support: https://github.com/YGGverse/YGGo/issues') . PHP_EOL . PHP_EOL;
|
||||||
|
@ -63,18 +63,6 @@ define('WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT', 100);
|
|||||||
*/
|
*/
|
||||||
define('WEBSITE_IDENTICON_IMAGE_CACHE', true);
|
define('WEBSITE_IDENTICON_IMAGE_CACHE', true);
|
||||||
|
|
||||||
/*
|
|
||||||
* Total snap files size allowed to download in bytes in WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE_TIME_OFFSET period
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
define('WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE', 10485760);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Time offset quota when WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE reached
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
define('WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE_TIME_OFFSET', 60*60);
|
|
||||||
|
|
||||||
// Database
|
// Database
|
||||||
define('DB_HOST', '127.0.0.1');
|
define('DB_HOST', '127.0.0.1');
|
||||||
define('DB_PORT', 3306);
|
define('DB_PORT', 3306);
|
||||||
@ -90,19 +78,60 @@ define('SPHINX_PORT', 9306);
|
|||||||
define('MEMCACHED_HOST', '127.0.0.1');
|
define('MEMCACHED_HOST', '127.0.0.1');
|
||||||
define('MEMCACHED_PORT', 11211);
|
define('MEMCACHED_PORT', 11211);
|
||||||
|
|
||||||
// Third-party connections (optional)
|
// Snaps
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mega.nz remote storage
|
* Storage nodes configuration
|
||||||
*
|
*
|
||||||
* FTP storage integration through MEGAcmd (https://mega.io/cmd)
|
* Supports optional single 'localhost' and multiple 'FTP' servers
|
||||||
*
|
*
|
||||||
* Connect mega-ftp instance on CRAWL_PAGE_MIME_SNAP_MEGA enabled
|
* Comment specified node to disable specified connection
|
||||||
|
*
|
||||||
|
* Make empty array to disable snaps or set quote.mime = false or quote.size = 0 to disable specified instance
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
define('MEGA_FTP_HOST', '127.0.0.1');
|
define('SNAP_STORAGE', json_encode((object)
|
||||||
define('MEGA_FTP_PORT', 4990);
|
[
|
||||||
define('MEGA_FTP_DIRECTORY', '');
|
'localhost' => [
|
||||||
|
[
|
||||||
|
'directory' => __DIR__ . '/../storage/snap/hp/',
|
||||||
|
'quota' => [
|
||||||
|
'mime' => false,
|
||||||
|
'size' => 10000000024,
|
||||||
|
'request' => [
|
||||||
|
'download' => [
|
||||||
|
'size' => 10000024,
|
||||||
|
'seconds' => 60*60
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
// ...
|
||||||
|
]
|
||||||
|
],
|
||||||
|
'ftp' => [
|
||||||
|
[
|
||||||
|
'port' => 21,
|
||||||
|
'host' => '',
|
||||||
|
'username' => '',
|
||||||
|
'password' => '',
|
||||||
|
'directory' => '/snap',
|
||||||
|
'timeout' => 30,
|
||||||
|
'passive' => true,
|
||||||
|
'quota' => [
|
||||||
|
'mime' => 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico',
|
||||||
|
'size' => 10000000024,
|
||||||
|
'request' => [
|
||||||
|
'download' => [
|
||||||
|
'size' => 10000024,
|
||||||
|
'seconds' => 60*60
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
],
|
||||||
|
// ...
|
||||||
|
]
|
||||||
|
]
|
||||||
|
));
|
||||||
|
|
||||||
// Proxy settings
|
// Proxy settings
|
||||||
|
|
||||||
@ -217,28 +246,6 @@ define('CRAWL_PAGE_HOME_SECONDS_OFFSET', 60*60*24*7);
|
|||||||
*/
|
*/
|
||||||
define('CRAWL_PAGE_MIME_INDEX', 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico,image/svg+xml,video/mp4,video/ogg,video/webm,audio/mpeg,audio/ogg,audio/wav,audio/mp4,audio/aac,audio/aacp,audio/webm,audio/x-caf,audio/x-mpegurl,audio/flac');
|
define('CRAWL_PAGE_MIME_INDEX', 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico,image/svg+xml,video/mp4,video/ogg,video/webm,audio/mpeg,audio/ogg,audio/wav,audio/mp4,audio/aac,audio/aacp,audio/webm,audio/x-caf,audio/x-mpegurl,audio/flac');
|
||||||
|
|
||||||
/*
|
|
||||||
* Snap pages locally match MIME types
|
|
||||||
*
|
|
||||||
* comma separated | false to disable
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
define('CRAWL_PAGE_MIME_SNAP_LOCAL', 'text/html');
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Snap pages to mega.nz match MIME types
|
|
||||||
*
|
|
||||||
* comma separated | false to disable
|
|
||||||
*
|
|
||||||
* Requires connection:
|
|
||||||
*
|
|
||||||
* MEGA_FTP_HOST
|
|
||||||
* MEGA_FTP_PORT
|
|
||||||
* MEGA_FTP_DIRECTORY
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
define('CRAWL_PAGE_MIME_SNAP_MEGA', 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico');
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Renew manifests index by timing offset provided
|
* Renew manifests index by timing offset provided
|
||||||
*
|
*
|
||||||
@ -289,9 +296,6 @@ define('CRAWL_HOST_DEFAULT_STATUS', true);
|
|||||||
* this option disabled requires huge disk storage,
|
* this option disabled requires huge disk storage,
|
||||||
* it's experimental feature, oriented for index operations
|
* it's experimental feature, oriented for index operations
|
||||||
*
|
*
|
||||||
* see CRAWL_PAGE_MIME_SNAP_LOCAL
|
|
||||||
* to create compressed data snaps
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
define('CRAWL_HOST_DEFAULT_META_ONLY', true);
|
define('CRAWL_HOST_DEFAULT_META_ONLY', true);
|
||||||
|
|
||||||
|
@ -93,23 +93,43 @@ try {
|
|||||||
|
|
||||||
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
||||||
|
|
||||||
if ($hostPageSnap->storageLocal) {
|
// Delete snap files
|
||||||
|
foreach (json_decode(SNAP_STORAGE) as $name => $storages) {
|
||||||
|
|
||||||
unlink(__DIR__ . '/../storage/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
foreach ($storages as $storage) {
|
||||||
}
|
|
||||||
|
|
||||||
if ($hostPageSnap->storageMega) {
|
// Generate storage id
|
||||||
|
$crc32name = crc32(sprintf('%s.%s', $name, $snapStorageIndex));
|
||||||
|
|
||||||
$ftp = new Ftp();
|
switch ($name) {
|
||||||
|
|
||||||
if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) {
|
case 'localhost':
|
||||||
$ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
|
||||||
|
@unlink($storage->directory . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
||||||
|
|
||||||
|
break;
|
||||||
|
case 'ftp':
|
||||||
|
|
||||||
|
$ftp = new Ftp();
|
||||||
|
|
||||||
|
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
||||||
|
$ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up DB registry
|
||||||
|
foreach ($db->getHostPageSnapStorages($hostPageSnap->hostPageSnapId) as $hostPageSnapStorage) {
|
||||||
|
|
||||||
|
$db->deleteHostPageSnapDownloads($hostPageSnapStorage->hostPageSnapStorageId);
|
||||||
|
}
|
||||||
|
|
||||||
|
$db->deleteHostPageSnapStorages($hostPageSnap->hostPageSnapId);
|
||||||
|
|
||||||
|
$hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$db->deleteHostPageSnapDownloads($hostPageSnap->hostPageSnapId);
|
|
||||||
|
|
||||||
$hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete host page
|
// Delete host page
|
||||||
@ -139,23 +159,43 @@ try {
|
|||||||
|
|
||||||
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
||||||
|
|
||||||
if ($hostPageSnap->storageLocal) {
|
// Delete snap files
|
||||||
|
foreach (json_decode(SNAP_STORAGE) as $name => $storages) {
|
||||||
|
|
||||||
unlink(__DIR__ . '/../storage/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
foreach ($storages as $storage) {
|
||||||
}
|
|
||||||
|
|
||||||
if ($hostPageSnap->storageMega) {
|
// Generate storage id
|
||||||
|
$crc32name = crc32(sprintf('%s.%s', $name, $snapStorageIndex));
|
||||||
|
|
||||||
$ftp = new Ftp();
|
switch ($name) {
|
||||||
|
|
||||||
if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) {
|
case 'localhost':
|
||||||
$ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
|
||||||
|
@unlink($storage->directory . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
||||||
|
|
||||||
|
break;
|
||||||
|
case 'ftp':
|
||||||
|
|
||||||
|
$ftp = new Ftp();
|
||||||
|
|
||||||
|
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
||||||
|
$ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up DB registry
|
||||||
|
foreach ($db->getHostPageSnapStorages($hostPageSnap->hostPageSnapId) as $hostPageSnapStorage) {
|
||||||
|
|
||||||
|
$db->deleteHostPageSnapDownloads($hostPageSnapStorage->hostPageSnapStorageId);
|
||||||
|
}
|
||||||
|
|
||||||
|
$db->deleteHostPageSnapStorages($hostPageSnap->hostPageSnapId);
|
||||||
|
|
||||||
|
$hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$db->deleteHostPageSnapDownloads($hostPageSnap->hostPageSnapId);
|
|
||||||
|
|
||||||
$hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete host page
|
// Delete host page
|
||||||
@ -225,39 +265,59 @@ try {
|
|||||||
$hostPagesBansRemoved += $db->resetBannedHostPages(time() - CLEAN_PAGE_BAN_SECONDS_OFFSET);
|
$hostPagesBansRemoved += $db->resetBannedHostPages(time() - CLEAN_PAGE_BAN_SECONDS_OFFSET);
|
||||||
|
|
||||||
// Clean up banned pages extra data
|
// Clean up banned pages extra data
|
||||||
foreach ($db->getHostPagesBanned() as $hostPageBanned) {
|
foreach ($db->getHostPagesBanned() as $hostPage) {
|
||||||
|
|
||||||
// Delete host page descriptions
|
// Delete host page descriptions
|
||||||
$hostPagesDescriptionsDeleted += $db->deleteHostPageDescriptions($hostPageBanned->hostPageId);
|
$hostPagesDescriptionsDeleted += $db->deleteHostPageDescriptions($hostPage->hostPageId);
|
||||||
|
|
||||||
// Delete host page DOMs
|
// Delete host page DOMs
|
||||||
$hostPagesDomsDeleted += $db->deleteHostPageDoms($hostPageBanned->hostPageId);
|
$hostPagesDomsDeleted += $db->deleteHostPageDoms($hostPage->hostPageId);
|
||||||
|
|
||||||
// Delete host page refs data
|
// Delete host page refs data
|
||||||
$hostPagesToHostPageDeleted += $db->deleteHostPageToHostPage($hostPageBanned->hostPageId);
|
$hostPagesToHostPageDeleted += $db->deleteHostPageToHostPage($hostPage->hostPageId);
|
||||||
|
|
||||||
// Delete host page snaps
|
// Delete host page snaps
|
||||||
$snapFilePath = chunk_split($hostPageBanned->hostPageId, 1, '/');
|
$snapFilePath = chunk_split($hostPage->hostPageId, 1, '/');
|
||||||
|
|
||||||
foreach ($db->getHostPageSnaps($hostPageBanned->hostPageId) as $hostPageSnap) {
|
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
||||||
|
|
||||||
if ($hostPageSnap->storageLocal) {
|
// Delete snap files
|
||||||
|
foreach (json_decode(SNAP_STORAGE) as $name => $storages) {
|
||||||
|
|
||||||
unlink(__DIR__ . '/../storage/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
foreach ($storages as $storage) {
|
||||||
}
|
|
||||||
|
|
||||||
if ($hostPageSnap->storageMega) {
|
// Generate storage id
|
||||||
|
$crc32name = crc32(sprintf('%s.%s', $name, $snapStorageIndex));
|
||||||
|
|
||||||
$ftp = new Ftp();
|
switch ($name) {
|
||||||
|
|
||||||
if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) {
|
case 'localhost':
|
||||||
$ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
|
||||||
|
@unlink($storage->directory . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
||||||
|
|
||||||
|
break;
|
||||||
|
case 'ftp':
|
||||||
|
|
||||||
|
$ftp = new Ftp();
|
||||||
|
|
||||||
|
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
||||||
|
$ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up DB registry
|
||||||
|
foreach ($db->getHostPageSnapStorages($hostPageSnap->hostPageSnapId) as $hostPageSnapStorage) {
|
||||||
|
|
||||||
|
$db->deleteHostPageSnapDownloads($hostPageSnapStorage->hostPageSnapStorageId);
|
||||||
|
}
|
||||||
|
|
||||||
|
$db->deleteHostPageSnapStorages($hostPageSnap->hostPageSnapId);
|
||||||
|
|
||||||
|
$hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$db->deleteHostPageSnapDownloads($hostPageSnap->hostPageSnapId);
|
|
||||||
|
|
||||||
$hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -271,35 +331,8 @@ try {
|
|||||||
$logsCleanerDeleted += $db->deleteLogCleaner(time() - CLEAN_LOG_SECONDS_OFFSET);
|
$logsCleanerDeleted += $db->deleteLogCleaner(time() - CLEAN_LOG_SECONDS_OFFSET);
|
||||||
$logsCrawlerDeleted += $db->deleteLogCrawler(time() - CRAWL_LOG_SECONDS_OFFSET);
|
$logsCrawlerDeleted += $db->deleteLogCrawler(time() - CRAWL_LOG_SECONDS_OFFSET);
|
||||||
|
|
||||||
// Delete failed snaps
|
// Delete failed snap files
|
||||||
foreach ($db->getHosts() as $host) {
|
// @TODO
|
||||||
|
|
||||||
foreach ($db->getHostPages($host->hostId) as $hostPage) {
|
|
||||||
|
|
||||||
$snapFilePath = chunk_split($hostPage->hostPageId, 1, '/');
|
|
||||||
|
|
||||||
foreach ($db->getHostPageSnaps($hostPage->hostPageId, false, false, 'AND') as $hostPageSnap) {
|
|
||||||
|
|
||||||
if ($hostPageSnap->storageLocal) {
|
|
||||||
|
|
||||||
unlink(__DIR__ . '/../storage/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($hostPageSnap->storageMega) {
|
|
||||||
|
|
||||||
$ftp = new Ftp();
|
|
||||||
|
|
||||||
if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) {
|
|
||||||
$ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$db->deleteHostPageSnapDownloads($hostPageSnap->hostPageSnapId);
|
|
||||||
|
|
||||||
$hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Commit results
|
// Commit results
|
||||||
$db->commit();
|
$db->commit();
|
||||||
|
@ -665,50 +665,18 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Begin snaps
|
// Begin snaps
|
||||||
$snapLocal = false;
|
if (SNAP_STORAGE) {
|
||||||
$snapMega = false;
|
|
||||||
|
|
||||||
// Snap local enabled and MIME in white list
|
|
||||||
if (false !== CRAWL_PAGE_MIME_SNAP_LOCAL) {
|
|
||||||
|
|
||||||
foreach ((array) explode(',', CRAWL_PAGE_MIME_SNAP_LOCAL) as $mime) {
|
|
||||||
|
|
||||||
// MIME type allowed in settings
|
|
||||||
if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) {
|
|
||||||
|
|
||||||
$snapLocal = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Snap MEGA enabled and MIME in white list
|
|
||||||
if (false !== CRAWL_PAGE_MIME_SNAP_MEGA) {
|
|
||||||
|
|
||||||
foreach ((array) explode(',', CRAWL_PAGE_MIME_SNAP_MEGA) as $mime) {
|
|
||||||
|
|
||||||
// MIME type allowed in settings
|
|
||||||
if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) {
|
|
||||||
|
|
||||||
$snapMega = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// At least one snap storage match settings condition
|
|
||||||
if ($snapLocal || $snapMega) {
|
|
||||||
|
|
||||||
$crc32data = crc32($content);
|
$crc32data = crc32($content);
|
||||||
|
|
||||||
// Create not duplicated data snaps only, even new time
|
// Create not duplicated data snaps only, even newer by time added
|
||||||
if (!$db->findHostPageSnap($queueHostPage->hostPageId, $crc32data)) {
|
if (!$db->findHostPageSnap($queueHostPage->hostPageId, $crc32data)) {
|
||||||
|
|
||||||
$snapTime = time();
|
$snapTime = time();
|
||||||
$snapPath = chunk_split($queueHostPage->hostPageId, 1, '/');
|
$snapPath = chunk_split($queueHostPage->hostPageId, 1, '/');
|
||||||
|
|
||||||
$snapTmp = __DIR__ . '/../storage/tmp/snap/hp/' . $snapPath . $snapTime . '.zip';
|
$snapTmp = __DIR__ . '/../storage/tmp/snap/hp/' . $snapPath . $snapTime . '.zip';
|
||||||
@mkdir(__DIR__ . '/../storage/tmp/snap/hp/' . $snapPath, 0755, true);
|
@mkdir(__DIR__ . '/../storage/tmp/snap/hp/' . $snapPath, 0755, true);
|
||||||
|
|
||||||
// Create new ZIP container
|
// Create new ZIP container
|
||||||
$zip = new ZipArchive();
|
$zip = new ZipArchive();
|
||||||
@ -718,10 +686,10 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
|||||||
// Insert compressed snap data into the tmp storage
|
// Insert compressed snap data into the tmp storage
|
||||||
if (true === $zip->addFromString('DATA', $content) &&
|
if (true === $zip->addFromString('DATA', $content) &&
|
||||||
true === $zip->addFromString('META', sprintf('TIMESTAMP: %s', $snapTime) . PHP_EOL .
|
true === $zip->addFromString('META', sprintf('TIMESTAMP: %s', $snapTime) . PHP_EOL .
|
||||||
sprintf('CRC32: %s', $crc32data . PHP_EOL .
|
sprintf('CRC32: %s', $crc32data . PHP_EOL .
|
||||||
sprintf('MIME: %s', Filter::mime($contentType)) . PHP_EOL .
|
sprintf('MIME: %s', Filter::mime($contentType)) . PHP_EOL .
|
||||||
sprintf('SOURCE: %s', Filter::url(WEBSITE_DOMAIN . '/explore.php?hp=' . $queueHostPage->hostPageId)) . PHP_EOL .
|
sprintf('SOURCE: %s', Filter::url(WEBSITE_DOMAIN . '/explore.php?hp=' . $queueHostPage->hostPageId)) . PHP_EOL .
|
||||||
sprintf('TARGET: %s', Filter::url($queueHostPageURL))))) {
|
sprintf('TARGET: %s', Filter::url($queueHostPageURL))))) {
|
||||||
|
|
||||||
// Done
|
// Done
|
||||||
$zip->close();
|
$zip->close();
|
||||||
@ -730,48 +698,103 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
|||||||
if (file_exists($snapTmp)) {
|
if (file_exists($snapTmp)) {
|
||||||
|
|
||||||
// Register snap in DB
|
// Register snap in DB
|
||||||
if ($hostPageSnapId = $db->addHostPageSnap($queueHostPage->hostPageId, $crc32data, $snapTime)) {
|
if ($hostPageSnapId = $db->addHostPageSnap($queueHostPage->hostPageId, $crc32data, filesize($snapTmp), $snapTime)) {
|
||||||
|
|
||||||
$hostPagesSnapAdded++;
|
$hostPagesSnapAdded++;
|
||||||
|
|
||||||
// Copy tmp snap to the permanent local storage
|
|
||||||
if ($snapLocal) {
|
|
||||||
|
|
||||||
@mkdir(__DIR__ . '/../storage/snap/hp/' . $snapPath, 0755, true);
|
|
||||||
|
|
||||||
if (copy($snapTmp, __DIR__ . '/../storage/snap/hp/' . $snapPath . $snapTime . '.zip')) {
|
|
||||||
|
|
||||||
// Update snap location info
|
|
||||||
$db->updateHostPageSnapStorageLocal($hostPageSnapId, true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy tmp snap to the permanent MEGA storage
|
|
||||||
if ($snapMega) {
|
|
||||||
|
|
||||||
$ftp = new Ftp();
|
|
||||||
|
|
||||||
if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) {
|
|
||||||
|
|
||||||
$ftp->mkdir('hp/' . $snapPath, true);
|
|
||||||
|
|
||||||
if ($ftp->copy($snapTmp, 'hp/' . $snapPath . $snapTime . '.zip')) {
|
|
||||||
|
|
||||||
// Update snap location info
|
|
||||||
$db->updateHostPageSnapStorageMega($hostPageSnapId, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
$ftp->close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove tmp
|
|
||||||
@unlink($snapTmp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Copy files to each storage
|
||||||
|
|
||||||
|
$snapStorageIndex = 0;
|
||||||
|
|
||||||
|
foreach (json_decode(SNAP_STORAGE) as $name => $storages) {
|
||||||
|
|
||||||
|
foreach ($storages as $storage) {
|
||||||
|
|
||||||
|
$snapStorageIndex++;
|
||||||
|
|
||||||
|
// Generate storage id
|
||||||
|
$crc32name = crc32(sprintf('%s.%s', $name, $snapStorageIndex));
|
||||||
|
|
||||||
|
switch ($name) {
|
||||||
|
|
||||||
|
case 'localhost':
|
||||||
|
|
||||||
|
// Validate size quota
|
||||||
|
if ($db->getTotalHostPageSnapSizeByStorage($hostPageSnapId, $crc32name) >= $storage->quota->size) continue 2;
|
||||||
|
|
||||||
|
// Validate mime
|
||||||
|
if (!$storage->quota->mime) continue 2;
|
||||||
|
|
||||||
|
$snapMimeValid = false;
|
||||||
|
foreach ((array) explode(',', $storage->quota->mime) as $mime) {
|
||||||
|
|
||||||
|
if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) {
|
||||||
|
|
||||||
|
$snapMimeValid = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$snapMimeValid) continue 2;
|
||||||
|
|
||||||
|
// Copy tmp snap file to the permanent storage
|
||||||
|
@mkdir($storage->directory . $snapPath, 0755, true);
|
||||||
|
|
||||||
|
if (copy($snapTmp, $storage->directory . $snapPath . $snapTime . '.zip')) {
|
||||||
|
|
||||||
|
// Register storage name
|
||||||
|
$db->addHostPageSnapStorage($hostPageSnapId, $crc32name, time());
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
case 'ftp':
|
||||||
|
|
||||||
|
// Validate size quota
|
||||||
|
if ($db->getTotalHostPageSnapSizeByStorage($hostPageSnapId, $crc32name) >= $storage->quota->size) continue 2;
|
||||||
|
|
||||||
|
// Validate mime
|
||||||
|
if (!$storage->quota->mime) continue 2;
|
||||||
|
|
||||||
|
$snapMimeValid = false;
|
||||||
|
foreach ((array) explode(',', $storage->quota->mime) as $mime) {
|
||||||
|
|
||||||
|
if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) {
|
||||||
|
|
||||||
|
$snapMimeValid = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$snapMimeValid) continue 2;
|
||||||
|
|
||||||
|
// Copy tmp snap file to the permanent storage
|
||||||
|
$ftp = new Ftp();
|
||||||
|
|
||||||
|
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
||||||
|
|
||||||
|
$ftp->mkdir('hp/' . $snapPath, true);
|
||||||
|
|
||||||
|
if ($ftp->copy($snapTmp, 'hp/' . $snapPath . $snapTime . '.zip')) {
|
||||||
|
|
||||||
|
// Register storage name
|
||||||
|
$db->addHostPageSnapStorage($hostPageSnapId, $crc32name, time());
|
||||||
|
}
|
||||||
|
|
||||||
|
$ftp->close();
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete tmp snap
|
||||||
|
unlink($snapTmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip page links following with meta robots:nofollow attribute
|
// Skip page links following with meta robots:nofollow attribute
|
||||||
|
Binary file not shown.
@ -15,18 +15,14 @@ class Ftp {
|
|||||||
mixed $login = null,
|
mixed $login = null,
|
||||||
mixed $password = null,
|
mixed $password = null,
|
||||||
string $directory = '/',
|
string $directory = '/',
|
||||||
int $timeout = 90) {
|
int $timeout = 90,
|
||||||
|
bool $passive = false) {
|
||||||
|
|
||||||
if (!$this->_connection = ftp_connect($host, $port, $timeout)) {
|
if (!$this->_connection = ftp_connect($host, $port, $timeout)) {
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ftp_pasv($this->_connection, $this->_passive)) {
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($login) && !empty($password)) {
|
if (!empty($login) && !empty($password)) {
|
||||||
|
|
||||||
if (!ftp_login($this->_connection, $login, $password)) {
|
if (!ftp_login($this->_connection, $login, $password)) {
|
||||||
@ -35,6 +31,11 @@ class Ftp {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($passive && !ftp_pasv($this->_connection, $this->_passive)) {
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return ftp_chdir($this->_connection, $directory);
|
return ftp_chdir($this->_connection, $directory);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -462,35 +462,18 @@ class MySQL {
|
|||||||
return $query->fetchAll();
|
return $query->fetchAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function addHostPageSnap(int $hostPageId, string $crc32data, int $timeAdded) {
|
public function addHostPageSnap(int $hostPageId, string $crc32data, int $size, int $timeAdded) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('INSERT INTO `hostPageSnap` (`hostPageId`,
|
$query = $this->_db->prepare('INSERT INTO `hostPageSnap` (`hostPageId`,
|
||||||
`crc32data`,
|
`crc32data`,
|
||||||
`timeAdded`) VALUES (?, ?, ?)');
|
`size`,
|
||||||
|
`timeAdded`) VALUES (?, ?, ?, ?)');
|
||||||
|
|
||||||
$query->execute([$hostPageId, $crc32data, $timeAdded]);
|
$query->execute([$hostPageId, $crc32data, $size, $timeAdded]);
|
||||||
|
|
||||||
return $this->_db->lastInsertId();
|
return $this->_db->lastInsertId();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function updateHostPageSnapStorageLocal(int $hostPageSnapId, mixed $value) {
|
|
||||||
|
|
||||||
$query = $this->_db->prepare('UPDATE `hostPageSnap` SET `storageLocal` = ? WHERE `hostPageSnapId` = ? LIMIT 1');
|
|
||||||
|
|
||||||
$query->execute([$value, $hostPageSnapId]);
|
|
||||||
|
|
||||||
return $query->rowCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
public function updateHostPageSnapStorageMega(int $hostPageSnapId, mixed $value) {
|
|
||||||
|
|
||||||
$query = $this->_db->prepare('UPDATE `hostPageSnap` SET `storageMega` = ? WHERE `hostPageSnapId` = ? LIMIT 1');
|
|
||||||
|
|
||||||
$query->execute([$value, $hostPageSnapId]);
|
|
||||||
|
|
||||||
return $query->rowCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
public function deleteHostPageSnap(int $hostPageSnapId) {
|
public function deleteHostPageSnap(int $hostPageSnapId) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('DELETE FROM `hostPageSnap` WHERE `hostPageSnapId` = ? LIMIT 1');
|
$query = $this->_db->prepare('DELETE FROM `hostPageSnap` WHERE `hostPageSnapId` = ? LIMIT 1');
|
||||||
@ -500,24 +483,37 @@ class MySQL {
|
|||||||
return $query->rowCount();
|
return $query->rowCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getTotalHostPageSnaps(int $hostPageId, bool $storageLocal = true, bool $storageMega = true) {
|
public function getTotalHostPageSnaps(int $hostPageId) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('SELECT COUNT(*) AS `total` FROM `hostPageSnap` WHERE `hostPageId` = ? AND (`storageLocal` = ? OR `storageMega` = ?)');
|
$query = $this->_db->prepare('SELECT COUNT(*) AS `total` FROM `hostPageSnap` WHERE `hostPageId` = ?');
|
||||||
|
|
||||||
$query->execute([$hostPageId, $storageLocal, $storageMega]);
|
$query->execute([$hostPageId]);
|
||||||
|
|
||||||
return $query->fetch()->total;
|
return $query->fetch()->total;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getHostPageSnaps(int $hostPageId, bool $storageLocal = true, bool $storageMega = true, string $condition = 'OR') {
|
public function getHostPageSnaps(int $hostPageId) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageId` = ? AND (`storageLocal` = ? ' . ($condition == 'OR' ? 'OR' : 'AND') . ' `storageMega` = ?) ORDER BY `timeAdded` DESC');
|
$query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageId` = ? ORDER BY `timeAdded` DESC');
|
||||||
|
|
||||||
$query->execute([$hostPageId, $storageLocal, $storageMega]);
|
$query->execute([$hostPageId]);
|
||||||
|
|
||||||
return $query->fetchAll();
|
return $query->fetchAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function getTotalHostPageSnapSizeByStorage(int $hostPageId, int $crc32name) {
|
||||||
|
|
||||||
|
$query = $this->_db->prepare('SELECT SUM(`hostPageSnap`.`size`) AS `total` FROM `hostPageSnap`
|
||||||
|
JOIN `hostPageSnapStorage` ON (`hostPageSnapStorage`.`hostPageSnapId` = `hostPageSnap`.`hostPageSnapId`)
|
||||||
|
|
||||||
|
WHERE `hostPageSnap`.`hostPageSnapId` = ?
|
||||||
|
AND `hostPageSnapStorage`.`crc32name` = ?');
|
||||||
|
|
||||||
|
$query->execute([$hostPageId, $crc32name]);
|
||||||
|
|
||||||
|
return $query->fetch()->total;
|
||||||
|
}
|
||||||
|
|
||||||
public function getHostPageSnap(int $hostPageSnapId) {
|
public function getHostPageSnap(int $hostPageSnapId) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageSnapId` = ? LIMIT 1');
|
$query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageSnapId` = ? LIMIT 1');
|
||||||
@ -536,44 +532,62 @@ class MySQL {
|
|||||||
return $query->fetch();
|
return $query->fetch();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function addHostPageSnapDownload(int $hostPageSnapId, string $crc32ip, int $timeAdded) {
|
public function addHostPageSnapDownload(int $hostPageSnapStorageId, string $crc32ip, int $timeAdded) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapId`,
|
$query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapStorageId`,
|
||||||
`crc32ip`,
|
`crc32ip`,
|
||||||
`timeAdded`) VALUES (?, ?, ?)');
|
`timeAdded`) VALUES (?, ?, ?)');
|
||||||
|
|
||||||
$query->execute([$hostPageSnapId, $crc32ip, $timeAdded]);
|
$query->execute([$hostPageSnapStorageId, $crc32ip, $timeAdded]);
|
||||||
|
|
||||||
return $this->_db->lastInsertId();
|
return $this->_db->lastInsertId();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function updateHostPageSnapDownload(int $hostPageSnapDownloadId, string $storage, int $size, mixed $httpCode = NULL) {
|
public function addHostPageSnapStorage(int $hostPageSnapId, int $crc32name, int $timeAdded) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('UPDATE `hostPageSnapDownload` SET `storage` = ?, `size` = ?, `httpCode` = ? WHERE `hostPageSnapDownloadId` = ? LIMIT 1');
|
$query = $this->_db->prepare('INSERT INTO `hostPageSnapStorage` (`hostPageSnapId`,
|
||||||
|
`crc32name`,
|
||||||
|
`timeAdded`) VALUES (?, ?, ?)');
|
||||||
|
|
||||||
$query->execute([$storage, $size, $httpCode, $hostPageSnapDownloadId]);
|
$query->execute([$hostPageSnapId, $crc32name, $timeAdded]);
|
||||||
|
|
||||||
return $query->rowCount();
|
return $this->_db->lastInsertId();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function deleteHostPageSnapDownloads(int $hostPageSnapId) {
|
public function getHostPageSnapStorageByCRC32Name(int $hostPageSnapId, int $crc32name) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('DELETE FROM `hostPageSnapDownload` WHERE `hostPageSnapId` = ? LIMIT 1');
|
$query = $this->_db->prepare('SELECT * FROM `hostPageSnapStorage` WHERE `hostPageSnapId` = ? AND `crc32name` = ?');
|
||||||
|
|
||||||
|
$query->execute([$hostPageSnapId, $crc32name]);
|
||||||
|
|
||||||
|
return $query->fetch();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getHostPageSnapStorages(int $hostPageSnapId) {
|
||||||
|
|
||||||
|
$query = $this->_db->prepare('SELECT * FROM `hostPageSnapStorage` WHERE `hostPageSnapId` = ?');
|
||||||
|
|
||||||
|
$query->execute([$hostPageSnapId]);
|
||||||
|
|
||||||
|
return $query->fetchAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function deleteHostPageSnapStorages(int $hostPageSnapId) {
|
||||||
|
|
||||||
|
$query = $this->_db->prepare('DELETE FROM `hostPageSnapStorage` WHERE `hostPageSnapId` = ?');
|
||||||
|
|
||||||
$query->execute([$hostPageSnapId]);
|
$query->execute([$hostPageSnapId]);
|
||||||
|
|
||||||
return $query->rowCount();
|
return $query->rowCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function findHostPageSnapDownloadsTotalSize(int $crc32ip, int $timeOffset) {
|
public function deleteHostPageSnapDownloads(int $hostPageSnapStorageId) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('SELECT SUM(`size`) AS `size` FROM `hostPageSnapDownload`
|
$query = $this->_db->prepare('DELETE FROM `hostPageSnapDownload` WHERE `hostPageSnapStorageId` = ?');
|
||||||
|
|
||||||
WHERE `crc32ip` = ? AND `timeAdded` < ?');
|
$query->execute([$hostPageSnapStorageId]);
|
||||||
|
|
||||||
$query->execute([$crc32ip, $timeOffset]);
|
return $query->rowCount();
|
||||||
|
|
||||||
return $query->fetch()->size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public function addHostPageDom(int $hostPageId, int $timeAdded, string $selector, string $value) {
|
public function addHostPageDom(int $hostPageId, int $timeAdded, string $selector, string $value) {
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 211 KiB After Width: | Height: | Size: 159 KiB |
@ -111,7 +111,6 @@ if (API_ENABLED) {
|
|||||||
'crawlHostPageSecondsOffset' => CRAWL_PAGE_SECONDS_OFFSET,
|
'crawlHostPageSecondsOffset' => CRAWL_PAGE_SECONDS_OFFSET,
|
||||||
'crawlHostPageHomeSecondsOffset' => CRAWL_PAGE_HOME_SECONDS_OFFSET,
|
'crawlHostPageHomeSecondsOffset' => CRAWL_PAGE_HOME_SECONDS_OFFSET,
|
||||||
'crawlHostPageMimeIndex' => CRAWL_PAGE_MIME_INDEX,
|
'crawlHostPageMimeIndex' => CRAWL_PAGE_MIME_INDEX,
|
||||||
'crawlHostPageMimeSnapLocal' => CRAWL_PAGE_MIME_SNAP_LOCAL,
|
|
||||||
'cleanHostSecondsOffset' => CLEAN_HOST_SECONDS_OFFSET,
|
'cleanHostSecondsOffset' => CLEAN_HOST_SECONDS_OFFSET,
|
||||||
'crawlRobotsDefaultRules' => CRAWL_ROBOTS_DEFAULT_RULES,
|
'crawlRobotsDefaultRules' => CRAWL_ROBOTS_DEFAULT_RULES,
|
||||||
'crawlRobotsPostfixRules' => CRAWL_ROBOTS_POSTFIX_RULES,
|
'crawlRobotsPostfixRules' => CRAWL_ROBOTS_POSTFIX_RULES,
|
||||||
|
121
public/file.php
121
public/file.php
@ -46,102 +46,93 @@ switch ($type) {
|
|||||||
// Connect database
|
// Connect database
|
||||||
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
|
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
|
||||||
|
|
||||||
|
// Init request
|
||||||
|
$crc32ip = crc32(!empty($_SERVER['REMOTE_ADDR']) ? $_SERVER['REMOTE_ADDR'] : '');
|
||||||
|
|
||||||
// Get snap details from DB
|
// Get snap details from DB
|
||||||
if ($hostPageSnap = $db->getHostPageSnap(!empty($_GET['hps']) ? (int) $_GET['hps'] : 0)) {
|
if ($hostPageSnap = $db->getHostPageSnap(!empty($_GET['hps']) ? (int) $_GET['hps'] : 0)) {
|
||||||
|
|
||||||
// Init variables
|
// Get snap file
|
||||||
$crc32ip = crc32(!empty($_SERVER['REMOTE_ADDR']) ? $_SERVER['REMOTE_ADDR'] : '');
|
|
||||||
$time = time();
|
|
||||||
|
|
||||||
$hostPageDownloadsTotalSize = $db->findHostPageSnapDownloadsTotalSize($crc32ip, $time - WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE_TIME_OFFSET);
|
$snapStorageIndex = 0;
|
||||||
|
|
||||||
// Check for downloading quotas
|
foreach (json_decode(SNAP_STORAGE) as $name => $storages) {
|
||||||
if ($hostPageDownloadsTotalSize >= WEBSITE_QUOTA_IP_SNAP_DOWNLOAD_TOTAL_SIZE) {
|
|
||||||
|
|
||||||
header('HTTP/1.0 403 Forbidden');
|
foreach ($storages as $storage) {
|
||||||
|
|
||||||
echo _('403 Access forbidden by requests quota');
|
$snapStorageIndex++;
|
||||||
|
|
||||||
exit;
|
// Generate storage id
|
||||||
}
|
$crc32name = crc32(sprintf('%s.%s', $name, $snapStorageIndex));
|
||||||
|
|
||||||
// Register snap download
|
switch ($name) {
|
||||||
$hostPageSnapDownloadId = $db->addHostPageSnapDownload($hostPageSnap->hostPageSnapId, $crc32ip, $time);
|
|
||||||
|
|
||||||
// Init variables
|
case 'localhost':
|
||||||
$snapSize = 0;
|
|
||||||
$snapFile = 'hp/' . chunk_split($hostPageSnap->hostPageId, 1, '/') . $hostPageSnap->timeAdded . '.zip';
|
|
||||||
|
|
||||||
// Download local snap in higher priority if possible
|
if ($hostPageSnapStorage = $db->getHostPageSnapStorageByCRC32Name($hostPageSnap->hostPageSnapId, $crc32name)) {
|
||||||
if ($hostPageSnap->storageLocal && file_exists(__DIR__ . '/../storage/snap/' . $snapFile) &&
|
|
||||||
is_readable(__DIR__ . '/../storage/snap/' . $snapFile)) {
|
|
||||||
|
|
||||||
$snapSize = (int) @filesize(__DIR__ . '/../storage/snap/' . $snapFile);
|
// Check request quota
|
||||||
|
//if ()
|
||||||
|
|
||||||
$db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'local', $snapSize, 200);
|
// Get file
|
||||||
|
$snapFile = 'hp/' . chunk_split($hostPageSnap->hostPageId, 1, '/') . $hostPageSnap->timeAdded . '.zip';
|
||||||
|
|
||||||
header('Content-Type: application/zip');
|
// Download local snap in higher priority if possible
|
||||||
header(sprintf('Content-Length: %s', $snapSize));
|
if (file_exists($storage->directory . $snapFile) &&
|
||||||
header(sprintf('Content-Disposition: filename="snap.%s.%s.%s.zip"', $hostPageSnap->hostPageSnapId,
|
is_readable($storage->directory . $snapFile)) {
|
||||||
$hostPageSnap->hostPageId,
|
|
||||||
$hostPageSnap->timeAdded));
|
|
||||||
readfile(__DIR__ . '/../storage/snap/' . $snapFile);
|
|
||||||
|
|
||||||
// Then try to download from MEGA storage if exists
|
// Register snap download
|
||||||
} else if ($hostPageSnap->storageMega) {
|
$db->addHostPageSnapDownload($hostPageSnapStorage->hostPageSnapStorageId, $crc32ip, time());
|
||||||
|
|
||||||
$ftp = new Ftp();
|
// Return snap file
|
||||||
|
header('Content-Type: application/zip');
|
||||||
|
header(sprintf('Content-Length: %s', $snapSize));
|
||||||
|
header(sprintf('Content-Disposition: filename="snap.%s.%s.%s.zip"', $hostPageSnap->hostPageSnapId,
|
||||||
|
$hostPageSnap->hostPageId,
|
||||||
|
$hostPageSnap->timeAdded));
|
||||||
|
readfile($storage->directory . $snapFile);
|
||||||
|
|
||||||
if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) {
|
exit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ($snapSize = $ftp->size($snapFile)) {
|
break;
|
||||||
|
case 'ftp':
|
||||||
|
|
||||||
$db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'mega', $snapSize, 200);
|
if ($hostPageSnapStorage = $db->getHostPageSnapStorageByCRC32Name($hostPageSnap->hostPageSnapId, $crc32name)) {
|
||||||
|
|
||||||
header('Content-Type: application/zip');
|
$ftp = new Ftp();
|
||||||
header(sprintf('Content-Length: %s', $snapSize));
|
|
||||||
header(sprintf('Content-Disposition: filename="snap.%s.%s.%s.zip"', $hostPageSnap->hostPageSnapId,
|
|
||||||
$hostPageSnap->hostPageId,
|
|
||||||
$hostPageSnap->timeAdded));
|
|
||||||
|
|
||||||
$ftp->get($snapFile, 'php://output');
|
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
||||||
|
|
||||||
} else {
|
// Register snap download
|
||||||
|
$db->addHostPageSnapDownload($hostPageSnapStorage->hostPageSnapStorageId, $crc32ip, time());
|
||||||
|
|
||||||
$db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'mega', $snapSize, 404);
|
// Return snap file
|
||||||
|
header('Content-Type: application/zip');
|
||||||
|
header(sprintf('Content-Length: %s', $snapSize));
|
||||||
|
header(sprintf('Content-Disposition: filename="snap.%s.%s.%s.zip"', $hostPageSnap->hostPageSnapId,
|
||||||
|
$hostPageSnap->hostPageId,
|
||||||
|
$hostPageSnap->timeAdded));
|
||||||
|
|
||||||
header('HTTP/1.0 404 Not Found');
|
$ftp->get($snapFile, 'php://output');
|
||||||
|
|
||||||
echo _('404 File not found');
|
exit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
|
||||||
|
|
||||||
$db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'mega', $snapSize, 404);
|
|
||||||
|
|
||||||
header('HTTP/1.0 404 Not Found');
|
|
||||||
|
|
||||||
echo _('404 File not found');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return 404 when file not found
|
|
||||||
} else {
|
|
||||||
|
|
||||||
$db->updateHostPageSnapDownload($hostPageSnapDownloadId, 'other', $snapSize, 404);
|
|
||||||
|
|
||||||
header('HTTP/1.0 404 Not Found');
|
|
||||||
|
|
||||||
echo _('404 File not found');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
|
||||||
|
|
||||||
header('HTTP/1.0 404 Not Found');
|
|
||||||
|
|
||||||
echo _('404 Snap not found');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
header('HTTP/1.0 404 Not Found');
|
||||||
|
|
||||||
|
echo _('404 Snap not found');
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
||||||
header('HTTP/1.0 404 Not Found');
|
header('HTTP/1.0 404 Not Found');
|
||||||
|
Loading…
Reference in New Issue
Block a user