mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-08-26 13:51:55 +00:00
implement local snaps
This commit is contained in:
parent
d98b8f5c94
commit
2f7d99079d
11
README.md
11
README.md
@ -27,6 +27,7 @@ php-pdo
|
|||||||
php-curl
|
php-curl
|
||||||
php-gd
|
php-gd
|
||||||
php-mbstring
|
php-mbstring
|
||||||
|
php-zip
|
||||||
php-mysql
|
php-mysql
|
||||||
sphinxsearch
|
sphinxsearch
|
||||||
```
|
```
|
||||||
@ -37,7 +38,7 @@ sphinxsearch
|
|||||||
* Deploy the database using [MySQL Workbench](https://www.mysql.com/products/workbench) project presented in the `/database` folder
|
* Deploy the database using [MySQL Workbench](https://www.mysql.com/products/workbench) project presented in the `/database` folder
|
||||||
* Install [Sphinx Search Server](https://sphinxsearch.com)
|
* Install [Sphinx Search Server](https://sphinxsearch.com)
|
||||||
* Configuration examples are placed at `/config` folder
|
* Configuration examples are placed at `/config` folder
|
||||||
* Make sure `/storage` folder is writable
|
* Make sure `/storage`, `/public/storage` folders writable
|
||||||
* Set up the `/crontab` scripts by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt)
|
* Set up the `/crontab` scripts by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt)
|
||||||
|
|
||||||
#### JSON API
|
#### JSON API
|
||||||
@ -147,7 +148,9 @@ GET m=SphinxQL
|
|||||||
* [x] Index explorer
|
* [x] Index explorer
|
||||||
* [x] Safe images preview
|
* [x] Safe images preview
|
||||||
* [x] Extended search syntax support
|
* [x] Extended search syntax support
|
||||||
* [ ] Page history snaps
|
* [ ] Page content snaps history
|
||||||
|
+ [x] Local
|
||||||
|
+ [ ] Remote
|
||||||
|
|
||||||
##### UI
|
##### UI
|
||||||
|
|
||||||
@ -180,7 +183,9 @@ GET m=SphinxQL
|
|||||||
* [x] MIME Content-type settings
|
* [x] MIME Content-type settings
|
||||||
* [x] Ban non-condition links to prevent extra requests
|
* [x] Ban non-condition links to prevent extra requests
|
||||||
* [x] Debug log
|
* [x] Debug log
|
||||||
* [x] History snaps
|
* [ ] Page content snaps generation
|
||||||
|
+ [x] Local
|
||||||
|
+ [ ] Remote
|
||||||
* [ ] Indexing new sites homepage in higher priority
|
* [ ] Indexing new sites homepage in higher priority
|
||||||
* [ ] Redirect codes extended processing
|
* [ ] Redirect codes extended processing
|
||||||
* [ ] Palette image index / filter
|
* [ ] Palette image index / filter
|
||||||
|
@ -168,7 +168,15 @@ define('CRAWL_PAGE_SECONDS_OFFSET', 60*60*24*30*12);
|
|||||||
* comma separated
|
* comma separated
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
define('CRAWL_PAGE_MIME', 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico,image/svg+xml,video/mp4,video/ogg,/video/webm,audio/mpeg,audio/ogg,audio/wav,audio/mp4,audio/aac,audio/aacp,audio/webm,audio/x-caf,audio/flac');
|
define('CRAWL_PAGE_MIME_INDEX', 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico,image/svg+xml,video/mp4,video/ogg,/video/webm,audio/mpeg,audio/ogg,audio/wav,audio/mp4,audio/aac,audio/aacp,audio/webm,audio/x-caf,audio/flac');
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Snap pages locally match MIME types
|
||||||
|
*
|
||||||
|
* comma separated | false to disable
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
define('CRAWL_PAGE_MIME_SNAP_LOCAL', 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico,image/svg+xml');
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Renew manifests index by timing offset provided
|
* Renew manifests index by timing offset provided
|
||||||
@ -266,7 +274,7 @@ define('CRAWL_MANIFEST', true);
|
|||||||
* Manifest API version compatibility
|
* Manifest API version compatibility
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
define('CRAWL_MANIFEST_API_VERSION', 0.8);
|
define('CRAWL_MANIFEST_API_VERSION', 0.9);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set default auto-crawl status for new manifest added
|
* Set default auto-crawl status for new manifest added
|
||||||
|
@ -30,7 +30,9 @@ $hostsTotal = $db->getTotalHosts();
|
|||||||
$manifestsTotal = $db->getTotalManifests();
|
$manifestsTotal = $db->getTotalManifests();
|
||||||
$hostsUpdated = 0;
|
$hostsUpdated = 0;
|
||||||
$hostPagesDeleted = 0;
|
$hostPagesDeleted = 0;
|
||||||
$hostPageDescriptionsDeleted = 0;
|
$hostPagesDescriptionsDeleted = 0;
|
||||||
|
$hostPagesSnapUrlDeleted = 0;
|
||||||
|
$hostPagesToHostPageDeleted = 0;
|
||||||
$manifestsDeleted = 0;
|
$manifestsDeleted = 0;
|
||||||
$hostPagesBansRemoved = 0;
|
$hostPagesBansRemoved = 0;
|
||||||
|
|
||||||
@ -74,8 +76,9 @@ try {
|
|||||||
foreach ((array) $db->getHostPagesByLimit($host->hostId, $totalHostPages - $host->crawlPageLimit) as $hostPage) {
|
foreach ((array) $db->getHostPagesByLimit($host->hostId, $totalHostPages - $host->crawlPageLimit) as $hostPage) {
|
||||||
|
|
||||||
// Delete host page
|
// Delete host page
|
||||||
$db->deleteHostPageDescriptions($hostPage->hostPageId);
|
$hostPagesDescriptionsDeleted += $db->deleteHostPageDescriptions($hostPage->hostPageId);
|
||||||
$db->deleteHostPageToHostPage($hostPage->hostPageId);
|
$hostPagesSnapUrlDeleted += $db->deleteHostPageSnapURL($hostPage->hostPageId); // @TODO delete file
|
||||||
|
$hostPagesToHostPageDeleted += $db->deleteHostPageToHostPage($hostPage->hostPageId);
|
||||||
|
|
||||||
if ($hostPage->uri != '/') {
|
if ($hostPage->uri != '/') {
|
||||||
$hostPagesDeleted += $db->deleteHostPage($hostPage->hostPageId);
|
$hostPagesDeleted += $db->deleteHostPage($hostPage->hostPageId);
|
||||||
@ -91,8 +94,9 @@ try {
|
|||||||
if (!$robots->uriAllowed($hostPage->uri)) {
|
if (!$robots->uriAllowed($hostPage->uri)) {
|
||||||
|
|
||||||
// Delete host page
|
// Delete host page
|
||||||
$db->deleteHostPageDescriptions($hostPage->hostPageId);
|
$hostPagesDescriptionsDeleted += $db->deleteHostPageDescriptions($hostPage->hostPageId);
|
||||||
$db->deleteHostPageToHostPage($hostPage->hostPageId);
|
$hostPagesSnapUrlDeleted += $db->deleteHostPageSnapURL($hostPage->hostPageId); // @TODO delete file
|
||||||
|
$hostPagesToHostPageDeleted += $db->deleteHostPageToHostPage($hostPage->hostPageId);
|
||||||
|
|
||||||
if ($hostPage->uri != '/') {
|
if ($hostPage->uri != '/') {
|
||||||
$hostPagesDeleted += $db->deleteHostPage($hostPage->hostPageId);
|
$hostPagesDeleted += $db->deleteHostPage($hostPage->hostPageId);
|
||||||
@ -162,7 +166,7 @@ try {
|
|||||||
$hostPagesBansRemoved += $db->resetBannedHostPages(time() - CLEAN_PAGE_BAN_SECONDS_OFFSET);
|
$hostPagesBansRemoved += $db->resetBannedHostPages(time() - CLEAN_PAGE_BAN_SECONDS_OFFSET);
|
||||||
|
|
||||||
// Delete page description history
|
// Delete page description history
|
||||||
$hostPageDescriptionsDeleted += $db->deleteHostPageDescriptionsByTimeAdded(time() - CLEAN_PAGE_DESCRIPTION_OFFSET);
|
$hostPagesDescriptionsDeleted += $db->deleteHostPageDescriptionsByTimeAdded(time() - CLEAN_PAGE_DESCRIPTION_OFFSET);
|
||||||
|
|
||||||
// Delete deprecated logs
|
// Delete deprecated logs
|
||||||
$logsCleanerDeleted += $db->deleteLogCleaner(time() - CLEAN_LOG_SECONDS_OFFSET);
|
$logsCleanerDeleted += $db->deleteLogCleaner(time() - CLEAN_LOG_SECONDS_OFFSET);
|
||||||
@ -187,7 +191,9 @@ if (CLEAN_LOG_ENABLED) {
|
|||||||
$hostsTotal,
|
$hostsTotal,
|
||||||
$hostsUpdated,
|
$hostsUpdated,
|
||||||
$hostPagesDeleted,
|
$hostPagesDeleted,
|
||||||
$hostPageDescriptionsDeleted,
|
$hostPagesDescriptionsDeleted,
|
||||||
|
$hostPagesSnapUrlDeleted,
|
||||||
|
$hostPagesToHostPageDeleted,
|
||||||
$hostPagesBansRemoved,
|
$hostPagesBansRemoved,
|
||||||
$manifestsTotal,
|
$manifestsTotal,
|
||||||
$manifestsDeleted,
|
$manifestsDeleted,
|
||||||
@ -209,7 +215,9 @@ echo 'Manifests total: ' . $manifestsTotal . PHP_EOL;
|
|||||||
echo 'Manifests deleted: ' . $manifestsDeleted . PHP_EOL;
|
echo 'Manifests deleted: ' . $manifestsDeleted . PHP_EOL;
|
||||||
|
|
||||||
echo 'Host page bans removed: ' . $hostPagesBansRemoved . PHP_EOL;
|
echo 'Host page bans removed: ' . $hostPagesBansRemoved . PHP_EOL;
|
||||||
echo 'Host page descriptions deleted: ' . $hostPageDescriptionsDeleted . PHP_EOL;
|
echo 'Host page descriptions deleted: ' . $hostPagesDescriptionsDeleted . PHP_EOL;
|
||||||
|
echo 'Host page snaps deleted: ' . $hostPagesSnapUrlDeleted . PHP_EOL;
|
||||||
|
echo 'Host page to host page deleted: ' . $hostPagesToHostPageDeleted . PHP_EOL;
|
||||||
|
|
||||||
echo 'Cleaner logs deleted: ' . $logsCleanerDeleted . PHP_EOL;
|
echo 'Cleaner logs deleted: ' . $logsCleanerDeleted . PHP_EOL;
|
||||||
echo 'Crawler logs deleted: ' . $logsCrawlerDeleted . PHP_EOL;
|
echo 'Crawler logs deleted: ' . $logsCrawlerDeleted . PHP_EOL;
|
||||||
|
@ -39,6 +39,7 @@ $manifestsAdded = 0;
|
|||||||
$hostPagesAdded = 0;
|
$hostPagesAdded = 0;
|
||||||
$hostsAdded = 0;
|
$hostsAdded = 0;
|
||||||
$hostPagesBanned = 0;
|
$hostPagesBanned = 0;
|
||||||
|
$hostPagesSnapUrlAdded = 0;
|
||||||
|
|
||||||
// Connect database
|
// Connect database
|
||||||
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
|
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
|
||||||
@ -251,11 +252,11 @@ try {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse MIME
|
// Parse index MIME
|
||||||
$hostPageIsDom = false;
|
$hostPageIsDom = false;
|
||||||
$hostPageInMime = false;
|
$hostPageInMime = false;
|
||||||
|
|
||||||
foreach ((array) explode(',', CRAWL_PAGE_MIME) as $mime) {
|
foreach ((array) explode(',', CRAWL_PAGE_MIME_INDEX) as $mime) {
|
||||||
|
|
||||||
$mime = Filter::mime($mime);
|
$mime = Filter::mime($mime);
|
||||||
|
|
||||||
@ -383,6 +384,48 @@ try {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Save local snap
|
||||||
|
if (false !== CRAWL_PAGE_MIME_SNAP_LOCAL) {
|
||||||
|
|
||||||
|
foreach ((array) explode(',', CRAWL_PAGE_MIME_SNAP_LOCAL) as $mime) {
|
||||||
|
|
||||||
|
$mime = Filter::mime($mime);
|
||||||
|
|
||||||
|
// MIME type allowed in settings
|
||||||
|
if (false !== stripos(Filter::mime($contentType), $mime)) {
|
||||||
|
|
||||||
|
$crc32data = crc32($content);
|
||||||
|
$crc32host = crc32(''); // WEBSITE_DOMAIN, use empty for this host
|
||||||
|
|
||||||
|
// Create not duplicated data snaps only for each storage host
|
||||||
|
if (!$db->getHostPageSnapURL($queueHostPage->hostPageId, $crc32data, $crc32host)) {
|
||||||
|
|
||||||
|
$time = time();
|
||||||
|
|
||||||
|
@mkdir('../public/storage/snap/hp/' . $queueHostPage->hostPageId, 755, true);
|
||||||
|
|
||||||
|
$zip = new ZipArchive();
|
||||||
|
|
||||||
|
if (true === $zip->open('../public/storage/snap/hp/' . $queueHostPage->hostPageId . '/' . $time . '.zip', ZipArchive::CREATE)) {
|
||||||
|
|
||||||
|
if (true === $zip->addFromString($queueHostPage->hostPageId . '.' . $time . '.' . preg_replace('|^[A-z-]+/([A-z-]+).*|ui', '$1', Filter::mime($contentType)), $content)) {
|
||||||
|
|
||||||
|
$hostPagesSnapUrlAdded += $db->addHostPageSnapURL($queueHostPage->hostPageId,
|
||||||
|
$crc32data, // do not create duplicated content snaps
|
||||||
|
$crc32host, // multi host storage with same timestamp / crc32data
|
||||||
|
'/storage/snap/hp/' . $queueHostPage->hostPageId . '/' . $time . '.zip', // public url
|
||||||
|
$time);
|
||||||
|
|
||||||
|
$zip->close();
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Begin page links collection
|
// Begin page links collection
|
||||||
$links = [];
|
$links = [];
|
||||||
|
|
||||||
@ -700,6 +743,7 @@ if (CRAWL_LOG_ENABLED) {
|
|||||||
$hostPagesProcessed,
|
$hostPagesProcessed,
|
||||||
$hostPagesIndexed,
|
$hostPagesIndexed,
|
||||||
$hostPagesAdded,
|
$hostPagesAdded,
|
||||||
|
$hostPagesSnapUrlAdded,
|
||||||
$hostPagesBanned,
|
$hostPagesBanned,
|
||||||
$manifestsProcessed,
|
$manifestsProcessed,
|
||||||
$manifestsAdded,
|
$manifestsAdded,
|
||||||
@ -716,6 +760,7 @@ echo 'Hosts added: ' . $hostsAdded . PHP_EOL;
|
|||||||
echo 'Pages processed: ' . $hostPagesProcessed . PHP_EOL;
|
echo 'Pages processed: ' . $hostPagesProcessed . PHP_EOL;
|
||||||
echo 'Pages indexed: ' . $hostPagesIndexed . PHP_EOL;
|
echo 'Pages indexed: ' . $hostPagesIndexed . PHP_EOL;
|
||||||
echo 'Pages added: ' . $hostPagesAdded . PHP_EOL;
|
echo 'Pages added: ' . $hostPagesAdded . PHP_EOL;
|
||||||
|
echo 'Pages snaps added: ' . $hostPagesSnapUrlAdded . PHP_EOL;
|
||||||
echo 'Pages banned: ' . $hostPagesBanned . PHP_EOL;
|
echo 'Pages banned: ' . $hostPagesBanned . PHP_EOL;
|
||||||
|
|
||||||
echo 'Manifests processed: ' . $manifestsProcessed . PHP_EOL;
|
echo 'Manifests processed: ' . $manifestsProcessed . PHP_EOL;
|
||||||
|
Binary file not shown.
@ -360,6 +360,63 @@ class MySQL {
|
|||||||
return $query->fetchAll();
|
return $query->fetchAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function addHostPageSnapURL(int $hostPageId,
|
||||||
|
int $crc32data,
|
||||||
|
int $crc32host,
|
||||||
|
string $url,
|
||||||
|
int $timeAdded) {
|
||||||
|
|
||||||
|
$query = $this->_db->prepare('INSERT IGNORE INTO `hostPageSnapURL` (`hostPageId`,
|
||||||
|
`crc32data`,
|
||||||
|
`crc32host`,
|
||||||
|
`url`,
|
||||||
|
`timeAdded`) VALUES (?, ?, ?, ?, ?)');
|
||||||
|
|
||||||
|
$query->execute([$hostPageId,
|
||||||
|
$crc32data,
|
||||||
|
$crc32host,
|
||||||
|
$url,
|
||||||
|
$timeAdded]);
|
||||||
|
|
||||||
|
return $query->rowCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function deleteHostPageSnapURL(int $hostPageId) {
|
||||||
|
|
||||||
|
$query = $this->_db->prepare('DELETE FROM `hostPageSnapURL` WHERE `hostPageId` = ?');
|
||||||
|
|
||||||
|
$query->execute([$hostPageId]);
|
||||||
|
|
||||||
|
return $query->rowCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getTotalHostPageSnapURLs(int $hostPageId) {
|
||||||
|
|
||||||
|
$query = $this->_db->prepare('SELECT COUNT(*) AS `total` FROM `hostPageSnapURL` WHERE `hostPageId` = ?');
|
||||||
|
|
||||||
|
$query->execute([$hostPageId]);
|
||||||
|
|
||||||
|
return $query->fetch()->total;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getHostPageSnapURLs(int $hostPageId) {
|
||||||
|
|
||||||
|
$query = $this->_db->prepare('SELECT * FROM `hostPageSnapURL` WHERE `hostPageId` = ? ORDER BY `timeAdded` DESC');
|
||||||
|
|
||||||
|
$query->execute([$hostPageId]);
|
||||||
|
|
||||||
|
return $query->fetchAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getHostPageSnapURL(int $hostPageId, int $crc32data, int $crc32host) {
|
||||||
|
|
||||||
|
$query = $this->_db->prepare('SELECT * FROM `hostPageSnapURL` WHERE `hostPageId` = ? AND `hostPageId` = ? AND `crc32host` = ? LIMIT 1');
|
||||||
|
|
||||||
|
$query->execute([$hostPageId, $crc32data, $crc32host]);
|
||||||
|
|
||||||
|
return $query->fetch();
|
||||||
|
}
|
||||||
|
|
||||||
// Cleaner tools
|
// Cleaner tools
|
||||||
public function getCleanerQueue(int $limit, int $timeFrom) {
|
public function getCleanerQueue(int $limit, int $timeFrom) {
|
||||||
|
|
||||||
@ -398,7 +455,9 @@ class MySQL {
|
|||||||
int $hostsTotal,
|
int $hostsTotal,
|
||||||
int $hostsUpdated,
|
int $hostsUpdated,
|
||||||
int $hostPagesDeleted,
|
int $hostPagesDeleted,
|
||||||
int $hostPageDescriptionsDeleted,
|
int $hostPagesDescriptionsDeleted,
|
||||||
|
int $hostPagesSnapUrlDeleted,
|
||||||
|
int $hostPagesToHostPageDeleted,
|
||||||
int $hostPagesBansRemoved,
|
int $hostPagesBansRemoved,
|
||||||
int $manifestsTotal,
|
int $manifestsTotal,
|
||||||
int $manifestsDeleted,
|
int $manifestsDeleted,
|
||||||
@ -414,7 +473,9 @@ class MySQL {
|
|||||||
`hostsTotal`,
|
`hostsTotal`,
|
||||||
`hostsUpdated`,
|
`hostsUpdated`,
|
||||||
`hostPagesDeleted`,
|
`hostPagesDeleted`,
|
||||||
`hostPageDescriptionsDeleted`,
|
`hostPagesDescriptionsDeleted`,
|
||||||
|
`hostPagesSnapUrlDeleted`,
|
||||||
|
`hostPagesToHostPageDeleted`,
|
||||||
`hostPagesBansRemoved`,
|
`hostPagesBansRemoved`,
|
||||||
`manifestsTotal`,
|
`manifestsTotal`,
|
||||||
`manifestsDeleted`,
|
`manifestsDeleted`,
|
||||||
@ -424,14 +485,16 @@ class MySQL {
|
|||||||
`httpRequestsSizeTotal`,
|
`httpRequestsSizeTotal`,
|
||||||
`httpDownloadSizeTotal`,
|
`httpDownloadSizeTotal`,
|
||||||
`httpRequestsTimeTotal`,
|
`httpRequestsTimeTotal`,
|
||||||
`executionTimeTotal`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
`executionTimeTotal`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
||||||
|
|
||||||
$query->execute([
|
$query->execute([
|
||||||
$timeAdded,
|
$timeAdded,
|
||||||
$hostsTotal,
|
$hostsTotal,
|
||||||
$hostsUpdated,
|
$hostsUpdated,
|
||||||
$hostPagesDeleted,
|
$hostPagesDeleted,
|
||||||
$hostPageDescriptionsDeleted,
|
$hostPagesDescriptionsDeleted,
|
||||||
|
$hostPagesSnapUrlDeleted,
|
||||||
|
$hostPagesToHostPageDeleted,
|
||||||
$hostPagesBansRemoved,
|
$hostPagesBansRemoved,
|
||||||
$manifestsTotal,
|
$manifestsTotal,
|
||||||
$manifestsDeleted,
|
$manifestsDeleted,
|
||||||
@ -523,6 +586,7 @@ class MySQL {
|
|||||||
int $hostPagesProcessed,
|
int $hostPagesProcessed,
|
||||||
int $hostPagesIndexed,
|
int $hostPagesIndexed,
|
||||||
int $hostPagesAdded,
|
int $hostPagesAdded,
|
||||||
|
int $hostPagesSnapUrlAdded,
|
||||||
int $hostPagesBanned,
|
int $hostPagesBanned,
|
||||||
int $manifestsProcessed,
|
int $manifestsProcessed,
|
||||||
int $manifestsAdded,
|
int $manifestsAdded,
|
||||||
@ -537,6 +601,7 @@ class MySQL {
|
|||||||
`hostPagesProcessed`,
|
`hostPagesProcessed`,
|
||||||
`hostPagesIndexed`,
|
`hostPagesIndexed`,
|
||||||
`hostPagesAdded`,
|
`hostPagesAdded`,
|
||||||
|
`hostPagesSnapUrlAdded`,
|
||||||
`hostPagesBanned`,
|
`hostPagesBanned`,
|
||||||
`manifestsProcessed`,
|
`manifestsProcessed`,
|
||||||
`manifestsAdded`,
|
`manifestsAdded`,
|
||||||
@ -544,7 +609,7 @@ class MySQL {
|
|||||||
`httpRequestsSizeTotal`,
|
`httpRequestsSizeTotal`,
|
||||||
`httpDownloadSizeTotal`,
|
`httpDownloadSizeTotal`,
|
||||||
`httpRequestsTimeTotal`,
|
`httpRequestsTimeTotal`,
|
||||||
`executionTimeTotal`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
`executionTimeTotal`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
||||||
|
|
||||||
$query->execute([
|
$query->execute([
|
||||||
$timeAdded,
|
$timeAdded,
|
||||||
@ -552,6 +617,7 @@ class MySQL {
|
|||||||
$hostPagesProcessed,
|
$hostPagesProcessed,
|
||||||
$hostPagesIndexed,
|
$hostPagesIndexed,
|
||||||
$hostPagesAdded,
|
$hostPagesAdded,
|
||||||
|
$hostPagesSnapUrlAdded,
|
||||||
$hostPagesBanned,
|
$hostPagesBanned,
|
||||||
$manifestsProcessed,
|
$manifestsProcessed,
|
||||||
$manifestsAdded,
|
$manifestsAdded,
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 147 KiB After Width: | Height: | Size: 176 KiB |
@ -1,7 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
// Current version
|
// Current version
|
||||||
define('API_VERSION', 0.8);
|
define('API_VERSION', 0.9);
|
||||||
|
|
||||||
// Load system dependencies
|
// Load system dependencies
|
||||||
require_once('../config/app.php');
|
require_once('../config/app.php');
|
||||||
@ -109,7 +109,8 @@ if (API_ENABLED) {
|
|||||||
'crawlHostDefaultStatus' => CRAWL_HOST_DEFAULT_STATUS,
|
'crawlHostDefaultStatus' => CRAWL_HOST_DEFAULT_STATUS,
|
||||||
'crawlHostDefaultMetaOnly' => CRAWL_HOST_DEFAULT_META_ONLY,
|
'crawlHostDefaultMetaOnly' => CRAWL_HOST_DEFAULT_META_ONLY,
|
||||||
'crawlHostPageSecondsOffset' => CRAWL_PAGE_SECONDS_OFFSET,
|
'crawlHostPageSecondsOffset' => CRAWL_PAGE_SECONDS_OFFSET,
|
||||||
'crawlHostPageMime' => CRAWL_PAGE_MIME,
|
'crawlHostPageMimeIndex' => CRAWL_PAGE_MIME_INDEX,
|
||||||
|
'crawlHostPageMimeSnapLocal' => CRAWL_PAGE_MIME_SNAP_LOCAL,
|
||||||
'cleanHostSecondsOffset' => CLEAN_HOST_SECONDS_OFFSET,
|
'cleanHostSecondsOffset' => CLEAN_HOST_SECONDS_OFFSET,
|
||||||
'crawlRobotsDefaultRules' => CRAWL_ROBOTS_DEFAULT_RULES,
|
'crawlRobotsDefaultRules' => CRAWL_ROBOTS_DEFAULT_RULES,
|
||||||
'crawlRobotsPostfixRules' => CRAWL_ROBOTS_POSTFIX_RULES,
|
'crawlRobotsPostfixRules' => CRAWL_ROBOTS_POSTFIX_RULES,
|
||||||
|
@ -234,13 +234,30 @@ $placeholder = Filter::plural($totalPages, [sprintf(_('Over %s page or enter the
|
|||||||
<p><?php echo date('c', $hostPage->timeAdded) ?></p>
|
<p><?php echo date('c', $hostPage->timeAdded) ?></p>
|
||||||
<p><?php echo _('Time updated') ?></p>
|
<p><?php echo _('Time updated') ?></p>
|
||||||
<p><?php echo date('c', $hostPage->timeUpdated) ?></p>
|
<p><?php echo date('c', $hostPage->timeUpdated) ?></p>
|
||||||
<?php if ($totalHostPageIdSources = $db->getTotalHostPageIdSourcesByHostPageIdTarget($hp)) { ?>
|
<?php $totalHostPageSnapUrls = $db->getTotalHostPageSnapURLs($hp); ?>
|
||||||
|
<p>
|
||||||
|
<?php echo Filter::plural($totalHostPageSnapUrls, [sprintf(_('%s snap'), $totalHostPageSnapUrls),
|
||||||
|
sprintf(_('%s snaps'), $totalHostPageSnapUrls),
|
||||||
|
sprintf(_('%s snaps'), $totalHostPageSnapUrls),
|
||||||
|
]) ?>
|
||||||
|
</p>
|
||||||
|
<?php if ($totalHostPageSnapUrls) { ?>
|
||||||
|
<?php foreach ($db->getHostPageSnapURLs($hp) as $hostPageSnapUrl) { ?>
|
||||||
|
<p>
|
||||||
|
<a href="<?php echo $hostPageSnapUrl->crc32host === 0 ? WEBSITE_DOMAIN . $hostPageSnapUrl->url : $hostPageSnapUrl->url ?>">
|
||||||
|
<?php echo date('c', $hostPageSnapUrl->timeAdded) ?>
|
||||||
|
</a>
|
||||||
|
</p>
|
||||||
|
<?php } ?>
|
||||||
|
<?php } ?>
|
||||||
|
<?php $totalHostPageIdSources = $db->getTotalHostPageIdSourcesByHostPageIdTarget($hp); ?>
|
||||||
<p>
|
<p>
|
||||||
<?php echo Filter::plural($totalHostPageIdSources, [sprintf(_('%s referrer'), $totalHostPageIdSources),
|
<?php echo Filter::plural($totalHostPageIdSources, [sprintf(_('%s referrer'), $totalHostPageIdSources),
|
||||||
sprintf(_('%s referrers'), $totalHostPageIdSources),
|
sprintf(_('%s referrers'), $totalHostPageIdSources),
|
||||||
sprintf(_('%s referrers'), $totalHostPageIdSources),
|
sprintf(_('%s referrers'), $totalHostPageIdSources),
|
||||||
]) ?>
|
]) ?>
|
||||||
</p>
|
</p>
|
||||||
|
<?php if ($totalHostPageIdSources) { ?>
|
||||||
<?php foreach ($db->getHostPageIdSourcesByHostPageIdTarget($hp) as $hostPageIdSource) { ?>
|
<?php foreach ($db->getHostPageIdSourcesByHostPageIdTarget($hp) as $hostPageIdSource) { ?>
|
||||||
<?php if ($hostPage = $db->getFoundHostPage($hostPageIdSource->hostPageIdSource)) { ?>
|
<?php if ($hostPage = $db->getFoundHostPage($hostPageIdSource->hostPageIdSource)) { ?>
|
||||||
<p>
|
<p>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user