mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-24 13:34:25 +00:00
update page / image description models, implement history snap crawling
This commit is contained in:
parent
77bd25f587
commit
23ead4e12c
@ -255,7 +255,7 @@ define('CRAWL_HOST_DEFAULT_STATUS', true);
|
||||
* Index only meta tags to prevent disk overuse
|
||||
* or false to save meta tags + overall plain text page content
|
||||
*
|
||||
* Custom rule for specified host could be provided in the DB `host`.`crawlPageMetaOnly` field
|
||||
* Custom rule for specified host could be provided in the DB `host`.`crawlMetaOnly` field
|
||||
*
|
||||
* This option able to change search results relevance
|
||||
* This option enables image data caching in base64
|
||||
@ -367,6 +367,12 @@ define('CLEAN_HOST_SECONDS_OFFSET', 60*60*24*30);
|
||||
*/
|
||||
define('CLEAN_PAGE_BAN_SECONDS_OFFSET', 60*60*24*30);
|
||||
|
||||
/*
|
||||
* Remove page description history after following time
|
||||
*
|
||||
*/
|
||||
define('CLEAN_PAGE_DESCRIPTION_OFFSET', 60*60*24*30*12*10);
|
||||
|
||||
/*
|
||||
* Remove image ban after following time
|
||||
*
|
||||
@ -376,6 +382,12 @@ define('CLEAN_PAGE_BAN_SECONDS_OFFSET', 60*60*24*30);
|
||||
*/
|
||||
define('CLEAN_IMAGE_BAN_SECONDS_OFFSET', 60*60*24*30);
|
||||
|
||||
/*
|
||||
* Remove image description history after following time
|
||||
*
|
||||
*/
|
||||
define('CLEAN_IMAGE_DESCRIPTION_OFFSET', 60*60*24*30*12*10);
|
||||
|
||||
// API settings
|
||||
|
||||
/*
|
||||
|
@ -21,7 +21,7 @@ source hostPage : common
|
||||
hostPageDescription.metaKeywords) \
|
||||
FROM hostPageDescription \
|
||||
WHERE hostPageDescription.hostPageId = hostPage.hostPageId \
|
||||
ORDER BY hostPageDescription.timeAdded DESC \
|
||||
ORDER BY hostPageDescription.timeUpdated DESC, hostPageDescription.timeAdded DESC \
|
||||
LIMIT 1) AS pageDescription \
|
||||
FROM hostPage \
|
||||
JOIN host ON (host.hostId = hostPage.hostId) \
|
||||
@ -37,8 +37,8 @@ source hostImage : common
|
||||
(SELECT CONCAT_WS(' ', hostImageDescription.alt, hostImageDescription.title) \
|
||||
FROM hostImageDescription \
|
||||
WHERE hostImageDescription.hostImageId = hostImage.hostImageId \
|
||||
ORDER BY hostImageDescription.timeAdded \
|
||||
DESC LIMIT 1) AS imageDescription \
|
||||
ORDER BY hostImageDescription.timeUpdated DESC, hostImageDescription.timeAdded DESC \
|
||||
LIMIT 1) AS imageDescription \
|
||||
FROM hostImage \
|
||||
JOIN host ON (host.hostId = hostImage.hostId) \
|
||||
WHERE host.status = '1' AND hostImage.httpCode = 200 AND hostImage.timeBanned IS NULL \
|
||||
|
@ -21,22 +21,24 @@ $db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
|
||||
// Debug
|
||||
$timeStart = microtime(true);
|
||||
|
||||
$httpRequestsTotal = 0;
|
||||
$httpRequestsSizeTotal = 0;
|
||||
$httpDownloadSizeTotal = 0;
|
||||
$httpRequestsTimeTotal = 0;
|
||||
$httpRequestsTotal = 0;
|
||||
$httpRequestsSizeTotal = 0;
|
||||
$httpDownloadSizeTotal = 0;
|
||||
$httpRequestsTimeTotal = 0;
|
||||
|
||||
$hostsTotal = $db->getTotalHosts();
|
||||
$manifestsTotal = $db->getTotalManifests();
|
||||
$hostsUpdated = 0;
|
||||
$hostPagesDeleted = 0;
|
||||
$hostImagesDeleted = 0;
|
||||
$manifestsDeleted = 0;
|
||||
$hostPagesBansRemoved = 0;
|
||||
$hostImagesBansRemoved = 0;
|
||||
$hostsTotal = $db->getTotalHosts();
|
||||
$manifestsTotal = $db->getTotalManifests();
|
||||
$hostsUpdated = 0;
|
||||
$hostPagesDeleted = 0;
|
||||
$hostPageDescriptionsDeleted = 0;
|
||||
$hostImagesDeleted = 0;
|
||||
$hostImageDescriptionsDeleted = 0;
|
||||
$manifestsDeleted = 0;
|
||||
$hostPagesBansRemoved = 0;
|
||||
$hostImagesBansRemoved = 0;
|
||||
|
||||
$logsCleanerDeleted = 0;
|
||||
$logsCrawlerDeleted = 0;
|
||||
$logsCleanerDeleted = 0;
|
||||
$logsCrawlerDeleted = 0;
|
||||
|
||||
// Begin update
|
||||
$db->beginTransaction();
|
||||
@ -202,9 +204,15 @@ try {
|
||||
// Reset banned pages
|
||||
$hostPagesBansRemoved += $db->resetBannedHostPages(time() - CLEAN_PAGE_BAN_SECONDS_OFFSET);
|
||||
|
||||
// Delete page description history
|
||||
$hostPageDescriptionsDeleted += $db->deleteHostPageDescriptionsByTimeAdded(time() - CLEAN_PAGE_DESCRIPTION_OFFSET);
|
||||
|
||||
// Reset banned images
|
||||
$hostImagesBansRemoved += $db->resetBannedHostImages(time() - CLEAN_IMAGE_BAN_SECONDS_OFFSET);
|
||||
|
||||
// Delete image description history
|
||||
$hostImageDescriptionsDeleted += $db->deleteHostImageDescriptionsByTimeAdded(time() - CLEAN_IMAGE_DESCRIPTION_OFFSET);
|
||||
|
||||
// Delete deprecated logs
|
||||
$logsCleanerDeleted += $db->deleteLogCleaner(time() - CLEAN_LOG_SECONDS_OFFSET);
|
||||
$logsCrawlerDeleted += $db->deleteLogCrawler(time() - CRAWL_LOG_SECONDS_OFFSET);
|
||||
@ -228,8 +236,10 @@ if (CLEAN_LOG_ENABLED) {
|
||||
$hostsTotal,
|
||||
$hostsUpdated,
|
||||
$hostPagesDeleted,
|
||||
$hostPageDescriptionsDeleted,
|
||||
$hostPagesBansRemoved,
|
||||
$hostImagesDeleted,
|
||||
$hostImageDescriptionsDeleted,
|
||||
$hostImagesBansRemoved,
|
||||
$manifestsTotal,
|
||||
$manifestsDeleted,
|
||||
@ -252,7 +262,9 @@ echo 'Manifests total: ' . $manifestsTotal . PHP_EOL;
|
||||
echo 'Manifests deleted: ' . $manifestsDeleted . PHP_EOL;
|
||||
|
||||
echo 'Host page bans removed: ' . $hostPagesBansRemoved . PHP_EOL;
|
||||
echo 'Host page descriptions deleted: ' . $hostPageDescriptionsDeleted . PHP_EOL;
|
||||
echo 'Host images bans removed: ' . $hostImagesBansRemoved . PHP_EOL;
|
||||
echo 'Host image descriptions deleted: ' . $hostImageDescriptionsDeleted . PHP_EOL;
|
||||
|
||||
echo 'Cleaner logs deleted: ' . $logsCleanerDeleted . PHP_EOL;
|
||||
echo 'Crawler logs deleted: ' . $logsCrawlerDeleted . PHP_EOL;
|
||||
|
@ -301,7 +301,7 @@ try {
|
||||
}
|
||||
|
||||
// Convert remote image data to base64 string
|
||||
if (!CRAWL_HOST_DEFAULT_META_ONLY) {
|
||||
if (!$queueHostImage->crawlMetaOnly) {
|
||||
|
||||
// Skip image processing without returned content
|
||||
if (!$hostImageContent = $curl->getContent()) {
|
||||
@ -327,14 +327,22 @@ try {
|
||||
|
||||
$hostImageData = 'data:image/' . str_replace(['svg'], ['svg+xml'], $hostImageExtension) . ';base64,' . $hostImageBase64;
|
||||
|
||||
} else {
|
||||
// Set host image description
|
||||
// On link collection we knew meta but data,
|
||||
// this step use latest description slice and insert the data received by curl request
|
||||
if ($lastHostImageDescription = $db->getLastHostImageDescription($queueHostImage->hostImageId)) {
|
||||
|
||||
$hostImageData = null;
|
||||
$db->setHostImageDescription($queueHostImage->hostImageId,
|
||||
crc32($hostImageData),
|
||||
$lastHostImageDescription->alt,
|
||||
$lastHostImageDescription->title,
|
||||
$hostImageData,
|
||||
time());
|
||||
}
|
||||
}
|
||||
|
||||
$hostImagesIndexed += $db->updateHostImage($queueHostImage->hostImageId,
|
||||
Filter::mime($hostImageContentType),
|
||||
$hostImageData,
|
||||
time());
|
||||
}
|
||||
|
||||
@ -465,17 +473,13 @@ try {
|
||||
$content = Filter::pageData($content);
|
||||
|
||||
// Add queued page description if not exists
|
||||
$crc32data = crc32($content);
|
||||
|
||||
if (!$db->getHostPageDescription($queueHostPage->hostPageId, $crc32data)) {
|
||||
$db->addHostPageDescription($queueHostPage->hostPageId,
|
||||
$crc32data,
|
||||
Filter::pageTitle($title->item(0)->nodeValue),
|
||||
Filter::pageDescription($metaDescription),
|
||||
Filter::pageKeywords($metaKeywords),
|
||||
CRAWL_HOST_DEFAULT_META_ONLY ? null : $content,
|
||||
time());
|
||||
}
|
||||
$db->setHostPageDescription($queueHostPage->hostPageId,
|
||||
crc32($content),
|
||||
Filter::pageTitle($title->item(0)->nodeValue),
|
||||
Filter::pageDescription($metaDescription),
|
||||
Filter::pageKeywords($metaKeywords),
|
||||
$queueHostPage->crawlMetaOnly ? null : $content,
|
||||
time());
|
||||
|
||||
// Update manifest registry
|
||||
if (CRAWL_MANIFEST && !empty($metaYggoManifest) && filter_var($metaYggoManifest, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $metaYggoManifest)) {
|
||||
@ -547,7 +551,7 @@ try {
|
||||
|
||||
// Update curl stats
|
||||
$httpRequestsTotal++;
|
||||
$httpRequestsSizeTotal += $curl->getSizeRequest();
|
||||
$httpRequestsSizeTotal += $curl->getSizeRequest();
|
||||
$httpDownloadSizeTotal += $curl->getSizeDownload();
|
||||
$httpRequestsTimeTotal += $curl->getTotalTime();
|
||||
|
||||
@ -610,20 +614,16 @@ try {
|
||||
}
|
||||
}
|
||||
|
||||
// Host image exists or created new one
|
||||
if ($hostImageId) {
|
||||
// Add/update host image description
|
||||
$db->setHostImageDescription($hostImageId,
|
||||
null, // no data, download it in the crawler queue
|
||||
Filter::imageAlt($imageAlt),
|
||||
Filter::imageTitle($imageTitle),
|
||||
null,
|
||||
time());
|
||||
|
||||
// Add/update host image description
|
||||
$db->setHostImageDescription($hostImageId,
|
||||
crc32(md5((string) $imageAlt . (string) $imageTitle)),
|
||||
Filter::imageAlt($imageAlt),
|
||||
Filter::imageTitle($imageTitle),
|
||||
time(),
|
||||
time());
|
||||
|
||||
// Relate host image with host page was found
|
||||
$db->setHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), time(), 1);
|
||||
}
|
||||
// Relate host image with host page was found
|
||||
$db->setHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), 1);
|
||||
|
||||
// Increase image rank when link does not match the current host
|
||||
if ($hostImageURL->scheme . '://' .
|
||||
|
Binary file not shown.
@ -102,11 +102,11 @@ class MySQL {
|
||||
return $query->fetch()->total;
|
||||
}
|
||||
|
||||
public function addHost(string $scheme, string $name, mixed $port, int $crc32url, int $timeAdded, mixed $timeUpdated, int $crawlPageLimit, int $crawlImageLimit, string $crawlPageMetaOnly, string $status, mixed $robots, mixed $robotsPostfix) {
|
||||
public function addHost(string $scheme, string $name, mixed $port, int $crc32url, int $timeAdded, mixed $timeUpdated, int $crawlPageLimit, int $crawlImageLimit, string $crawlMetaOnly, string $status, mixed $robots, mixed $robotsPostfix) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT INTO `host` (`scheme`, `name`, `port`, `crc32url`, `timeAdded`, `timeUpdated`, `crawlPageLimit`, `crawlImageLimit`, `crawlPageMetaOnly`, `status`, `robots`, `robotsPostfix`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
||||
$query = $this->_db->prepare('INSERT INTO `host` (`scheme`, `name`, `port`, `crc32url`, `timeAdded`, `timeUpdated`, `crawlPageLimit`, `crawlImageLimit`, `crawlMetaOnly`, `status`, `robots`, `robotsPostfix`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
||||
|
||||
$query->execute([$scheme, $name, $port, $crc32url, $timeAdded, $timeUpdated, $crawlPageLimit, $crawlImageLimit, $crawlPageMetaOnly, $status, $robots, $robotsPostfix]);
|
||||
$query->execute([$scheme, $name, $port, $crc32url, $timeAdded, $timeUpdated, $crawlPageLimit, $crawlImageLimit, $crawlMetaOnly, $status, $robots, $robotsPostfix]);
|
||||
|
||||
return $this->_db->lastInsertId();
|
||||
}
|
||||
@ -241,13 +241,12 @@ class MySQL {
|
||||
|
||||
public function updateHostImage(int $hostImageId,
|
||||
string $mime,
|
||||
mixed $data,
|
||||
int $timeUpdated,
|
||||
mixed $timeBanned = null) {
|
||||
|
||||
$query = $this->_db->prepare('UPDATE `hostImage` SET `mime` = ?, `data` = ?, `timeUpdated` = ?, `timeBanned` = ? WHERE `hostImageId` = ? LIMIT 1');
|
||||
$query = $this->_db->prepare('UPDATE `hostImage` SET `mime` = ?, `timeUpdated` = ?, `timeBanned` = ? WHERE `hostImageId` = ? LIMIT 1');
|
||||
|
||||
$query->execute([$mime, $data, $timeUpdated, $timeBanned, $hostImageId]);
|
||||
$query->execute([$mime, $timeUpdated, $timeBanned, $hostImageId]);
|
||||
|
||||
return $query->rowCount();
|
||||
}
|
||||
@ -261,10 +260,15 @@ class MySQL {
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function setHostImageDescription(int $hostImageId, int $crc32id, string $alt, string $title, int $timeAdded, int $timeUpdated) {
|
||||
public function setHostImageDescription(int $hostImageId,
|
||||
mixed $crc32data,
|
||||
string $alt,
|
||||
string $title,
|
||||
mixed $data,
|
||||
int $time) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT INTO `hostImageDescription` (`hostImageId`,
|
||||
`crc32id`,
|
||||
`crc32data`,
|
||||
`alt`,
|
||||
`title`,
|
||||
`timeAdded`) VALUES (?, ?, ?, ?, ?)
|
||||
@ -273,7 +277,7 @@ class MySQL {
|
||||
`title` = ?,
|
||||
`timeUpdated` = ?');
|
||||
|
||||
$query->execute([$hostImageId, $crc32id, $alt, $title, $timeAdded, $alt, $title, $timeUpdated]);
|
||||
$query->execute([$hostImageId, $crc32data, $alt, $title, $time, $alt, $title, $time]);
|
||||
|
||||
return $this->_db->lastInsertId();
|
||||
}
|
||||
@ -287,6 +291,15 @@ class MySQL {
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function getLastHostImageDescription(int $hostImageId) {
|
||||
|
||||
$query = $this->_db->prepare('SELECT * FROM `hostImageDescription` WHERE `hostImageId` = ? ORDER BY `timeUpdated` DESC, `timeAdded` DESC LIMIT 1');
|
||||
|
||||
$query->execute([$hostImageId]);
|
||||
|
||||
return $query->fetch();
|
||||
}
|
||||
|
||||
public function getHostImageHostPages(int $hostImageId, int $limit = 5) {
|
||||
|
||||
$query = $this->_db->prepare('SELECT * FROM `hostImageToHostPage`
|
||||
@ -312,7 +325,7 @@ class MySQL {
|
||||
return $query->fetch()->total;
|
||||
}
|
||||
|
||||
public function setHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, mixed $timeUpdated, int $quantity) {
|
||||
public function setHostImageToHostPage(int $hostImageId, int $hostPageId, int $time, int $quantity) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT INTO `hostImageToHostPage` (`hostImageId`,
|
||||
`hostPageId`,
|
||||
@ -323,7 +336,7 @@ class MySQL {
|
||||
ON DUPLICATE KEY UPDATE `timeUpdated` = ?,
|
||||
`quantity` = `quantity` + ' . (int) $quantity);
|
||||
|
||||
$query->execute([$hostImageId, $hostPageId, $timeAdded, null, $quantity, $timeUpdated]);
|
||||
$query->execute([$hostImageId, $hostPageId, $time, null, $quantity, $time]);
|
||||
|
||||
return $query->rowCount(); // no primary key
|
||||
}
|
||||
@ -402,7 +415,7 @@ class MySQL {
|
||||
|
||||
public function getLastPageDescription(int $hostPageId) {
|
||||
|
||||
$query = $this->_db->prepare('SELECT * FROM `hostPageDescription` WHERE `hostPageId` = ? ORDER BY `timeAdded` DESC LIMIT 1');
|
||||
$query = $this->_db->prepare('SELECT * FROM `hostPageDescription` WHERE `hostPageId` = ? ORDER BY `timeUpdated` DESC, `timeAdded` DESC LIMIT 1');
|
||||
|
||||
$query->execute([$hostPageId]);
|
||||
|
||||
@ -438,11 +451,7 @@ class MySQL {
|
||||
`host`.`scheme`,
|
||||
`host`.`name`,
|
||||
`host`.`port`,
|
||||
|
||||
(SELECT GROUP_CONCAT(CONCAT_WS(" ", `hostImageDescription`.`alt`, `hostImageDescription`.`title`))
|
||||
|
||||
FROM `hostImageDescription`
|
||||
WHERE `hostImageDescription`.`hostImageId` = `hostImage`.`hostImageId`) AS `description`
|
||||
`host`.`crawlMetaOnly`
|
||||
|
||||
FROM `hostImage`
|
||||
JOIN `host` ON (`host`.`hostId` = `hostImage`.`hostId`)
|
||||
@ -560,13 +569,13 @@ class MySQL {
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function addHostPageDescription(int $hostPageId,
|
||||
public function setHostPageDescription(int $hostPageId,
|
||||
int $crc32data,
|
||||
mixed $metaTitle,
|
||||
mixed $metaDescription,
|
||||
mixed $metaKeywords,
|
||||
mixed $data,
|
||||
int $timeAdded) {
|
||||
int $time) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT INTO `hostPageDescription` ( `hostPageId`,
|
||||
`crc32data`,
|
||||
@ -575,7 +584,9 @@ class MySQL {
|
||||
`metaKeywords`,
|
||||
`data`,
|
||||
`timeAdded`
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?)');
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
|
||||
ON DUPLICATE KEY UPDATE `timeUpdated` = ?');
|
||||
|
||||
$query->execute([
|
||||
$hostPageId,
|
||||
@ -584,7 +595,8 @@ class MySQL {
|
||||
$metaDescription,
|
||||
$metaKeywords,
|
||||
$data,
|
||||
$timeAdded
|
||||
$time,
|
||||
$time
|
||||
]);
|
||||
|
||||
return $query->rowCount();
|
||||
@ -615,6 +627,15 @@ class MySQL {
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function deleteHostPageDescriptionsByTimeAdded(int $timeOffset) {
|
||||
|
||||
$query = $this->_db->prepare('DELETE FROM `hostPageDescription` WHERE `timeAdded` < ' . (int) $timeOffset);
|
||||
|
||||
$query->execute();
|
||||
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function resetBannedHostImages(int $timeOffset) {
|
||||
|
||||
$query = $this->_db->prepare('UPDATE `hostImage` SET `timeBanned` = NULL WHERE `timeBanned` IS NOT NULL AND `timeBanned` < ' . (int) $timeOffset);
|
||||
@ -624,12 +645,23 @@ class MySQL {
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function deleteHostImageDescriptionsByTimeAdded(int $timeOffset) {
|
||||
|
||||
$query = $this->_db->prepare('DELETE FROM `hostImageDescription` WHERE `timeAdded` < ' . (int) $timeOffset);
|
||||
|
||||
$query->execute();
|
||||
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function addCleanerLog(int $timeAdded,
|
||||
int $hostsTotal,
|
||||
int $hostsUpdated,
|
||||
int $hostPagesDeleted,
|
||||
int $hostPageDescriptionsDeleted,
|
||||
int $hostPagesBansRemoved,
|
||||
int $hostImagesDeleted,
|
||||
int $hostImageDescriptionsDeleted,
|
||||
int $hostImagesBansRemoved,
|
||||
int $manifestsTotal,
|
||||
int $manifestsDeleted,
|
||||
@ -645,8 +677,10 @@ class MySQL {
|
||||
`hostsTotal`,
|
||||
`hostsUpdated`,
|
||||
`hostPagesDeleted`,
|
||||
`hostPageDescriptionsDeleted`,
|
||||
`hostPagesBansRemoved`,
|
||||
`hostImagesDeleted`,
|
||||
`hostImageDescriptionsDeleted`,
|
||||
`hostImagesBansRemoved`,
|
||||
`manifestsTotal`,
|
||||
`manifestsDeleted`,
|
||||
@ -656,15 +690,17 @@ class MySQL {
|
||||
`httpRequestsSizeTotal`,
|
||||
`httpDownloadSizeTotal`,
|
||||
`httpRequestsTimeTotal`,
|
||||
`executionTimeTotal`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
||||
`executionTimeTotal`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
||||
|
||||
$query->execute([
|
||||
$timeAdded,
|
||||
$hostsTotal,
|
||||
$hostsUpdated,
|
||||
$hostPagesDeleted,
|
||||
$hostPageDescriptionsDeleted,
|
||||
$hostPagesBansRemoved,
|
||||
$hostImagesDeleted,
|
||||
$hostImageDescriptionsDeleted,
|
||||
$hostImagesBansRemoved,
|
||||
$manifestsTotal,
|
||||
$manifestsDeleted,
|
||||
@ -700,7 +736,7 @@ class MySQL {
|
||||
`host`.`port`,
|
||||
`host`.`crawlPageLimit`,
|
||||
`host`.`crawlImageLimit`,
|
||||
`host`.`crawlPageMetaOnly`,
|
||||
`host`.`crawlMetaOnly`,
|
||||
`host`.`robots`,
|
||||
`host`.`robotsPostfix`
|
||||
|
||||
@ -735,7 +771,8 @@ class MySQL {
|
||||
`hostImage`.`uri`,
|
||||
`host`.`scheme`,
|
||||
`host`.`name`,
|
||||
`host`.`port`
|
||||
`host`.`port`,
|
||||
`host`.`crawlMetaOnly`
|
||||
|
||||
FROM `hostImage`
|
||||
JOIN `host` ON (`host`.`hostId` = `hostImage`.`hostId`)
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 126 KiB After Width: | Height: | Size: 135 KiB |
@ -418,9 +418,21 @@ if (!empty($q)) {
|
||||
// Save image content on data settings enabled
|
||||
$db->updateHostImage($hostImage->hostImageId,
|
||||
Filter::mime($hostImageContentType),
|
||||
CRAWL_HOST_DEFAULT_META_ONLY ? null : $hostImageURLencoded,
|
||||
time());
|
||||
|
||||
// Set host image description
|
||||
// On link collection we knew meta but data,
|
||||
// this step use latest description slice and insert the data received by curl request
|
||||
if ($lastHostImageDescription = $db->getLastHostImageDescription($hostImage->hostImageId)) {
|
||||
|
||||
$db->setHostImageDescription($hostImage->hostImageId,
|
||||
crc32($hostImageData),
|
||||
$lastHostImageDescription->alt,
|
||||
$lastHostImageDescription->title,
|
||||
$hostImage->crawlMetaOnly ? null : $hostImageData,
|
||||
time());
|
||||
}
|
||||
|
||||
// Local image data exists
|
||||
} else {
|
||||
|
||||
@ -439,8 +451,8 @@ if (!empty($q)) {
|
||||
<?php if ($hostPageDescription = $db->getLastPageDescription($result->id)) { ?>
|
||||
<h3><?php echo $hostPageDescription->metaTitle ?></h3>
|
||||
<?php } ?>
|
||||
<?php if (!empty($hostImage->description)) { ?>
|
||||
<span><?php echo $hostImage->description ?></span>
|
||||
<?php if ($lastHostImageDescription = $db->getLastHostImageDescription($result->id)) { ?>
|
||||
<span><?php echo $lastHostImageDescription->title ?> <?php echo $lastHostImageDescription->alt ?></span>
|
||||
<?php } ?>
|
||||
<a href="<?php echo $hostPageURL ?>">
|
||||
<img src="<?php echo WEBSITE_DOMAIN ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" />
|
||||
|
Loading…
x
Reference in New Issue
Block a user