Browse Source

update page / image description models, implement history snap crawling

main
ghost 2 years ago
parent
commit
23ead4e12c
  1. 14
      config/app.php.txt
  2. 6
      config/sphinx.conf.txt
  3. 12
      crontab/cleaner.php
  4. 36
      crontab/crawler.php
  5. BIN
      database/yggo.mwb
  6. 85
      library/mysql.php
  7. BIN
      media/db-prototype.png
  8. 18
      public/search.php

14
config/app.php.txt

@ -255,7 +255,7 @@ define('CRAWL_HOST_DEFAULT_STATUS', true);
* Index only meta tags to prevent disk overuse * Index only meta tags to prevent disk overuse
* or false to save meta tags + overall plain text page content * or false to save meta tags + overall plain text page content
* *
* Custom rule for specified host could be provided in the DB `host`.`crawlPageMetaOnly` field * Custom rule for specified host could be provided in the DB `host`.`crawlMetaOnly` field
* *
* This option able to change search results relevance * This option able to change search results relevance
* This option enables image data caching in base64 * This option enables image data caching in base64
@ -367,6 +367,12 @@ define('CLEAN_HOST_SECONDS_OFFSET', 60*60*24*30);
*/ */
define('CLEAN_PAGE_BAN_SECONDS_OFFSET', 60*60*24*30); define('CLEAN_PAGE_BAN_SECONDS_OFFSET', 60*60*24*30);
/*
* Remove page description history after following time
*
*/
define('CLEAN_PAGE_DESCRIPTION_OFFSET', 60*60*24*30*12*10);
/* /*
* Remove image ban after following time * Remove image ban after following time
* *
@ -376,6 +382,12 @@ define('CLEAN_PAGE_BAN_SECONDS_OFFSET', 60*60*24*30);
*/ */
define('CLEAN_IMAGE_BAN_SECONDS_OFFSET', 60*60*24*30); define('CLEAN_IMAGE_BAN_SECONDS_OFFSET', 60*60*24*30);
/*
* Remove image description history after following time
*
*/
define('CLEAN_IMAGE_DESCRIPTION_OFFSET', 60*60*24*30*12*10);
// API settings // API settings
/* /*

6
config/sphinx.conf.txt

@ -21,7 +21,7 @@ source hostPage : common
hostPageDescription.metaKeywords) \ hostPageDescription.metaKeywords) \
FROM hostPageDescription \ FROM hostPageDescription \
WHERE hostPageDescription.hostPageId = hostPage.hostPageId \ WHERE hostPageDescription.hostPageId = hostPage.hostPageId \
ORDER BY hostPageDescription.timeAdded DESC \ ORDER BY hostPageDescription.timeUpdated DESC, hostPageDescription.timeAdded DESC \
LIMIT 1) AS pageDescription \ LIMIT 1) AS pageDescription \
FROM hostPage \ FROM hostPage \
JOIN host ON (host.hostId = hostPage.hostId) \ JOIN host ON (host.hostId = hostPage.hostId) \
@ -37,8 +37,8 @@ source hostImage : common
(SELECT CONCAT_WS(' ', hostImageDescription.alt, hostImageDescription.title) \ (SELECT CONCAT_WS(' ', hostImageDescription.alt, hostImageDescription.title) \
FROM hostImageDescription \ FROM hostImageDescription \
WHERE hostImageDescription.hostImageId = hostImage.hostImageId \ WHERE hostImageDescription.hostImageId = hostImage.hostImageId \
ORDER BY hostImageDescription.timeAdded \ ORDER BY hostImageDescription.timeUpdated DESC, hostImageDescription.timeAdded DESC \
DESC LIMIT 1) AS imageDescription \ LIMIT 1) AS imageDescription \
FROM hostImage \ FROM hostImage \
JOIN host ON (host.hostId = hostImage.hostId) \ JOIN host ON (host.hostId = hostImage.hostId) \
WHERE host.status = '1' AND hostImage.httpCode = 200 AND hostImage.timeBanned IS NULL \ WHERE host.status = '1' AND hostImage.httpCode = 200 AND hostImage.timeBanned IS NULL \

12
crontab/cleaner.php

@ -30,7 +30,9 @@ $hostsTotal = $db->getTotalHosts();
$manifestsTotal = $db->getTotalManifests(); $manifestsTotal = $db->getTotalManifests();
$hostsUpdated = 0; $hostsUpdated = 0;
$hostPagesDeleted = 0; $hostPagesDeleted = 0;
$hostPageDescriptionsDeleted = 0;
$hostImagesDeleted = 0; $hostImagesDeleted = 0;
$hostImageDescriptionsDeleted = 0;
$manifestsDeleted = 0; $manifestsDeleted = 0;
$hostPagesBansRemoved = 0; $hostPagesBansRemoved = 0;
$hostImagesBansRemoved = 0; $hostImagesBansRemoved = 0;
@ -202,9 +204,15 @@ try {
// Reset banned pages // Reset banned pages
$hostPagesBansRemoved += $db->resetBannedHostPages(time() - CLEAN_PAGE_BAN_SECONDS_OFFSET); $hostPagesBansRemoved += $db->resetBannedHostPages(time() - CLEAN_PAGE_BAN_SECONDS_OFFSET);
// Delete page description history
$hostPageDescriptionsDeleted += $db->deleteHostPageDescriptionsByTimeAdded(time() - CLEAN_PAGE_DESCRIPTION_OFFSET);
// Reset banned images // Reset banned images
$hostImagesBansRemoved += $db->resetBannedHostImages(time() - CLEAN_IMAGE_BAN_SECONDS_OFFSET); $hostImagesBansRemoved += $db->resetBannedHostImages(time() - CLEAN_IMAGE_BAN_SECONDS_OFFSET);
// Delete image description history
$hostImageDescriptionsDeleted += $db->deleteHostImageDescriptionsByTimeAdded(time() - CLEAN_IMAGE_DESCRIPTION_OFFSET);
// Delete deprecated logs // Delete deprecated logs
$logsCleanerDeleted += $db->deleteLogCleaner(time() - CLEAN_LOG_SECONDS_OFFSET); $logsCleanerDeleted += $db->deleteLogCleaner(time() - CLEAN_LOG_SECONDS_OFFSET);
$logsCrawlerDeleted += $db->deleteLogCrawler(time() - CRAWL_LOG_SECONDS_OFFSET); $logsCrawlerDeleted += $db->deleteLogCrawler(time() - CRAWL_LOG_SECONDS_OFFSET);
@ -228,8 +236,10 @@ if (CLEAN_LOG_ENABLED) {
$hostsTotal, $hostsTotal,
$hostsUpdated, $hostsUpdated,
$hostPagesDeleted, $hostPagesDeleted,
$hostPageDescriptionsDeleted,
$hostPagesBansRemoved, $hostPagesBansRemoved,
$hostImagesDeleted, $hostImagesDeleted,
$hostImageDescriptionsDeleted,
$hostImagesBansRemoved, $hostImagesBansRemoved,
$manifestsTotal, $manifestsTotal,
$manifestsDeleted, $manifestsDeleted,
@ -252,7 +262,9 @@ echo 'Manifests total: ' . $manifestsTotal . PHP_EOL;
echo 'Manifests deleted: ' . $manifestsDeleted . PHP_EOL; echo 'Manifests deleted: ' . $manifestsDeleted . PHP_EOL;
echo 'Host page bans removed: ' . $hostPagesBansRemoved . PHP_EOL; echo 'Host page bans removed: ' . $hostPagesBansRemoved . PHP_EOL;
echo 'Host page descriptions deleted: ' . $hostPageDescriptionsDeleted . PHP_EOL;
echo 'Host images bans removed: ' . $hostImagesBansRemoved . PHP_EOL; echo 'Host images bans removed: ' . $hostImagesBansRemoved . PHP_EOL;
echo 'Host image descriptions deleted: ' . $hostImageDescriptionsDeleted . PHP_EOL;
echo 'Cleaner logs deleted: ' . $logsCleanerDeleted . PHP_EOL; echo 'Cleaner logs deleted: ' . $logsCleanerDeleted . PHP_EOL;
echo 'Crawler logs deleted: ' . $logsCrawlerDeleted . PHP_EOL; echo 'Crawler logs deleted: ' . $logsCrawlerDeleted . PHP_EOL;

36
crontab/crawler.php

@ -301,7 +301,7 @@ try {
} }
// Convert remote image data to base64 string // Convert remote image data to base64 string
if (!CRAWL_HOST_DEFAULT_META_ONLY) { if (!$queueHostImage->crawlMetaOnly) {
// Skip image processing without returned content // Skip image processing without returned content
if (!$hostImageContent = $curl->getContent()) { if (!$hostImageContent = $curl->getContent()) {
@ -327,14 +327,22 @@ try {
$hostImageData = 'data:image/' . str_replace(['svg'], ['svg+xml'], $hostImageExtension) . ';base64,' . $hostImageBase64; $hostImageData = 'data:image/' . str_replace(['svg'], ['svg+xml'], $hostImageExtension) . ';base64,' . $hostImageBase64;
} else { // Set host image description
// On link collection we knew meta but data,
// this step use latest description slice and insert the data received by curl request
if ($lastHostImageDescription = $db->getLastHostImageDescription($queueHostImage->hostImageId)) {
$hostImageData = null; $db->setHostImageDescription($queueHostImage->hostImageId,
crc32($hostImageData),
$lastHostImageDescription->alt,
$lastHostImageDescription->title,
$hostImageData,
time());
}
} }
$hostImagesIndexed += $db->updateHostImage($queueHostImage->hostImageId, $hostImagesIndexed += $db->updateHostImage($queueHostImage->hostImageId,
Filter::mime($hostImageContentType), Filter::mime($hostImageContentType),
$hostImageData,
time()); time());
} }
@ -465,17 +473,13 @@ try {
$content = Filter::pageData($content); $content = Filter::pageData($content);
// Add queued page description if not exists // Add queued page description if not exists
$crc32data = crc32($content); $db->setHostPageDescription($queueHostPage->hostPageId,
crc32($content),
if (!$db->getHostPageDescription($queueHostPage->hostPageId, $crc32data)) {
$db->addHostPageDescription($queueHostPage->hostPageId,
$crc32data,
Filter::pageTitle($title->item(0)->nodeValue), Filter::pageTitle($title->item(0)->nodeValue),
Filter::pageDescription($metaDescription), Filter::pageDescription($metaDescription),
Filter::pageKeywords($metaKeywords), Filter::pageKeywords($metaKeywords),
CRAWL_HOST_DEFAULT_META_ONLY ? null : $content, $queueHostPage->crawlMetaOnly ? null : $content,
time()); time());
}
// Update manifest registry // Update manifest registry
if (CRAWL_MANIFEST && !empty($metaYggoManifest) && filter_var($metaYggoManifest, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $metaYggoManifest)) { if (CRAWL_MANIFEST && !empty($metaYggoManifest) && filter_var($metaYggoManifest, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $metaYggoManifest)) {
@ -610,20 +614,16 @@ try {
} }
} }
// Host image exists or created new one
if ($hostImageId) {
// Add/update host image description // Add/update host image description
$db->setHostImageDescription($hostImageId, $db->setHostImageDescription($hostImageId,
crc32(md5((string) $imageAlt . (string) $imageTitle)), null, // no data, download it in the crawler queue
Filter::imageAlt($imageAlt), Filter::imageAlt($imageAlt),
Filter::imageTitle($imageTitle), Filter::imageTitle($imageTitle),
time(), null,
time()); time());
// Relate host image with host page was found // Relate host image with host page was found
$db->setHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), time(), 1); $db->setHostImageToHostPage($hostImageId, $queueHostPage->hostPageId, time(), 1);
}
// Increase image rank when link does not match the current host // Increase image rank when link does not match the current host
if ($hostImageURL->scheme . '://' . if ($hostImageURL->scheme . '://' .

BIN
database/yggo.mwb

Binary file not shown.

85
library/mysql.php

@ -102,11 +102,11 @@ class MySQL {
return $query->fetch()->total; return $query->fetch()->total;
} }
public function addHost(string $scheme, string $name, mixed $port, int $crc32url, int $timeAdded, mixed $timeUpdated, int $crawlPageLimit, int $crawlImageLimit, string $crawlPageMetaOnly, string $status, mixed $robots, mixed $robotsPostfix) { public function addHost(string $scheme, string $name, mixed $port, int $crc32url, int $timeAdded, mixed $timeUpdated, int $crawlPageLimit, int $crawlImageLimit, string $crawlMetaOnly, string $status, mixed $robots, mixed $robotsPostfix) {
$query = $this->_db->prepare('INSERT INTO `host` (`scheme`, `name`, `port`, `crc32url`, `timeAdded`, `timeUpdated`, `crawlPageLimit`, `crawlImageLimit`, `crawlPageMetaOnly`, `status`, `robots`, `robotsPostfix`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'); $query = $this->_db->prepare('INSERT INTO `host` (`scheme`, `name`, `port`, `crc32url`, `timeAdded`, `timeUpdated`, `crawlPageLimit`, `crawlImageLimit`, `crawlMetaOnly`, `status`, `robots`, `robotsPostfix`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
$query->execute([$scheme, $name, $port, $crc32url, $timeAdded, $timeUpdated, $crawlPageLimit, $crawlImageLimit, $crawlPageMetaOnly, $status, $robots, $robotsPostfix]); $query->execute([$scheme, $name, $port, $crc32url, $timeAdded, $timeUpdated, $crawlPageLimit, $crawlImageLimit, $crawlMetaOnly, $status, $robots, $robotsPostfix]);
return $this->_db->lastInsertId(); return $this->_db->lastInsertId();
} }
@ -241,13 +241,12 @@ class MySQL {
public function updateHostImage(int $hostImageId, public function updateHostImage(int $hostImageId,
string $mime, string $mime,
mixed $data,
int $timeUpdated, int $timeUpdated,
mixed $timeBanned = null) { mixed $timeBanned = null) {
$query = $this->_db->prepare('UPDATE `hostImage` SET `mime` = ?, `data` = ?, `timeUpdated` = ?, `timeBanned` = ? WHERE `hostImageId` = ? LIMIT 1'); $query = $this->_db->prepare('UPDATE `hostImage` SET `mime` = ?, `timeUpdated` = ?, `timeBanned` = ? WHERE `hostImageId` = ? LIMIT 1');
$query->execute([$mime, $data, $timeUpdated, $timeBanned, $hostImageId]); $query->execute([$mime, $timeUpdated, $timeBanned, $hostImageId]);
return $query->rowCount(); return $query->rowCount();
} }
@ -261,10 +260,15 @@ class MySQL {
return $query->rowCount(); return $query->rowCount();
} }
public function setHostImageDescription(int $hostImageId, int $crc32id, string $alt, string $title, int $timeAdded, int $timeUpdated) { public function setHostImageDescription(int $hostImageId,
mixed $crc32data,
string $alt,
string $title,
mixed $data,
int $time) {
$query = $this->_db->prepare('INSERT INTO `hostImageDescription` (`hostImageId`, $query = $this->_db->prepare('INSERT INTO `hostImageDescription` (`hostImageId`,
`crc32id`, `crc32data`,
`alt`, `alt`,
`title`, `title`,
`timeAdded`) VALUES (?, ?, ?, ?, ?) `timeAdded`) VALUES (?, ?, ?, ?, ?)
@ -273,7 +277,7 @@ class MySQL {
`title` = ?, `title` = ?,
`timeUpdated` = ?'); `timeUpdated` = ?');
$query->execute([$hostImageId, $crc32id, $alt, $title, $timeAdded, $alt, $title, $timeUpdated]); $query->execute([$hostImageId, $crc32data, $alt, $title, $time, $alt, $title, $time]);
return $this->_db->lastInsertId(); return $this->_db->lastInsertId();
} }
@ -287,6 +291,15 @@ class MySQL {
return $query->rowCount(); return $query->rowCount();
} }
public function getLastHostImageDescription(int $hostImageId) {
$query = $this->_db->prepare('SELECT * FROM `hostImageDescription` WHERE `hostImageId` = ? ORDER BY `timeUpdated` DESC, `timeAdded` DESC LIMIT 1');
$query->execute([$hostImageId]);
return $query->fetch();
}
public function getHostImageHostPages(int $hostImageId, int $limit = 5) { public function getHostImageHostPages(int $hostImageId, int $limit = 5) {
$query = $this->_db->prepare('SELECT * FROM `hostImageToHostPage` $query = $this->_db->prepare('SELECT * FROM `hostImageToHostPage`
@ -312,7 +325,7 @@ class MySQL {
return $query->fetch()->total; return $query->fetch()->total;
} }
public function setHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, mixed $timeUpdated, int $quantity) { public function setHostImageToHostPage(int $hostImageId, int $hostPageId, int $time, int $quantity) {
$query = $this->_db->prepare('INSERT INTO `hostImageToHostPage` (`hostImageId`, $query = $this->_db->prepare('INSERT INTO `hostImageToHostPage` (`hostImageId`,
`hostPageId`, `hostPageId`,
@ -323,7 +336,7 @@ class MySQL {
ON DUPLICATE KEY UPDATE `timeUpdated` = ?, ON DUPLICATE KEY UPDATE `timeUpdated` = ?,
`quantity` = `quantity` + ' . (int) $quantity); `quantity` = `quantity` + ' . (int) $quantity);
$query->execute([$hostImageId, $hostPageId, $timeAdded, null, $quantity, $timeUpdated]); $query->execute([$hostImageId, $hostPageId, $time, null, $quantity, $time]);
return $query->rowCount(); // no primary key return $query->rowCount(); // no primary key
} }
@ -402,7 +415,7 @@ class MySQL {
public function getLastPageDescription(int $hostPageId) { public function getLastPageDescription(int $hostPageId) {
$query = $this->_db->prepare('SELECT * FROM `hostPageDescription` WHERE `hostPageId` = ? ORDER BY `timeAdded` DESC LIMIT 1'); $query = $this->_db->prepare('SELECT * FROM `hostPageDescription` WHERE `hostPageId` = ? ORDER BY `timeUpdated` DESC, `timeAdded` DESC LIMIT 1');
$query->execute([$hostPageId]); $query->execute([$hostPageId]);
@ -438,11 +451,7 @@ class MySQL {
`host`.`scheme`, `host`.`scheme`,
`host`.`name`, `host`.`name`,
`host`.`port`, `host`.`port`,
`host`.`crawlMetaOnly`
(SELECT GROUP_CONCAT(CONCAT_WS(" ", `hostImageDescription`.`alt`, `hostImageDescription`.`title`))
FROM `hostImageDescription`
WHERE `hostImageDescription`.`hostImageId` = `hostImage`.`hostImageId`) AS `description`
FROM `hostImage` FROM `hostImage`
JOIN `host` ON (`host`.`hostId` = `hostImage`.`hostId`) JOIN `host` ON (`host`.`hostId` = `hostImage`.`hostId`)
@ -560,13 +569,13 @@ class MySQL {
return $query->rowCount(); return $query->rowCount();
} }
public function addHostPageDescription(int $hostPageId, public function setHostPageDescription(int $hostPageId,
int $crc32data, int $crc32data,
mixed $metaTitle, mixed $metaTitle,
mixed $metaDescription, mixed $metaDescription,
mixed $metaKeywords, mixed $metaKeywords,
mixed $data, mixed $data,
int $timeAdded) { int $time) {
$query = $this->_db->prepare('INSERT INTO `hostPageDescription` ( `hostPageId`, $query = $this->_db->prepare('INSERT INTO `hostPageDescription` ( `hostPageId`,
`crc32data`, `crc32data`,
@ -575,7 +584,9 @@ class MySQL {
`metaKeywords`, `metaKeywords`,
`data`, `data`,
`timeAdded` `timeAdded`
) VALUES (?, ?, ?, ?, ?, ?, ?)'); ) VALUES (?, ?, ?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE `timeUpdated` = ?');
$query->execute([ $query->execute([
$hostPageId, $hostPageId,
@ -584,7 +595,8 @@ class MySQL {
$metaDescription, $metaDescription,
$metaKeywords, $metaKeywords,
$data, $data,
$timeAdded $time,
$time
]); ]);
return $query->rowCount(); return $query->rowCount();
@ -615,6 +627,15 @@ class MySQL {
return $query->rowCount(); return $query->rowCount();
} }
public function deleteHostPageDescriptionsByTimeAdded(int $timeOffset) {
$query = $this->_db->prepare('DELETE FROM `hostPageDescription` WHERE `timeAdded` < ' . (int) $timeOffset);
$query->execute();
return $query->rowCount();
}
public function resetBannedHostImages(int $timeOffset) { public function resetBannedHostImages(int $timeOffset) {
$query = $this->_db->prepare('UPDATE `hostImage` SET `timeBanned` = NULL WHERE `timeBanned` IS NOT NULL AND `timeBanned` < ' . (int) $timeOffset); $query = $this->_db->prepare('UPDATE `hostImage` SET `timeBanned` = NULL WHERE `timeBanned` IS NOT NULL AND `timeBanned` < ' . (int) $timeOffset);
@ -624,12 +645,23 @@ class MySQL {
return $query->rowCount(); return $query->rowCount();
} }
public function deleteHostImageDescriptionsByTimeAdded(int $timeOffset) {
$query = $this->_db->prepare('DELETE FROM `hostImageDescription` WHERE `timeAdded` < ' . (int) $timeOffset);
$query->execute();
return $query->rowCount();
}
public function addCleanerLog(int $timeAdded, public function addCleanerLog(int $timeAdded,
int $hostsTotal, int $hostsTotal,
int $hostsUpdated, int $hostsUpdated,
int $hostPagesDeleted, int $hostPagesDeleted,
int $hostPageDescriptionsDeleted,
int $hostPagesBansRemoved, int $hostPagesBansRemoved,
int $hostImagesDeleted, int $hostImagesDeleted,
int $hostImageDescriptionsDeleted,
int $hostImagesBansRemoved, int $hostImagesBansRemoved,
int $manifestsTotal, int $manifestsTotal,
int $manifestsDeleted, int $manifestsDeleted,
@ -645,8 +677,10 @@ class MySQL {
`hostsTotal`, `hostsTotal`,
`hostsUpdated`, `hostsUpdated`,
`hostPagesDeleted`, `hostPagesDeleted`,
`hostPageDescriptionsDeleted`,
`hostPagesBansRemoved`, `hostPagesBansRemoved`,
`hostImagesDeleted`, `hostImagesDeleted`,
`hostImageDescriptionsDeleted`,
`hostImagesBansRemoved`, `hostImagesBansRemoved`,
`manifestsTotal`, `manifestsTotal`,
`manifestsDeleted`, `manifestsDeleted`,
@ -656,15 +690,17 @@ class MySQL {
`httpRequestsSizeTotal`, `httpRequestsSizeTotal`,
`httpDownloadSizeTotal`, `httpDownloadSizeTotal`,
`httpRequestsTimeTotal`, `httpRequestsTimeTotal`,
`executionTimeTotal`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'); `executionTimeTotal`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
$query->execute([ $query->execute([
$timeAdded, $timeAdded,
$hostsTotal, $hostsTotal,
$hostsUpdated, $hostsUpdated,
$hostPagesDeleted, $hostPagesDeleted,
$hostPageDescriptionsDeleted,
$hostPagesBansRemoved, $hostPagesBansRemoved,
$hostImagesDeleted, $hostImagesDeleted,
$hostImageDescriptionsDeleted,
$hostImagesBansRemoved, $hostImagesBansRemoved,
$manifestsTotal, $manifestsTotal,
$manifestsDeleted, $manifestsDeleted,
@ -700,7 +736,7 @@ class MySQL {
`host`.`port`, `host`.`port`,
`host`.`crawlPageLimit`, `host`.`crawlPageLimit`,
`host`.`crawlImageLimit`, `host`.`crawlImageLimit`,
`host`.`crawlPageMetaOnly`, `host`.`crawlMetaOnly`,
`host`.`robots`, `host`.`robots`,
`host`.`robotsPostfix` `host`.`robotsPostfix`
@ -735,7 +771,8 @@ class MySQL {
`hostImage`.`uri`, `hostImage`.`uri`,
`host`.`scheme`, `host`.`scheme`,
`host`.`name`, `host`.`name`,
`host`.`port` `host`.`port`,
`host`.`crawlMetaOnly`
FROM `hostImage` FROM `hostImage`
JOIN `host` ON (`host`.`hostId` = `hostImage`.`hostId`) JOIN `host` ON (`host`.`hostId` = `hostImage`.`hostId`)

BIN
media/db-prototype.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 126 KiB

After

Width:  |  Height:  |  Size: 135 KiB

18
public/search.php

@ -418,9 +418,21 @@ if (!empty($q)) {
// Save image content on data settings enabled // Save image content on data settings enabled
$db->updateHostImage($hostImage->hostImageId, $db->updateHostImage($hostImage->hostImageId,
Filter::mime($hostImageContentType), Filter::mime($hostImageContentType),
CRAWL_HOST_DEFAULT_META_ONLY ? null : $hostImageURLencoded,
time()); time());
// Set host image description
// On link collection we knew meta but data,
// this step use latest description slice and insert the data received by curl request
if ($lastHostImageDescription = $db->getLastHostImageDescription($hostImage->hostImageId)) {
$db->setHostImageDescription($hostImage->hostImageId,
crc32($hostImageData),
$lastHostImageDescription->alt,
$lastHostImageDescription->title,
$hostImage->crawlMetaOnly ? null : $hostImageData,
time());
}
// Local image data exists // Local image data exists
} else { } else {
@ -439,8 +451,8 @@ if (!empty($q)) {
<?php if ($hostPageDescription = $db->getLastPageDescription($result->id)) { ?> <?php if ($hostPageDescription = $db->getLastPageDescription($result->id)) { ?>
<h3><?php echo $hostPageDescription->metaTitle ?></h3> <h3><?php echo $hostPageDescription->metaTitle ?></h3>
<?php } ?> <?php } ?>
<?php if (!empty($hostImage->description)) { ?> <?php if ($lastHostImageDescription = $db->getLastHostImageDescription($result->id)) { ?>
<span><?php echo $hostImage->description ?></span> <span><?php echo $lastHostImageDescription->title ?> <?php echo $lastHostImageDescription->alt ?></span>
<?php } ?> <?php } ?>
<a href="<?php echo $hostPageURL ?>"> <a href="<?php echo $hostPageURL ?>">
<img src="<?php echo WEBSITE_DOMAIN ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" /> <img src="<?php echo WEBSITE_DOMAIN ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" />

Loading…
Cancel
Save