update host images info on search requests

This commit is contained in:
ghost 2023-05-04 06:12:51 +03:00
parent 68581960a3
commit d905e33b4f
5 changed files with 55 additions and 7 deletions

View File

@ -141,6 +141,7 @@ define('CRAWL_HOST_DEFAULT_STATUS', true);
* Custom rule for specified host could be provided in the DB `host`.`crawlPageMetaOnly` field
*
* This option able to change search results relevance
* This option enables image data caching in base64
*
*/
define('CRAWL_HOST_DEFAULT_META_ONLY', false);

View File

@ -231,7 +231,7 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET
!$hostImageId) { // image not exists
// Add host image
if ($hostImageId = $db->addHostImage($hostId, crc32($hostImageURI->string), $hostImageURI->string, time())) {
if ($hostImageId = $db->addHostImage($hostId, crc32($hostImageURI->string), $hostImageURI->string, time(), null, 200)) {
$hostImagesAdded++;
@ -245,7 +245,7 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET
$hostImageDescriptionCRC32id = crc32(md5((string) $imageAlt . (string) $imageTitle));
if (!$db->getHostImageDescription($hostImageId, $hostImageDescriptionCRC32id)) {
$db->addHostImageDescription($hostImageId, $hostImageDescriptionCRC32id, (string) $imageAlt, (string) $imageTitle, time());
$db->addHostImageDescription($hostImageId, $hostImageDescriptionCRC32id, (string) Filter::imageAlt($imageAlt), (string) Filter::imageTitle($imageTitle), time());
}
// Relate host image with host page was found

View File

@ -34,6 +34,20 @@ class Filter {
return $keywords;
}
static public function imageAlt(string $alt) {
$alt = trim($alt);
return $alt;
}
static public function imageTitle(string $title) {
$title = trim($title);
return $title;
}
static public function pageData(string $data) {
$filterDataPre = [

View File

@ -177,6 +177,28 @@ class MySQL {
return $query->rowCount();
}
public function updateHostImageHttpCode(int $hostImageId,
int $httpCode,
int $timeUpdated) {
$query = $this->_db->prepare('UPDATE `hostImage` SET `httpCode` = ?, `timeUpdated` = ? WHERE `hostImageId` = ? LIMIT 1');
$query->execute([$httpCode, $timeUpdated, $hostImageId]);
return $query->rowCount();
}
public function updateHostImageData(int $hostImageId,
string $data,
int $timeUpdated) {
$query = $this->_db->prepare('UPDATE `hostImage` SET `data` = ?, `timeUpdated` = ? WHERE `hostImageId` = ? LIMIT 1');
$query->execute([$data, $timeUpdated, $hostImageId]);
return $query->rowCount();
}
public function deleteHostImage(int $hostImageId) {
$query = $this->_db->prepare('DELETE FROM `hostImage` WHERE `hostImageId` = ? LIMIT 1');
@ -359,7 +381,8 @@ class MySQL {
public function getFoundHostImage(int $hostImageId) {
$query = $this->_db->prepare('SELECT `hostImage`.`uri`,
$query = $this->_db->prepare('SELECT `hostImage`.`hostImageId`,
`hostImage`.`uri`,
`hostImage`.`data`,
`hostImage`.`rank`,
`host`.`scheme`,

View File

@ -256,7 +256,7 @@ if (!empty($q)) {
a, a:visited, a:active {
color: #9ba2ac;
display: block;
display: inline-block;
font-size: 12px;
margin-top: 8px;
}
@ -320,13 +320,17 @@ if (!empty($q)) {
($hostImage->port ? ':' . $hostImage->port : false) .
$hostImage->uri;
// Get image data
// Get remote image data
if (empty($hostImage->data)) {
$hostImageCurl = new Curl($hostImageURL);
// Skip item render on timeout
if (200 != $hostImageCurl->getCode()) continue;
$hostImageHttpCode = $hostImageCurl->getCode();
$db->updateHostImageHttpCode($hostImage->hostImageId, (int) $hostImageHttpCode, time());
if (200 != $hostImageHttpCode) continue;
// Convert remote image data to base64 string to prevent direct URL call
if (!$hostImageType = @pathinfo($hostImageURL, PATHINFO_EXTENSION)) continue;
@ -334,6 +338,12 @@ if (!empty($q)) {
$hostImageURLencoded = 'data:image/' . $hostImageType . ';base64,' . $hostImageBase64;
// Save image content on data settings enabled
if (!CRAWL_HOST_DEFAULT_META_ONLY) {
$db->updateHostImageData($hostImage->hostImageId, (string) $hostImageURLencoded, time());
}
// Local image data exists
} else {
$hostImageURLencoded = $hostImage->data;
@ -342,7 +352,7 @@ if (!empty($q)) {
?>
<div>
<a href="<?php echo $hostImageURL ?>">
<img src="<?php echo $hostImageURLencoded ?>" alt="<?php echo $hostImage->description ?>" title="<?php echo $hostImageURL ?>" class="image" />
<img src="<?php echo $hostImageURLencoded ?>" alt="<?php echo htmlentities($hostImage->description) ?>" title="<?php echo htmlentities($hostImageURL) ?>" class="image" />
</a>
<?php foreach ((array) $db->getHostImageHostPages($result->id) as $hostPage) { ?>
<?php if ($hostPage = $db->getFoundHostPage($hostPage->hostPageId)) { ?>