implement proxied image search #1

This commit is contained in:
ghost 2023-05-04 03:48:57 +03:00
parent baf78e2bf5
commit 6b18202588
5 changed files with 179 additions and 13 deletions

View File

@ -62,6 +62,7 @@ Could be enabled or disabled by `API_SEARCH_ENABLED` option
``` ```
GET action=search - required GET action=search - required
GET query={string} - optional, search request, empty if not provided GET query={string} - optional, search request, empty if not provided
GET type={string} - optional, search type, image|default or empty
GET page={int} - optional, search results page, 1 if not provided GET page={int} - optional, search results page, 1 if not provided
GET mode=SphinxQL - optional, enable extended SphinxQL syntax GET mode=SphinxQL - optional, enable extended SphinxQL syntax
``` ```
@ -142,7 +143,7 @@ GET m=SphinxQL
* [x] Add robots.txt support (Issue #2) * [x] Add robots.txt support (Issue #2)
* [ ] Improve yggdrasil links detection, add .ygg domain zone support * [ ] Improve yggdrasil links detection, add .ygg domain zone support
* [ ] Make page description visible - based on the cached content dump, when website description tag not available, add condition highlights * [ ] Make page description visible - based on the cached content dump, when website description tag not available, add condition highlights
* [ ] Images search (basically implemented but requires testing and some performance optimization) * [x] Images search (basically implemented but requires testing and some performance optimization)
* [x] Index cleaner * [x] Index cleaner
* [ ] Crawl queue balancer, that depends from CPU available * [ ] Crawl queue balancer, that depends from CPU available
* [ ] Implement smart queue algorithm that indexing new sites homepage in higher priority * [ ] Implement smart queue algorithm that indexing new sites homepage in higher priority

View File

@ -224,6 +224,15 @@ class MySQL {
return $query->fetch(); return $query->fetch();
} }
public function getHostImageHostPages(int $hostImageId) {
$query = $this->_db->prepare('SELECT * FROM `hostImageToHostPage` WHERE `hostImageId` = ?');
$query->execute([$hostImageId]);
return $query->fetchAll();
}
public function addHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, mixed $timeUpdated, int $quantity) { public function addHostImageToHostPage(int $hostImageId, int $hostPageId, int $timeAdded, mixed $timeUpdated, int $quantity) {
$query = $this->_db->prepare('INSERT INTO `hostImageToHostPage` (`hostImageId`, $query = $this->_db->prepare('INSERT INTO `hostImageToHostPage` (`hostImageId`,
@ -346,6 +355,31 @@ class MySQL {
return $query->fetch(); return $query->fetch();
} }
public function getFoundHostImage(int $hostImageId) {
$query = $this->_db->prepare('SELECT `hostImage`.`uri`,
`hostImage`.`rank`,
`host`.`scheme`,
`host`.`name`,
`host`.`port`,
(SELECT GROUP_CONCAT(CONCAT_WS(" | ", `hostImageDescription`.`alt`, `hostImageDescription`.`title`))
FROM `hostImageDescription`
WHERE `hostImageDescription`.`hostImageId` = `hostImage`.`hostImageId`) AS `description`
FROM `hostImage`
JOIN `host` ON (`host`.`hostId` = `hostImage`.`hostId`)
WHERE `hostImage`.`hostImageId` = ?
LIMIT 1');
$query->execute([$hostImageId]);
return $query->fetch();
}
public function addHostPage(int $hostId, public function addHostPage(int $hostId,
int $crc32uri, int $crc32uri,
string $uri, string $uri,

View File

@ -30,6 +30,25 @@ class SphinxQL {
return $query->fetchAll(); return $query->fetchAll();
} }
public function searchHostImages(string $keyword, int $start, int $limit, int $maxMatches) {
$query = $this->_sphinx->prepare('SELECT *, WEIGHT() AS `weight`
FROM `hostImage`
WHERE MATCH(?)
ORDER BY `rank` DESC, WEIGHT() DESC
LIMIT ' . (int) ($start > $maxMatches ? ($maxMatches > 0 ? $maxMatches - 1 : 0) : $start) . ',' . (int) $limit . '
OPTION `max_matches`=' . (int) ($maxMatches > 1 ? $maxMatches : 1));
$query->execute([$keyword]);
return $query->fetchAll();
}
public function searchHostPagesTotal(string $keyword) { public function searchHostPagesTotal(string $keyword) {
$query = $this->_sphinx->prepare('SELECT COUNT(*) AS `total` FROM `hostPage` WHERE MATCH(?)'); $query = $this->_sphinx->prepare('SELECT COUNT(*) AS `total` FROM `hostPage` WHERE MATCH(?)');
@ -38,4 +57,13 @@ class SphinxQL {
return $query->fetch()->total; return $query->fetch()->total;
} }
public function searchHostImagesTotal(string $keyword) {
$query = $this->_sphinx->prepare('SELECT COUNT(*) AS `total` FROM `hostImage` WHERE MATCH(?)');
$query->execute([$keyword]);
return $query->fetch()->total;
}
} }

View File

@ -30,19 +30,42 @@ if (API_ENABLED) {
// Filter request data // Filter request data
$type = !empty($_GET['type']) ? Filter::url($_GET['type']) : 'page';
$mode = !empty($_GET['mode']) ? Filter::url($_GET['mode']) : 'default'; $mode = !empty($_GET['mode']) ? Filter::url($_GET['mode']) : 'default';
$query = !empty($_GET['query']) ? Filter::url($_GET['query']) : ''; $query = !empty($_GET['query']) ? Filter::url($_GET['query']) : '';
$page = !empty($_GET['page']) ? (int) $_GET['page'] : 1; $page = !empty($_GET['page']) ? (int) $_GET['page'] : 1;
// Make search request // Make image search request
if (!empty($type) && $type == 'image') {
$sphinxResultsTotal = $sphinx->searchHostImagesTotal(Filter::searchQuery($query, $mode));
$sphinxResults = $sphinx->searchHostImages(Filter::searchQuery($query, $mode), $page * API_SEARCH_PAGINATION_RESULTS_LIMIT - API_SEARCH_PAGINATION_RESULTS_LIMIT, API_SEARCH_PAGINATION_RESULTS_LIMIT, $sphinxResultsTotal);
// Make default search request
} else {
$sphinxResultsTotal = $sphinx->searchHostPagesTotal(Filter::searchQuery($query, $mode)); $sphinxResultsTotal = $sphinx->searchHostPagesTotal(Filter::searchQuery($query, $mode));
$sphinxResults = $sphinx->searchHostPages(Filter::searchQuery($query, $mode), $page * API_SEARCH_PAGINATION_RESULTS_LIMIT - API_SEARCH_PAGINATION_RESULTS_LIMIT, API_SEARCH_PAGINATION_RESULTS_LIMIT, $sphinxResultsTotal); $sphinxResults = $sphinx->searchHostPages(Filter::searchQuery($query, $mode), $page * API_SEARCH_PAGINATION_RESULTS_LIMIT - API_SEARCH_PAGINATION_RESULTS_LIMIT, API_SEARCH_PAGINATION_RESULTS_LIMIT, $sphinxResultsTotal);
}
// Generate results // Generate results
$dbResults = []; $dbResults = [];
foreach ($sphinxResults as $i => $sphinxResult) { foreach ($sphinxResults as $i => $sphinxResult) {
// Image
if (!empty($type) && $type == 'image') {
if ($hostImage = $db->getFoundHostImage($sphinxResult->id)) {
$dbResults[$i] = $hostImage;
$dbResults[$i]->weight = $sphinxResult->weight;
}
// Default
} else {
if ($hostPage = $db->getFoundHostPage($sphinxResult->id)) { if ($hostPage = $db->getFoundHostPage($sphinxResult->id)) {
$dbResults[$i] = $hostPage; $dbResults[$i] = $hostPage;
@ -50,6 +73,7 @@ if (API_ENABLED) {
$dbResults[$i]->weight = $sphinxResult->weight; $dbResults[$i]->weight = $sphinxResult->weight;
} }
} }
}
// Make response // Make response
$response = [ $response = [

View File

@ -24,6 +24,7 @@ $placeholder = Filter::plural($totalPages, [sprintf(_('Over %s page or enter the
]); ]);
// Filter request data // Filter request data
$t = !empty($_GET['t']) ? Filter::url($_GET['t']) : 'page';
$m = !empty($_GET['m']) ? Filter::url($_GET['m']) : 'default'; $m = !empty($_GET['m']) ? Filter::url($_GET['m']) : 'default';
$q = !empty($_GET['q']) ? Filter::url($_GET['q']) : ''; $q = !empty($_GET['q']) ? Filter::url($_GET['q']) : '';
$p = !empty($_GET['p']) ? (int) $_GET['p'] : 1; $p = !empty($_GET['p']) ? (int) $_GET['p'] : 1;
@ -107,8 +108,16 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
// Search request // Search request
if (!empty($q)) { if (!empty($q)) {
if (!empty($t) && $t == 'image') {
$resultsTotal = $sphinx->searchHostImagesTotal(Filter::searchQuery($q, $m));
$results = $sphinx->searchHostImages(Filter::searchQuery($q, $m), $p * WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT, $resultsTotal);
} else {
$resultsTotal = $sphinx->searchHostPagesTotal(Filter::searchQuery($q, $m)); $resultsTotal = $sphinx->searchHostPagesTotal(Filter::searchQuery($q, $m));
$results = $sphinx->searchHostPages(Filter::searchQuery($q, $m), $p * WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT, $resultsTotal); $results = $sphinx->searchHostPages(Filter::searchQuery($q, $m), $p * WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT, $resultsTotal);
}
} else { } else {
@ -177,6 +186,14 @@ if (!empty($q)) {
color: #fff; color: #fff;
} }
h3 {
display: block;
font-size: 16px;
font-weight: normal;
margin: 8px 0;
color: #fff;
}
form { form {
display: block; display: block;
max-width: 678px; max-width: 678px;
@ -208,6 +225,19 @@ if (!empty($q)) {
color: #090808 color: #090808
} }
label {
font-size: 14px;
position: fixed;
top: 30px;
right: 120px;
color: #fff
}
label > input {
width: auto;
margin: 0 4px;
}
button { button {
padding: 12px 16px; padding: 12px 16px;
border-radius: 4px; border-radius: 4px;
@ -235,12 +265,17 @@ if (!empty($q)) {
color: #54a3f7; color: #54a3f7;
} }
img { img.icon {
float: left; float: left;
border-radius: 50%; border-radius: 50%;
margin-right: 8px; margin-right: 8px;
} }
img.image {
max-width: 100%;
border-radius: 3px;
}
div { div {
max-width: 640px; max-width: 640px;
margin: 0 auto; margin: 0 auto;
@ -262,6 +297,7 @@ if (!empty($q)) {
<form name="search" method="GET" action="<?php echo WEBSITE_DOMAIN; ?>/search.php"> <form name="search" method="GET" action="<?php echo WEBSITE_DOMAIN; ?>/search.php">
<h1><a href="<?php echo WEBSITE_DOMAIN; ?>"><?php echo _('YGGo!') ?></a></h1> <h1><a href="<?php echo WEBSITE_DOMAIN; ?>"><?php echo _('YGGo!') ?></a></h1>
<input type="text" name="q" placeholder="<?php echo $placeholder ?>" value="<?php echo htmlentities($q) ?>" /> <input type="text" name="q" placeholder="<?php echo $placeholder ?>" value="<?php echo htmlentities($q) ?>" />
<label><input type="checkbox" name="t" value="image" <?php echo (!empty($t) && $t == 'image' ? 'checked="checked"' : false) ?>/> <?php echo _('Images') ?></label>
<button type="submit"><?php echo _('Search'); ?></button> <button type="submit"><?php echo _('Search'); ?></button>
</form> </form>
</header> </header>
@ -274,15 +310,58 @@ if (!empty($q)) {
<?php } ?> <?php } ?>
</div> </div>
<?php foreach ($results as $result) { ?> <?php foreach ($results as $result) { ?>
<?php if ($hostPage = $db->getFoundHostPage($result->id)) { ?> <?php if (!empty($t) && $t == 'image' &&
$hostImage = $db->getFoundHostImage($result->id)) { ?>
<?php
// Built image url
$hostImageURL = $hostImage->scheme . '://' .
$hostImage->name .
($hostImage->port ? ':' . $hostImage->port : false) .
$hostImage->uri;
// Convert remote image to base64 string for the privacy reasons
if (!$hostImageType = @pathinfo($hostImageURL, PATHINFO_EXTENSION)) continue;
if (!$hostImageData = @file_get_contents($hostImageURL)) continue;
if (!$hostImageBase64 = @base64_encode($hostImageData)) continue;
$hostImageURLencoded = 'data:image/' . $hostImageType . ';base64,' . $hostImageBase64;
?>
<div>
<a href="<?php echo $hostImageURL ?>">
<img src="<?php echo $hostImageURLencoded ?>" alt="<?php echo $hostImage->description ?>" title="<?php echo $hostImageURL ?>" class="image" />
</a>
<?php foreach ((array) $db->getHostImageHostPages($result->id) as $hostPage) { ?>
<?php if ($hostPage = $db->getFoundHostPage($hostPage->hostPageId)) { ?>
<?php $hostPageURL = $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?> <?php $hostPageURL = $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>
<h3><?php echo $hostPage->metaTitle ?></h3>
<?php if (!empty($hostImage->description)) { ?>
<span><?php echo $hostImage->description ?></span>
<?php } ?>
<a href="<?php echo $hostPageURL ?>">
<img src="<?php echo WEBSITE_DOMAIN ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" />
<?php echo $hostPageURL ?>
</a>
<?php } ?>
<?php } ?>
</div>
<?php } else if ($hostPage = $db->getFoundHostPage($result->id)) { ?>
<?php
$hostPageURL = $hostPage->scheme . '://' .
$hostPage->name .
($hostPage->port ? ':' . $hostPage->port : false) .
$hostPage->uri;
?>
<div> <div>
<h2><?php echo $hostPage->metaTitle ?></h2> <h2><?php echo $hostPage->metaTitle ?></h2>
<?php if (!empty($hostPage->metaDescription)) { ?> <?php if (!empty($hostPage->metaDescription)) { ?>
<span><?php echo $hostPage->metaDescription ?></span> <span><?php echo $hostPage->metaDescription ?></span>
<?php } ?> <?php } ?>
<a href="<?php echo $hostPageURL ?>"> <a href="<?php echo $hostPageURL ?>">
<img src="<?php echo WEBSITE_DOMAIN; ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" /> <img src="<?php echo WEBSITE_DOMAIN; ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" />
<?php echo $hostPageURL ?> <?php echo $hostPageURL ?>
</a> </a>
</div> </div>