implement page ranking

This commit is contained in:
ghost 2023-04-25 16:54:01 +03:00
parent 57f64f6b90
commit 8671fc4bde
3 changed files with 37 additions and 4 deletions

View File

@ -183,6 +183,18 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET
$hostRobots = $host->robots;
$hostRobotsPostfix = $host->robotsPostfix;
// Increase page rank when link does not match the current host
if ($hostURL->scheme . '://' .
$hostURL->name .
($hostURL->port ? ':' . $hostURL->port : '')
!=
$queueHostPage->scheme . '://' .
$queueHostPage->name .
($queueHostPage->port ? ':' . $queueHostPage->port : '')) {
$db->updateHostPageRank($hostId, crc32($hostPageURI->string), 1);
}
// Register new host
} else {

View File

@ -143,6 +143,7 @@ class MySQL {
`hostPage`.`metaDescription`,
`hostPage`.`data`,
`hostPage`.`uri`,
`hostPage`.`rank`,
`host`.`scheme`,
`host`.`name`,
`host`.`port`
@ -204,6 +205,22 @@ class MySQL {
return $query->rowCount();
}
public function updateHostPageRank(int $hostId,
int $crc32uri,
int $increment) {
$query = $this->_db->prepare('UPDATE `hostPage` SET `rank` = `rank` + ' . (int) $increment . '
WHERE `hostId` = ?
AND `crc32uri` = ?
LIMIT 1');
$query->execute([$hostId, $crc32uri]);
return $query->rowCount();
}
public function deleteHostPage(int $hostPageId) {
$query = $this->_db->prepare('DELETE FROM `hostPage` WHERE `hostPageId` = ? LIMIT 1');

View File

@ -13,13 +13,17 @@ class SphinxQL {
public function searchHostPages(string $keyword, int $start, int $limit, int $maxMatches) {
$query = $this->_sphinx->prepare('SELECT * FROM `hostPage`
$query = $this->_sphinx->prepare('SELECT *, WEIGHT() AS `weight`
WHERE MATCH(?)
FROM `hostPage`
LIMIT ' . (int) ($start > $maxMatches ? ($maxMatches > 0 ? $maxMatches - 1 : 0) : $start) . ',' . (int) $limit . '
WHERE MATCH(?)
OPTION `max_matches`=' . (int) ($maxMatches > 1 ? $maxMatches : 1));
ORDER BY `rank` DESC, WEIGHT() DESC
LIMIT ' . (int) ($start > $maxMatches ? ($maxMatches > 0 ? $maxMatches - 1 : 0) : $start) . ',' . (int) $limit . '
OPTION `max_matches`=' . (int) ($maxMatches > 1 ? $maxMatches : 1));
$query->execute([$keyword]);