mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-08 22:07:56 +00:00
implement page ranking
This commit is contained in:
parent
57f64f6b90
commit
8671fc4bde
@ -183,6 +183,18 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET
|
|||||||
$hostRobots = $host->robots;
|
$hostRobots = $host->robots;
|
||||||
$hostRobotsPostfix = $host->robotsPostfix;
|
$hostRobotsPostfix = $host->robotsPostfix;
|
||||||
|
|
||||||
|
// Increase page rank when link does not match the current host
|
||||||
|
if ($hostURL->scheme . '://' .
|
||||||
|
$hostURL->name .
|
||||||
|
($hostURL->port ? ':' . $hostURL->port : '')
|
||||||
|
!=
|
||||||
|
$queueHostPage->scheme . '://' .
|
||||||
|
$queueHostPage->name .
|
||||||
|
($queueHostPage->port ? ':' . $queueHostPage->port : '')) {
|
||||||
|
|
||||||
|
$db->updateHostPageRank($hostId, crc32($hostPageURI->string), 1);
|
||||||
|
}
|
||||||
|
|
||||||
// Register new host
|
// Register new host
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
@ -143,6 +143,7 @@ class MySQL {
|
|||||||
`hostPage`.`metaDescription`,
|
`hostPage`.`metaDescription`,
|
||||||
`hostPage`.`data`,
|
`hostPage`.`data`,
|
||||||
`hostPage`.`uri`,
|
`hostPage`.`uri`,
|
||||||
|
`hostPage`.`rank`,
|
||||||
`host`.`scheme`,
|
`host`.`scheme`,
|
||||||
`host`.`name`,
|
`host`.`name`,
|
||||||
`host`.`port`
|
`host`.`port`
|
||||||
@ -204,6 +205,22 @@ class MySQL {
|
|||||||
return $query->rowCount();
|
return $query->rowCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public function updateHostPageRank(int $hostId,
|
||||||
|
int $crc32uri,
|
||||||
|
int $increment) {
|
||||||
|
|
||||||
|
$query = $this->_db->prepare('UPDATE `hostPage` SET `rank` = `rank` + ' . (int) $increment . '
|
||||||
|
|
||||||
|
WHERE `hostId` = ?
|
||||||
|
AND `crc32uri` = ?
|
||||||
|
|
||||||
|
LIMIT 1');
|
||||||
|
|
||||||
|
$query->execute([$hostId, $crc32uri]);
|
||||||
|
|
||||||
|
return $query->rowCount();
|
||||||
|
}
|
||||||
|
|
||||||
public function deleteHostPage(int $hostPageId) {
|
public function deleteHostPage(int $hostPageId) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('DELETE FROM `hostPage` WHERE `hostPageId` = ? LIMIT 1');
|
$query = $this->_db->prepare('DELETE FROM `hostPage` WHERE `hostPageId` = ? LIMIT 1');
|
||||||
|
@ -13,10 +13,14 @@ class SphinxQL {
|
|||||||
|
|
||||||
public function searchHostPages(string $keyword, int $start, int $limit, int $maxMatches) {
|
public function searchHostPages(string $keyword, int $start, int $limit, int $maxMatches) {
|
||||||
|
|
||||||
$query = $this->_sphinx->prepare('SELECT * FROM `hostPage`
|
$query = $this->_sphinx->prepare('SELECT *, WEIGHT() AS `weight`
|
||||||
|
|
||||||
|
FROM `hostPage`
|
||||||
|
|
||||||
WHERE MATCH(?)
|
WHERE MATCH(?)
|
||||||
|
|
||||||
|
ORDER BY `rank` DESC, WEIGHT() DESC
|
||||||
|
|
||||||
LIMIT ' . (int) ($start > $maxMatches ? ($maxMatches > 0 ? $maxMatches - 1 : 0) : $start) . ',' . (int) $limit . '
|
LIMIT ' . (int) ($start > $maxMatches ? ($maxMatches > 0 ? $maxMatches - 1 : 0) : $start) . ',' . (int) $limit . '
|
||||||
|
|
||||||
OPTION `max_matches`=' . (int) ($maxMatches > 1 ? $maxMatches : 1));
|
OPTION `max_matches`=' . (int) ($maxMatches > 1 ? $maxMatches : 1));
|
||||||
|
Loading…
Reference in New Issue
Block a user