Browse Source

implement page ranking

main
ghost 2 years ago
parent
commit
8671fc4bde
  1. 12
      crontab/crawler.php
  2. 17
      library/mysql.php
  3. 12
      library/sphinxql.php

12
crontab/crawler.php

@ -183,6 +183,18 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET
$hostRobots = $host->robots; $hostRobots = $host->robots;
$hostRobotsPostfix = $host->robotsPostfix; $hostRobotsPostfix = $host->robotsPostfix;
// Increase page rank when link does not match the current host
if ($hostURL->scheme . '://' .
$hostURL->name .
($hostURL->port ? ':' . $hostURL->port : '')
!=
$queueHostPage->scheme . '://' .
$queueHostPage->name .
($queueHostPage->port ? ':' . $queueHostPage->port : '')) {
$db->updateHostPageRank($hostId, crc32($hostPageURI->string), 1);
}
// Register new host // Register new host
} else { } else {

17
library/mysql.php

@ -143,6 +143,7 @@ class MySQL {
`hostPage`.`metaDescription`, `hostPage`.`metaDescription`,
`hostPage`.`data`, `hostPage`.`data`,
`hostPage`.`uri`, `hostPage`.`uri`,
`hostPage`.`rank`,
`host`.`scheme`, `host`.`scheme`,
`host`.`name`, `host`.`name`,
`host`.`port` `host`.`port`
@ -204,6 +205,22 @@ class MySQL {
return $query->rowCount(); return $query->rowCount();
} }
public function updateHostPageRank(int $hostId,
int $crc32uri,
int $increment) {
$query = $this->_db->prepare('UPDATE `hostPage` SET `rank` = `rank` + ' . (int) $increment . '
WHERE `hostId` = ?
AND `crc32uri` = ?
LIMIT 1');
$query->execute([$hostId, $crc32uri]);
return $query->rowCount();
}
public function deleteHostPage(int $hostPageId) { public function deleteHostPage(int $hostPageId) {
$query = $this->_db->prepare('DELETE FROM `hostPage` WHERE `hostPageId` = ? LIMIT 1'); $query = $this->_db->prepare('DELETE FROM `hostPage` WHERE `hostPageId` = ? LIMIT 1');

12
library/sphinxql.php

@ -13,13 +13,17 @@ class SphinxQL {
public function searchHostPages(string $keyword, int $start, int $limit, int $maxMatches) { public function searchHostPages(string $keyword, int $start, int $limit, int $maxMatches) {
$query = $this->_sphinx->prepare('SELECT * FROM `hostPage` $query = $this->_sphinx->prepare('SELECT *, WEIGHT() AS `weight`
WHERE MATCH(?) FROM `hostPage`
LIMIT ' . (int) ($start > $maxMatches ? ($maxMatches > 0 ? $maxMatches - 1 : 0) : $start) . ',' . (int) $limit . ' WHERE MATCH(?)
OPTION `max_matches`=' . (int) ($maxMatches > 1 ? $maxMatches : 1)); ORDER BY `rank` DESC, WEIGHT() DESC
LIMIT ' . (int) ($start > $maxMatches ? ($maxMatches > 0 ? $maxMatches - 1 : 0) : $start) . ',' . (int) $limit . '
OPTION `max_matches`=' . (int) ($maxMatches > 1 ? $maxMatches : 1));
$query->execute([$keyword]); $query->execute([$keyword]);

Loading…
Cancel
Save