diff --git a/crontab/crawler.php b/crontab/crawler.php index 7341559..ed4969c 100644 --- a/crontab/crawler.php +++ b/crontab/crawler.php @@ -183,6 +183,18 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET $hostRobots = $host->robots; $hostRobotsPostfix = $host->robotsPostfix; + // Increase page rank when link does not match the current host + if ($hostURL->scheme . '://' . + $hostURL->name . + ($hostURL->port ? ':' . $hostURL->port : '') + != + $queueHostPage->scheme . '://' . + $queueHostPage->name . + ($queueHostPage->port ? ':' . $queueHostPage->port : '')) { + + $db->updateHostPageRank($hostId, crc32($hostPageURI->string), 1); + } + // Register new host } else { diff --git a/library/mysql.php b/library/mysql.php index 7cb42b4..9383d81 100644 --- a/library/mysql.php +++ b/library/mysql.php @@ -143,6 +143,7 @@ class MySQL { `hostPage`.`metaDescription`, `hostPage`.`data`, `hostPage`.`uri`, + `hostPage`.`rank`, `host`.`scheme`, `host`.`name`, `host`.`port` @@ -204,6 +205,22 @@ class MySQL { return $query->rowCount(); } + public function updateHostPageRank(int $hostId, + int $crc32uri, + int $increment) { + + $query = $this->_db->prepare('UPDATE `hostPage` SET `rank` = `rank` + ' . (int) $increment . ' + + WHERE `hostId` = ? + AND `crc32uri` = ? + + LIMIT 1'); + + $query->execute([$hostId, $crc32uri]); + + return $query->rowCount(); + } + public function deleteHostPage(int $hostPageId) { $query = $this->_db->prepare('DELETE FROM `hostPage` WHERE `hostPageId` = ? LIMIT 1'); diff --git a/library/sphinxql.php b/library/sphinxql.php index 5ad355f..0859c84 100644 --- a/library/sphinxql.php +++ b/library/sphinxql.php @@ -13,13 +13,17 @@ class SphinxQL { public function searchHostPages(string $keyword, int $start, int $limit, int $maxMatches) { - $query = $this->_sphinx->prepare('SELECT * FROM `hostPage` + $query = $this->_sphinx->prepare('SELECT *, WEIGHT() AS `weight` - WHERE MATCH(?) + FROM `hostPage` - LIMIT ' . (int) ($start > $maxMatches ? ($maxMatches > 0 ? $maxMatches - 1 : 0) : $start) . ',' . (int) $limit . ' + WHERE MATCH(?) - OPTION `max_matches`=' . (int) ($maxMatches > 1 ? $maxMatches : 1)); + ORDER BY `rank` DESC, WEIGHT() DESC + + LIMIT ' . (int) ($start > $maxMatches ? ($maxMatches > 0 ? $maxMatches - 1 : 0) : $start) . ',' . (int) $limit . ' + + OPTION `max_matches`=' . (int) ($maxMatches > 1 ? $maxMatches : 1)); $query->execute([$keyword]);