mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-08 22:07:56 +00:00
make url/src columns unique keys, add insert/ignore construction
This commit is contained in:
parent
aadfe7f551
commit
b218b8bbc3
@ -92,9 +92,9 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET)
|
|||||||
if (!parse_url($src, PHP_URL_HOST)) {
|
if (!parse_url($src, PHP_URL_HOST)) {
|
||||||
|
|
||||||
$src = parse_url($queue->url, PHP_URL_SCHEME) . '://' .
|
$src = parse_url($queue->url, PHP_URL_SCHEME) . '://' .
|
||||||
parse_url($queue->url, PHP_URL_HOST) .
|
parse_url($queue->url, PHP_URL_HOST) .
|
||||||
parse_url($queue->url, PHP_URL_PORT) .
|
parse_url($queue->url, PHP_URL_PORT) .
|
||||||
$src; // @TODO sometimes wrong URL prefix available
|
$src; // @TODO sometimes wrong URL prefix available
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add page images
|
// Add page images
|
||||||
@ -134,10 +134,7 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET)
|
|||||||
// Save valid internal links to the index queue
|
// Save valid internal links to the index queue
|
||||||
if (filter_var($href, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $href)) {
|
if (filter_var($href, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $href)) {
|
||||||
|
|
||||||
if (!$db->getPage($href)) {
|
$db->initPage($href, time());
|
||||||
|
|
||||||
$db->initPage($href, time());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -21,7 +21,7 @@ class SQLite {
|
|||||||
"data" TEXT,
|
"data" TEXT,
|
||||||
"description" TEXT,
|
"description" TEXT,
|
||||||
"keywords" TEXT,
|
"keywords" TEXT,
|
||||||
"url" TEXT NOT NULL
|
"url" TEXT NOT NULL UNIQUE
|
||||||
)
|
)
|
||||||
');
|
');
|
||||||
|
|
||||||
@ -30,7 +30,7 @@ class SQLite {
|
|||||||
"imageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
"imageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||||
"pageId" INTEGER NOT NULL,
|
"pageId" INTEGER NOT NULL,
|
||||||
"alt" TEXT NOT NULL,
|
"alt" TEXT NOT NULL,
|
||||||
"src" TEXT NOT NULL
|
"src" TEXT NOT NULL UNIQUE
|
||||||
)
|
)
|
||||||
');
|
');
|
||||||
|
|
||||||
@ -85,15 +85,6 @@ class SQLite {
|
|||||||
return $query->fetch()->total;
|
return $query->fetch()->total;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getPage(string $url) {
|
|
||||||
|
|
||||||
$query = $this->_db->prepare('SELECT * FROM `page` WHERE `url` = ?');
|
|
||||||
|
|
||||||
$query->execute([$url]);
|
|
||||||
|
|
||||||
return $query->fetch();
|
|
||||||
}
|
|
||||||
|
|
||||||
public function updatePage(int $pageId, string $title, string $description, string $keywords, string $data, int $timeUpdated) {
|
public function updatePage(int $pageId, string $title, string $description, string $keywords, string $data, int $timeUpdated) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('UPDATE `page` SET `title` = ?, `description` = ?, `data` = ?, `timeUpdated` = ? WHERE `pageId` = ?');
|
$query = $this->_db->prepare('UPDATE `page` SET `title` = ?, `description` = ?, `data` = ?, `timeUpdated` = ? WHERE `pageId` = ?');
|
||||||
@ -114,7 +105,7 @@ class SQLite {
|
|||||||
|
|
||||||
public function initPage(string $url, int $timeAdded) {
|
public function initPage(string $url, int $timeAdded) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('INSERT INTO `page` (`url`, `timeAdded`) VALUES (?, ?)');
|
$query = $this->_db->prepare('INSERT OR IGNORE INTO `page` (`url`, `timeAdded`) VALUES (?, ?)');
|
||||||
|
|
||||||
$query->execute([$url, $timeAdded]);
|
$query->execute([$url, $timeAdded]);
|
||||||
|
|
||||||
@ -123,7 +114,7 @@ class SQLite {
|
|||||||
|
|
||||||
public function addImage(int $pageId, string $src, string $alt) {
|
public function addImage(int $pageId, string $src, string $alt) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('INSERT INTO `image` (`pageId`, `src`, `alt`) VALUES (?, ?, ?)');
|
$query = $this->_db->prepare('INSERT OR IGNORE INTO `image` (`pageId`, `src`, `alt`) VALUES (?, ?, ?)');
|
||||||
|
|
||||||
$query->execute([$pageId, $src, $alt]);
|
$query->execute([$pageId, $src, $alt]);
|
||||||
|
|
||||||
|
@ -22,9 +22,7 @@ $q = !empty($_GET['q']) ? Filter::url($_GET['q']) : '';
|
|||||||
// Crawl request
|
// Crawl request
|
||||||
if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
||||||
|
|
||||||
if (!$db->getPage($q)) {
|
$db->initPage($q, time());
|
||||||
$db->initPage($q, time());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Search request
|
// Search request
|
||||||
|
Loading…
Reference in New Issue
Block a user