Browse Source

add CRAWL_META_ONLY option

sqliteway
ghost 2 years ago
parent
commit
79663c84db
  1. 2
      config/app.php.txt
  2. 2
      crontab/crawler.php

2
config/app.php.txt

@ -22,3 +22,5 @@ define('CRAWL_PAGE_LIMIT', 10);
define('CRAWL_PAGE_SECONDS_OFFSET', 3600); define('CRAWL_PAGE_SECONDS_OFFSET', 3600);
define('CRAWL_URL_REGEXP', '/^.*$/ui'); define('CRAWL_URL_REGEXP', '/^.*$/ui');
define('CRAWL_META_ONLY', false);

2
crontab/crawler.php

@ -67,7 +67,7 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET)
Filter::pageTitle($title->item(0)->nodeValue), Filter::pageTitle($title->item(0)->nodeValue),
Filter::pageDescription($description), Filter::pageDescription($description),
Filter::pageKeywords($keywords), Filter::pageKeywords($keywords),
Filter::pageData($url->getContent()), CRAWL_META_ONLY ? '' : Filter::pageData($content),
time()); time());
// Update images // Update images

Loading…
Cancel
Save