diff --git a/config/app.php.txt b/config/app.php.txt index 6ba3900..87bb98e 100644 --- a/config/app.php.txt +++ b/config/app.php.txt @@ -22,3 +22,5 @@ define('CRAWL_PAGE_LIMIT', 10); define('CRAWL_PAGE_SECONDS_OFFSET', 3600); define('CRAWL_URL_REGEXP', '/^.*$/ui'); + +define('CRAWL_META_ONLY', false); diff --git a/crontab/crawler.php b/crontab/crawler.php index 07a877a..59879c7 100644 --- a/crontab/crawler.php +++ b/crontab/crawler.php @@ -67,7 +67,7 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET) Filter::pageTitle($title->item(0)->nodeValue), Filter::pageDescription($description), Filter::pageKeywords($keywords), - Filter::pageData($url->getContent()), + CRAWL_META_ONLY ? '' : Filter::pageData($content), time()); // Update images