Browse Source

skip javascript/mailto links index

main
ghost 2 years ago
parent
commit
9b9d40a97c
  1. 14
      crontab/crawler.php

14
crontab/crawler.php

@ -100,6 +100,20 @@ foreach ($db->getCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET
continue; continue;
} }
// Skip javascript links
if (false !== strpos($href, 'javascript:')) {
continue;
}
// Skip mailto links
if (false !== strpos($href, 'mailto:')) {
continue;
}
// @TODO skip other apps
// Add absolute URL prefixes to the relative links found // Add absolute URL prefixes to the relative links found
if (!parse_url($href, PHP_URL_HOST)) { if (!parse_url($href, PHP_URL_HOST)) {

Loading…
Cancel
Save