metaTitle ?>
description)) { ?> description ?> 0) { ?>
getTotalPages(); $placeholder = Filter::plural($totalPages, [sprintf(_('Over %s page or enter the new one...'), $totalPages), sprintf(_('Over %s pages or enter the new one...'), $totalPages), sprintf(_('Over %s pages or enter the new one...'), $totalPages), ]); // Filter request data $t = !empty($_GET['t']) ? Filter::url($_GET['t']) : 'page'; $m = !empty($_GET['m']) ? Filter::url($_GET['m']) : 'default'; $q = !empty($_GET['q']) ? Filter::url($_GET['q']) : ''; $p = !empty($_GET['p']) ? (int) $_GET['p'] : 1; // Crawl request if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) { $db->beginTransaction(); try { // Parse host info if ($hostURL = Parser::hostURL($q)) { // Host exists if ($host = $db->getHost(crc32($hostURL->string))) { $hostStatus = $host->status; $hostPageLimit = $host->crawlPageLimit; $hostId = $host->hostId; $hostRobots = $host->robots; $hostRobotsPostfix = $host->robotsPostfix; // Register new host } else { // Disk quota not reached if (CRAWL_STOP_DISK_QUOTA_MB_LEFT < disk_free_space('/') / 1000000) { // Get robots.txt if exists $curl = new Curl($hostURL->string . '/robots.txt', CRAWL_CURLOPT_USERAGENT); if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) { $hostRobots = $curl->getContent(); } else { $hostRobots = null; } $hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES; $hostStatus = CRAWL_HOST_DEFAULT_STATUS; $hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT; $hostId = $db->addHost($hostURL->scheme, $hostURL->name, $hostURL->port, crc32($hostURL->string), time(), null, $hostPageLimit, (string) CRAWL_HOST_DEFAULT_META_ONLY, (string) $hostStatus, $hostRobots, $hostRobotsPostfix); } } // Parse page URI $hostPageURI = Parser::uri($q); // Init robots parser $robots = new Robots((!$hostRobots ? (string) $hostRobots : (string) CRAWL_ROBOTS_DEFAULT_RULES) . PHP_EOL . (string) $hostRobotsPostfix); // Save page info if ($hostStatus && // host enabled $robots->uriAllowed($hostPageURI->string) && // page allowed by robots.txt rules $hostPageLimit > $db->getTotalHostPages($hostId) && // pages quantity not reached host limit !$db->getHostPage($hostId, crc32($hostPageURI->string))) { // page not exists $db->addHostPage($hostId, crc32($hostPageURI->string), $hostPageURI->string, time()); } } $db->commit(); } catch(Exception $e){ $db->rollBack(); } } // Search request if (!empty($q)) { if ($t == 'image') { $resultsTotal = $sphinx->searchHostImagesTotal(Filter::searchQuery($q, $m)); $results = $sphinx->searchHostImages(Filter::searchQuery($q, $m), $p * WEBSITE_PAGINATION_SEARCH_IMAGE_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_IMAGE_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_IMAGE_RESULTS_LIMIT, $resultsTotal); } else { $resultsTotal = $sphinx->searchHostPagesTotal(Filter::searchQuery($q, $m)); $results = $sphinx->searchHostPages(Filter::searchQuery($q, $m), $p * WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT, $resultsTotal); } } else { $resultsTotal = 0; $results = []; } ?>