getTotalPages(); $placeholder = Filter::plural($totalPages, [sprintf(_('Over %s page or enter the new one...'), $totalPages), sprintf(_('Over %s pages or enter the new one...'), $totalPages), sprintf(_('Over %s pages or enter the new one...'), $totalPages), ]); // Filter request data $q = !empty($_GET['q']) ? Filter::url($_GET['q']) : ''; $p = !empty($_GET['p']) ? (int) $_GET['p'] : 1; // Crawl request if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) { $db->beginTransaction(); try { // Parse host info if ($hostURL = Parser::hostURL($q)) { // Host exists if ($host = $db->getHost(crc32($hostURL->string))) { $hostStatus = $host->status; $hostPageLimit = $host->crawlPageLimit; $hostId = $host->hostId; $hostRobots = $host->robots; $hostRobotsPostfix = $host->robotsPostfix; // Register new host } else { // Disk quota not reached if (CRAWL_STOP_DISK_QUOTA_MB_LEFT < disk_free_space('/') / 1000000) { // Get robots.txt if exists $curl = new Curl($hostURL->string . '/robots.txt'); if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) { $hostRobots = $curl->getContent(); } else { $hostRobots = null; } $hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES; $hostStatus = CRAWL_HOST_DEFAULT_STATUS; $hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT; $hostId = $db->addHost($hostURL->scheme, $hostURL->name, $hostURL->port, crc32($hostURL->string), time(), null, $hostPageLimit, (string) CRAWL_HOST_DEFAULT_META_ONLY, (string) $hostStatus, $hostRobots, $hostRobotsPostfix); } } // Parse page URI $hostPageURI = Parser::uri($q); // Init robots parser $robots = new Robots((!$hostRobots ? (string) $hostRobots : (string) CRAWL_ROBOTS_DEFAULT_RULES) . PHP_EOL . (string) $hostRobotsPostfix); // Save page info if ($hostStatus && // host enabled $robots->uriAllowed($hostPageURI->string) && // page allowed by robots.txt rules $hostPageLimit > $db->getTotalHostPages($hostId) && // pages quantity not reached host limit !$db->getHostPage($hostId, crc32($hostPageURI->string))) { // page not exists $db->addHostPage($hostId, crc32($hostPageURI->string), $hostPageURI->string, time()); } } $db->commit(); } catch(Exception $e){ $db->rollBack(); } } // Search request if (!empty($q)) { $resultsTotal = $sphinx->searchHostPagesTotal(Filter::sphinxSearchQuery($q)); $results = $sphinx->searchHostPages(Filter::sphinxSearchQuery($q), $p * WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT, $resultsTotal); } else { $resultsTotal = 0; $results = []; } ?> <?php echo (empty($q) ? _('Empty request - YGGo!') : ($p > 1 ? sprintf(_('%s - #%s - YGGo!'), htmlentities($q), $p) : sprintf(_('%s - YGGo!'), htmlentities($q)))) ?>

getTotalPagesByHttpCode(null)) { ?>
getFoundHostPage($result->id)) { ?> scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>

metaTitle ?>

metaDescription)) { ?> metaDescription ?> favicon
getTotalPagesByHttpCode(null)) { ?>