getHostImagesTotal(); $placeholder = Filter::plural($totalPages, [sprintf(_('Over %s image or enter the new one...'), $totalPages), sprintf(_('Over %s images or enter the new one...'), $totalPages), sprintf(_('Over %s images or enter the new one...'), $totalPages), ]); break; default: $totalPages = $sphinx->getHostPagesTotal(); $placeholder = Filter::plural($totalPages, [sprintf(_('Over %s page or enter the new one...'), $totalPages), sprintf(_('Over %s pages or enter the new one...'), $totalPages), sprintf(_('Over %s pages or enter the new one...'), $totalPages), ]); } // Crawl request if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) { $db->beginTransaction(); try { // Parse host info if ($hostURL = Parser::hostURL($q)) { // Host exists if ($host = $db->getHost(crc32($hostURL->string))) { $hostStatus = $host->status; $hostPageLimit = $host->crawlPageLimit; $hostId = $host->hostId; $hostRobots = $host->robots; $hostRobotsPostfix = $host->robotsPostfix; // Register new host } else { // Disk quota not reached if (CRAWL_STOP_DISK_QUOTA_MB_LEFT < disk_free_space('/') / 1000000) { // Get robots.txt if exists $curl = new Curl($hostURL->string . '/robots.txt', CRAWL_CURLOPT_USERAGENT); if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) { $hostRobots = $curl->getContent(); } else { $hostRobots = null; } $hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES; $hostStatus = CRAWL_HOST_DEFAULT_STATUS; $hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT; $hostId = $db->addHost($hostURL->scheme, $hostURL->name, $hostURL->port, crc32($hostURL->string), time(), null, $hostPageLimit, (string) CRAWL_HOST_DEFAULT_META_ONLY, (string) $hostStatus, $hostRobots, $hostRobotsPostfix); } } // Parse page URI $hostPageURI = Parser::uri($q); // Init robots parser $robots = new Robots((!$hostRobots ? (string) $hostRobots : (string) CRAWL_ROBOTS_DEFAULT_RULES) . PHP_EOL . (string) $hostRobotsPostfix); // Save page info if ($hostStatus && // host enabled $robots->uriAllowed($hostPageURI->string) && // page allowed by robots.txt rules $hostPageLimit > $db->getTotalHostPages($hostId) && // pages quantity not reached host limit !$db->getHostPage($hostId, crc32($hostPageURI->string))) { // page not exists $db->addHostPage($hostId, crc32($hostPageURI->string), $hostPageURI->string, time()); } } $db->commit(); } catch(Exception $e){ $db->rollBack(); } } // Search request if (!empty($q)) { if ($t == 'image') { $resultsTotal = $sphinx->searchHostImagesTotal(Filter::searchQuery($q, $m)); $results = $sphinx->searchHostImages(Filter::searchQuery($q, $m), $p * WEBSITE_PAGINATION_SEARCH_IMAGE_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_IMAGE_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_IMAGE_RESULTS_LIMIT, $resultsTotal); } else { $resultsTotal = $sphinx->searchHostPagesTotal(Filter::searchQuery($q, $m)); $results = $sphinx->searchHostPages(Filter::searchQuery($q, $m), $p * WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT, $resultsTotal); } } else { $resultsTotal = 0; $results = []; } ?> <?php echo (empty($q) ? _('Empty request - YGGo!') : ($p > 1 ? sprintf(_('%s - #%s - YGGo!'), htmlentities($q), $p) : sprintf(_('%s - YGGo!'), htmlentities($q)))) ?>

getTotalPagesByHttpCode(null)) { ?>
getFoundHostImage($result->id)) { ?> scheme . '://' . $hostImage->name . ($hostImage->port ? ':' . $hostImage->port : false) . $hostImage->uri; // Get remote image data if (empty($hostImage->data)) { // Init image request $hostImageCurl = new Curl($hostImageURL, PROXY_CURLOPT_USERAGENT); // Skip item render on timeout $hostImageHttpCode = $hostImageCurl->getCode(); $db->updateHostImageHttpCode($hostImage->hostImageId, (int) $hostImageHttpCode, time()); if (200 != $hostImageHttpCode) { $db->updateHostImageTimeBanned($hostImage->hostImageId, time()); continue; } // Skip image processing on MIME type not provided if (!$hostImageContentType = $hostImageCurl->getContentType()) { $db->updateHostImageTimeBanned($hostImage->hostImageId, time()); continue; } // Skip image processing on MIME type not allowed in settings $hostImageBanned = true; foreach ((array) explode(',', CRAWL_IMAGE_MIME) as $mime) { if (false !== strpos($hostImageContentType, trim($mime))) { $hostImageBanned = false; break; } } if ($hostImageBanned) { $hostImagesBanned += $db->updateHostImageTimeBanned($hostImage->hostImageId, time()); } // Skip image processing without returned content if (!$hostImageContent = $hostImageCurl->getContent()) { $db->updateHostImageTimeBanned($hostImage->hostImageId, time()); continue; } // Convert remote image data to base64 string to prevent direct URL call if (!$hostImageExtension = @pathinfo($hostImageURL, PATHINFO_EXTENSION)) { $db->updateHostImageTimeBanned($hostImage->hostImageId, time()); continue; } if (!$hostImageBase64 = @base64_encode($hostImageContent)) { $db->updateHostImageTimeBanned($hostImage->hostImageId, time()); continue; } $hostImageURLencoded = 'data:image/' . $hostImageExtension . ';base64,' . $hostImageBase64; // Save image content on data settings enabled $db->updateHostImage($hostImage->hostImageId, Filter::mime($hostImageContentType), CRAWL_HOST_DEFAULT_META_ONLY ? null : $hostImageURLencoded, time()); // Local image data exists } else { $hostImageURLencoded = $hostImage->data; } ?>
<?php echo htmlentities($hostImage->description) ?> getHostImageHostPagesTotal($result->id) ?> getHostImageHostPages($result->id, WEBSITE_SEARCH_IMAGE_RELATED_PAGE_RESULTS_LIMIT) as $hostPage) { ?> getFoundHostPage($hostPage->hostPageId)) { ?> scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>

metaTitle ?>

description)) { ?> description ?> favicon 0) { ?>

getFoundHostPage($result->id)) { ?> scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri; ?>

metaTitle ?>

metaDescription)) { ?> metaDescription ?> favicon
getTotalPagesByHttpCode(null)) { ?>