getHostImagesTotal();
$placeholder = Filter::plural($totalPages, [sprintf(_('Over %s image or enter the new one...'), $totalPages),
sprintf(_('Over %s images or enter the new one...'), $totalPages),
sprintf(_('Over %s images or enter the new one...'), $totalPages),
]);
break;
default:
$totalPages = $sphinx->getHostPagesTotal();
$placeholder = Filter::plural($totalPages, [sprintf(_('Over %s page or enter the new one...'), $totalPages),
sprintf(_('Over %s pages or enter the new one...'), $totalPages),
sprintf(_('Over %s pages or enter the new one...'), $totalPages),
]);
}
// Crawl request
if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
$db->beginTransaction();
try {
// Parse host info
if ($hostURL = Parser::hostURL($q)) {
// Host exists
if ($host = $db->getHost(crc32($hostURL->string))) {
$hostStatus = $host->status;
$hostPageLimit = $host->crawlPageLimit;
$hostId = $host->hostId;
$hostRobots = $host->robots;
$hostRobotsPostfix = $host->robotsPostfix;
// Register new host
} else {
// Disk quota not reached
if (CRAWL_STOP_DISK_QUOTA_MB_LEFT < disk_free_space('/') / 1000000) {
// Get robots.txt if exists
$curl = new Curl($hostURL->string . '/robots.txt', CRAWL_CURLOPT_USERAGENT);
if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) {
$hostRobots = $curl->getContent();
} else {
$hostRobots = null;
}
$hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES;
$hostStatus = CRAWL_HOST_DEFAULT_STATUS;
$hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT;
$hostId = $db->addHost($hostURL->scheme,
$hostURL->name,
$hostURL->port,
crc32($hostURL->string),
time(),
null,
$hostPageLimit,
(string) CRAWL_HOST_DEFAULT_META_ONLY,
(string) $hostStatus,
$hostRobots,
$hostRobotsPostfix);
}
}
// Parse page URI
$hostPageURI = Parser::uri($q);
// Init robots parser
$robots = new Robots((!$hostRobots ? (string) $hostRobots : (string) CRAWL_ROBOTS_DEFAULT_RULES) . PHP_EOL . (string) $hostRobotsPostfix);
// Save page info
if ($hostStatus && // host enabled
$robots->uriAllowed($hostPageURI->string) && // page allowed by robots.txt rules
$hostPageLimit > $db->getTotalHostPages($hostId) && // pages quantity not reached host limit
!$db->getHostPage($hostId, crc32($hostPageURI->string))) { // page not exists
$db->addHostPage($hostId, crc32($hostPageURI->string), $hostPageURI->string, time());
}
}
$db->commit();
} catch(Exception $e){
$db->rollBack();
}
}
// Search request
if (!empty($q)) {
if ($t == 'image') {
$resultsTotal = $sphinx->searchHostImagesTotal(Filter::searchQuery($q, $m));
$results = $sphinx->searchHostImages(Filter::searchQuery($q, $m), $p * WEBSITE_PAGINATION_SEARCH_IMAGE_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_IMAGE_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_IMAGE_RESULTS_LIMIT, $resultsTotal);
} else {
$resultsTotal = $sphinx->searchHostPagesTotal(Filter::searchQuery($q, $m));
$results = $sphinx->searchHostPages(Filter::searchQuery($q, $m), $p * WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT, $resultsTotal);
}
} else {
$resultsTotal = 0;
$results = [];
}
?>
1 ? sprintf(_('%s - #%s - YGGo!'), htmlentities($q), $p) : sprintf(_('%s - YGGo!'), htmlentities($q)))) ?>
getTotalPagesByHttpCode(null)) { ?>
getFoundHostImage($result->id)) { ?>
scheme . '://' .
$hostImage->name .
($hostImage->port ? ':' . $hostImage->port : false) .
$hostImage->uri;
// Get local image data
if ($lastHostImageDescription = $db->getLastHostImageDescription($hostImage->hostImageId)) {
$hostImageURLencoded = $lastHostImageDescription->data;
// Get remote if local index not found or CRAWL_HOST_DEFAULT_META_ONLY enabled
} else {
// Init image request
$hostImageCurl = new Curl($hostImageURL, PROXY_CURLOPT_USERAGENT);
// Skip item render on timeout
$hostImageHttpCode = $hostImageCurl->getCode();
$db->updateHostImageHttpCode($hostImage->hostImageId, (int) $hostImageHttpCode, time());
if (200 != $hostImageHttpCode) {
$db->updateHostImageHttpCode($hostImage->hostImageId, $hostImageHttpCode, time());
$db->updateHostImageTimeBanned($hostImage->hostImageId, time());
continue;
}
// Skip image processing on MIME type not provided
if (!$hostImageContentType = $hostImageCurl->getContentType()) {
$db->updateHostImageTimeBanned($hostImage->hostImageId, time());
continue;
}
// Skip image processing on MIME type not allowed in settings
$hostImageBanned = true;
foreach ((array) explode(',', CRAWL_IMAGE_MIME) as $mime) {
if (false !== strpos($hostImageContentType, trim($mime))) {
$hostImageBanned = false;
break;
}
}
if ($hostImageBanned) {
$db->updateHostImageMime($hostImage->hostImageId, $hostImageContentType, time());
$hostImagesBanned += $db->updateHostImageTimeBanned($hostImage->hostImageId, time());
continue;
}
// Skip image processing without returned content
if (!$hostImageContent = $hostImageCurl->getContent()) {
$db->updateHostImageTimeBanned($hostImage->hostImageId, time());
continue;
}
// Convert remote image data to base64 string to prevent direct URL call
if (!$hostImageExtension = @pathinfo($hostImageURL, PATHINFO_EXTENSION)) {
$db->updateHostImageTimeBanned($hostImage->hostImageId, time());
continue;
}
if (!$hostImageBase64 = @base64_encode($hostImageContent)) {
$db->updateHostImageTimeBanned($hostImage->hostImageId, time());
continue;
}
$hostImageURLencoded = 'data:image/' . str_replace(['svg'], ['svg+xml'], $hostImageExtension) . ';base64,' . $hostImageBase64;
// Save image content on data settings enabled
$db->updateHostImage($hostImage->hostImageId,
Filter::mime($hostImageContentType),
time());
$db->setHostImageDescriptionData($hostImage->hostImageId,
crc32($hostImageURLencoded),
$hostImage->crawlMetaOnly ? null : $hostImageURLencoded,
time());
}
?>
getHostImageHostPagesTotal($result->id) ?>
getHostImageHostPages($result->id, WEBSITE_SEARCH_IMAGE_RELATED_PAGE_RESULTS_LIMIT) as $hostPage) { ?>
getFoundHostPage($hostPage->hostPageId)) { ?>
scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>
getLastPageDescription($result->id)) { ?>
metaTitle ?>
getLastHostImageDescription($result->id)) { ?>
title ?> alt ?>
0) { ?>
getFoundHostPage($result->id)) { ?>
scheme . '://' .
$hostPage->name .
($hostPage->port ? ':' . $hostPage->port : false) .
$hostPage->uri;
?>
getLastPageDescription($result->id)) { ?>
metaTitle ?>
metaDescription)) { ?>
metaDescription ?>
getTotalPagesByHttpCode(null)) { ?>