mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-09 06:17:52 +00:00
add banned items counters
This commit is contained in:
parent
b6605b9132
commit
20514c455f
@ -36,6 +36,8 @@ $manifestsIndexed = 0;
|
|||||||
$hostPagesAdded = 0;
|
$hostPagesAdded = 0;
|
||||||
$hostImagesAdded = 0;
|
$hostImagesAdded = 0;
|
||||||
$hostsAdded = 0;
|
$hostsAdded = 0;
|
||||||
|
$hostPagesBanned = 0;
|
||||||
|
$hostImagesBanned = 0;
|
||||||
|
|
||||||
// Connect database
|
// Connect database
|
||||||
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
|
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
|
||||||
@ -237,6 +239,8 @@ try {
|
|||||||
// Skip image processing non 200 code
|
// Skip image processing non 200 code
|
||||||
if (200 != $curl->getCode()) {
|
if (200 != $curl->getCode()) {
|
||||||
|
|
||||||
|
$hostImagesBanned++;
|
||||||
|
|
||||||
$hostImageTimeBanned = time();
|
$hostImageTimeBanned = time();
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
@ -245,6 +249,8 @@ try {
|
|||||||
// Skip image processing on MIME type not provided
|
// Skip image processing on MIME type not provided
|
||||||
if (!$hostImageContentType = $curl->getContentType()) {
|
if (!$hostImageContentType = $curl->getContentType()) {
|
||||||
|
|
||||||
|
$hostImagesBanned++;
|
||||||
|
|
||||||
$hostImageTimeBanned = time();
|
$hostImageTimeBanned = time();
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
@ -253,6 +259,8 @@ try {
|
|||||||
// Skip image processing on MIME type not allowed in settings
|
// Skip image processing on MIME type not allowed in settings
|
||||||
if (false === strpos(CRAWL_IMAGE_MIME_TYPE, $hostImageContentType)) {
|
if (false === strpos(CRAWL_IMAGE_MIME_TYPE, $hostImageContentType)) {
|
||||||
|
|
||||||
|
$hostImagesBanned++;
|
||||||
|
|
||||||
$hostImageTimeBanned = time();
|
$hostImageTimeBanned = time();
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
@ -264,6 +272,8 @@ try {
|
|||||||
// Skip image processing without returned content
|
// Skip image processing without returned content
|
||||||
if (!$hostImageContent = $curl->getContent()) {
|
if (!$hostImageContent = $curl->getContent()) {
|
||||||
|
|
||||||
|
$hostImagesBanned++;
|
||||||
|
|
||||||
$hostImageTimeBanned = time();
|
$hostImageTimeBanned = time();
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
@ -271,6 +281,8 @@ try {
|
|||||||
|
|
||||||
if (!$hostImageExtension = @pathinfo($queueHostImageURL, PATHINFO_EXTENSION)) {
|
if (!$hostImageExtension = @pathinfo($queueHostImageURL, PATHINFO_EXTENSION)) {
|
||||||
|
|
||||||
|
$hostImagesBanned++;
|
||||||
|
|
||||||
$hostImageTimeBanned = time();
|
$hostImageTimeBanned = time();
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
@ -278,6 +290,8 @@ try {
|
|||||||
|
|
||||||
if (!$hostImageBase64 = @base64_encode($hostImageContent)) {
|
if (!$hostImageBase64 = @base64_encode($hostImageContent)) {
|
||||||
|
|
||||||
|
$hostImagesBanned++;
|
||||||
|
|
||||||
$hostImageTimeBanned = time();
|
$hostImageTimeBanned = time();
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
@ -315,6 +329,8 @@ try {
|
|||||||
// Skip page processing non 200 code
|
// Skip page processing non 200 code
|
||||||
if (200 != $curl->getCode()) {
|
if (200 != $curl->getCode()) {
|
||||||
|
|
||||||
|
$hostPagesBanned++;
|
||||||
|
|
||||||
$hostPageTimeBanned = time();
|
$hostPageTimeBanned = time();
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
@ -323,6 +339,8 @@ try {
|
|||||||
// Skip page processing on MIME type not provided
|
// Skip page processing on MIME type not provided
|
||||||
if (!$contentType = $curl->getContentType()) {
|
if (!$contentType = $curl->getContentType()) {
|
||||||
|
|
||||||
|
$hostPagesBanned++;
|
||||||
|
|
||||||
$hostPageTimeBanned = time();
|
$hostPageTimeBanned = time();
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
@ -331,6 +349,8 @@ try {
|
|||||||
// Skip page processing on MIME type not allowed in settings
|
// Skip page processing on MIME type not allowed in settings
|
||||||
if (false === strpos(CRAWL_PAGE_MIME_TYPE, $contentType)) {
|
if (false === strpos(CRAWL_PAGE_MIME_TYPE, $contentType)) {
|
||||||
|
|
||||||
|
$hostPagesBanned++;
|
||||||
|
|
||||||
$hostPageTimeBanned = time();
|
$hostPageTimeBanned = time();
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
@ -339,6 +359,8 @@ try {
|
|||||||
// Skip page processing without returned data
|
// Skip page processing without returned data
|
||||||
if (!$content = $curl->getContent()) {
|
if (!$content = $curl->getContent()) {
|
||||||
|
|
||||||
|
$hostPagesBanned++;
|
||||||
|
|
||||||
$hostPageTimeBanned = time();
|
$hostPageTimeBanned = time();
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
@ -354,6 +376,8 @@ try {
|
|||||||
|
|
||||||
if ($title->length == 0) {
|
if ($title->length == 0) {
|
||||||
|
|
||||||
|
$hostPagesBanned++;
|
||||||
|
|
||||||
$hostPageTimeBanned = time();
|
$hostPageTimeBanned = time();
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
@ -387,6 +411,8 @@ try {
|
|||||||
// Append page with meta robots:noindex value to the robotsPostfix disallow list
|
// Append page with meta robots:noindex value to the robotsPostfix disallow list
|
||||||
if (false !== stripos($metaRobots, 'noindex')) {
|
if (false !== stripos($metaRobots, 'noindex')) {
|
||||||
|
|
||||||
|
$hostPagesBanned++;
|
||||||
|
|
||||||
$hostPageTimeBanned = time();
|
$hostPageTimeBanned = time();
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
@ -713,4 +739,6 @@ echo 'Images added: ' . $hostImagesAdded . PHP_EOL;
|
|||||||
echo 'Manifests processed: ' . $manifestsProcessed . PHP_EOL;
|
echo 'Manifests processed: ' . $manifestsProcessed . PHP_EOL;
|
||||||
echo 'Manifests indexed: ' . $manifestsIndexed . PHP_EOL;
|
echo 'Manifests indexed: ' . $manifestsIndexed . PHP_EOL;
|
||||||
echo 'Hosts added: ' . $hostsAdded . PHP_EOL;
|
echo 'Hosts added: ' . $hostsAdded . PHP_EOL;
|
||||||
|
echo 'Hosts pages banned: ' . $hostPagesBanned . PHP_EOL;
|
||||||
|
echo 'Hosts images banned: ' . $hostImagesBanned . PHP_EOL;
|
||||||
echo 'Total time: ' . microtime(true) - $timeStart . PHP_EOL . PHP_EOL;
|
echo 'Total time: ' . microtime(true) - $timeStart . PHP_EOL . PHP_EOL;
|
||||||
|
Loading…
Reference in New Issue
Block a user