mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-08 22:07:56 +00:00
add host nsfw settings
This commit is contained in:
parent
8ce0324e94
commit
28bf526d53
@ -270,14 +270,24 @@ define('CRAWL_HOST_DEFAULT_STATUS', true);
|
|||||||
define('CRAWL_HOST_DEFAULT_META_ONLY', false);
|
define('CRAWL_HOST_DEFAULT_META_ONLY', false);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Images limit per new host by default
|
* Not suitable/safe for work status for new host by default
|
||||||
*
|
*
|
||||||
* Crawler stops indexing on this limit reach to prevent disk overuse
|
* Could be filtered in search results
|
||||||
*
|
*
|
||||||
* Custom rule for specified host could be provided in the DB `host`.`crawlImageLimit` field
|
* Custom rule for specified host could be provided in the DB `host`.`nsfw` field
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
define('CRAWL_HOST_DEFAULT_IMAGES_LIMIT', 1000);
|
define('CRAWL_HOST_DEFAULT_NSFW', false);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Not suitable/safe for work status for new host by default
|
||||||
|
*
|
||||||
|
* Could be filtered in crawl conditions or search results
|
||||||
|
*
|
||||||
|
* Custom rule for specified host could be provided in the DB `host`.`nsfw` field
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
define('CRAWL_HOST_DEFAULT_NSFW', false);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Default robots.txt rules on remote file not exists
|
* Default robots.txt rules on remote file not exists
|
||||||
@ -314,7 +324,7 @@ define('CRAWL_MANIFEST', true);
|
|||||||
* Manifest API version compatibility
|
* Manifest API version compatibility
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
define('CRAWL_MANIFEST_API_VERSION', 0.6);
|
define('CRAWL_MANIFEST_API_VERSION', 0.7);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set default auto-crawl status for new manifest added
|
* Set default auto-crawl status for new manifest added
|
||||||
@ -438,6 +448,7 @@ define('API_HOSTS_FIELDS',
|
|||||||
`host`.`crawlImageLimit`,
|
`host`.`crawlImageLimit`,
|
||||||
`host`.`robots`,
|
`host`.`robots`,
|
||||||
`host`.`robotsPostfix`,
|
`host`.`robotsPostfix`,
|
||||||
|
`host`.`nsfw`,
|
||||||
`host`.`timeAdded`,
|
`host`.`timeAdded`,
|
||||||
`host`.`timeUpdated`,
|
`host`.`timeUpdated`,
|
||||||
(SELECT COUNT(*) FROM `hostPage` WHERE `hostPage`.`hostId` = `host`.`hostId`) AS `hostPagesTotal`,
|
(SELECT COUNT(*) FROM `hostPage` WHERE `hostPage`.`hostId` = `host`.`hostId`) AS `hostPagesTotal`,
|
||||||
|
@ -171,6 +171,7 @@ try {
|
|||||||
if ($host = $db->getHost(crc32($hostURL))) {
|
if ($host = $db->getHost(crc32($hostURL))) {
|
||||||
|
|
||||||
$hostStatus = $host->status;
|
$hostStatus = $host->status;
|
||||||
|
$hostNsfw = $host->nsfw;
|
||||||
$hostPageLimit = $host->crawlPageLimit;
|
$hostPageLimit = $host->crawlPageLimit;
|
||||||
$hostImageLimit = $host->crawlImageLimit;
|
$hostImageLimit = $host->crawlImageLimit;
|
||||||
$hostId = $host->hostId;
|
$hostId = $host->hostId;
|
||||||
@ -198,6 +199,7 @@ try {
|
|||||||
$hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES;
|
$hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES;
|
||||||
|
|
||||||
$hostStatus = CRAWL_HOST_DEFAULT_STATUS;
|
$hostStatus = CRAWL_HOST_DEFAULT_STATUS;
|
||||||
|
$hostNsfw = CRAWL_HOST_DEFAULT_NSFW;
|
||||||
$hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT;
|
$hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT;
|
||||||
$hostImageLimit= CRAWL_HOST_DEFAULT_IMAGES_LIMIT;
|
$hostImageLimit= CRAWL_HOST_DEFAULT_IMAGES_LIMIT;
|
||||||
|
|
||||||
@ -211,6 +213,7 @@ try {
|
|||||||
$hostImageLimit,
|
$hostImageLimit,
|
||||||
(string) CRAWL_HOST_DEFAULT_META_ONLY,
|
(string) CRAWL_HOST_DEFAULT_META_ONLY,
|
||||||
(string) $hostStatus,
|
(string) $hostStatus,
|
||||||
|
(string) $hostNsfw,
|
||||||
$hostRobots,
|
$hostRobots,
|
||||||
$hostRobotsPostfix);
|
$hostRobotsPostfix);
|
||||||
|
|
||||||
@ -534,6 +537,7 @@ try {
|
|||||||
if ($host = $db->getHost(crc32($hostImageURL->string))) {
|
if ($host = $db->getHost(crc32($hostImageURL->string))) {
|
||||||
|
|
||||||
$hostStatus = $host->status;
|
$hostStatus = $host->status;
|
||||||
|
$hostNsfw = $host->nsfw;
|
||||||
$hostPageLimit = $host->crawlPageLimit;
|
$hostPageLimit = $host->crawlPageLimit;
|
||||||
$hostImageLimit = $host->crawlImageLimit;
|
$hostImageLimit = $host->crawlImageLimit;
|
||||||
$hostId = $host->hostId;
|
$hostId = $host->hostId;
|
||||||
@ -561,6 +565,7 @@ try {
|
|||||||
$hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES;
|
$hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES;
|
||||||
|
|
||||||
$hostStatus = CRAWL_HOST_DEFAULT_STATUS;
|
$hostStatus = CRAWL_HOST_DEFAULT_STATUS;
|
||||||
|
$hostNsfw = CRAWL_HOST_DEFAULT_NSFW;
|
||||||
$hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT;
|
$hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT;
|
||||||
$hostImageLimit= CRAWL_HOST_DEFAULT_IMAGES_LIMIT;
|
$hostImageLimit= CRAWL_HOST_DEFAULT_IMAGES_LIMIT;
|
||||||
$hostId = $db->addHost($hostImageURL->scheme,
|
$hostId = $db->addHost($hostImageURL->scheme,
|
||||||
@ -573,6 +578,7 @@ try {
|
|||||||
$hostImageLimit,
|
$hostImageLimit,
|
||||||
(string) CRAWL_HOST_DEFAULT_META_ONLY,
|
(string) CRAWL_HOST_DEFAULT_META_ONLY,
|
||||||
(string) $hostStatus,
|
(string) $hostStatus,
|
||||||
|
(string) $hostNsfw,
|
||||||
$hostRobots,
|
$hostRobots,
|
||||||
$hostRobotsPostfix);
|
$hostRobotsPostfix);
|
||||||
|
|
||||||
@ -692,6 +698,7 @@ try {
|
|||||||
if ($host = $db->getHost(crc32($hostURL->string))) {
|
if ($host = $db->getHost(crc32($hostURL->string))) {
|
||||||
|
|
||||||
$hostStatus = $host->status;
|
$hostStatus = $host->status;
|
||||||
|
$hostNsfw = $host->nsfw;
|
||||||
$hostPageLimit = $host->crawlPageLimit;
|
$hostPageLimit = $host->crawlPageLimit;
|
||||||
$hostImageLimit = $host->crawlImageLimit;
|
$hostImageLimit = $host->crawlImageLimit;
|
||||||
$hostId = $host->hostId;
|
$hostId = $host->hostId;
|
||||||
@ -719,6 +726,7 @@ try {
|
|||||||
$hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES;
|
$hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES;
|
||||||
|
|
||||||
$hostStatus = CRAWL_HOST_DEFAULT_STATUS;
|
$hostStatus = CRAWL_HOST_DEFAULT_STATUS;
|
||||||
|
$hostNsfw = CRAWL_HOST_DEFAULT_NSFW;
|
||||||
$hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT;
|
$hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT;
|
||||||
$hostImageLimit= CRAWL_HOST_DEFAULT_IMAGES_LIMIT;
|
$hostImageLimit= CRAWL_HOST_DEFAULT_IMAGES_LIMIT;
|
||||||
$hostId = $db->addHost($hostURL->scheme,
|
$hostId = $db->addHost($hostURL->scheme,
|
||||||
@ -731,6 +739,7 @@ try {
|
|||||||
$hostImageLimit,
|
$hostImageLimit,
|
||||||
(string) CRAWL_HOST_DEFAULT_META_ONLY,
|
(string) CRAWL_HOST_DEFAULT_META_ONLY,
|
||||||
(string) $hostStatus,
|
(string) $hostStatus,
|
||||||
|
(string) $hostNsfw,
|
||||||
$hostRobots,
|
$hostRobots,
|
||||||
$hostRobotsPostfix);
|
$hostRobotsPostfix);
|
||||||
|
|
||||||
|
Binary file not shown.
@ -102,11 +102,11 @@ class MySQL {
|
|||||||
return $query->fetch()->total;
|
return $query->fetch()->total;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function addHost(string $scheme, string $name, mixed $port, int $crc32url, int $timeAdded, mixed $timeUpdated, int $crawlPageLimit, int $crawlImageLimit, string $crawlMetaOnly, string $status, mixed $robots, mixed $robotsPostfix) {
|
public function addHost(string $scheme, string $name, mixed $port, int $crc32url, int $timeAdded, mixed $timeUpdated, int $crawlPageLimit, int $crawlImageLimit, string $crawlMetaOnly, string $status, string $nsfw, mixed $robots, mixed $robotsPostfix) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('INSERT INTO `host` (`scheme`, `name`, `port`, `crc32url`, `timeAdded`, `timeUpdated`, `crawlPageLimit`, `crawlImageLimit`, `crawlMetaOnly`, `status`, `robots`, `robotsPostfix`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
$query = $this->_db->prepare('INSERT INTO `host` (`scheme`, `name`, `port`, `crc32url`, `timeAdded`, `timeUpdated`, `crawlPageLimit`, `crawlImageLimit`, `crawlMetaOnly`, `status`, `nsfw`, `robots`, `robotsPostfix`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
||||||
|
|
||||||
$query->execute([$scheme, $name, $port, $crc32url, $timeAdded, $timeUpdated, $crawlPageLimit, $crawlImageLimit, $crawlMetaOnly, $status, $robots, $robotsPostfix]);
|
$query->execute([$scheme, $name, $port, $crc32url, $timeAdded, $timeUpdated, $crawlPageLimit, $crawlImageLimit, $crawlMetaOnly, $status, $nsfw, $robots, $robotsPostfix]);
|
||||||
|
|
||||||
return $this->_db->lastInsertId();
|
return $this->_db->lastInsertId();
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
<?php
|
<?php
|
||||||
|
|
||||||
// Current version
|
// Current version
|
||||||
define('API_VERSION', 0.6);
|
define('API_VERSION', 0.7);
|
||||||
|
|
||||||
// Load system dependencies
|
// Load system dependencies
|
||||||
require_once('../config/app.php');
|
require_once('../config/app.php');
|
||||||
@ -127,6 +127,7 @@ if (API_ENABLED) {
|
|||||||
'config' => [
|
'config' => [
|
||||||
'websiteDomain' => WEBSITE_DOMAIN,
|
'websiteDomain' => WEBSITE_DOMAIN,
|
||||||
'crawlUrlRegexp' => CRAWL_URL_REGEXP,
|
'crawlUrlRegexp' => CRAWL_URL_REGEXP,
|
||||||
|
'crawlHostDefaultNsfw' => CRAWL_HOST_DEFAULT_NSFW,
|
||||||
'crawlHostDefaultPagesLimit' => CRAWL_HOST_DEFAULT_PAGES_LIMIT,
|
'crawlHostDefaultPagesLimit' => CRAWL_HOST_DEFAULT_PAGES_LIMIT,
|
||||||
'crawlHostDefaultImagesLimit' => CRAWL_HOST_DEFAULT_IMAGES_LIMIT,
|
'crawlHostDefaultImagesLimit' => CRAWL_HOST_DEFAULT_IMAGES_LIMIT,
|
||||||
'crawlHostDefaultStatus' => CRAWL_HOST_DEFAULT_STATUS,
|
'crawlHostDefaultStatus' => CRAWL_HOST_DEFAULT_STATUS,
|
||||||
|
@ -59,6 +59,7 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
|||||||
if ($host = $db->getHost(crc32($hostURL->string))) {
|
if ($host = $db->getHost(crc32($hostURL->string))) {
|
||||||
|
|
||||||
$hostStatus = $host->status;
|
$hostStatus = $host->status;
|
||||||
|
$hostNsfw = $host->nsfw;
|
||||||
$hostPageLimit = $host->crawlPageLimit;
|
$hostPageLimit = $host->crawlPageLimit;
|
||||||
$hostId = $host->hostId;
|
$hostId = $host->hostId;
|
||||||
$hostRobots = $host->robots;
|
$hostRobots = $host->robots;
|
||||||
@ -82,6 +83,7 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
|||||||
$hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES;
|
$hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES;
|
||||||
|
|
||||||
$hostStatus = CRAWL_HOST_DEFAULT_STATUS;
|
$hostStatus = CRAWL_HOST_DEFAULT_STATUS;
|
||||||
|
$hostNsfw = CRAWL_HOST_DEFAULT_NSFW;
|
||||||
$hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT;
|
$hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT;
|
||||||
$hostId = $db->addHost($hostURL->scheme,
|
$hostId = $db->addHost($hostURL->scheme,
|
||||||
$hostURL->name,
|
$hostURL->name,
|
||||||
@ -92,6 +94,7 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
|||||||
$hostPageLimit,
|
$hostPageLimit,
|
||||||
(string) CRAWL_HOST_DEFAULT_META_ONLY,
|
(string) CRAWL_HOST_DEFAULT_META_ONLY,
|
||||||
(string) $hostStatus,
|
(string) $hostStatus,
|
||||||
|
(string) $hostNsfw,
|
||||||
$hostRobots,
|
$hostRobots,
|
||||||
$hostRobotsPostfix);
|
$hostRobotsPostfix);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user