mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-24 21:44:59 +00:00
add host nsfw settings
This commit is contained in:
parent
8ce0324e94
commit
28bf526d53
@ -270,14 +270,24 @@ define('CRAWL_HOST_DEFAULT_STATUS', true);
|
||||
define('CRAWL_HOST_DEFAULT_META_ONLY', false);
|
||||
|
||||
/*
|
||||
* Images limit per new host by default
|
||||
* Not suitable/safe for work status for new host by default
|
||||
*
|
||||
* Crawler stops indexing on this limit reach to prevent disk overuse
|
||||
* Could be filtered in search results
|
||||
*
|
||||
* Custom rule for specified host could be provided in the DB `host`.`crawlImageLimit` field
|
||||
* Custom rule for specified host could be provided in the DB `host`.`nsfw` field
|
||||
*
|
||||
*/
|
||||
define('CRAWL_HOST_DEFAULT_IMAGES_LIMIT', 1000);
|
||||
define('CRAWL_HOST_DEFAULT_NSFW', false);
|
||||
|
||||
/*
|
||||
* Not suitable/safe for work status for new host by default
|
||||
*
|
||||
* Could be filtered in crawl conditions or search results
|
||||
*
|
||||
* Custom rule for specified host could be provided in the DB `host`.`nsfw` field
|
||||
*
|
||||
*/
|
||||
define('CRAWL_HOST_DEFAULT_NSFW', false);
|
||||
|
||||
/*
|
||||
* Default robots.txt rules on remote file not exists
|
||||
@ -314,7 +324,7 @@ define('CRAWL_MANIFEST', true);
|
||||
* Manifest API version compatibility
|
||||
*
|
||||
*/
|
||||
define('CRAWL_MANIFEST_API_VERSION', 0.6);
|
||||
define('CRAWL_MANIFEST_API_VERSION', 0.7);
|
||||
|
||||
/*
|
||||
* Set default auto-crawl status for new manifest added
|
||||
@ -438,6 +448,7 @@ define('API_HOSTS_FIELDS',
|
||||
`host`.`crawlImageLimit`,
|
||||
`host`.`robots`,
|
||||
`host`.`robotsPostfix`,
|
||||
`host`.`nsfw`,
|
||||
`host`.`timeAdded`,
|
||||
`host`.`timeUpdated`,
|
||||
(SELECT COUNT(*) FROM `hostPage` WHERE `hostPage`.`hostId` = `host`.`hostId`) AS `hostPagesTotal`,
|
||||
|
@ -171,6 +171,7 @@ try {
|
||||
if ($host = $db->getHost(crc32($hostURL))) {
|
||||
|
||||
$hostStatus = $host->status;
|
||||
$hostNsfw = $host->nsfw;
|
||||
$hostPageLimit = $host->crawlPageLimit;
|
||||
$hostImageLimit = $host->crawlImageLimit;
|
||||
$hostId = $host->hostId;
|
||||
@ -198,6 +199,7 @@ try {
|
||||
$hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES;
|
||||
|
||||
$hostStatus = CRAWL_HOST_DEFAULT_STATUS;
|
||||
$hostNsfw = CRAWL_HOST_DEFAULT_NSFW;
|
||||
$hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT;
|
||||
$hostImageLimit= CRAWL_HOST_DEFAULT_IMAGES_LIMIT;
|
||||
|
||||
@ -211,6 +213,7 @@ try {
|
||||
$hostImageLimit,
|
||||
(string) CRAWL_HOST_DEFAULT_META_ONLY,
|
||||
(string) $hostStatus,
|
||||
(string) $hostNsfw,
|
||||
$hostRobots,
|
||||
$hostRobotsPostfix);
|
||||
|
||||
@ -534,6 +537,7 @@ try {
|
||||
if ($host = $db->getHost(crc32($hostImageURL->string))) {
|
||||
|
||||
$hostStatus = $host->status;
|
||||
$hostNsfw = $host->nsfw;
|
||||
$hostPageLimit = $host->crawlPageLimit;
|
||||
$hostImageLimit = $host->crawlImageLimit;
|
||||
$hostId = $host->hostId;
|
||||
@ -561,6 +565,7 @@ try {
|
||||
$hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES;
|
||||
|
||||
$hostStatus = CRAWL_HOST_DEFAULT_STATUS;
|
||||
$hostNsfw = CRAWL_HOST_DEFAULT_NSFW;
|
||||
$hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT;
|
||||
$hostImageLimit= CRAWL_HOST_DEFAULT_IMAGES_LIMIT;
|
||||
$hostId = $db->addHost($hostImageURL->scheme,
|
||||
@ -573,6 +578,7 @@ try {
|
||||
$hostImageLimit,
|
||||
(string) CRAWL_HOST_DEFAULT_META_ONLY,
|
||||
(string) $hostStatus,
|
||||
(string) $hostNsfw,
|
||||
$hostRobots,
|
||||
$hostRobotsPostfix);
|
||||
|
||||
@ -692,6 +698,7 @@ try {
|
||||
if ($host = $db->getHost(crc32($hostURL->string))) {
|
||||
|
||||
$hostStatus = $host->status;
|
||||
$hostNsfw = $host->nsfw;
|
||||
$hostPageLimit = $host->crawlPageLimit;
|
||||
$hostImageLimit = $host->crawlImageLimit;
|
||||
$hostId = $host->hostId;
|
||||
@ -719,6 +726,7 @@ try {
|
||||
$hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES;
|
||||
|
||||
$hostStatus = CRAWL_HOST_DEFAULT_STATUS;
|
||||
$hostNsfw = CRAWL_HOST_DEFAULT_NSFW;
|
||||
$hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT;
|
||||
$hostImageLimit= CRAWL_HOST_DEFAULT_IMAGES_LIMIT;
|
||||
$hostId = $db->addHost($hostURL->scheme,
|
||||
@ -731,6 +739,7 @@ try {
|
||||
$hostImageLimit,
|
||||
(string) CRAWL_HOST_DEFAULT_META_ONLY,
|
||||
(string) $hostStatus,
|
||||
(string) $hostNsfw,
|
||||
$hostRobots,
|
||||
$hostRobotsPostfix);
|
||||
|
||||
|
Binary file not shown.
@ -102,11 +102,11 @@ class MySQL {
|
||||
return $query->fetch()->total;
|
||||
}
|
||||
|
||||
public function addHost(string $scheme, string $name, mixed $port, int $crc32url, int $timeAdded, mixed $timeUpdated, int $crawlPageLimit, int $crawlImageLimit, string $crawlMetaOnly, string $status, mixed $robots, mixed $robotsPostfix) {
|
||||
public function addHost(string $scheme, string $name, mixed $port, int $crc32url, int $timeAdded, mixed $timeUpdated, int $crawlPageLimit, int $crawlImageLimit, string $crawlMetaOnly, string $status, string $nsfw, mixed $robots, mixed $robotsPostfix) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT INTO `host` (`scheme`, `name`, `port`, `crc32url`, `timeAdded`, `timeUpdated`, `crawlPageLimit`, `crawlImageLimit`, `crawlMetaOnly`, `status`, `robots`, `robotsPostfix`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
||||
$query = $this->_db->prepare('INSERT INTO `host` (`scheme`, `name`, `port`, `crc32url`, `timeAdded`, `timeUpdated`, `crawlPageLimit`, `crawlImageLimit`, `crawlMetaOnly`, `status`, `nsfw`, `robots`, `robotsPostfix`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
||||
|
||||
$query->execute([$scheme, $name, $port, $crc32url, $timeAdded, $timeUpdated, $crawlPageLimit, $crawlImageLimit, $crawlMetaOnly, $status, $robots, $robotsPostfix]);
|
||||
$query->execute([$scheme, $name, $port, $crc32url, $timeAdded, $timeUpdated, $crawlPageLimit, $crawlImageLimit, $crawlMetaOnly, $status, $nsfw, $robots, $robotsPostfix]);
|
||||
|
||||
return $this->_db->lastInsertId();
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
<?php
|
||||
|
||||
// Current version
|
||||
define('API_VERSION', 0.6);
|
||||
define('API_VERSION', 0.7);
|
||||
|
||||
// Load system dependencies
|
||||
require_once('../config/app.php');
|
||||
@ -127,6 +127,7 @@ if (API_ENABLED) {
|
||||
'config' => [
|
||||
'websiteDomain' => WEBSITE_DOMAIN,
|
||||
'crawlUrlRegexp' => CRAWL_URL_REGEXP,
|
||||
'crawlHostDefaultNsfw' => CRAWL_HOST_DEFAULT_NSFW,
|
||||
'crawlHostDefaultPagesLimit' => CRAWL_HOST_DEFAULT_PAGES_LIMIT,
|
||||
'crawlHostDefaultImagesLimit' => CRAWL_HOST_DEFAULT_IMAGES_LIMIT,
|
||||
'crawlHostDefaultStatus' => CRAWL_HOST_DEFAULT_STATUS,
|
||||
|
@ -59,6 +59,7 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
||||
if ($host = $db->getHost(crc32($hostURL->string))) {
|
||||
|
||||
$hostStatus = $host->status;
|
||||
$hostNsfw = $host->nsfw;
|
||||
$hostPageLimit = $host->crawlPageLimit;
|
||||
$hostId = $host->hostId;
|
||||
$hostRobots = $host->robots;
|
||||
@ -82,6 +83,7 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
||||
$hostRobotsPostfix = CRAWL_ROBOTS_POSTFIX_RULES;
|
||||
|
||||
$hostStatus = CRAWL_HOST_DEFAULT_STATUS;
|
||||
$hostNsfw = CRAWL_HOST_DEFAULT_NSFW;
|
||||
$hostPageLimit = CRAWL_HOST_DEFAULT_PAGES_LIMIT;
|
||||
$hostId = $db->addHost($hostURL->scheme,
|
||||
$hostURL->name,
|
||||
@ -92,6 +94,7 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
||||
$hostPageLimit,
|
||||
(string) CRAWL_HOST_DEFAULT_META_ONLY,
|
||||
(string) $hostStatus,
|
||||
(string) $hostNsfw,
|
||||
$hostRobots,
|
||||
$hostRobotsPostfix);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user