<?php // Debug ini_set('display_errors', '1'); ini_set('display_startup_errors', '1'); error_reporting(E_ALL); // Website /* * Project domain, without slash on postfix * */ define('WEBSITE_DOMAIN', (issue($_SERVER['HTTP_HOST']) ? 'http://' . $_SERVER['HTTP_HOST'] : '')); /* * Search results per page before show the read more link. * */ define('WEBSITE_PAGINATION_SEARCH_RESULTS_LIMIT', 100); /* * Save ident icons to the static webp cache (placed in storage/cache) to prevent CPU overload * * or false - to generate every time on request * */ define('WEBSITE_IDENTICON_IMAGE_CACHE', true); // Database define('DB_HOST', 'localhost'); define('DB_PORT', 3306); define('DB_NAME', ''); define('DB_USERNAME', ''); define('DB_PASSWORD', ''); // Sphinx define('SPHINX_HOST', '127.0.0.1'); define('SPHINX_PORT', 9306); // Crawler settings /* * Pages (URI) processing limit in the crawler.php queue * * This option related to CRAWL_PAGE_SECONDS_OFFSET value * and the crontab task frequency (https://github.com/YGGverse/YGGo#crontab) * * Usually up to 20 pages per minute, * to prevent websites overload by sending GET crawling requests * */ define('CRAWL_PAGE_LIMIT', 10); /* * Renew page index by timing offset provided * * This option works with CRAWL_PAGE_LIMIT step queue * * Pay attention, that CRAWL_PAGE_LIMIT + CRAWL_PAGE_SECONDS_OFFSET pair * must have enought value to crawl all pages collected in the DB index * * or the crawler can stuck in queue * */ define('CRAWL_PAGE_SECONDS_OFFSET', 3600); /* * Only URL addresses match this rule will be auto-crawled * */ define('CRAWL_URL_REGEXP', '/^.*$/ui'); // ipv6 only '/^http:\/\/\[[\w:]+\].*$/ui' /* * Pages limit per new host by default * * Crawler stops indexing on this limit reach to prevent disk overuse * * Custom rule for specified host could be provided in the DB `host`.`crawlPageLimit` field * */ define('CRAWL_HOST_DEFAULT_PAGES_LIMIT', 1000); /* * Set default auto-crawl status for new host added * * true - crawler autostart pages indexer limited by CRAWL_HOST_DEFAULT_PAGES_LIMIT * false - requires manual validation by the moderator in the DB `host`.`status` field * * This option also disable host in the search results * */ define('CRAWL_HOST_DEFAULT_STATUS', true); /* * Index only meta tags to prevent disk overuse * or false to save meta tags + overall plain text page content * * Custom rule for specified host could be provided in the DB `host`.`crawlPageMetaOnly` field * * This option able to change search results relevance * */ define('CRAWL_HOST_DEFAULT_META_ONLY', false); /* * Default robots.txt rules on remote file not exists * The crawler able to overwrite these rules * * Presets * yggdrasil: /database/yggdrasil/host.robots.md * */ define('CRAWL_ROBOTS_DEFAULT_RULES', null); // string|null /* * Permanent rules that append to the robots.txt if exists else CRAWL_ROBOTS_DEFAULT_RULES * The crawler does not overwrite these rules * * Presets * yggdrasil: /database/yggdrasil/host.robotsPostfix.md * */ define('CRAWL_ROBOTS_POSTFIX_RULES', null); // string|null // Cleaner settings /* * Hosts limit per crontab execution step (https://github.com/YGGverse/YGGo#crontab) * * This option works with CLEAN_HOST_SECONDS_OFFSET * * The value depends of CPU resources available * */ define('CLEAN_HOST_LIMIT', 20); /* * Apply cleaning rules to page older than value provided * * This option works with CLEAN_HOST_LIMIT step queue * * Pay attention, that CLEAN_HOST_LIMIT + CLEAN_HOST_SECONDS_OFFSET pair * must have enought value to process all pages in the DB index * * or the cleaner can stuck in queue * */ define('CLEAN_HOST_SECONDS_OFFSET', 3600); // API settings /* * JSON API features * * When false - every the actions settings below will be ignored * */ define('API_ENABLED', true); /* * Search API * * When false - API_SEARCH_PAGINATION_RESULTS_LIMIT will be ignored * */ define('API_SEARCH_ENABLED', true); /* * Search results per page * */ define('API_SEARCH_PAGINATION_RESULTS_LIMIT', 20); /* * Hosts distribution API * * When false - API_HOSTS_FIELDS will be ignored * */ define('API_HOSTS_ENABLED', true); /* * Database host fields comma separated or * to share all the fields * */ define('API_HOSTS_FIELDS', '`scheme`,`name`,`port`,`crawlPageLimit`,`robots`,`robotsPostfix`,`timeAdded`,`timeUpdated`'); // string: *|field names comma separated