diff --git a/README.md b/README.md index 7912100..e1b94b7 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,41 @@ sphinxsearch * * * * * cd /YGGo/crontab && php crawler.php > /dev/null 2>&1 ``` +#### API + +JSON interface to build third party applications / distributed index sharing. +Could be enabled or disabled by API_ENABLED option + +Address + +``` +/api.php +``` + +##### Search API + +Returns search results. +Could be enabled or disabled by API_SEARCH_ENABLED option + +###### Request attributes + +``` +GET action=search +GET query={string} - search request, empty if not provided +GET page={int} - search results page, 1 if not provided +``` + +##### Hosts distribution API + +Returns node hosts collected with fields provided in API_HOSTS_FIELDS settings. +Could be enabled or disabled by API_HOSTS_ENABLED option + +###### Request attributes + +``` +GET action=hosts +``` + #### Roadmap / ideas * [x] Web pages full text ranking search @@ -66,6 +101,7 @@ sphinxsearch * [ ] Implement smart queue algorithm that indexing new sites homepage in higher priority * [ ] Implement database auto backup on crawl process completing * [x] Add transactions to prevent data loss on DB crashes +* [x] JSON API * [ ] Distributed index data sharing between the nodes trough service API * [x] An idea to make unique gravatars for sites without favicons, because simpler to ident, comparing to ipv6 * [ ] An idea to make some visitors counters, like in good old times? diff --git a/config/app.php.txt b/config/app.php.txt index f75600f..cc4ca92 100644 --- a/config/app.php.txt +++ b/config/app.php.txt @@ -146,4 +146,13 @@ define('CLEAN_HOST_LIMIT', 20); * or the cleaner can stuck in queue * */ -define('CLEAN_HOST_SECONDS_OFFSET', 3600); \ No newline at end of file +define('CLEAN_HOST_SECONDS_OFFSET', 3600); + +// API settings +define('API_ENABLED', true); + +define('API_SEARCH_ENABLED', true); +define('API_SEARCH_PAGINATION_RESULTS_LIMIT', 20); + +define('API_HOSTS_ENABLED', true); +define('API_HOSTS_FIELDS', '`scheme`,`name`,`port`,`crawlPageLimit`,`robots`,`robotsPostfix`,`timeAdded`,`timeUpdated`'); // string: *|field names comma separated \ No newline at end of file diff --git a/library/mysql.php b/library/mysql.php index d517d80..7cb42b4 100644 --- a/library/mysql.php +++ b/library/mysql.php @@ -29,6 +29,15 @@ class MySQL { } // Host + public function getAPIHosts(string $apiHostFields) { + + $query = $this->_db->prepare('SELECT ' . $apiHostFields . ' FROM `host`'); + + $query->execute(); + + return $query->fetchAll(); + } + public function getHost(int $crc32url) { $query = $this->_db->prepare('SELECT * FROM `host` WHERE `crc32url` = ? LIMIT 1'); diff --git a/public/api.php b/public/api.php new file mode 100644 index 0000000..500187d --- /dev/null +++ b/public/api.php @@ -0,0 +1,86 @@ + false, + 'message' => _('API requests disabled by the node owner.'), + ]; +} + +// Action +switch (!empty($_GET['action']) ? $_GET['action'] : false) { + + // Search API + case 'search'; + + // Connect database + $db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD); + + // Connect Sphinx search server + $sphinx = new SphinxQL(SPHINX_HOST, SPHINX_PORT); + + + // Filter request data + $query = !empty($_GET['query']) ? Filter::url($_GET['query']) : ''; + $page = !empty($_GET['page']) ? Filter::url($_GET['page']) : 1; + + // Make search request + $sphinxResults = $sphinx->searchHostPages('"' . $query . '"', $page * API_SEARCH_PAGINATION_RESULTS_LIMIT - API_SEARCH_PAGINATION_RESULTS_LIMIT, API_SEARCH_PAGINATION_RESULTS_LIMIT); + $sphinxResultsTotal = $sphinx->searchHostPagesTotal('"' . $query . '"'); + + // Generate results + $dbResults = []; + + foreach ($sphinxResults as $sphinxResult) { + + if ($hostPage = $db->getFoundHostPage($sphinxResult->id)) { + + $dbResults[] = $hostPage; + } + } + + // Make response + $response = [ + 'status' => true, + 'totals' => $sphinxResultsTotal, + 'result' => $dbResults, + ]; + + break; + + // Host API + case 'hosts'; + + // Connect database + $db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD); + + $response = [ + 'status' => true, + 'totals' => $db->getTotalHosts(), + 'result' => $db->getAPIHosts(API_HOSTS_FIELDS), + ]; + + break; + + default: + + $response = [ + 'status' => false, + 'message' => _('Undefined API action request.'), + ]; +} + +// Output +header('Content-Type: application/json; charset=utf-8'); + +echo json_encode($response); \ No newline at end of file