Browse Source

implement basic api

main
ghost 2 years ago
parent
commit
9916fb701f
  1. 36
      README.md
  2. 9
      config/app.php.txt
  3. 9
      library/mysql.php
  4. 86
      public/api.php

36
README.md

@ -53,6 +53,41 @@ sphinxsearch @@ -53,6 +53,41 @@ sphinxsearch
* * * * * cd /YGGo/crontab && php crawler.php > /dev/null 2>&1
```
#### API
JSON interface to build third party applications / distributed index sharing.
Could be enabled or disabled by API_ENABLED option
Address
```
/api.php
```
##### Search API
Returns search results.
Could be enabled or disabled by API_SEARCH_ENABLED option
###### Request attributes
```
GET action=search
GET query={string} - search request, empty if not provided
GET page={int} - search results page, 1 if not provided
```
##### Hosts distribution API
Returns node hosts collected with fields provided in API_HOSTS_FIELDS settings.
Could be enabled or disabled by API_HOSTS_ENABLED option
###### Request attributes
```
GET action=hosts
```
#### Roadmap / ideas
* [x] Web pages full text ranking search
@ -66,6 +101,7 @@ sphinxsearch @@ -66,6 +101,7 @@ sphinxsearch
* [ ] Implement smart queue algorithm that indexing new sites homepage in higher priority
* [ ] Implement database auto backup on crawl process completing
* [x] Add transactions to prevent data loss on DB crashes
* [x] JSON API
* [ ] Distributed index data sharing between the nodes trough service API
* [x] An idea to make unique gravatars for sites without favicons, because simpler to ident, comparing to ipv6
* [ ] An idea to make some visitors counters, like in good old times?

9
config/app.php.txt

@ -147,3 +147,12 @@ define('CLEAN_HOST_LIMIT', 20); @@ -147,3 +147,12 @@ define('CLEAN_HOST_LIMIT', 20);
*
*/
define('CLEAN_HOST_SECONDS_OFFSET', 3600);
// API settings
define('API_ENABLED', true);
define('API_SEARCH_ENABLED', true);
define('API_SEARCH_PAGINATION_RESULTS_LIMIT', 20);
define('API_HOSTS_ENABLED', true);
define('API_HOSTS_FIELDS', '`scheme`,`name`,`port`,`crawlPageLimit`,`robots`,`robotsPostfix`,`timeAdded`,`timeUpdated`'); // string: *|field names comma separated

9
library/mysql.php

@ -29,6 +29,15 @@ class MySQL { @@ -29,6 +29,15 @@ class MySQL {
}
// Host
public function getAPIHosts(string $apiHostFields) {
$query = $this->_db->prepare('SELECT ' . $apiHostFields . ' FROM `host`');
$query->execute();
return $query->fetchAll();
}
public function getHost(int $crc32url) {
$query = $this->_db->prepare('SELECT * FROM `host` WHERE `crc32url` = ? LIMIT 1');

86
public/api.php

@ -0,0 +1,86 @@ @@ -0,0 +1,86 @@
<?php
// Load system dependencies
require_once('../config/app.php');
require_once('../library/curl.php');
require_once('../library/robots.php');
require_once('../library/filter.php');
require_once('../library/parser.php');
require_once('../library/mysql.php');
require_once('../library/sphinxql.php');
if (!API_ENABLED) {
$response = [
'status' => false,
'message' => _('API requests disabled by the node owner.'),
];
}
// Action
switch (!empty($_GET['action']) ? $_GET['action'] : false) {
// Search API
case 'search';
// Connect database
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
// Connect Sphinx search server
$sphinx = new SphinxQL(SPHINX_HOST, SPHINX_PORT);
// Filter request data
$query = !empty($_GET['query']) ? Filter::url($_GET['query']) : '';
$page = !empty($_GET['page']) ? Filter::url($_GET['page']) : 1;
// Make search request
$sphinxResults = $sphinx->searchHostPages('"' . $query . '"', $page * API_SEARCH_PAGINATION_RESULTS_LIMIT - API_SEARCH_PAGINATION_RESULTS_LIMIT, API_SEARCH_PAGINATION_RESULTS_LIMIT);
$sphinxResultsTotal = $sphinx->searchHostPagesTotal('"' . $query . '"');
// Generate results
$dbResults = [];
foreach ($sphinxResults as $sphinxResult) {
if ($hostPage = $db->getFoundHostPage($sphinxResult->id)) {
$dbResults[] = $hostPage;
}
}
// Make response
$response = [
'status' => true,
'totals' => $sphinxResultsTotal,
'result' => $dbResults,
];
break;
// Host API
case 'hosts';
// Connect database
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
$response = [
'status' => true,
'totals' => $db->getTotalHosts(),
'result' => $db->getAPIHosts(API_HOSTS_FIELDS),
];
break;
default:
$response = [
'status' => false,
'message' => _('Undefined API action request.'),
];
}
// Output
header('Content-Type: application/json; charset=utf-8');
echo json_encode($response);
Loading…
Cancel
Save