mirror of https://github.com/YGGverse/YGGo.git
phpyggdrasilmysqlcrawlerjs-lessalt-websphinxspiderdistributedwebsearch-engineopen-sourcesphinxsearchfederativeweb-archivepdocurlparserfts5privacy-oriented
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
40 lines
1.4 KiB
40 lines
1.4 KiB
source common |
|
{ |
|
type = mysql |
|
|
|
sql_host = localhost |
|
sql_user = |
|
sql_pass = |
|
sql_db = |
|
sql_port = 3306 # optional, default is 3306 |
|
} |
|
|
|
source hostPage : common |
|
{ |
|
sql_query = \ |
|
SELECT `hostPage`.`hostPageId`, \ |
|
`hostPage`.`uri`, \ |
|
`host`.`name`, \ |
|
REGEXP_REPLACE(`hostPage`.`mime`, '^[A-z-]+/([A-z-]+).*', '$1') AS `mime`, \ |
|
(SELECT COUNT(*) FROM `hostPageToHostPage` \ |
|
WHERE `hostPageToHostPage`.`hostPageIdTarget` = `hostPage`.`hostPageId` \ |
|
AND `hostPageToHostPage`.`hostPageIdSource` <> `hostPage`.`hostPageId`) AS `rank`, \ |
|
(SELECT GROUP_CONCAT(CONCAT_WS(' ', `hostPageDescription`.`title`, \ |
|
`hostPageDescription`.`description`, \ |
|
`hostPageDescription`.`keywords`)) \ |
|
FROM `hostPageDescription` \ |
|
WHERE `hostPageDescription`.`hostPageId` = `hostPage`.`hostPageId`) AS `pageDescription` \ |
|
FROM `hostPage` \ |
|
JOIN `host` ON (`host`.`hostId` = `hostPage`.`hostId`) \ |
|
WHERE `host`.`status` = '1' AND `hostPage`.`httpCode` = 200 AND `hostPage`.`timeBanned` IS NULL |
|
|
|
sql_attr_uint = rank |
|
sql_attr_string = mime |
|
} |
|
|
|
index hostPage |
|
{ |
|
source = hostPage |
|
morphology = stem_enru, stem_cz, stem_ar |
|
path = /var/lib/sphinxsearch/data/hostPage |
|
} |