mirror of https://github.com/YGGverse/YGGo.git
phpyggdrasilcrawlermysqljs-lessspideralt-websphinxwebsearch-engineopen-sourcedistributedcurlparserfts5privacy-orientedsphinxsearchfederativeweb-archivepdo
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
72 lines
2.7 KiB
72 lines
2.7 KiB
source common |
|
{ |
|
type = mysql |
|
|
|
sql_host = localhost |
|
sql_user = |
|
sql_pass = |
|
sql_db = |
|
sql_port = 3306 # optional, default is 3306 |
|
} |
|
|
|
source hostPage : common |
|
{ |
|
sql_query = \ |
|
SELECT `hostPage`.`hostPageId`, \ |
|
`hostPage`.`uri`, \ |
|
REPLACE(REPLACE(REPLACE(REPLACE(`hostPage`.`uri`, '/', ' '), '_', ' '), '-', ' '), '.', ' ') AS `hostPageURIKeywords`, \ |
|
`hostPage`.`rank`, \ |
|
`host`.`name`, \ |
|
IF (`host`.`port` IS NOT NULL, \ |
|
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`), \ |
|
CONCAT(`host`.`scheme`, '://', `host`.`name`)), \ |
|
CRC32 (IF (`host`.`port` IS NOT NULL, \ |
|
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`), \ |
|
CONCAT(`host`.`scheme`, '://', `host`.`name`))), \ |
|
IF (`host`.`port` IS NOT NULL, \ |
|
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`, `hostPage`.`uri`), \ |
|
CONCAT(`host`.`scheme`, '://', `host`.`name`, `hostPage`.`uri`)), \ |
|
CRC32 (IF (`host`.`port` IS NOT NULL, \ |
|
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`, `hostPage`.`uri`), \ |
|
CONCAT(`host`.`scheme`, '://', `host`.`name`, `hostPage`.`uri`))), \ |
|
REGEXP_REPLACE(`hostPage`.`mime`, '^([A-z-]+)/[A-z-]+.*', '$1') AS `mime`, \ |
|
(SELECT GROUP_CONCAT(`hostPageDescription`.`title`) \ |
|
FROM `hostPageDescription` \ |
|
WHERE `hostPageDescription`.`hostPageId` = `hostPage`.`hostPageId`) AS `title`, \ |
|
(SELECT GROUP_CONCAT(`hostPageDescription`.`description`) \ |
|
FROM `hostPageDescription` \ |
|
WHERE `hostPageDescription`.`hostPageId` = `hostPage`.`hostPageId`) AS `description`, \ |
|
(SELECT GROUP_CONCAT(`hostPageDescription`.`keywords`) \ |
|
FROM `hostPageDescription` \ |
|
WHERE `hostPageDescription`.`hostPageId` = `hostPage`.`hostPageId`) AS `keywords` \ |
|
FROM `hostPage` \ |
|
JOIN `host` ON (`host`.`hostId` = `hostPage`.`hostId`) \ |
|
WHERE `hostPage`.`httpCode` != 0 AND `hostPage`.`httpCode` IS NOT NULL AND `hostPage`.`mime` IS NOT NULL \ |
|
|
|
sql_attr_uint = rank |
|
sql_attr_string = mime |
|
} |
|
|
|
index hostPage |
|
{ |
|
source = hostPage |
|
morphology = stem_cz, stem_ar, lemmatize_de_all, lemmatize_ru_all, lemmatize_en_all # stem_enru |
|
path = /var/lib/sphinxsearch/data/hostPage |
|
|
|
min_word_len = 2 |
|
min_prefix_len = 2 |
|
|
|
html_strip = 1 |
|
|
|
index_exact_words = 1 |
|
} |
|
|
|
indexer |
|
{ |
|
mem_limit = 256M |
|
lemmatizer_cache = 256M |
|
} |
|
|
|
common { |
|
lemmatizer_base = /var/lib/sphinxsearch/dicts # http://sphinxsearch.com/downloads/dicts |
|
} |