1
0
mirror of https://github.com/YGGverse/YGGo.git synced 2025-01-12 15:58:00 +00:00
YGGo/example/environment/sphinx.conf

72 lines
2.7 KiB
Plaintext

source common
{
type = mysql
sql_host = localhost
sql_user =
sql_pass =
sql_db =
sql_port = 3306 # optional, default is 3306
}
source hostPage : common
{
sql_query = \
SELECT `hostPage`.`hostPageId`, \
`hostPage`.`uri`, \
REPLACE(REPLACE(REPLACE(REPLACE(`hostPage`.`uri`, '/', ' '), '_', ' '), '-', ' '), '.', ' ') AS `hostPageURIKeywords`, \
`hostPage`.`rank`, \
`host`.`name`, \
IF (`host`.`port` IS NOT NULL, \
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`), \
CONCAT(`host`.`scheme`, '://', `host`.`name`)), \
CRC32 (IF (`host`.`port` IS NOT NULL, \
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`), \
CONCAT(`host`.`scheme`, '://', `host`.`name`))), \
IF (`host`.`port` IS NOT NULL, \
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`, `hostPage`.`uri`), \
CONCAT(`host`.`scheme`, '://', `host`.`name`, `hostPage`.`uri`)), \
CRC32 (IF (`host`.`port` IS NOT NULL, \
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`, `hostPage`.`uri`), \
CONCAT(`host`.`scheme`, '://', `host`.`name`, `hostPage`.`uri`))), \
REGEXP_REPLACE(`hostPage`.`mime`, '^([A-z-]+)/[A-z-]+.*', '$1') AS `mime`, \
(SELECT GROUP_CONCAT(`hostPageDescription`.`title`) \
FROM `hostPageDescription` \
WHERE `hostPageDescription`.`hostPageId` = `hostPage`.`hostPageId`) AS `title`, \
(SELECT GROUP_CONCAT(`hostPageDescription`.`description`) \
FROM `hostPageDescription` \
WHERE `hostPageDescription`.`hostPageId` = `hostPage`.`hostPageId`) AS `description`, \
(SELECT GROUP_CONCAT(`hostPageDescription`.`keywords`) \
FROM `hostPageDescription` \
WHERE `hostPageDescription`.`hostPageId` = `hostPage`.`hostPageId`) AS `keywords` \
FROM `hostPage` \
JOIN `host` ON (`host`.`hostId` = `hostPage`.`hostId`) \
WHERE `hostPage`.`httpCode` = 200 AND `hostPage`.`timeBanned` IS NULL AND `hostPage`.`mime` IS NOT NULL \
sql_attr_uint = rank
sql_attr_string = mime
}
index hostPage
{
source = hostPage
morphology = stem_cz, stem_ar, lemmatize_de_all, lemmatize_ru_all, lemmatize_en_all # stem_enru
path = /var/lib/sphinxsearch/data/hostPage
min_word_len = 2
min_prefix_len = 2
html_strip = 1
index_exact_words = 1
}
indexer
{
mem_limit = 256M
lemmatizer_cache = 256M
}
common {
lemmatizer_base = /var/lib/sphinxsearch/dicts # http://sphinxsearch.com/downloads/dicts
}