source common { type = mysql sql_host = localhost sql_user = sql_pass = sql_db = sql_port = 3306 # optional, default is 3306 } source hostPage : common { sql_query = \ SELECT `hostPage`.`hostPageId`, \ `hostPage`.`uri`, \ REPLACE(REPLACE(REPLACE(REPLACE(`hostPage`.`uri`, '/', ' '), '_', ' '), '-', ' '), '.', ' ') AS `hostPageURIKeywords`, \ `hostPage`.`rank`, \ `host`.`name`, \ IF (`host`.`port` IS NOT NULL, \ CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`), \ CONCAT(`host`.`scheme`, '://', `host`.`name`)), \ CRC32 (IF (`host`.`port` IS NOT NULL, \ CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`), \ CONCAT(`host`.`scheme`, '://', `host`.`name`))), \ IF (`host`.`port` IS NOT NULL, \ CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`, `hostPage`.`uri`), \ CONCAT(`host`.`scheme`, '://', `host`.`name`, `hostPage`.`uri`)), \ CRC32 (IF (`host`.`port` IS NOT NULL, \ CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`, `hostPage`.`uri`), \ CONCAT(`host`.`scheme`, '://', `host`.`name`, `hostPage`.`uri`))), \ REGEXP_REPLACE(`hostPage`.`mime`, '^([A-z-]+)/[A-z-]+.*', '$1') AS `mime`, \ (SELECT GROUP_CONCAT(`hostPageDescription`.`title`) \ FROM `hostPageDescription` \ WHERE `hostPageDescription`.`hostPageId` = `hostPage`.`hostPageId`) AS `title`, \ (SELECT GROUP_CONCAT(`hostPageDescription`.`description`) \ FROM `hostPageDescription` \ WHERE `hostPageDescription`.`hostPageId` = `hostPage`.`hostPageId`) AS `description`, \ (SELECT GROUP_CONCAT(`hostPageDescription`.`keywords`) \ FROM `hostPageDescription` \ WHERE `hostPageDescription`.`hostPageId` = `hostPage`.`hostPageId`) AS `keywords` \ FROM `hostPage` \ JOIN `host` ON (`host`.`hostId` = `hostPage`.`hostId`) \ WHERE `hostPage`.`httpCode` = 200 AND `hostPage`.`timeBanned` IS NULL AND `hostPage`.`mime` IS NOT NULL \ sql_attr_uint = rank sql_attr_string = mime } index hostPage { source = hostPage morphology = stem_cz, stem_ar, lemmatize_de_all, lemmatize_ru_all, lemmatize_en_all # stem_enru path = /var/lib/sphinxsearch/data/hostPage min_word_len = 2 min_prefix_len = 2 html_strip = 1 index_exact_words = 1 } indexer { mem_limit = 256M lemmatizer_cache = 256M } common { lemmatizer_base = /var/lib/sphinxsearch/dicts # http://sphinxsearch.com/downloads/dicts }