Micro Web Crawler in PHP & Manticore
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

168 lines
4.4 KiB

{
"manticore":
{
"server":
{
"host":"127.0.0.1",
"port":9308
},
"index":
{
"document":{
"name":"yo_document",
"settings":
{
"morphology":"stem_cz,stem_enru",
"index_exact_words":1,
"html_strip":1,
"min_word_len":3,
"min_prefix_len":3
}
}
}
},
"webui":
{
"url":
{
"base":"http://127.0.0.1"
},
"pagination":
{
"limit":20
},
"search":
{
"index":
{
"request":
{
"url":{
"enabled":false,
"regex":"/.*/ui"
}
}
}
}
},
"cli":
{
"document":
{
"crawl":
{
"queue":
{
"limit":1,
"delay":1
},
"selector":
{
"a:not([rel=nofollow])":
{
"attribute":"href",
"external":false,
"regex":"/.*/ui"
},
"image":
{
"attribute":"src",
"external":false,
"regex":"/.*/ui"
},
"audio":
{
"attribute":"src",
"external":false,
"regex":"/.*/ui"
},
"video":
{
"attribute":"src",
"external":false,
"regex":"/.*/ui"
},
"script":
{
"attribute":"href",
"external":false,
"regex":"/.*/ui"
}
},
"snap":
{
"enabled":true
}
}
}
},
"snap":
{
"storage":
{
"tmp":{
"directory":"storage/tmp/snap"
},
"local":{
"enabled":true,
"directory":"storage/snap",
"size":
{
"max":10000024
},
"mime":
[
"application/xhtml+xml",
"application/javascript",
"text/html",
"text/plain",
"text/css",
"image/webp",
"image/png",
"image/gif",
"image/ico"
]
},
"mirror":
{
"ftp":
[
{
"enabled":false,
"connection":
{
"port":21,
"host":"",
"username":"",
"password":"",
"directory":"/snap/yo",
"timeout":30,
"passive":true,
"attempts":
{
"limit":0,
"delay":60
}
},
"size":
{
"max":10000024
},
"mime":
[
"application/xhtml+xml",
"application/javascript",
"text/html",
"text/plain",
"text/css",
"image/webp",
"image/png",
"image/gif",
"image/ico"
]
}
]
}
}
}
}