Micro Web Crawler in PHP & Manticore
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
{
|
|
|
|
"manticore":
|
|
|
|
{
|
|
|
|
"server":
|
|
|
|
{
|
|
|
|
"host":"127.0.0.1",
|
|
|
|
"port":9308
|
|
|
|
},
|
|
|
|
"index":
|
|
|
|
{
|
|
|
|
"document":{
|
|
|
|
"name":"yo_document",
|
|
|
|
"settings":
|
|
|
|
{
|
|
|
|
"morphology":"stem_cz,stem_enru",
|
|
|
|
"index_exact_words":1,
|
|
|
|
"html_strip":1,
|
|
|
|
"min_word_len":3,
|
|
|
|
"min_prefix_len":3
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"webui":
|
|
|
|
{
|
|
|
|
"url":
|
|
|
|
{
|
|
|
|
"base":"http://127.0.0.1"
|
|
|
|
},
|
|
|
|
"pagination":
|
|
|
|
{
|
|
|
|
"limit":20
|
|
|
|
},
|
|
|
|
"search":
|
|
|
|
{
|
|
|
|
"index":
|
|
|
|
{
|
|
|
|
"request":
|
|
|
|
{
|
|
|
|
"url":{
|
|
|
|
"enabled":false,
|
|
|
|
"regex":"/.*/ui"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"cli":
|
|
|
|
{
|
|
|
|
"document":
|
|
|
|
{
|
|
|
|
"crawl":
|
|
|
|
{
|
|
|
|
"curl":
|
|
|
|
{
|
|
|
|
"connection":
|
|
|
|
{
|
|
|
|
"timeout":3
|
|
|
|
},
|
|
|
|
"download":
|
|
|
|
{
|
|
|
|
"size":
|
|
|
|
{
|
|
|
|
"max":10000024
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"queue":
|
|
|
|
{
|
|
|
|
"limit":1,
|
|
|
|
"delay":1
|
|
|
|
},
|
|
|
|
"selector":
|
|
|
|
{
|
|
|
|
"a:not([rel=nofollow])":
|
|
|
|
{
|
|
|
|
"attribute":"href",
|
|
|
|
"external":false,
|
|
|
|
"regex":"/.*/ui"
|
|
|
|
},
|
|
|
|
"image":
|
|
|
|
{
|
|
|
|
"attribute":"src",
|
|
|
|
"external":false,
|
|
|
|
"regex":"/.*/ui"
|
|
|
|
},
|
|
|
|
"audio":
|
|
|
|
{
|
|
|
|
"attribute":"src",
|
|
|
|
"external":false,
|
|
|
|
"regex":"/.*/ui"
|
|
|
|
},
|
|
|
|
"video":
|
|
|
|
{
|
|
|
|
"attribute":"src",
|
|
|
|
"external":false,
|
|
|
|
"regex":"/.*/ui"
|
|
|
|
},
|
|
|
|
"script":
|
|
|
|
{
|
|
|
|
"attribute":"href",
|
|
|
|
"external":false,
|
|
|
|
"regex":"/.*/ui"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"snap":
|
|
|
|
{
|
|
|
|
"enabled":true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"snap":
|
|
|
|
{
|
|
|
|
"storage":
|
|
|
|
{
|
|
|
|
"tmp":{
|
|
|
|
"directory":"storage/tmp/snap"
|
|
|
|
},
|
|
|
|
"local":{
|
|
|
|
"enabled":true,
|
|
|
|
"directory":"storage/snap",
|
|
|
|
"size":
|
|
|
|
{
|
|
|
|
"max":10000024
|
|
|
|
},
|
|
|
|
"mime":
|
|
|
|
[
|
|
|
|
"application/xhtml+xml",
|
|
|
|
"application/javascript",
|
|
|
|
"text/html",
|
|
|
|
"text/plain",
|
|
|
|
"text/css",
|
|
|
|
"image/webp",
|
|
|
|
"image/png",
|
|
|
|
"image/gif",
|
|
|
|
"image/ico"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
"remote":
|
|
|
|
{
|
|
|
|
"ftp":
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"enabled":false,
|
|
|
|
"connection":
|
|
|
|
{
|
|
|
|
"port":21,
|
|
|
|
"host":"",
|
|
|
|
"username":"",
|
|
|
|
"password":"",
|
|
|
|
"directory":"/snap/yo",
|
|
|
|
"timeout":30,
|
|
|
|
"passive":true,
|
|
|
|
"attempts":
|
|
|
|
{
|
|
|
|
"limit":0,
|
|
|
|
"delay":60
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"size":
|
|
|
|
{
|
|
|
|
"max":10000024
|
|
|
|
},
|
|
|
|
"mime":
|
|
|
|
[
|
|
|
|
"application/xhtml+xml",
|
|
|
|
"application/javascript",
|
|
|
|
"text/html",
|
|
|
|
"text/plain",
|
|
|
|
"text/css",
|
|
|
|
"image/webp",
|
|
|
|
"image/png",
|
|
|
|
"image/gif",
|
|
|
|
"image/ico"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|