Merge pull request #260 from dryabov/search

add search method
This commit is contained in:
miguelfreitas 2014-09-21 09:47:26 -03:00
commit 00b67caea4
7 changed files with 451 additions and 3 deletions

View File

@ -311,6 +311,7 @@ namespace libtorrent
entry const &p, std::string const &sig_p, std::string const &sig_user, bool local);
void dht_getData(std::string const &username, std::string const &resource, bool multi);
entry dht_getLocalData() const;
#ifndef TORRENT_NO_DEPRECATE

View File

@ -442,6 +442,7 @@ namespace libtorrent
entry const &p, std::string const &sig_p, std::string const &sig_user, bool local);
void dht_getData(std::string const &username, std::string const &resource, bool multi);
entry dht_getLocalData() const;
#ifndef TORRENT_NO_DEPRECATE
// deprecated in 0.15

View File

@ -869,6 +869,15 @@ namespace libtorrent
#endif
}
entry session::dht_getLocalData() const
{
#ifndef TORRENT_DISABLE_DHT
return m_impl->dht_getLocalData();
#else
return entry();
#endif
}
bool session::is_dht_running() const
{
#ifndef TORRENT_DISABLE_DHT

View File

@ -5821,6 +5821,16 @@ retry:
boost::bind( getDataDone_fun, this, username, resource, multi, _1, _2));
}
entry session_impl::dht_getLocalData() const
{
if( m_dht ) {
entry state = m_dht->state();
return state["storage_table"];
} else {
return entry();
}
}
void session_impl::on_dht_router_name_lookup(error_code const& e
, tcp::resolver::iterator host)
{

View File

@ -264,6 +264,7 @@ static const CRPCCommand vRPCCommands[] =
{ "gettrendinghashtags", &gettrendinghashtags, false, true, true },
{ "getspamposts", &getspamposts, false, true, false },
{ "torrentstatus", &torrentstatus, false, true, false },
{ "search", &search, false, true, false },
};
CRPCTable::CRPCTable()
@ -1314,6 +1315,7 @@ Array RPCConvertValues(const std::string &strMethod, const std::vector<std::stri
if (strMethod == "getspamposts" && n > 0) ConvertTo<boost::int64_t>(params[0]);
if (strMethod == "getspamposts" && n > 1) ConvertTo<boost::int64_t>(params[1]);
if (strMethod == "getspamposts" && n > 2) ConvertTo<boost::int64_t>(params[2]);
if (strMethod == "search" && n > 2) ConvertTo<boost::int64_t>(params[2]);
return params;
}

View File

@ -217,5 +217,6 @@ extern json_spirit::Value recheckusertorrent(const json_spirit::Array& params, b
extern json_spirit::Value gettrendinghashtags(const json_spirit::Array& params, bool fHelp);
extern json_spirit::Value getspamposts(const json_spirit::Array& params, bool fHelp);
extern json_spirit::Value torrentstatus(const json_spirit::Array& params, bool fHelp);
extern json_spirit::Value search(const json_spirit::Array& params, bool fHelp);
#endif

View File

@ -1077,7 +1077,7 @@ bool processReceivedDM(lazy_entry const* post)
StoredDirectMsg stoDM;
stoDM.m_fromMe = false;
stoDM.m_text = textOut;
stoDM.m_utcTime = post->dict_find_int_value("time");;
stoDM.m_utcTime = post->dict_find_int_value("time");
LOCK(cs_twister);
// store this dm in memory list, but prevent duplicates
@ -1671,7 +1671,7 @@ int findLastPublicPostLocalUser( std::string strUsername )
std::vector<std::string> pieces;
int max_id = std::numeric_limits<int>::max();
int since_id = -1;
h.get_pieces(pieces, 1, max_id, since_id, USERPOST_FLAG_RT);
h.get_pieces(pieces, 1, max_id, since_id, ~USERPOST_FLAG_DM);
if( pieces.size() ) {
string const& piece = pieces.front();
@ -1898,7 +1898,7 @@ Value getposts(const Array& params, bool fHelp)
int count = params[0].get_int();
Array users = params[1].get_array();
int flags = (params.size() > 2) ? params[2].get_int() : USERPOST_FLAG_RT;
int flags = (params.size() > 2) ? params[2].get_int() : ~USERPOST_FLAG_DM;
std::multimap<int64,entry> postsByTime;
@ -2378,3 +2378,427 @@ Value torrentstatus(const Array& params, bool fHelp)
return result;
}
class TextSearch
{
public:
enum search_mode {
TEXTSEARCH_EXACT,
TEXTSEARCH_ALL,
TEXTSEARCH_ANY
};
TextSearch(std::string const &keyword, libtorrent::entry const &params);
bool matchText(std::string msg);
bool matchTime(int64_t time);
libtorrent::lazy_entry const* matchRawMessage(std::string const &rawMessage, libtorrent::lazy_entry &v);
private:
std::vector<std::string> keywords;
search_mode mode;
bool caseInsensitive;
int64_t timeMin, timeMax;
std::string username;
};
TextSearch::TextSearch(string const &keyword, entry const &params) :
mode(TEXTSEARCH_EXACT),
caseInsensitive(false),
timeMin(numeric_limits<int64_t>::min()),
timeMax(numeric_limits<int64_t>::max())
{
entry const *pMode = params.find_key("mode");
if( pMode && pMode->type() == entry::string_t ) {
string strMode = pMode->string();
if( strMode == "all" ) {
mode = TEXTSEARCH_ALL;
} else if( strMode == "any" ) {
mode = TEXTSEARCH_ANY;
}
}
entry const *pCase = params.find_key("case");
if( pCase && pCase->type() == entry::string_t && pCase->string() == "insensitive" ) {
caseInsensitive = true;
}
int64_t now = GetAdjustedTime();
entry const *pAgeMin = params.find_key("agemin");
if( pAgeMin && pAgeMin->type() == entry::int_t ) {
timeMax = now - pAgeMin->integer() * 24*60*60;
}
entry const *pAgeMax = params.find_key("agemax");
if( pAgeMax && pAgeMax->type() == entry::int_t ) {
timeMin = now - pAgeMax->integer() * 24*60*60;
}
entry const *pUsername = params.find_key("username");
if( pUsername && pUsername->type() == entry::string_t ) {
username = pUsername->string();
}
if( mode == TEXTSEARCH_EXACT ) {
keywords.push_back( keyword );
} else {
stringstream stream( keyword );
string word;
while( getline(stream, word, ' ') ) {
if( !word.empty() ) {
keywords.push_back( word );
}
}
}
if( caseInsensitive ) {
for( vector<string>::iterator it=keywords.begin(); it != keywords.end(); ++it ) {
#ifdef HAVE_BOOST_LOCALE
*it = boost::locale::to_lower(*it);
#else
boost::algorithm::to_lower(*it);
#endif
}
}
}
bool TextSearch::matchText(string msg)
{
if( keywords.size() == 0 ) {
return false;
}
if( caseInsensitive ) { // that is why msg is passed by value
#ifdef HAVE_BOOST_LOCALE
msg = boost::locale::to_lower(msg);
#else
boost::algorithm::to_lower(msg);
#endif
}
switch( mode ) {
case TEXTSEARCH_EXACT:
return msg.find(keywords[0]) != string::npos;
case TEXTSEARCH_ALL:
for( vector<string>::const_iterator it=keywords.begin(); it != keywords.end(); ++it ) {
if( msg.find(*it) == string::npos ) {
return false;
}
}
return true;
case TEXTSEARCH_ANY:
for( vector<string>::const_iterator it=keywords.begin(); it != keywords.end(); ++it ) {
if( msg.find(*it) != string::npos ) {
return true;
}
}
return false;
}
return false;
}
inline bool TextSearch::matchTime(int64_t time)
{
return time >= timeMin && time <= timeMax;
}
lazy_entry const* TextSearch::matchRawMessage(string const &rawMessage, lazy_entry &v)
{
if( keywords.size() == 0 ) {
return 0;
}
// fast check
if( !caseInsensitive && mode != TEXTSEARCH_ANY && rawMessage.find(keywords[0]) == string::npos ) {
return 0;
}
int pos;
libtorrent::error_code ec;
if (lazy_bdecode(rawMessage.data(), rawMessage.data()+rawMessage.size(), v, ec, &pos) == 0) {
lazy_entry const* vv = v.dict_find_dict("v");
lazy_entry const* post = vv ? vv->dict_find_dict("userpost") : v.dict_find_dict("userpost");
if( post ) {
lazy_entry const* rt = post->dict_find_dict("rt");
lazy_entry const* p = rt ? rt : post;
if( username.length() ) {
string user = p->dict_find_string_value("n");
if( user != username ) {
return 0;
}
}
int64_t time = p->dict_find_int_value("time");
if( !matchTime(time) ) {
return 0;
}
string msg = p->dict_find_string_value("msg");
return matchText( msg ) ? p : 0;
}
}
return 0;
}
Value search(const Array& params, bool fHelp)
{
if (fHelp || params.size() < 3 || params.size() > 4)
throw runtime_error(
"search <scope> <text> <count> ['{\"username\":username,\"mode\":\"exact\"|\"all\"|\"any\",\"case\":\"sensitive\"|\"insensitive\",\"agemin\":agemin,\"agemax\":agemin}']\n"
"search text in available data\n"
"<scope> is data area: messages, directmsgs, profiles, users, hashtags\n"
"<text> is a phrase to search\n"
"up to <count> entries are returned\n"
"<username> in messages scope is optional and allows to search in username's messages only\n"
"<username> in directmsgs scope is required and sets whose conversation to search\n"
"\"mode\" and \"case\" are search mode options\n"
"\"agemin\" and \"agemax\" (days) are message date filter\n"
"\"mode\", \"case\", \"agemin\", and \"agemax\" are optional");
string scope = params[0].get_str();
string keyword = params[1].get_str();
int count = params[2].get_int();
entry options = params.size()==4 ? jsonToEntry(params[3].get_obj()) : entry();
string username;
if( keyword.size() == 0 ) {
throw runtime_error("Empty <text> parameter");
}
entry const *pUsername = options.find_key("username");
if( pUsername && pUsername->type() == entry::string_t ) {
username = pUsername->string();
}
Array ret;
if( scope == "messages" ) {
// search public messages
std::map< pair<std::string,int>, pair<int64,entry> > posts;
lazy_entry v;
TextSearch searcher(keyword, options);
// search public messages in torrents
{
LOCK(cs_twister);
std::map<std::string,torrent_handle> users;
if( username.size() == 0 ) {
users = m_userTorrent;
} else {
if( m_userTorrent.count(username) )
users[username] = m_userTorrent[username];
}
BOOST_FOREACH(const PAIRTYPE(std::string,torrent_handle)& item, users) {
std::vector<std::string> pieces;
item.second.get_pieces(pieces, std::numeric_limits<int>::max(), std::numeric_limits<int>::max(), -1, ~USERPOST_FLAG_DM);
BOOST_FOREACH(string const& piece, pieces) {
lazy_entry const* p = searcher.matchRawMessage(piece, v);
if( p ) {
string n = p->dict_find_string_value("n");
int k = p->dict_find_int_value("k");
int64 time = p->dict_find_int_value("time",-1);
entry vEntry;
vEntry = *p;
hexcapePost(vEntry);
posts[pair<std::string,int>(n,k)] = pair<int64,entry>(time,vEntry);
}
}
}
}
// search messages in dht
boost::shared_ptr<session> ses(m_ses);
if( ses )
{
entry data = ses->dht_getLocalData();
if( data.type() == entry::dictionary_t ) {
for (entry::dictionary_type::const_iterator i = data.dict().begin(); i != data.dict().end(); ++i) {
if ( i->second.type() != entry::list_t )
continue;
for (entry::list_type::const_iterator j = i->second.list().begin(); j != i->second.list().end(); ++j) {
entry const* key_p = j->find_key("p");
if( key_p ) {
string str_p = key_p->string();
lazy_entry const* p = searcher.matchRawMessage(str_p, v);
if( p ) {
string n = p->dict_find_string_value("n");
int k = p->dict_find_int_value("k");
pair<std::string,int> post_id(n,k);
if( posts.count(post_id) == 0 ) {
int64 time = p->dict_find_int_value("time",-1);
entry vEntry;
vEntry = *p;
hexcapePost(vEntry);
posts[post_id] = pair<int64,entry>(time,vEntry);
}
}
}
}
}
}
}
std::multimap<int64,entry> postsByTime;
BOOST_FOREACH(const PAIRTYPE(PAIRTYPE(std::string,int),PAIRTYPE(int64,entry))& item, posts) {
postsByTime.insert(item.second);
}
std::multimap<int64,entry>::reverse_iterator rit;
for (rit=postsByTime.rbegin(); rit!=postsByTime.rend() && (int)ret.size() < count; ++rit) {
ret.push_back( entryToJson(rit->second) );
}
} else if( scope == "directmsgs" ) {
// search direct messages
if( m_users.count(username) ) {
std::multimap<int64,entry> postsByTime;
TextSearch searcher(keyword, options);
{
LOCK(cs_twister);
BOOST_FOREACH(const PAIRTYPE(std::string,std::vector<StoredDirectMsg>)& list, m_users[username].m_directmsg) {
string remoteUser = list.first;
BOOST_FOREACH(const StoredDirectMsg& item, list.second) {
if( searcher.matchText(item.m_text) ) {
int64_t time = item.m_utcTime;
if( searcher.matchTime(time) ) {
entry vEntry;
vEntry["remoteUser"] = remoteUser;
vEntry["text"] = item.m_text;
vEntry["time"] = time;
vEntry["fromMe"] = item.m_fromMe;
hexcapePost(vEntry);
postsByTime.insert( pair<int64,entry>(time, vEntry) );
}
}
}
}
}
std::multimap<int64,entry>::reverse_iterator rit;
for (rit=postsByTime.rbegin(); rit!=postsByTime.rend() && (int)ret.size() < count; ++rit) {
ret.push_back( entryToJson(rit->second) );
}
}
} else if( scope == "profiles" ) {
// search dht profiles
boost::shared_ptr<session> ses(m_ses);
if( ses )
{
entry data = ses->dht_getLocalData();
std::map<string,entry> users;
TextSearch searcher(keyword, options);
for (entry::dictionary_type::const_iterator i = data.dict().begin(); i != data.dict().end(); ++i) {
if ( i->second.type() != entry::list_t )
continue;
for (entry::list_type::const_iterator j = i->second.list().begin(); j != i->second.list().end(); ++j) {
string str_p = j->find_key("p")->string();
lazy_entry p;
int pos;
libtorrent::error_code err;
int ret = lazy_bdecode(str_p.data(), str_p.data() + str_p.size(), p, err, &pos);
lazy_entry const* target = p.dict_find_dict("target");
if( target ) {
string resource = target->dict_find_string_value("r");
if( resource == "profile" ) {
lazy_entry const* v = p.dict_find_dict("v");
if( v ) {
if( searcher.matchText(v->dict_find_string_value("bio")) ||
searcher.matchText(v->dict_find_string_value("fullname")) ||
searcher.matchText(v->dict_find_string_value("location")) ||
searcher.matchText(v->dict_find_string_value("url")) ) {
string n = target->dict_find_string_value("n");
entry vEntry;
vEntry = *v;
users.insert(pair<string,entry>(n,vEntry));
}
}
}
}
}
}
std::map<string,entry>::iterator it;
for (it=users.begin(); it!=users.end() && (int)ret.size() < count; ++it) {
entry user;
user["username"] = it->first;
user["profile"] = it->second;
ret.push_back( entryToJson(user) );
}
}
} else if( scope == "users" ) {
// search users (blockchain)
// @todo: there should be a faster way
std::multimap<string::size_type,std::string> usernamesByLength;
boost::algorithm::to_lower(keyword);
string allowed = "abcdefghijklmnopqrstuvwxyz0123456789_";
for( int i = 0; i < allowed.size(); ++i ) {
set<string> usernames;
string prefix(1, allowed[i]);
pblocktree->GetNamesFromPartial(prefix, usernames, std::numeric_limits<int>::max());
BOOST_FOREACH(string username, usernames) {
if( username.find(keyword) != string::npos ) {
usernamesByLength.insert( pair<string::size_type,std::string>(username.size(), username) );
}
}
}
std::multimap<string::size_type,std::string>::iterator it;
for (it=usernamesByLength.begin(); it!=usernamesByLength.end() && (int)ret.size() < count; ++it) {
ret.push_back( entryToJson(it->second) );
}
} else if( scope == "hashtags" ) {
// search hashtags
std::multimap<string::size_type,std::string> hashtagsByLength;
#ifdef HAVE_BOOST_LOCALE
keyword = boost::locale::to_lower(keyword);
#else
boost::algorithm::to_lower(keyword);
#endif
{
LOCK(cs_seenHashtags);
BOOST_FOREACH(const PAIRTYPE(std::string,double)& item, m_seenHashtags) {
if (item.first.find(keyword) != std::string::npos) {
hashtagsByLength.insert( pair<string::size_type,std::string>(item.first.size(), item.first) );
}
}
}
std::multimap<string::size_type,std::string>::iterator it;
for (it=hashtagsByLength.begin(); it!=hashtagsByLength.end() && (int)ret.size() < count; ++it) {
ret.push_back( entryToJson(it->second) );
}
} else {
throw runtime_error("Unknown <scope> value");
}
return ret;
}