experimental top trending hashtags

This commit is contained in:
Miguel Freitas 2014-02-15 20:41:54 -02:00
parent d1a17dc21f
commit 672b56a865
6 changed files with 99 additions and 2 deletions

View File

@ -489,6 +489,22 @@ void node_impl::tick()
}
}
static void processEntryForHashtags(lazy_entry &p)
{
const lazy_entry *target = p.dict_find_dict("target");
bool multi = (target && target->dict_find_string_value("t") == "m");
const lazy_entry *v = p.dict_find_dict("v");
if( v && !multi ) {
const lazy_entry *userpost = v->dict_find_dict("userpost");
if( userpost ) {
int64_t time = p.dict_find_int_value("time");
std::string msg = userpost->dict_find_string_value("msg");
updateSeenHashtags(msg,time);
}
}
}
bool node_impl::refresh_storage() {
bool did_something = false;
bool refresh_next_item = false;
@ -524,7 +540,7 @@ bool node_impl::refresh_storage() {
if( height > getBestHeight() ) {
continue; // how?
}
const lazy_entry *target = p.dict_find_dict("target");
std::string username = target->dict_find_string_value("n");
std::string resource = target->dict_find_string_value("r");
@ -544,6 +560,8 @@ bool node_impl::refresh_storage() {
target->dict_find_string_value("t").c_str());
#endif
processEntryForHashtags(p);
entry entryP;
entryP = p; // lazy to non-lazy
@ -689,6 +707,13 @@ void node_impl::load_storage(entry const* e) {
#ifdef ENABLE_DHT_ITEM_EXPIRE
if( !expired ) {
#endif
lazy_entry p;
int pos;
error_code err;
// FIXME: optimize to avoid bdecode (store seq separated, etc)
int ret = lazy_bdecode(item.p.data(), item.p.data() + item.p.size(), p, err, &pos, 10, 500);
processEntryForHashtags(p);
to_add.push_back(item);
#ifdef ENABLE_DHT_ITEM_EXPIRE
}

View File

@ -260,6 +260,7 @@ static const CRPCCommand vRPCCommands[] =
{ "listusernamespartial", &listusernamespartial, false, true },
{ "rescandirectmsgs", &rescandirectmsgs, false, true },
{ "recheckusertorrent", &recheckusertorrent, false, true },
{ "gettrendinghashtags", &gettrendinghashtags, false, true },
};
CRPCTable::CRPCTable()
@ -1281,6 +1282,7 @@ Array RPCConvertValues(const std::string &strMethod, const std::vector<std::stri
if (strMethod == "unfollow" && n > 1) ConvertTo<Array>(params[1]);
if (strMethod == "listusernamespartial" && n > 1) ConvertTo<boost::int64_t>(params[1]);
if (strMethod == "listusernamespartial" && n > 2) ConvertTo<bool>(params[2]);
if (strMethod == "gettrendinghashtags" && n > 0) ConvertTo<boost::int64_t>(params[0]);
return params;
}

View File

@ -212,5 +212,6 @@ extern json_spirit::Value getnumpieces(const json_spirit::Array& params, bool fH
extern json_spirit::Value listusernamespartial(const json_spirit::Array& params, bool fHelp);
extern json_spirit::Value rescandirectmsgs(const json_spirit::Array& params, bool fHelp);
extern json_spirit::Value recheckusertorrent(const json_spirit::Array& params, bool fHelp);
extern json_spirit::Value gettrendinghashtags(const json_spirit::Array& params, bool fHelp);
#endif

View File

@ -8,7 +8,7 @@
// These need to be macros, as version.cpp's and bitcoin-qt.rc's voodoo requires it
#define CLIENT_VERSION_MAJOR 0
#define CLIENT_VERSION_MINOR 9
#define CLIENT_VERSION_REVISION 13
#define CLIENT_VERSION_REVISION 14
#define CLIENT_VERSION_BUILD 0
// Set to true for release, false for prerelease or test build

View File

@ -58,6 +58,9 @@ static std::string m_receivedSpamUserStr;
static int64 m_lastSpamTime = 0;
static std::map<std::string,UserData> m_users;
static CCriticalSection cs_seenHashtags;
static std::map<std::string,double> m_seenHashtags;
#define USER_DATA_FILE "user_data"
#define GLOBAL_DATA_FILE "global_data"
@ -1174,6 +1177,44 @@ void receivedSpamMessage(std::string const &message, std::string const &user)
}
}
void updateSeenHashtags(std::string &message, int64_t msgTime)
{
boost::int64_t curTime = GetAdjustedTime();
if( msgTime > curTime ) msgTime = curTime;
double vote = 1.0;
if( msgTime + (2*3600) < curTime ) {
double timeDiff = (curTime - msgTime);
timeDiff /= (2*3600);
vote /= timeDiff;
}
// split and look for hashtags
vector<string> tokens;
set<string> hashtags;
boost::algorithm::split(tokens,message,boost::algorithm::is_any_of(" \n\t.,:/?!"),
boost::algorithm::token_compress_on);
BOOST_FOREACH(string const& token, tokens) {
if( token.length() >= 2 ) {
string word = token.substr(1);
boost::algorithm::to_lower(word);
if( token.at(0) == '#') {
hashtags.insert(word);
}
}
}
if( hashtags.size() ) {
LOCK(cs_seenHashtags);
BOOST_FOREACH(string const& word, hashtags) {
if( m_seenHashtags.count(word) ) {
m_seenHashtags[word] += vote;
} else {
m_seenHashtags[word] = vote;
}
}
}
}
Value dhtput(const Array& params, bool fHelp)
{
@ -1902,4 +1943,30 @@ Value recheckusertorrent(const Array& params, bool fHelp)
return Value();
}
Value gettrendinghashtags(const Array& params, bool fHelp)
{
if (fHelp || (params.size() != 1))
throw runtime_error(
"gettrendinghashtags <count>\n"
"obtain list of trending hashtags");
size_t count = params[0].get_int();
std::map<double,std::string> sortedHashtags;
{
LOCK(cs_seenHashtags);
BOOST_FOREACH(const PAIRTYPE(std::string,double)& item, m_seenHashtags) {
sortedHashtags[item.second]=item.first;
}
}
Array ret;
BOOST_REVERSE_FOREACH(const PAIRTYPE(double, std::string)& item, sortedHashtags) {
if( ret.size() >= count )
break;
ret.push_back(item.second);
}
return ret;
}

View File

@ -39,4 +39,6 @@ bool shouldDhtResourceExpire(std::string resource, bool multi, int height);
int getDhtNodes(boost::int64_t *dht_global_nodes = NULL);
void updateSeenHashtags(std::string &message, int64_t msgTime);
#endif // TWISTER_H