Browse Source

Improve performance by scanning list once

master
Lyndsay Roger 9 years ago
parent
commit
1fd3563702
  1. 35
      main.go
  2. 16
      node.go
  3. 81
      seeder.go

35
main.go

@ -144,14 +144,41 @@ func main() {
// updateNodeCounts runs in a goroutine and updates the global stats with the latest // updateNodeCounts runs in a goroutine and updates the global stats with the latest
// counts from a startCrawlers run // counts from a startCrawlers run
func updateNodeCounts(s *dnsseeder, status, total, started uint32) { func updateNodeCounts(s *dnsseeder, tcount uint32, started, totals []uint32) {
// update the stats counters
s.counts.mtx.Lock() s.counts.mtx.Lock()
s.counts.NdStatus[status] = total
s.counts.NdStarts[status] = started for st := range []int{statusRG, statusCG, statusWG, statusNG} {
if config.stats {
log.Printf("%s: started crawler: %s total: %v started: %v\n", s.name, status2str(uint32(st)), totals[st], started[st])
}
// update the stats counters
s.counts.NdStatus[st] = totals[st]
s.counts.NdStarts[st] = started[st]
}
if config.stats {
log.Printf("%s: crawlers started. total nodes: %d\n", s.name, tcount)
}
s.counts.mtx.Unlock() s.counts.mtx.Unlock()
} }
// status2str will return the string description of the status
func status2str(status uint32) string {
switch status {
case statusRG:
return "statusRG"
case statusCG:
return "statusCG"
case statusWG:
return "statusWG"
case statusNG:
return "statusNG"
default:
return "Unknown"
}
}
// updateDNSCounts runs in a goroutine and updates the global stats for the number of DNS requests // updateDNSCounts runs in a goroutine and updates the global stats for the number of DNS requests
func updateDNSCounts(name, qtype string) { func updateDNSCounts(name, qtype string) {
var ndType uint32 var ndType uint32

16
node.go

@ -27,22 +27,6 @@ type node struct {
crawlActive bool // are we currently crawling this client crawlActive bool // are we currently crawling this client
} }
// status2str will return the string description of the status
func (nd node) status2str() string {
switch nd.status {
case statusRG:
return "statusRG"
case statusCG:
return "statusCG"
case statusWG:
return "statusWG"
case statusNG:
return "statusNG"
default:
return "Unknown"
}
}
// dns2str will return the string description of the dns type // dns2str will return the string description of the dns type
func (nd node) dns2str() string { func (nd node) dns2str() string {
switch nd.dnsType { switch nd.dnsType {

81
seeder.go

@ -62,7 +62,6 @@ type dnsseeder struct {
maxStart []uint32 // max number of goroutines to start each run for each status type maxStart []uint32 // max number of goroutines to start each run for each status type
delay []int64 // number of seconds to wait before we connect to a known client for each status delay []int64 // number of seconds to wait before we connect to a known client for each status
counts NodeCounts // structure to hold stats for this seeder counts NodeCounts // structure to hold stats for this seeder
shutdown bool // seeder is shutting down
} }
type result struct { type result struct {
@ -167,7 +166,10 @@ func (s *dnsseeder) runSeeder(done <-chan struct{}, wg *sync.WaitGroup) {
// goroutines if there are spare goroutine slots available // goroutines if there are spare goroutine slots available
func (s *dnsseeder) startCrawlers(resultsChan chan *result) { func (s *dnsseeder) startCrawlers(resultsChan chan *result) {
tcount := len(s.theList) s.mtx.RLock()
defer s.mtx.RUnlock()
tcount := uint32(len(s.theList))
if tcount == 0 { if tcount == 0 {
if config.debug { if config.debug {
log.Printf("%s - debug - startCrawlers fail: no node ailable\n", s.name) log.Printf("%s - debug - startCrawlers fail: no node ailable\n", s.name)
@ -175,69 +177,40 @@ func (s *dnsseeder) startCrawlers(resultsChan chan *result) {
return return
} }
// struct to hold config options for each status started := make([]uint32, maxStatusTypes)
var crawlers = []struct { totals := make([]uint32, maxStatusTypes)
desc string
status uint32
maxCount uint32 // max goroutines to start for this status type
totalCount uint32 // stats count of this type
started uint32 // count of goroutines started for this type
delay int64 // number of second since last try
}{
{"statusRG", statusRG, s.maxStart[statusRG], 0, 0, s.delay[statusRG]},
{"statusCG", statusCG, s.maxStart[statusCG], 0, 0, s.delay[statusCG]},
{"statusWG", statusWG, s.maxStart[statusWG], 0, 0, s.delay[statusWG]},
{"statusNG", statusNG, s.maxStart[statusNG], 0, 0, s.delay[statusNG]},
}
s.mtx.RLock()
defer s.mtx.RUnlock()
// step through each of the status types RG, CG, WG, NG
for _, c := range crawlers {
// range on a map will not return items in the same order each time
// so this is a random'ish selection
for _, nd := range s.theList {
if nd.status != c.status {
continue
}
// stats count // range on a map will not return items in the same order each time
c.totalCount++ // so this is a random'ish selection
for _, nd := range s.theList {
if nd.crawlActive == true { totals[nd.status]++
continue
}
if c.started >= c.maxCount { if nd.crawlActive == true {
continue continue
} }
if (time.Now().Unix() - c.delay) <= nd.lastTry.Unix() { // capture the node status
continue ns := nd.status
}
nd.crawlActive = true // do we already have enough started at this status
nd.crawlStart = time.Now() if started[ns] >= s.maxStart[ns] {
// all looks good so start a go routine to crawl the remote node continue
go crawlNode(resultsChan, s, nd)
c.started++
} }
if config.stats { // don't crawl a node to quickly
log.Printf("%s: started crawler: %s total: %v started: %v\n", s.name, c.desc, c.totalCount, c.started) if (time.Now().Unix() - s.delay[ns]) <= nd.lastTry.Unix() {
continue
} }
// update the global stats in another goroutine to free the main goroutine // all looks good so start a go routine to crawl the remote node
// for other work go crawlNode(resultsChan, s, nd)
go updateNodeCounts(s, c.status, c.totalCount, c.started) started[ns]++
} }
if config.stats { // update the global stats in another goroutine to free the main goroutine
log.Printf("%s: crawlers started. total clients: %d\n", s.name, tcount) // for other work
} go updateNodeCounts(s, tcount, started, totals)
// returns and read lock released // returns and read lock released
} }

Loading…
Cancel
Save