Browse Source

Improve performance by scanning list once

master
Lyndsay Roger 9 years ago
parent
commit
1fd3563702
  1. 35
      main.go
  2. 16
      node.go
  3. 81
      seeder.go

35
main.go

@ -144,14 +144,41 @@ func main() { @@ -144,14 +144,41 @@ func main() {
// updateNodeCounts runs in a goroutine and updates the global stats with the latest
// counts from a startCrawlers run
func updateNodeCounts(s *dnsseeder, status, total, started uint32) {
// update the stats counters
func updateNodeCounts(s *dnsseeder, tcount uint32, started, totals []uint32) {
s.counts.mtx.Lock()
s.counts.NdStatus[status] = total
s.counts.NdStarts[status] = started
for st := range []int{statusRG, statusCG, statusWG, statusNG} {
if config.stats {
log.Printf("%s: started crawler: %s total: %v started: %v\n", s.name, status2str(uint32(st)), totals[st], started[st])
}
// update the stats counters
s.counts.NdStatus[st] = totals[st]
s.counts.NdStarts[st] = started[st]
}
if config.stats {
log.Printf("%s: crawlers started. total nodes: %d\n", s.name, tcount)
}
s.counts.mtx.Unlock()
}
// status2str will return the string description of the status
func status2str(status uint32) string {
switch status {
case statusRG:
return "statusRG"
case statusCG:
return "statusCG"
case statusWG:
return "statusWG"
case statusNG:
return "statusNG"
default:
return "Unknown"
}
}
// updateDNSCounts runs in a goroutine and updates the global stats for the number of DNS requests
func updateDNSCounts(name, qtype string) {
var ndType uint32

16
node.go

@ -27,22 +27,6 @@ type node struct { @@ -27,22 +27,6 @@ type node struct {
crawlActive bool // are we currently crawling this client
}
// status2str will return the string description of the status
func (nd node) status2str() string {
switch nd.status {
case statusRG:
return "statusRG"
case statusCG:
return "statusCG"
case statusWG:
return "statusWG"
case statusNG:
return "statusNG"
default:
return "Unknown"
}
}
// dns2str will return the string description of the dns type
func (nd node) dns2str() string {
switch nd.dnsType {

81
seeder.go

@ -62,7 +62,6 @@ type dnsseeder struct { @@ -62,7 +62,6 @@ type dnsseeder struct {
maxStart []uint32 // max number of goroutines to start each run for each status type
delay []int64 // number of seconds to wait before we connect to a known client for each status
counts NodeCounts // structure to hold stats for this seeder
shutdown bool // seeder is shutting down
}
type result struct {
@ -167,7 +166,10 @@ func (s *dnsseeder) runSeeder(done <-chan struct{}, wg *sync.WaitGroup) { @@ -167,7 +166,10 @@ func (s *dnsseeder) runSeeder(done <-chan struct{}, wg *sync.WaitGroup) {
// goroutines if there are spare goroutine slots available
func (s *dnsseeder) startCrawlers(resultsChan chan *result) {
tcount := len(s.theList)
s.mtx.RLock()
defer s.mtx.RUnlock()
tcount := uint32(len(s.theList))
if tcount == 0 {
if config.debug {
log.Printf("%s - debug - startCrawlers fail: no node ailable\n", s.name)
@ -175,69 +177,40 @@ func (s *dnsseeder) startCrawlers(resultsChan chan *result) { @@ -175,69 +177,40 @@ func (s *dnsseeder) startCrawlers(resultsChan chan *result) {
return
}
// struct to hold config options for each status
var crawlers = []struct {
desc string
status uint32
maxCount uint32 // max goroutines to start for this status type
totalCount uint32 // stats count of this type
started uint32 // count of goroutines started for this type
delay int64 // number of second since last try
}{
{"statusRG", statusRG, s.maxStart[statusRG], 0, 0, s.delay[statusRG]},
{"statusCG", statusCG, s.maxStart[statusCG], 0, 0, s.delay[statusCG]},
{"statusWG", statusWG, s.maxStart[statusWG], 0, 0, s.delay[statusWG]},
{"statusNG", statusNG, s.maxStart[statusNG], 0, 0, s.delay[statusNG]},
}
s.mtx.RLock()
defer s.mtx.RUnlock()
// step through each of the status types RG, CG, WG, NG
for _, c := range crawlers {
// range on a map will not return items in the same order each time
// so this is a random'ish selection
for _, nd := range s.theList {
if nd.status != c.status {
continue
}
started := make([]uint32, maxStatusTypes)
totals := make([]uint32, maxStatusTypes)
// stats count
c.totalCount++
// range on a map will not return items in the same order each time
// so this is a random'ish selection
for _, nd := range s.theList {
if nd.crawlActive == true {
continue
}
totals[nd.status]++
if c.started >= c.maxCount {
continue
}
if nd.crawlActive == true {
continue
}
if (time.Now().Unix() - c.delay) <= nd.lastTry.Unix() {
continue
}
// capture the node status
ns := nd.status
nd.crawlActive = true
nd.crawlStart = time.Now()
// all looks good so start a go routine to crawl the remote node
go crawlNode(resultsChan, s, nd)
c.started++
// do we already have enough started at this status
if started[ns] >= s.maxStart[ns] {
continue
}
if config.stats {
log.Printf("%s: started crawler: %s total: %v started: %v\n", s.name, c.desc, c.totalCount, c.started)
// don't crawl a node to quickly
if (time.Now().Unix() - s.delay[ns]) <= nd.lastTry.Unix() {
continue
}
// update the global stats in another goroutine to free the main goroutine
// for other work
go updateNodeCounts(s, c.status, c.totalCount, c.started)
// all looks good so start a go routine to crawl the remote node
go crawlNode(resultsChan, s, nd)
started[ns]++
}
if config.stats {
log.Printf("%s: crawlers started. total clients: %d\n", s.name, tcount)
}
// update the global stats in another goroutine to free the main goroutine
// for other work
go updateNodeCounts(s, tcount, started, totals)
// returns and read lock released
}

Loading…
Cancel
Save