![]() |
|
|
|
#1 |
|
Hi,
I have the following script (see below) which queries various sites for statistics about a list of domains. The problem is that 1 in 3 attempts to run it result in "Segmentation Fault". Sometimes it will work fine 10 times in a row, other times it will segfault 3 times in a row before it works. How can I tell what's causing the segfault ? It doesn't happen all the time, but when it does I'm pretty sure it's $thr->join; that does it. (if I uncomment the print statements, it will say "Waiting for thread number 1 to join" followed immediately by "Segmentation Fault".. Chris #!/usr/bin/t-perl -W use strict; use threads; use Thread::Queue; use LWP::UserAgent; my $dls = './domains.txt'; # List of domain names my $out = './results.csv'; # Where to output results to my @nme = ('Yahoo','Alexa','Overture'); my @url = ('http://search.yahoo.com/search?p=link%3Ahttp%3A%2F%2Fdomain.com&ei=UTF-8&f r=FP-tab-web-t&x=wrt', 'http://www.alexa.com/data/ds/linksin?q=link:domain.com&url=', 'http://inventory.overture.com/d/searchinventory/suggestion/?mkt=us&lang=en_ US&term='); my @rgx = (qr/of about ((\d|,)+)/, qr/Sites \s+1\s+-\s+\d+\s+of\s+((\d|,)+)/i, qr/color=E8E8E8> (\d+)/); my $t1 = time(); open (FILE, "<$dls") || die "Unable to open $dls: $!"; my @domains = <FILE>; close (FILE); my @stream; my $cthreads; # Start 5 new threads per search engine: for (my $i=0;$i<=$#url;$i++){ $stream[$i] = new Thread::Queue; for (0..4){ my $tid = $i*$_; my $qnm = "q".($i+1); $cthreads->[$tid] = threads->new(\&fetchStats, $stream[$i], $url[$i], $rgx[$i], $nme[$i],$qnm); #print "Thread number ".($tid + 1)." created.\n"; } } # Add our domains to the queue: foreach my $dom (@domains){ chomp($dom); #print "Domain '$dom' added to the queue.\n"; foreach my $str (@stream){ $str->enqueue($dom); } } # Join all our threads (except ourselves): foreach my $thr (threads->list) { if ($thr->tid && !threads::equal($thr, threads->self) ){ #print "Waiting for thread number ".$thr->tid." to join\n"; $thr->join; #print "Thread number ".$thr->tid." has joined.\n"; } } my $t2 = time(); my $t3 = $t2-$t1; print "Total execution time: $t3 seconds\n"; sub fetchStats { my ($upstream,$url,$rgx,$nme,$qnm) = @_; while (my $dom = $upstream->dequeue){ my $ua = new LWP::UserAgent; $ua->parse_head(0); $ua->agent("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0 $ua->proxy('http','http://59.18.143.176:8080/'); $ua->timeout(2); $ua->max_size(9400); #print "Checking $dom\n"; my $response = $ua->get($url.$dom); if (!$response->is_success){ #$response = $ua->get($url.$dom); #print "Retrying $dom...\n"; } if ($response->is_success){ my $txt = $response->content; $txt =~ s/\r|\n//gs; my $hits = ($txt =~ $rgx) ? $1 : 0; print "$dom ($nme): $hits hits\n"; }else{ print "$dom ($nme): timeout\n"; } if ($upstream->pending < 1){ $upstream->enqueue(undef); } } } Skeleton Man |
|
|