#!/usr/local/bin/perl5 -w # Name: mybin/http-graph Author: js-cgi@inwap.com 12-Nov-2001 # Purpose: Converts bandwidth report from http-sum to a graph # # The resulting image has three lines: # green = thousands of hits per day, updated once per day. Stairstep shape # orange = bandwidth per 2 hours (12 samples per day). Very jaggy. # black = average bandwidth for 24 hours (12 hours in past to 12 in future). use strict; $|++; use GD::Graph::lines; use constant USAGE => < < 3 || die "Days to graph should be greater than 3, aborting"; @ARGV || die USAGE; # Must explictly specify "-" to read from STDIN $cachefile = @ARGV > 1 ? $ARGV[0] : 0; # If two or more files, 1st is cache use vars qw($date $hits %hits %line @lines); # All the files specified on the command line are expected to have been # created from the output of mybin/http-sum. The first one is a summary # of the previous runs, the second one is http-sum for today. The results # will be combined and written to the first file name. while(<>) { # Read from 1 or more input files, OK if out of sequence # This program reads files created by the http-sum program. # 2000/03/31 Fri .68 .43 .39 .73 1.1 1.8 1.2 1.7 .72 1.5 1.3 .60 = 11,708 906 # Some older log files have yyyymmdd instead of yyyy/mm/dd for the date. s%^(\d\d\d\d)(\d\d)(\d\d)\s%$1/$2/$3 %; # Convert old format to new next unless ($date,$hits) = m%^(\d{4}/\d{2}/\d{2}).*\s+(\d+)$%; next if exists $hits{$date} and $hits{$date} > $hits; # Ignore short counts $hits{$date} = $hits; # If duplicates, use line with most hits $line{$date} = $_; # Including trailing "\n"; } @lines = map {$line{$_}} sort keys %line; # Put into chronological order #pop @lines if $cachefile; # Last line is only a partial day # The cache file keeps growing. Another cron job should keep its size down. if ($cachefile) { open(OUT,">$cachefile") || warn "open() failed to $cachefile: $!\n"; print(OUT (HEADER),@lines) || warn "print() failed to $cachefile: $!\n"; close(OUT) || warn "close() failed to $cachefile: $!\n"; #print "Stored ",scalar @lines," days statistics in $cachefile\n"; } use vars qw(@dates @mb @hits $kbytes $h @h @avg $avg); splice @lines,0,(@lines - $maxdays) if @lines > $maxdays; # Keep 60 or so days my $first_date = substr $lines[0],0,10; my $last_date = substr $lines[-1],0,10; my $value = 0; foreach $_ (@lines) { # 2000/03/31 Fri .68 .43 - .73 1.1 1.8 1.2 1.7 .72 1.5 1.3 .60 = 11,708 906 # 2001/11/02 Fri 9.4 9.8 11. 19. 20. 20. 28. 36.160. 79. 77. 38. = 514,341 86993 my ($date,$day,$mb,$kbytes,$hits) = m%^(\S+) (...)(.*)=\s*(\S+)\s+(\S+)%; $mb or warn "Line $. has an invalid format:\n$_" and next; my @h = unpack "A4 A4 A4 A4 A4 A4 A4 A4 A4 A4 A4 A4",$mb; # 12 per day $kbytes =~ tr/,//d; # (unused) $date = substr $date,5,5; # Skip "yyyy/", use just "mm/dd" foreach $_ (0 .. 11) { $value = $h[$_] if (defined $h[$_] && $h[$_] =~ /^\s*[\d.]+$/); push @dates, "$date $day"; push @mb, $value; # Use previous $value if $h[$_] eq " - "; push @hits, $hits/1000; } } my $total_mb = 0; my $total_hits = 0; foreach $_ (0 .. $#mb) { # Average previous and future 2-hour stats $avg = 0; foreach $h (-6 .. +6) { my $n = $h + $_; $n = $_ if ($n < 0 || $n > $#mb); $avg += $mb[$n]; } $avg[$_] = $avg / 13; $total_mb += $avg[$_]; $total_hits += $hits[$_]; } die "oops" unless @dates; # Spikes that are 100 times the average will make the graph unviewable. # Step 1: Clip values that vary wildly from the average. # Step 2: Clip values to something that graphs well. foreach $h (1 .. 2) { my $mb = $total_mb / @dates; my $hits = $total_hits / @dates; my $max = 6 * ($mb > $hits ? $mb : $hits); $max = 100 if $max > 100; # 100 MB in 2 hours = 1.2 GB/day = 36 GB/month $max = 50 if $max > 50 && $max < 100; $max = 25 if $max > 25 && $max < 50; $max = 10 if $max > 10 && $max < 25; $max = 5 if $max > 5 && $max < 10; $total_mb = $total_hits = 0; foreach $_ (0 .. $#dates) { $mb[$_] = $max if $mb[$_] > $max; $avg[$_] = $max if $avg[$_] > $max; $hits[$_] = $max if $hits[$_] > $max; $total_mb += $mb[$_]; $total_hits += $hits[$_]; } } # Feed the data into GD::Graph::lines. my @plotdata = (\@dates, \@mb, \@avg, \@hits,); my $graph = GD::Graph::lines->new(400,200); my $day = 1 + int(@lines / 33); # Label every other day for 33 to 65 days $graph->set( x_labels_vertical => 1, x_label_skip => $day * 12, # 12 samples per day transparent => 0, bgclr => "white", title => "$title, $first_date to $last_date", dclrs => [qw(lorange black dgreen)] ); $graph->set_legend('Megabytes per 2 hours', '24-hour Average MB', '1000s of hits per day'); my $gd = $graph->plot(\@plotdata); open(IMG, '>', $imgout) or die "Cannot create $imgout: $!\n"; binmode IMG; print IMG $gd->png; exit;