#!/usr/bin/perl -w use strict; use Socket; #################################################################### # UNIQUE VISITORS - A very simple utility to parse Apache log files # and print a listing of unique visiting domains. # Copyleft 2005 - Nathan E. Pralle # # DESCRIPTION: This utility parses an Apache log file and determines # how many unique domains visited your site, then compiles # a list of them (good for directing to an email or logfile). # It uses a cache file to speed up the process as well (not # having to do DNS lookups all the time). It only handles # the first level of a domain, IE: blah.mchsi.com and dink.mchsi.com # will just be listed as 2 entries from mchsi.com. # # SYNTAX: perl unique_visitors.pl # Where is the number of days back you want to look. # I usually run mine at 12:30am and use a '1' to get the previous day. # # EXTRA: You must have a file available called "unique_domains.dat" # in the same location as this binary and chmod'ed to 777. # This is the cache file for this binary. I recommend: # 'touch unique_domains.dat;chmod 777 unique_domains.dat' # # CONTACT: Questions, comments, etc. # http://www.nathanpralle.com/contact.html #################################################################### ######################## # CONFIGURATION OPTIONS #full path to your Apache access_log file my $logfile="/path/to/access_log"; ####################### my $offset=shift||0; my @months=qw/Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec/; my @time=localtime; my $year=$time[5]+1900; my $shortmonth=$months[$time[4]]; my $day; if(($time[3] - $offset)<1){ $shortmonth=$months[$time[4] - 1]; $day=30; } else{ $day=pad($time[3] - $offset,2); } my $hour=pad($time[2],2); my $min=pad($time[1],2); my $sec=pad($time[0],2); my $formatted="$day/$shortmonth/$year"; my $domainstring; my @temparray; my %iparray; my $counter=0; my $hitcounter=0; my %domainlookups; open(DOM,"unique_domains.dat"); while(){ chomp; my($ip,$domain)=split(/\t/); $domainlookups{$ip}=$domain; } close(DOM); my @logfile; open(LOGFILE,"$logfile")||die("can't open logfile\n"); @logfile=; close(LOGFILE); foreach my $line(@logfile){ if($line=~/$formatted/){ $counter++; @temparray=split(/ /,$line); my $lookup=$domainlookups{$temparray[0]}; my $host; if(!defined $lookup){ my $iaddr=inet_aton("$temparray[0]"); $host=gethostbyaddr($iaddr,AF_INET); } else{ $hitcounter++; $host=$lookup; } if($host){ $domainlookups{$temparray[0]}=$host; if($host=~/[0-9]$/){ $domainstring=$host; } else{ my @domainarray=split(/\./,$host); my $limit=scalar @domainarray; $limit--; if(length($domainarray[$limit])<3){ $domainstring=$domainarray[$limit - 2].'.'.$domainarray[$limit - 1].'.'.$domainarray[$limit]; } else{ $domainstring=$domainarray[$limit - 1].'.'.$domainarray[$limit]; } } } else{ $domainstring=$temparray[0]; $domainlookups{$temparray[0]}=$temparray[0]; } my $num=$iparray{$domainstring}; if($num){ $iparray{$domainstring}=$num+1; } else{ $iparray{$domainstring}=1; } } } open(DOM,">unique_domains.dat")||die("Can't open datafile for writing!\n"); foreach my $keyitem (keys %domainlookups){ print DOM "$keyitem\t$domainlookups{$keyitem}\n"; } close(DOM); print "Visitors on $shortmonth $day, $year: $counter ($hitcounter cached hits)\n\n"; foreach my $key (sort hashValueAscendingNum (keys(%iparray))){ print pad($iparray{$key},4)." $key\n"; } exit; sub pad{ my $thing=shift; my $len=shift; while(length($thing)<$len){ $thing="0".$thing; } return $thing; } sub hashValueAscendingNum{ $iparray{$a}<=>$iparray{$b}; }