#!/usr/bin/perl -w
#
# copyright 2000 by Gottfried Szing e9625460@stud3.tuwien.ac.at
#
# version 0.15-1
# 
# Author: Gottfried Szing e9625460@stud3.tuwien.ac.at
# Maintainer: Gottfried Szing e9625460@stud3.tuwien.ac.at

# best vieved under vi with tabstop=4

package main;

my $version 	= "0.16-1";		    # dont touch this ;-)

# ============== CONFIGURATION START =========================
my $serverName 	= "localhost";		# where is the server?
my $serverPort 	= "3306";
my $serverUser 	= "root";			# user and password
my $serverPass 	= "";
my $serverDb 	= "apache2";		# tablename

# mail notification
my $notify_mail_on	= 0;
my $errortrigger	= 20;
my $smtp_host		= "localhost";
my $adminmail		= "root\@localhost";
# =============== CONFIGURATION END ==========================

# =============== MAIN BEGIN ==========================
use strict;
use DBI;		# db-connection
use DBI::DBD;
use Time::ParseDate;

$| = 1;

my $start	= localtime;

chomp($start);

my %monthtab = ( 	"Jan" => "1",  
					"Feb" => "2",
					"Mar" => "3",
					"Apr" => "4",
					"May" => "5",
					"Jun" => "6",
					"Jul" => "7",
					"Aug" => "8",
					"Sep" => "9",
					"Oct" => "10",
					"Nov" => "11",
					"Dec" => "12" );

# be quiet if input comes from stdin
my $quiet	= 1 ;
$quiet = 0 if (@ARGV != 0);

my $dbh 	= DBI->connect("DBI:mysql:database=$serverDb;host=$serverName;port=$serverPort",$serverUser,$serverPass);

# create table-objects
unless ($quiet)
{
	print "Initializing sub tables...  ";
}
my $Chost			= Entry->new($dbh, "host");
my $Cmethod			= Entry->new($dbh, "method");
my $Crequest		= Entry->new($dbh, "request");
my $Chttpversion	= Entry->new($dbh, "httpversion");
my $Creferrer		= Entry->new($dbh, "referrer");
my $Cagent			= Entry->new($dbh, "agent");
my $Chostip			= Entry->new($dbh, "hostip");
my $Ccookie			= Entry->new($dbh, "cookie");
my $Cerrorentry		= Entry->new($dbh, "errorentry");
my $Ccgi			= Entry->new($dbh, "cgi");
my $Cusertrack		= Entry->new($dbh, "usertrack");

my $Cvirtserverid	= Entry->new($dbh, "virtualid");
my $Cphysserverid	= Entry->new($dbh, "physicalid");

unless ($quiet)
{
	print "done.\n\n";

	print <<EOF;
log2mysql version $version - Written by Gottfried Szing (e9625460\@stud3.tuwien.ac.at)

This version is for non-commercial use only.

Database:  $serverDb\@$serverName:$serverPort

Progress (. == 100 lines in log):
--------------------------------

EOF
}

my $line;

# status
my ($totalcount, $errorcount, $totalimported) = (0,0,0);

# data
my ($id, $host, $ident, $remoteuser, $datetime, $method, $request, $httpversion, $response, $size, $referer, $agent, $hostip, $cookie, $cgi, $virt, $usertrack) =
   ("", "", "", "", "", "". "", "", "", "", "", "". "", "", "", "");
my ($hostid, $methodid, $requestid, $httpversionid, $referrerid, $agentid, $hostipid, $cookieid, $cgiid, $virtid, $usertrackid) =
   (0,0,0,0,0,0,0,0,0,0);

# timeconvert
my ($day, $month, $year, $timestamp, $newdate);

# check insert
my $dberrorcount	= 0;
my $insertid;
my $optional;
my $dotsign = "x";
my $maxindb = &getMaxTime();

# dont chech date
my $checkdate = 1;

# open log-file

INPUT: while (defined($line = <>))
{
	chomp($line);

	$totalcount++;
		
	# keep user informed
	print "$dotsign" 			if ( ($totalcount % 100) == 0 && ! $quiet);
	print " $totalcount\n" 		if ( ($totalcount % 5000) == 0 && ! $quiet);

	# host ident user [time] "request" response size "referrer" "agent"
	if ( ($line =~ m!(.*?)\s+(.*?)\s+(.*?)\s+\[(.*?)\]\s+"(.*?)"\s+([\d\-]+)\s+([\d\-]+)(\s+.*)*!) )
	{
			# the first seven entries in the logs are always the same
			($host, $ident, $remoteuser, $datetime, $request, $response, $size, $referer, $agent, $usertrack) = 
					($1, $2, $3, $4, $5, $6, $7, "-", "-", "-");

			$optional = $8 || "";

			# combined log?
			if ($optional ne "")
			{
				#print "optional = [$optional]\n";
				# thats a combined with a referer and an agent
				if ($optional =~ m!^ "([^"]*?)" "([^"]*?)"$!g)
				{
					$referer	= ($1)?$1:"-";
					$agent		= ($2)?$2:"-";
					$usertrack	= ($3)?$3:"-";
				}
				# thats a combined with a referer and an agent AND an usertrack field
				if ($optional =~ m!^ "([^\"]*?)" "([^"]*?)" "([^"]*?)"$!)
				{
					$referer	= ($1)?$1:"-";
					$agent		= ($2)?$2:"-";
					$usertrack	= ($3)?$3:"-";
				}
			}


			# check time
			unless ($datetime =~ m!(\d+)/(\w+)/(\d+):([\d:]+) ([+-]\d+)!ig)
			{
				$errorcount++;
				print "\nNot inserted: [$line]\n"	unless ($quiet);
		
				$line 	= $dbh->quote($line);
				$id		= $Cerrorentry->getid($dbh, "Unknown date format");
				$dbh->do("insert into errorlog values ('', $line, '$id', NOW())");
				&checkdbstate();

				next INPUT;
			}
			($day, $month, $year, $timestamp) = ($1, $2, $3, $4);
			$month 		= $monthtab{$month};				
			$newdate    = "$year-$month-$day $timestamp";

			# for incremental import
			if ($checkdate)
			{
				my $secs = &parsedate($newdate);
				next INPUT if ($maxindb >= $secs);
				$checkdate = 0;
				$dotsign = ".";
			}

			# quote bad characters - thnx to Bip Thelin <bip@razorfish.com>
			# rest of fields are quoted via Entry.pm
			$remoteuser = "" if ($remoteuser eq "-");
			$ident		= "" if ($ident eq "-");
			$remoteuser = $dbh->quote($remoteuser);
			$ident 		= $dbh->quote($ident);
			$remoteuser = "'-'" if ($remoteuser eq "''");
			$ident		= "'-'" if ($ident eq "''");

			# check request because its often "-" instead of "METHOD URL HTTPVERSION" (why!?)
			if ($request =~ m!^(.*?)\s+([^ ]*?)(\s+HTTP/.*?)*$!ig)
			{
				($method, $request, $httpversion) = ($1, $2, $3 || " HTTP/1.0");
				$cgi = "-";

				if ($request =~ m/^(.*)\?(.*)$/)
				{
					($request, $cgi) = ($1, $2);
				}

				$httpversion =~ m! HTTP/(\d\.\d)!;
				$httpversion = $1;
			}
			else
			{
				($method, $request, $cgi, $httpversion) = ("-", "-", "-", "-");
			}
		
			# now get ids
			$httpversionid	= $Chttpversion->getid($dbh, $httpversion);
			$methodid		= $Cmethod->getid($dbh, $method);
			$referrerid		= $Creferrer->getid($dbh, $referer);
			$requestid		= $Crequest->getid($dbh, $request);
			$hostid			= $Chost->getid($dbh, $host);
			$agentid		= $Cagent->getid($dbh, $agent);
			$hostipid		= $Chostip->getid($dbh, $hostip);
			$cgiid			= $Ccgi->getid($dbh, $cgi);
			$usertrackid	= $Cusertrack->getid($dbh, $usertrack);

			#$virtid			= $Cvirtserverid->getid($dbh, $virtserver);
			#$physid			= $Cphysserverid->getid($dbh, $physserver);

			my $rv = $dbh->do("insert into transfer (id, time, methodid, requestid, httpversionid, cgiid,
											response, size, referrerid, agentid, remoteuser, ident, hostid, usertrackid) 
									values ('', '$newdate', '$methodid', '$requestid', '$httpversionid', '$cgiid',
											'$response', '$size', '$referrerid', '$agentid', $remoteuser, $ident, '$hostid', '$usertrackid')");

			&checkdbstate();

			$insertid = $dbh->{'mysql_insertid'};

			$totalimported++;
			
			if ($insertid <= 0)
			{
				$errorcount++;
				print "\nNot inserted: [$line]\n"	unless ($quiet);
		
				$line = $dbh->quote($line);
				$id		= $Cerrorentry->getid($dbh, "Error occured durring insert");
				$dbh->do("insert into errorlog values ('', $line, '$id', NOW())");
				&checkdbstate();
			}
	}
	else
	{
		$errorcount++;
		print "\nLog format error: [$line]\n"	unless ($quiet);

		$line = $dbh->quote($line);
		$id		= $Cerrorentry->getid($dbh, "Unknow log format");
		$dbh->do("insert into errorlog values ('', $line, '$id', NOW())");
	}	
}


# get total count
unless ($quiet)
{
	my $sth = $dbh->prepare("SELECT * FROM transfer");
	$sth->execute;
	my $numRows = $sth->rows;
	$sth->finish;

	print "\n\n";

	my $stop	= localtime;
	chomp($stop);

	print <<EOF;
	Statistics:
		
	   started at:  $start
	   ended at:    $stop

	   Total records in db:   $numRows
	   Total lines in log:    $totalcount
	   Total lines imported:  $totalimported
	   Lines with errors:     $errorcount


EOF
}

# close logfile and database-connection
$dbh->disconnect();

exit 0;


sub getMaxTime()
{
	my $cur1 = $dbh->prepare("select max (unix_timestamp(time)) from transfer");
	$cur1->execute;
	my @req=$cur1->fetchrow_array();
	return $req[0] || 0;
}


sub checkdbstate()
{
	my $dbstate = $dbh->state;

	if ($dbstate ne "")
	{
		$errorcount++;
		print STDERR "Encountered DB error - current counter: $errorcount\n";

		if ($errorcount >= $errortrigger)
		{
			&send_mail("I have encountered $errorcount DB errors.\n\nYou should check the MysqlDB\n");
			$errorcount = 0;
		}

		#$dbh 	= DBI->connect("DBI:mysql:database=$serverDb;host=$serverName;port=$serverPort",$serverUser,$serverPass);
	}
}

sub send_mail()
{
	return unless ($notify_mail_on);
	my $msg = shift;
	smtpmail::real_send_mail($adminmail, $smtp_host, $adminmail, $smtp_host, "ApachedDB error", $msg);
}

# ============== ENTRU MODULE =========================
# the entry module is now located within the script to ease 
# the installation for the user
#
package Entry;

sub new
{
	my ($class, $dbh, $table) = @_;

	my	$self = {};

	$self->{TABLE}	= $table;
	$self->{ENTRY}	= {};

	bless ($self, $class);

	$self->init($dbh);

	return $self;
}

sub init()
{
    my $self    = shift;
	my $dbh     = shift;
	my $table		= $self->{TABLE};

	my $cur 	= $dbh->prepare( "select ID, $table from $table");
	$cur->execute;

	return unless ($cur->rows() > 0);

	my ($id, $entry, $rowcount);

	$rowcount	= $cur->rows();

	while (my @req=$cur->fetchrow_array())
	{
		($id, $entry) = @req;

		$self->{ENTRY}->{"$entry"} = $id;
		#print "$entry $id ", $self->{ENTRY}->{$entry}, "\n";
	}

	$cur->finish();
}

sub getid
{
    my $self   	= shift;
	my $dbh  	= shift;
    my $key   	= shift;
	my $table  	= $self->{TABLE};

	my $id 		= $self->{ENTRY}->{"$key"} || -1;

	return $id if(defined $id && $id > 0);

	my $key2		= $dbh->quote($key);

	$dbh->do("insert into $table values ('', $key2)");

	$id = $dbh->{'mysql_insertid'};
	$self->{ENTRY}->{"$key"} = $id;

	return $id;
}

1;


############################################################
#                       SMTPMAIL_LIB.PL
# This script was written by Gunther Birznieks.
# Date Created: 2-22-96
# Date Last Modified: 5-5-96
#
package smtpmail;

# Use the Sockets library for TCP/IP Communications
use Socket;
my $mail_os = "NT";

############################################################
#
# subroutine: real_send_mail 
#   Usage:
#     &send_mail("me@myhouse.com","myhouse.com","you@yourhouse.com",
#     "yourhouse.com", "Mysubject", "My message");
#
#   Parameters:
#     $fromuser = Full Email address of sender
#     $fromsmtp = Full Internet Address of sender's SMTP Server
#     $touser   = Full Email address of receiver
#     $tosmtp   = Full Internet Address of receiver's SMTP Server
#     $subject  = Subject of message
#     $messagebody = Body of message including newlines.
#
#   Output:
#     None
############################################################

sub real_send_mail {
    my($fromuser, $fromsmtp, $touser, $tosmtp, 
	  $subject, $messagebody) = @_;
    my($ipaddress, $fullipaddress, $packconnectip);
    my($packthishostip);
    my($AF_INET, $SOCK_STREAM, $SOCK_ADDR);
    my($PROTOCOL, $SMTP_PORT);
    my($buf);

    $messagebody = "Subject: $subject\n\n" . $messagebody;

    $AF_INET = AF_INET;
    $SOCK_STREAM = SOCK_STREAM;

    $SOCK_ADDR = "S n a4 x8";

    $PROTOCOL = (getprotobyname('tcp'))[2];
    $SMTP_PORT = (getservbyname('smtp','tcp'))[2];

    $SMTP_PORT = 25 unless ($SMTP_PORT =~ /^\d+$/);
    $PROTOCOL = 6 unless ($PROTOCOL =~ /^\d+$/);

    $ipaddress = (gethostbyname($tosmtp))[4];

    $fullipaddress = join (".", unpack("C4", $ipaddress));

    $packconnectip = pack($SOCK_ADDR, $AF_INET, 
		   $SMTP_PORT, $ipaddress);
    $packthishostip = pack($SOCK_ADDR, 
			 $AF_INET, 0, "\0\0\0\0");

    socket (S, $AF_INET, $SOCK_STREAM, $PROTOCOL) || 
	&web_error( "Can't make socket:$!\n");

    bind (S,$packthishostip) || 
	&web_error( "Can't bind:$!\n");

    connect(S, $packconnectip) || 
	&web_error( "Can't connect socket:$!\n");

    select(S);
    $| = 1;
    select (STDOUT);

    $buf = read_sock(*S, 6);

    print S "HELO $fromsmtp\n";

    $buf = read_sock(*S, 6);

    print S "MAIL From:<$fromuser>\n";
    $buf = read_sock(*S, 6);

    print S "RCPT To:<$touser>\n";
    $buf = read_sock(*S, 6);

    print S "DATA\n";
    $buf = read_sock(*S, 6);

    print S $messagebody . "\n";

    print S ".\n";
    $buf = read_sock(*S, 6);

    print S "QUIT\n";

    close S;

} 

############################################################
#
# subroutine: read_sock
#   Usage:
#     &read_socket(SOCKET_HANDLE, $timeout);
#
#   Parameters:
#     SOCKET_HANDLE = Handle to an allocated Socket
#     $timeout = amount of time read_sock is allowed to
#                wait for input before timing out
#                (measured in seconds)
#
#   Output:
#     Buffer containing what was read from the socket
# 
############################################################

sub read_sock {
    my($handle, $endtime) = @_;
    my($localbuf,$buf);
    my($rin,$rout,$nfound, $length);

	# Set endtime to be time + endtime.
    $endtime += time;

	# Clear buffer
    $buf = "";

	# Clear $rin (Read Input variable)
    $rin = '';
	# Set $rin to be a vector of the socket file handle
    vec($rin, fileno($handle), 1) = 1;

	# nfound is 0 since we have not read anything yet
    $nfound = 0;

	# Loop until we time out or something was read 
read_socket: 
while (($endtime > time) && ($nfound <= 0)) {
	# Read 1024 bytes at a time
    $length = 1024;
	# Preallocate buffer
    $localbuf = " " x 1025;
	# NT does not support select for polling to see if 
	# There are characters to be received.  This is important
	# Because we dont want to block if there is nothing
	# being received.
    $nfound = 1;
    if ($mail_os ne "NT") {
	# The following polls to see if there is anything in the input
	# buffer to read.  If there is, we will later call the sysread routine
	$nfound = select($rout=$rin, undef, undef,.2);
	    }
}
	
    if ($nfound > 0) {
	$length = sysread($handle, $localbuf, 1024);
	if ($length > 0) {
	    $buf .= $localbuf;
	    }
    }

	$buf;
}

1;

