
//
// $Id: convertLog.cc,v 1.2 2002/11/30 01:38:45 dredd Exp $
//
// $Source: /cvsroot/hammerhead/hammerhead/utils/convertLog.cc,v $
// $Revision: 1.2 $
// $Date: 2002/11/30 01:38:45 $
// $State: Exp $
//
// Author: Jon Gifford
//
// Purpose: Convert a log file into a set of scenarios
//

#include <time.h>
#include <stdio.h>
#include <iostream.h>
#include <string.h>

#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>

#include "dictionary.h"
#include "str.h"

#ifndef SunOS
#include "hrtime.h"
#endif

extern char * strptime (const char * buf, const char * format, struct tm * tm);

class knownIp 
{
public: 
    String ipAddress;
    int ipCount;
    time_t ipTime;
    String ipRequest;
    long scenarioNum;
};
    
void destroy(knownIp **) { }
#define MAXTHINK 60

void
printScenario(FILE *outF, const String& ipA, const String& ipR, long thinkT, int cnt,
              long scenarioNum, long nextScenarioNum)
{
#if 0
    struct in_addr ipAddr;

#ifdef SunOS
    ipAddr.S_un.S_addr = ipA;
#else
    ipAddr.s_addr = ipA;
#endif
#endif

    fprintf(outF, "#%s #%d\nN%lx\nR%s\n", 
            ipA.c_str(), cnt, scenarioNum, ipR.c_str());
    if (thinkT >= 0 && thinkT < MAXTHINK)
    {
        fprintf(outF, "S%lx\nT%ld\n", 
                nextScenarioNum, thinkT * 1000000);
    }
    fprintf(outF, "X%d\n.\n", (scenarioNum == -1) ? 0 : 1);
}


main(int argc, char *argv[])
{
    if (argc != 3)
    {
        cerr << "Usage: " << argv[0] << " logFileName scenarioFileName\n";
        exit(1);
    }
    
    FILE *inFile;
    if (argv[1][0] == '-')
    {
        // stdin redirect
        inFile = stdin;
    }
    else
    {
        inFile = fopen(argv[1], "r");
        if (inFile == NULL)
        {
            cerr << "Error: Unable to open log file: " << argv[1] << endl;
            exit(2);
        }
    }

    FILE *outFile;
    if (argv[2][0] == '-')
    {
        // stdout
        outFile = stdout;
    }
    else
    {
        outFile = fopen(argv[2], "w");
        if (outFile == NULL)
        {
            cerr << "Error: unable to open scenario file: " << argv[2] << endl;
            exit(3);
        }
    }

    Dictionary<knownIp *> knownIps;

    int lineNum = 0;
    int created = 0;
    int longestSeq = 0;

    hrtime_t fileStart = gethrtime();
    char inBuf[BUFSIZ];
    while (fgets(inBuf, BUFSIZ, inFile))
    {
        lineNum++;
        if (lineNum % 5000 == 0)
        {
            hrtime_t currT = gethrtime();
            double elapsed = (currT - fileStart)/1e9;
            double scenPerSec = lineNum/elapsed;
            fprintf(stderr, " %d (%d,%.0f,%.0f)", 
                    lineNum, knownIps.num_elements(),
                    elapsed, scenPerSec);
        }

        inBuf[strlen(inBuf) - 1] = '\0';
        if (strlen(inBuf) == 0)
        {
            // blank line
            continue;
        }

        char *ipStr = strtok(inBuf, " ");
        String source = ipStr;
#if 0
        // resolve it?
        unsigned long source = inet_addr(ipStr);

        if ((long )source == -1 || (long )source == 0)
        {
            cerr << "\nWarning: illegal ip address " << ipStr
                 << " - Ignoring line " << lineNum << "\n";
            continue;
        }
#endif

        // now parse the rest of the line...
        char *tempToken = strtok(0, "[");
        char *time = strtok(0, "]");
        if (time == 0)
        {
            cerr << "\nWarning: No Date/Time string found"
                 << " - Ignoring line " << lineNum << "\n";
            continue;
        }

        tempToken = strtok(0, "\"");
        char *request = strtok(0, "\"");
        if (request == 0)
        {
            cerr << "\nWarning: No request string found"
                 << " - Ignoring line " << lineNum << "\n";
            continue;
        }

        struct tm cTm;
        char *nextC = strptime(time, "%d/%b/%Y:%T", &cTm);

        if (nextC == 0)
        {
            cerr << "\nWarning: Illegal Date/Time string " << time
                 << " - Ignoring line " << lineNum << "\n";
            continue;
        }
        time_t cTime = mktime(&cTm);

        knownIp *ip;
        if (knownIps.exists(ipStr))
        {
            ip = knownIps[ipStr];

            long deltaT = cTime - ip->ipTime;
            printScenario(outFile, ip->ipAddress, ip->ipRequest, 
                          deltaT, ip->ipCount, ip->scenarioNum, lineNum);
            if (deltaT > MAXTHINK)
            {
                // assume new connection if think time too long...
                // initial request to / is not being logged, so fake it
                // whenever a new ip address is found.
                printScenario(outFile, ip->ipAddress, "GET / HTTP/1.0",
                              0, ip->ipCount, -1, lineNum);
            }
            ip->ipCount++;
        }
        else
        {
            ip = new knownIp();
            knownIps[ipStr] = ip;
            
            ip->ipAddress = source;
            ip->ipCount = 1;

            // initial request to / is not being logged, so fake it
            // whenever a new ip address is found.
            printScenario(outFile, ip->ipAddress, "GET / HTTP/1.0",
                          0, 0, -1, lineNum);
        }

        ip->ipTime = cTime;
        ip->ipRequest = request;
        ip->scenarioNum = lineNum;

        created++;
    }

    knownIp **allElements = knownIps.element_array();
    for (int i = 0; i < knownIps.num_elements(); i++)
    {
        knownIp *ip = allElements[i];
        printScenario(outFile, ip->ipAddress, ip->ipRequest, -1, 
                      ip->ipCount, ip->scenarioNum, 0);
        if (ip->ipCount > longestSeq)
        {
            longestSeq = ip->ipCount;
        }
    }

    cerr << "\n\nSUMMARY: Scenarios created\t" << created
         << "\n\t Input lines discarded\t" << lineNum - created
         << "\n\t Unique IP addresses\t" << knownIps.size()
         << "\n\t Longest Sequence\t" << longestSeq
         << "\n\t Sequences:\n\t\tLength\tNumber\t%";

    
    int arrayEnd = knownIps.num_elements();
    for (int j = 1; j <= longestSeq; j++) 
    {
        int cnt = 0;
        for (int i = 0; i < arrayEnd; )
        {
            knownIp *ip = allElements[i];
            if (ip->ipCount == j)
            {
                cnt++;
                allElements[i] = allElements[--arrayEnd];
                delete ip;
            }
            else
                i++;
        }
        if (cnt > 0)
        {
            cerr << "\n\t\t" << j << "\t" << cnt << "\t" 
                 << (int )(((double)cnt*j/(double)created)*100000)/1000.0;
        }
    }
    delete allElements;
    cerr << "\n\n";
}
