Code Search for Developers
 
 
  

RegexCommonLogParser.java from Tea Stats at Krugle


Show RegexCommonLogParser.java syntax highlighted

/*
 * This file is distributed under the GPL v2 as part of teastats site statistics package
 * http://teastats.sourceforge.net
 */
package net.time4tea.webstats.parser;

import net.time4tea.webstats.record.Page;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;

import java.net.URISyntaxException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Originally richja Feb 8, 2006
 */
public class RegexCommonLogParser implements LogParser {

    private static final String FORMAT = "dd/MMM/yyyy:HH:mm:ss Z";
    private static final String PATTERN = "^(\\S+)\\s+(\\S+)\\s+(\\S+)\\s+\\[(.*?)\\]\\s+\"(.*?)\\s+(.*?)\\s+(.*?)\"\\s+(\\d+)\\s+(\\S+)\\s+\"(.*?)\"\\s+\"(.*?)\"";

    private DateTimeFormatter dtf = DateTimeFormat.forPattern(FORMAT);
    private Pattern pattern = null;


    public RegexCommonLogParser() {
        pattern = Pattern.compile(PATTERN);
    }

    public Page parseLine(String input) throws ParseException {
        Matcher matcher = pattern.matcher(input);

        if (matcher.find()) {

            Page record = new Page();

            record.setClientAddress(matcher.group(1));
            record.setVirtualHost(matcher.group(2));
            record.setUsername(matcher.group(3));

            record.setDate(dtf.parseDateTime(matcher.group(4)).toInstant());

            record.setMethod(matcher.group(5));

            try {
                record.setUriString(matcher.group(6));
            }
            catch (URISyntaxException e) {
                try {
                    record.setUriString("invalid");
                }
                catch (URISyntaxException e2) {
                }
            }

            //7 is HTTP/1.0

            record.setStatusCode(matcher.group(8));
            record.setBytes(matcher.group(9));

            try {
                record.setReferer(matcher.group(10));
            }
            catch (URISyntaxException e) {
                try {
                    record.setReferer("-");
                }
                catch (URISyntaxException e1) {

                }
            }
            record.setUserAgent(matcher.group(11));


            return record;
        }
        System.out.println("Can't parse " + input);

        return null;
    }
}




See more files for this project here

Tea Stats

Web log analyzer... Written in OO Perl, provides the usual host / page analysis. Can also do site graphing using graphviz, browser, os, worm and search engine identification, and country and session tracking.

Project homepage: http://sourceforge.net/projects/teastats
Programming language(s): Java
License: other

  CommonLogParser.java
  LogParser.java
  ParseException.java
  RegexCommonLogParser.java
  TimingParser.java
  TimingParserTest.java