RegexCommonLogParser.java from Tea Stats at Krugle
Show RegexCommonLogParser.java syntax highlighted
/*
* This file is distributed under the GPL v2 as part of teastats site statistics package
* http://teastats.sourceforge.net
*/
package net.time4tea.webstats.parser;
import net.time4tea.webstats.record.Page;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import java.net.URISyntaxException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Originally richja Feb 8, 2006
*/
public class RegexCommonLogParser implements LogParser {
private static final String FORMAT = "dd/MMM/yyyy:HH:mm:ss Z";
private static final String PATTERN = "^(\\S+)\\s+(\\S+)\\s+(\\S+)\\s+\\[(.*?)\\]\\s+\"(.*?)\\s+(.*?)\\s+(.*?)\"\\s+(\\d+)\\s+(\\S+)\\s+\"(.*?)\"\\s+\"(.*?)\"";
private DateTimeFormatter dtf = DateTimeFormat.forPattern(FORMAT);
private Pattern pattern = null;
public RegexCommonLogParser() {
pattern = Pattern.compile(PATTERN);
}
public Page parseLine(String input) throws ParseException {
Matcher matcher = pattern.matcher(input);
if (matcher.find()) {
Page record = new Page();
record.setClientAddress(matcher.group(1));
record.setVirtualHost(matcher.group(2));
record.setUsername(matcher.group(3));
record.setDate(dtf.parseDateTime(matcher.group(4)).toInstant());
record.setMethod(matcher.group(5));
try {
record.setUriString(matcher.group(6));
}
catch (URISyntaxException e) {
try {
record.setUriString("invalid");
}
catch (URISyntaxException e2) {
}
}
//7 is HTTP/1.0
record.setStatusCode(matcher.group(8));
record.setBytes(matcher.group(9));
try {
record.setReferer(matcher.group(10));
}
catch (URISyntaxException e) {
try {
record.setReferer("-");
}
catch (URISyntaxException e1) {
}
}
record.setUserAgent(matcher.group(11));
return record;
}
System.out.println("Can't parse " + input);
return null;
}
}
See more files for this project here