Code Search for Developers
 
 
  

Teastats.java from Tea Stats at Krugle


Show Teastats.java syntax highlighted

/*
 * This file is distributed under the GPL v2 as part of teastats site statistics package
 * http://teastats.sourceforge.net
 */
package net.time4tea.webstats.main;

import com.maxmind.geoip.LookupService;
import com.opensymphony.oscache.general.GeneralCacheAdministrator;
import com.opensymphony.oscache.plugins.diskpersistence.HashDiskPersistenceListener;
import net.time4tea.webstats.analyser.composite.session.*;
import net.time4tea.webstats.analyser.extractor.CalendarFinder;
import net.time4tea.webstats.analyser.extractor.DayOfWeekFinder;
import net.time4tea.webstats.analyser.extractor.key.*;
import net.time4tea.webstats.analyser.extractor.value.SessionSizeValueFinder;
import net.time4tea.webstats.enhancer.geoip.LocationEnhancer;
import net.time4tea.webstats.enhancer.identify.*;
import net.time4tea.webstats.filter.http.StatusCodeFilter;
import net.time4tea.webstats.geoip.GeoIPLocationLookupDelegate;
import net.time4tea.webstats.identify.SearchEngineTermEnhancer;
import net.time4tea.webstats.jms.EnvironmentException;
import net.time4tea.webstats.parser.RegexCommonLogParser;
import net.time4tea.webstats.parser.TimingParser;
import net.time4tea.webstats.pipeline.Pipeline;
import net.time4tea.webstats.process.DelegatingSessionProcessor;
import net.time4tea.webstats.process.Processor;
import net.time4tea.webstats.process.ProcessorTimer;
import net.time4tea.webstats.record.Page;
import net.time4tea.webstats.session.Session;
import net.time4tea.webstats.source.FilteringPageSource;
import net.time4tea.webstats.source.NaiveHostBasedSessionConverter;
import net.time4tea.webstats.source.ParsingPageViewSource;
import net.time4tea.webstats.source.Source;
import net.time4tea.webstats.statistic.repo.CachingStatisticMapRepository;
import net.time4tea.webstats.statistic.repo.DiskBasedStatisticMapRepository;
import net.time4tea.webstats.statistic.repo.MonthlyStatisticMapRepository;
import net.time4tea.webstats.statistic.repo.StatisticMapRepository;
import net.time4tea.webstats.util.FileHelper;
import org.hamcrest.Matcher;
import static org.hamcrest.core.AllOf.allOf;
import static org.hamcrest.core.IsNot.not;
import static org.hamcrest.number.OrderingComparisons.lessThan;
import org.hamcrest.text.pattern.Patterns;
import static org.hamcrest.text.pattern.Patterns.*;
import org.joda.time.DateTimeFieldType;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.Properties;

/**
 * Initially by: james on 30-Jul-2006
 */
public class Teastats extends AbstractTea {
    private BufferedReader reader;

    public Teastats(BufferedReader reader, StatisticMapRepository repository) {
        super(repository);
        this.reader = reader;
    }

    public void run() throws Exception {
        Source<Page> pageViewSource = new FilteringPageSource(new ParsingPageViewSource(new TimingParser(new RegexCommonLogParser()), reader), not(status(302)));
        Source<Session> sessionSource = new NaiveHostBasedSessionConverter(pageViewSource);

        Properties cacheProperties = new Properties() {
            {
                setProperty("cache.memory", "true");
                setProperty("cache.capacity", "2000");
                setProperty("cache.unlimited.disk", "true");
                setProperty("cache.persistence.class", HashDiskPersistenceListener.class.getName());
                setProperty("cache.path", new File(System.getProperty("java.io.tmpdir"),"hostip.cache").getAbsolutePath());
            }
        };

        final GeneralCacheAdministrator admin = new GeneralCacheAdministrator(cacheProperties);

        DelegatingSessionProcessor processor = new DelegatingSessionProcessor() {
            {
//                add(new DateFilter<Session>(new DateMidnight(2007,6,1).toInstant(), DateFilter.Comparison.GreaterThan));
                then(new LocationEnhancer(new GeoIPLocationLookupDelegate(new LookupService("src/resources/GeoLiteCity.dat"))));
                then(new CachingIdentifierEnhancer(new BrowserIdentifierEnhancer()));
                then(new CachingIdentifierEnhancer(new WormIdentifierEnhancer()));
                then(new CachingIdentifierEnhancer(new SearchEngineIdentifierEnhancer()));
                then(new CachingIdentifierEnhancer(new OSIdentifierEnhancer()));
                then(new CachingIdentifierEnhancer(new RobotIdentifierEnhancer()));
                then(new SearchEngineTermEnhancer());
//                add(new HostnameEnhancer(new OsCachingNameResolver(admin.getCache(), 30, new DnsNameResolver())));

                then(count("Page Hits", new ConstantKeyFinder<Session>(), new SessionSizeValueFinder()));
                then(new SessionCountAnalyser(repository));
                then(new AllHitsAnalyser(repository));

                then(count("Worms", when(sessionHasAttribute("worm")), counting("worm")));
                then(forPages(when(sessionHasAttribute("worm")), count("Worm Pages", path())));
                then(filter(discard(when(sessionHasAttribute("worm")))));

                then(count("Robots", when(sessionHasAttribute("robot")), counting("robot")));
                then(forPages(when(sessionHasAttribute("robot")), count("Robot Pages", path())));
                then(filter(discard(when(sessionHasAttribute("robot")))));

                then(tagWith("bouncepage", when(allOf(sessionSize(lessThan(2)), sessionHasAttribute("searchengine")))));
                then(count("Expensive Referals", when(sessionHasAttribute("bouncepage")), counting("searchengine")));
                then(count("Expensive Search Terms", when(sessionHasAttribute("bouncepage")), counting("searchterm")));
                then(count("Expensive Landing Pages", when(sessionHasAttribute("bouncepage")), firstPage()));
                then(filter(discard(when(sessionHasAttribute("bouncepage")))));

                then(count("Session Duration", new DurationBucketKeyFinder()));
                then(new SessionDurationAnalyser(repository));
                then(count("Session Bandwidth", new SessionBandwidthKeyFinder()));
                then(new HowManyClicksPerSessionAnalyser(repository));
                then(new SearchEngineTermWordAnalyser(repository));
                then(count("Search Engine Terms", when(sessionHasAttribute("searchterm")), counting("searchterm")));
                then(count("Search Engines", when(sessionHasAttribute("searchengine")), counting("searchengine")));

                then(forPages(
                        count("Successful Pages", when(success()), path()),
                        count("Unsuccessful Pages", when(allOf(not(success()), not(status(404)))), path()),
                        count("Missing Pages", when(status(404)), path()),
                        count("Server Error Pages", when(status(500)), path()),
                        count("File Types", new FileTypeKeyFinder()),
                        count("Browsers Causing Errors", new StatusCodeFilter(400, 599), new BrowserKeyFinder()),
                        count("Response Codes", new StatusKeyFinder()),
                        count("Hour Of Day", new CalendarFinder<Page>(DateTimeFieldType.hourOfDay())),
                        count("Day Of Week", new DayOfWeekFinder<Page>()),
                        count("Day Of Month", new CalendarFinder<Page>(DateTimeFieldType.dayOfMonth())),
                        count("Methods", new MethodKeyFinder())
                ));

                then(count("Countries", when(sessionHasAttribute("location")), new LocationKeyFinder<Session>("countryName")));
                then(count("Cities", when(sessionHasAttribute("location")), new LocationKeyFinder<Session>("city")));

                then(count("Referers", discard(referredBy(sequence(Patterns.text("your.site.net"), zeroOrMore(anyCharacter())))), new RefererHostKeyFinder<Session>()));
                then(count("Operating System", counting("os")));
            }
        };

        ProcessorTimer timingProcessor = new ProcessorTimer("Pipeline", processor);
        Pipeline pipeline = new Pipeline<Session>(sessionSource, timingProcessor);
        pipeline.process();
        repository.sync();
        
        System.out.println("timingProcessor = " + timingProcessor);
        System.out.println("Got " + pipeline.count() + " sessions ");

        new Teasite(repository).generate();

    }

    private Processor<Session> filter(final Matcher<Session> matcher) {
        return new Processor<Session>() {
            public boolean process(Session thing) throws EnvironmentException {
                return matcher.matches(thing);
            }
        };
    }

    public static void main(String[] args) throws Exception {
        File repoDir = new File("repository");
        FileHelper.recursivelyDelete(repoDir);
        DiskBasedStatisticMapRepository ondisk = new DiskBasedStatisticMapRepository(repoDir);
//        StatisticMapRepository repository = new SingleStatisticMapRepository(new CachingStatisticMapRepository(ondisk));
        CachingStatisticMapRepository cachingRepository = new CachingStatisticMapRepository(ondisk);
        MonthlyStatisticMapRepository repository = new MonthlyStatisticMapRepository(cachingRepository);
        BufferedReader reader = new BufferedReader(new FileReader(args[0]));
        Teastats teastats = new Teastats(reader, repository);
        teastats.run();
    }
}




See more files for this project here

Tea Stats

Web log analyzer... Written in OO Perl, provides the usual host / page analysis. Can also do site graphing using graphviz, browser, os, worm and search engine identification, and country and session tracking.

Project homepage: http://sourceforge.net/projects/teastats
Programming language(s): Java
License: other

  AbstractTea.java
  Teasite.java
  Teastats.java