Code Search for Developers
 
 
  

Hits.java from Kneobase at Krugle


Show Hits.java syntax highlighted

/*
 * Created on 12/08/2004
 *
 */
package com.kneobase.search.query;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.QueryTermExtractor;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.WeightedTerm;

import com.kneobase.KneobaseException;
import com.kneobase.document.Document;

/**
 * @author Ernesto De Santis
 *
 */
public final class Hits {

    private static final int STEP_RESOLVER = 50;
    
    private LuceneHitsContainer[] hitsContainers;
    private int lastResolvedPos = 0;
    private int[] nextHitToRead;
    //hits firts, pos second
    private HitPosLocation[] hitPos;
    
    private boolean resolvingPos = false;

    public Hits(LuceneHitsContainer oneHitsContainer) throws IOException {
        LuceneHitsContainer[] hitsContainers =
            new LuceneHitsContainer[1];
        hitsContainers[0] = oneHitsContainer;
        initHits(hitsContainers);
    }

    public Hits(LuceneHitsContainer[] hitsContainer) throws IOException {
        initHits(hitsContainer);
    }

    private void initHits(LuceneHitsContainer[] hitsContainers) {
        this.hitsContainers = hitsContainers;
        nextHitToRead = new int[hitsContainers.length];

        int size = 0;
        for (int i = 0; i < hitsContainers.length; i++) {
            size += hitsContainers[i].getHits().length();
            nextHitToRead[i] = 0;
        }
        hitPos = new HitPosLocation[size];
    }

    /**
     * 
     * @param i document index number 
     * @return Returns the nth document in this set.
     * @throws KneobaseException
     * @throws IOException
     */
    public Document get(int i) throws KneobaseException, IOException {
        if (!isResolvedPos(i)) {
            resolvePos(i);
        }

        org.apache.lucene.document.Document luceneDoc;
        luceneDoc = hitsContainers[hitPos[i].hitsObject].getHits().doc(hitPos[i].position);

        Document kneoDoc = new Document(luceneDoc, score(i));
        return kneoDoc;
    }

	/**
	 * 
	 * @param doc doc index
	 * @param field field to highligth
	 * @return a highlighted text
	 * 
	 * @throws KneobaseException
	 * @throws IOException
	 */
	public String getHighLightedText(int doc, String field) throws KneobaseException, IOException {
		int maxNumFragmentsRequired = HighlighterConfiguration.DEFAULT_MAX_FRAGMENTS; 
		int fragmentSize = HighlighterConfiguration.DEFAULT_FRAGMENTS_SIZE;
        
		return getHighLightedText(doc, field, maxNumFragmentsRequired, fragmentSize);
	}

	public String getHighLightedText(int doc, String field, int maxNumFragmentsRequired, int fragmentSize) throws KneobaseException, IOException {
		String textBetweenFragments = HighlighterConfiguration.DEFAULT_TEXT_BETWEEN_FRAGMENTS; 
		return getHighLightedText(doc, field, maxNumFragmentsRequired, fragmentSize, textBetweenFragments);
	}
	
	public String getHighLightedText(int doc, String field, int maxNumFragmentsRequired, int fragmentSize, String textBetweenFragments) throws KneobaseException, IOException {
		Formatter formatter = HighlighterConfiguration.getDefaultFormatter();
		return getHighLightedText(doc, field, maxNumFragmentsRequired, fragmentSize, textBetweenFragments, formatter);
	}

	/**
	 * 
	 * @param doc doc index
	 * @param field field to highligth
	 * @param maxNumFragmentsRequired fragments to return.
	 * @param fragmentSize chars fragments size
	 * @param textBetweenFragments text to insert between fragments, usually "..."
	 * @param formatter
	 * 
	 * @return a highlighted text
	 * 
	 * @throws KneobaseException
	 * @throws IOException
	 */
	public String getHighLightedText(int doc, String field, int maxNumFragmentsRequired, int fragmentSize, String textBetweenFragments, Formatter formatter) throws KneobaseException, IOException {
		Document document = get(doc);
        LuceneHitsContainer hitsContainer = hitsContainers[hitPos[doc].hitsObject];
        org.apache.lucene.search.Query lQuery = hitsContainer.getQuery();
        
		Highlighter highlighter;
		//FIXME: realizar el highlight sobre body, y la query sobre bulk hace que el query scorer no funcione.
        //QueryScorer queryScorer = getQueryScorer(lQuery, field);
		QueryScorer queryScorer = new QueryScorer(lQuery);
        if(formatter != null)
        	highlighter = new Highlighter(formatter, queryScorer);
        else
			highlighter = new Highlighter(queryScorer);
        
        highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize));
        Analyzer analyzer = hitsContainer.getAnalyzer();
        String text = document.get(field);
        
        if (text != null) {
            TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(text));
            return highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, textBetweenFragments);
        } else {
            return null;
        }
    }

	/**
	 * 
	 * @param i doc index
	 * @return search score for i document 
	 * 
	 * @throws KneobaseException
	 * @throws IOException
	 */
    public float score(int i) throws KneobaseException, IOException {
        if (!isResolvedPos(i)) {
            resolvePos(i);
        }
        return hitsContainers[hitPos[i].hitsObject].getHits().score(hitPos[i].position);
    }

	/**
	 * cant of hits.
	 * 
	 * @return
	 */
    public int length() {
        return hitPos.length;
    }
    
    /**
     * Build a QueryScorer that only highlight terms searched in field parameter.
     * 
     * @param lQuery query to extract terms.
     * @param field field to highlight
     * @return
     */    
    private QueryScorer getQueryScorer(org.apache.lucene.search.Query lQuery, String field) {
        String strQuery = lQuery.toString();
        WeightedTerm[] weightedTermTemp = QueryTermExtractor.getTerms(lQuery);
        ArrayList bodyQueryTerms = new ArrayList();
        for (int i = 0; i < weightedTermTemp.length; i++) {
            String term = weightedTermTemp[i].getTerm();     
            if (strQuery.indexOf(field + ":" + term) != -1)
                bodyQueryTerms.add(weightedTermTemp[i]);
        }
        WeightedTerm[] weightedTerms = (WeightedTerm[]) bodyQueryTerms.toArray(new WeightedTerm[0]);
        return new QueryScorer(weightedTerms);
    }

    private boolean isResolvedPos(int pos) throws KneobaseException {
        if (pos < length() && pos >= 0) {
            return hitPos[pos] != null;
        } else {
            throw new KneobaseException("Invalid hit position: " + pos);
        }
    }

    /**
     * Este método debe resolver el hit que corresponde a pos.
     * 
     * @param pos
     * @throws IOException
     */
    private void resolvePos( int pos )throws IOException {

        if( resolvingPos && pos <= lastResolvedPos )
            return;
        
        for (int i = ( resolvingPos ? lastResolvedPos + 1 : 0 ); i <= getPosToBeResolved( pos ); i++) {
            int bestHits = 0;
            float bestScore = -1;
            for (int j = 0; j < nextHitToRead.length; j++) {
                if (nextHitToRead[j] < hitsContainers[j].getHits().length()
                    && hitsContainers[j].getHits().score(nextHitToRead[j]) > bestScore) {

                    bestScore = hitsContainers[j].getHits().score(nextHitToRead[j]);
                    bestHits = j;
                }
            } 
            HitPosLocation nextPosLocation =
                new HitPosLocation(bestHits, nextHitToRead[bestHits]);
            hitPos[i] = nextPosLocation;
            nextHitToRead[bestHits]++;
        }
        lastResolvedPos = getPosToBeResolved( pos );
        resolvingPos = true;
    }
    
    /**
     * Retorna la posición hasta la que debe resolver los hits para una posición pos pedida.
     * 
     * @param pos
     * @return la ultima posicion a resolver
     */
    private int getPosToBeResolved( int pos ){
        int nextResolvedPos = 0;

        //determine the next final position to resolve
        if (lastResolvedPos + STEP_RESOLVER < length()) {
            if (lastResolvedPos + STEP_RESOLVER > pos) {
                nextResolvedPos = lastResolvedPos + STEP_RESOLVER;
            } else {
                nextResolvedPos = pos;
            }
        } else {
            nextResolvedPos = length() - 1;
        }
         return nextResolvedPos;
    }

    class HitPosLocation {
        public int hitsObject;
        public int position;

        public HitPosLocation(int hObject, int pos) {
            hitsObject = hObject;
            position = pos;
        }

    }

}





See more files for this project here

Kneobase

Kneobase is an enterprise search engine, based upon the Lucene search engine and the Spring framework. It allows to perform full-text search across many different content sources. It is highly adaptable out-of-the-box and has a pluggable architecture.

Project homepage: http://sourceforge.net/projects/kneobase
Programming language(s): Java,XML
License: other

  BooleanClause.java
  BooleanQuery.java
  CategoryFilter.java
  FieldFilter.java
  Filter.java
  FullTextQuery.java
  HighlighterConfiguration.java
  Hits.java
  LanguageFilter.java
  LuceneHitsContainer.java
  MultiFilter.java
  NaturalQuery.java
  Query.java
  QueryException.java
  QueryFilter.java
  SourceFilter.java
  Term.java
  TermQuery.java
  UnsupportedLanguageException.java
  WildcardQuery.java