Show Hits.java syntax highlighted
/*
* Created on 12/08/2004
*
*/
package com.kneobase.search.query;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.QueryTermExtractor;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.WeightedTerm;
import com.kneobase.KneobaseException;
import com.kneobase.document.Document;
/**
* @author Ernesto De Santis
*
*/
public final class Hits {
private static final int STEP_RESOLVER = 50;
private LuceneHitsContainer[] hitsContainers;
private int lastResolvedPos = 0;
private int[] nextHitToRead;
//hits firts, pos second
private HitPosLocation[] hitPos;
private boolean resolvingPos = false;
public Hits(LuceneHitsContainer oneHitsContainer) throws IOException {
LuceneHitsContainer[] hitsContainers =
new LuceneHitsContainer[1];
hitsContainers[0] = oneHitsContainer;
initHits(hitsContainers);
}
public Hits(LuceneHitsContainer[] hitsContainer) throws IOException {
initHits(hitsContainer);
}
private void initHits(LuceneHitsContainer[] hitsContainers) {
this.hitsContainers = hitsContainers;
nextHitToRead = new int[hitsContainers.length];
int size = 0;
for (int i = 0; i < hitsContainers.length; i++) {
size += hitsContainers[i].getHits().length();
nextHitToRead[i] = 0;
}
hitPos = new HitPosLocation[size];
}
/**
*
* @param i document index number
* @return Returns the nth document in this set.
* @throws KneobaseException
* @throws IOException
*/
public Document get(int i) throws KneobaseException, IOException {
if (!isResolvedPos(i)) {
resolvePos(i);
}
org.apache.lucene.document.Document luceneDoc;
luceneDoc = hitsContainers[hitPos[i].hitsObject].getHits().doc(hitPos[i].position);
Document kneoDoc = new Document(luceneDoc, score(i));
return kneoDoc;
}
/**
*
* @param doc doc index
* @param field field to highligth
* @return a highlighted text
*
* @throws KneobaseException
* @throws IOException
*/
public String getHighLightedText(int doc, String field) throws KneobaseException, IOException {
int maxNumFragmentsRequired = HighlighterConfiguration.DEFAULT_MAX_FRAGMENTS;
int fragmentSize = HighlighterConfiguration.DEFAULT_FRAGMENTS_SIZE;
return getHighLightedText(doc, field, maxNumFragmentsRequired, fragmentSize);
}
public String getHighLightedText(int doc, String field, int maxNumFragmentsRequired, int fragmentSize) throws KneobaseException, IOException {
String textBetweenFragments = HighlighterConfiguration.DEFAULT_TEXT_BETWEEN_FRAGMENTS;
return getHighLightedText(doc, field, maxNumFragmentsRequired, fragmentSize, textBetweenFragments);
}
public String getHighLightedText(int doc, String field, int maxNumFragmentsRequired, int fragmentSize, String textBetweenFragments) throws KneobaseException, IOException {
Formatter formatter = HighlighterConfiguration.getDefaultFormatter();
return getHighLightedText(doc, field, maxNumFragmentsRequired, fragmentSize, textBetweenFragments, formatter);
}
/**
*
* @param doc doc index
* @param field field to highligth
* @param maxNumFragmentsRequired fragments to return.
* @param fragmentSize chars fragments size
* @param textBetweenFragments text to insert between fragments, usually "..."
* @param formatter
*
* @return a highlighted text
*
* @throws KneobaseException
* @throws IOException
*/
public String getHighLightedText(int doc, String field, int maxNumFragmentsRequired, int fragmentSize, String textBetweenFragments, Formatter formatter) throws KneobaseException, IOException {
Document document = get(doc);
LuceneHitsContainer hitsContainer = hitsContainers[hitPos[doc].hitsObject];
org.apache.lucene.search.Query lQuery = hitsContainer.getQuery();
Highlighter highlighter;
//FIXME: realizar el highlight sobre body, y la query sobre bulk hace que el query scorer no funcione.
//QueryScorer queryScorer = getQueryScorer(lQuery, field);
QueryScorer queryScorer = new QueryScorer(lQuery);
if(formatter != null)
highlighter = new Highlighter(formatter, queryScorer);
else
highlighter = new Highlighter(queryScorer);
highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize));
Analyzer analyzer = hitsContainer.getAnalyzer();
String text = document.get(field);
if (text != null) {
TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(text));
return highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, textBetweenFragments);
} else {
return null;
}
}
/**
*
* @param i doc index
* @return search score for i document
*
* @throws KneobaseException
* @throws IOException
*/
public float score(int i) throws KneobaseException, IOException {
if (!isResolvedPos(i)) {
resolvePos(i);
}
return hitsContainers[hitPos[i].hitsObject].getHits().score(hitPos[i].position);
}
/**
* cant of hits.
*
* @return
*/
public int length() {
return hitPos.length;
}
/**
* Build a QueryScorer that only highlight terms searched in field parameter.
*
* @param lQuery query to extract terms.
* @param field field to highlight
* @return
*/
private QueryScorer getQueryScorer(org.apache.lucene.search.Query lQuery, String field) {
String strQuery = lQuery.toString();
WeightedTerm[] weightedTermTemp = QueryTermExtractor.getTerms(lQuery);
ArrayList bodyQueryTerms = new ArrayList();
for (int i = 0; i < weightedTermTemp.length; i++) {
String term = weightedTermTemp[i].getTerm();
if (strQuery.indexOf(field + ":" + term) != -1)
bodyQueryTerms.add(weightedTermTemp[i]);
}
WeightedTerm[] weightedTerms = (WeightedTerm[]) bodyQueryTerms.toArray(new WeightedTerm[0]);
return new QueryScorer(weightedTerms);
}
private boolean isResolvedPos(int pos) throws KneobaseException {
if (pos < length() && pos >= 0) {
return hitPos[pos] != null;
} else {
throw new KneobaseException("Invalid hit position: " + pos);
}
}
/**
* Este método debe resolver el hit que corresponde a pos.
*
* @param pos
* @throws IOException
*/
private void resolvePos( int pos )throws IOException {
if( resolvingPos && pos <= lastResolvedPos )
return;
for (int i = ( resolvingPos ? lastResolvedPos + 1 : 0 ); i <= getPosToBeResolved( pos ); i++) {
int bestHits = 0;
float bestScore = -1;
for (int j = 0; j < nextHitToRead.length; j++) {
if (nextHitToRead[j] < hitsContainers[j].getHits().length()
&& hitsContainers[j].getHits().score(nextHitToRead[j]) > bestScore) {
bestScore = hitsContainers[j].getHits().score(nextHitToRead[j]);
bestHits = j;
}
}
HitPosLocation nextPosLocation =
new HitPosLocation(bestHits, nextHitToRead[bestHits]);
hitPos[i] = nextPosLocation;
nextHitToRead[bestHits]++;
}
lastResolvedPos = getPosToBeResolved( pos );
resolvingPos = true;
}
/**
* Retorna la posición hasta la que debe resolver los hits para una posición pos pedida.
*
* @param pos
* @return la ultima posicion a resolver
*/
private int getPosToBeResolved( int pos ){
int nextResolvedPos = 0;
//determine the next final position to resolve
if (lastResolvedPos + STEP_RESOLVER < length()) {
if (lastResolvedPos + STEP_RESOLVER > pos) {
nextResolvedPos = lastResolvedPos + STEP_RESOLVER;
} else {
nextResolvedPos = pos;
}
} else {
nextResolvedPos = length() - 1;
}
return nextResolvedPos;
}
class HitPosLocation {
public int hitsObject;
public int position;
public HitPosLocation(int hObject, int pos) {
hitsObject = hObject;
position = pos;
}
}
}
See more files for this project here