Step 1: Download the lucene version 2.4 from here.
Step 2:
The constructor of TermScorer class has to be changed. The path for this file is lucene/search/TermScorer.java . The first change is
to add an IndexReader as argument in the constructor in order to parse the documents in the collection.
TermScorer(Weight weight, TermDocs td, IndexReader reader,Similarity similarity,
byte[] norms)throws IOException { super (similarity);this .weight = weight;this .termDocs = td;this .norms = norms;this .weightValue = weight.getValue();this.reader=reader; for (int i = 0; i < SCORE_CACHE_SIZE; i++){ scoreCache[i] = getSimilarity().tf(i) * weightValue; } } |
public Scorer scorer(IndexReader reader)throws IOException {TermDocs termDocs = reader.termDocs(term); if (termDocs == null) return null; return new TermScorer(this , termDocs,reader,similarity, reader.norms(term.field()));} |
public float getAvgFreq()throws IOException{String field = "Search_Field";TermFreqVector tfv= this .reader.getTermFreqVector(this .doc(), field);if (tfv!= null ) {this .setUd(tfv.size());int[] tfs=tfv.getTermFrequencies();int sum=0;for(int i= 0;i < tfv.size(); i++ ){ sum=sum+tfs[i]; } float avgFreq=(float )sum/tfv.size();return avgFreq;} else{ return 0f;}} |
public float getLengthNorm(float slope,float pivot)throws IOException{float den=(float )((1-slope)*pivot + (slope*this .getUd()));den=( float ) Math.sqrt(den);float lengthNorm=1/den;return lengthNorm;} |
public void setUd(int unique){this .Ud=unique;} |
public int getUd(){return this.Ud; } |
public static float getPivot(IndexReader reader,String field)throws IOException{int sum=0;for( int i= 0;i < reader.numDocs(); i++){TermFreqVector tfv= reader.getTermFreqVector(i, field); if(tfv!= null ){sum=sum+tfv.size(); } } float pivot=(float )sum/reader.numDocs();//System.out.println ("pivot = " + pivot);return pivot;} //end of method
|
public float score()throws IOException {
/* We use comments in order to use the changed code instead of default code. int f = freqs[pointer]; float raw = // compute tf(f)*weight f < SCORE_CACHE_SIZE // check cache ? scoreCache[f] // cache hit : getSimilarity().tf(f)*weightValue; // cache miss return raw * Similarity.decodeNorm(norms[doc]); // normalize for field */ //This is the changed code int f = freqs[pointer];float num=(float ) (1+Math.log10(f));float den=(float )(1+Math.log10(this .getAvgFreq()));float pivot_tf=num/den;float raw =f < SCORE_CACHE_SIZE ? pivot_tf*weightValue : pivot_tf*weightValue; return raw *this .getLengthNorm(0.35f, 45.0f);} |
public class pivotSearch{public static float getPivot(IndexReader reader,String field)throws IOException{int sum=0;for( int i= 0;i < reader.numDocs(); i++){TermFreqVector tfv= reader.getTermFreqVector(i, field); if(tfv!= null ){sum=sum+tfv.size(); } } float pivot=(float )sum/reader.numDocs();System.out.println ("pivot = " + pivot);return pivot;} //end of method
public static void main (String args[])throws IOException, ParseException{String index="index";IndexReader reader= null ;try {reader=IndexReader.open(index); } catch (CorruptIndexException e1) {e1.printStackTrace();}catch (IOException e1) {e1.printStackTrace();}String field = "Search_Field"; //calculate and print the pivot value getPivot(reader,field); reader.close(); } //end of main } //end of class |
public class pivotSearch{public static float getPivot(IndexReader reader,String field)throws IOException{int sum=0;for( int i= 0;i < reader.numDocs(); i++){TermFreqVector tfv= reader.getTermFreqVector(i, field); if(tfv!= null ){sum=sum+tfv.size(); } } float pivot=(float )sum/reader.numDocs();System.out.println ("pivot = " + pivot);return pivot;} //end of method
public static void main (String args[])throws IOException, ParseException{String index="index";IndexReader reader= null ;try {reader=IndexReader.open(index); } catch (CorruptIndexException e1) {e1.printStackTrace();}catch (IOException e1) {e1.printStackTrace();}String field = "Search_Field"; //calculate and print the pivot value getPivot(reader,field); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); QueryParser parser = new QueryParser(field, analyzer); Query query = parser.parse("Cystic hygroma"); TopDocs tp=searcher.search(query, 10); ScoreDoc[] docs = tp.scoreDocs; for ( int i= 0;i<10; i++){System.out.println ("the document with id= " + docs[i].doc + " has score ="+docs[i].score);} reader.close(); } //end of main } //end of class |