Package org.apache.lucene.util

Examples of org.apache.lucene.util.PriorityQueue


            // nothing to highlight
            return createDefaultExcerpt(text, excerptStart, excerptEnd,
                    fragmentStart, fragmentEnd, surround * 2);
        }

        PriorityQueue bestFragments = new FragmentInfoPriorityQueue(maxFragments);
        for (int i = 0; i < offsets.length; i++) {
            if (offsets[i].getEndOffset() <= text.length()) {
                FragmentInfo fi = new FragmentInfo(offsets[i], surround * 2);
                for (int j = i + 1; j < offsets.length; j++) {
                    if (offsets[j].getEndOffset() > text.length()) {
                        break;
                    }
                    if (!fi.add(offsets[j], text)) {
                        break;
                    }
                }
                bestFragments.insert(fi);
            }
        }

        if (bestFragments.size() == 0) {
            return createDefaultExcerpt(text, excerptStart, excerptEnd,
                    fragmentStart, fragmentEnd, surround * 2);
        }

        // retrieve fragment infos from queue and fill into list, least
        // fragment comes out first
        List<FragmentInfo> infos = new LinkedList<FragmentInfo>();
        while (bestFragments.size() > 0) {
            FragmentInfo fi = (FragmentInfo) bestFragments.pop();
            infos.add(0, fi);
        }

        Map<TermVectorOffsetInfo, Object> offsetInfos = new IdentityHashMap<TermVectorOffsetInfo, Object>();
        // remove overlapping fragment infos
View Full Code Here


      (narrative==null?"":narrative)      + newline + newline +
      "</top>";
  }
 
  private String [] bestTerms(String field,int numTerms) throws IOException {
    PriorityQueue pq = new TermsDfQueue(numTerms);
    IndexReader ir = IndexReader.open(dir);
    try {
      int threshold = ir.maxDoc() / 10; // ignore words too common.
      TermEnum terms = ir.terms(new Term(field,""));
      while (terms.next()) {
        if (!field.equals(terms.term().field())) {
          break;
        }
        int df = terms.docFreq();
        if (df<threshold) {
          String ttxt = terms.term().text();
          pq.insert(new TermDf(ttxt,df));
        }
      }
    } finally {
      ir.close();
    }
    String res[] = new String[pq.size()];
    int i = 0;
    while (pq.size()>0) {
      TermDf tdf = (TermDf) pq.pop();
      res[i++] = tdf.word;
      System.out.println(i+".   word:  "+tdf.df+"   "+tdf.word);
    }
    return res;
  }
View Full Code Here

  /**
   * @see #retrieveInterestingTerms(java.io.Reader)
   */
  public String [] retrieveInterestingTerms(int docNum) throws IOException{
    ArrayList al = new ArrayList( maxQueryTerms);
    PriorityQueue pq = retrieveTerms(docNum);
    Object cur;
    int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
    // we just want to return the top words
    while (((cur = pq.pop()) != null) && lim-- > 0) {
            Object[] ar = (Object[]) cur;
      al.add( ar[ 0]); // the 1st entry is the interesting word
    }
    String[] res = new String[ al.size()];
    return (String[]) al.toArray( res);
View Full Code Here

   * @see #retrieveTerms(java.io.Reader)
   * @see #setMaxQueryTerms
   */
  public String[] retrieveInterestingTerms( Reader r) throws IOException {
    ArrayList al = new ArrayList( maxQueryTerms);
    PriorityQueue pq = retrieveTerms( r);
    Object cur;
    int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
    // we just want to return the top words
    while (((cur = pq.pop()) != null) && lim-- > 0) {
            Object[] ar = (Object[]) cur;
      al.add( ar[ 0]); // the 1st entry is the interesting word
    }
    String[] res = new String[ al.size()];
    return (String[]) al.toArray( res);
View Full Code Here

      (narrative==null?"":narrative)      + newline + newline +
      "</top>";
  }
 
  private String [] bestTerms(String field,int numTerms) throws IOException {
    PriorityQueue pq = new TermsDfQueue(numTerms);
    IndexReader ir = IndexReader.open(dir);
    try {
      int threshold = ir.maxDoc() / 10; // ignore words too common.
      TermEnum terms = ir.terms(new Term(field,""));
      while (terms.next()) {
        if (!field.equals(terms.term().field())) {
          break;
        }
        int df = terms.docFreq();
        if (df<threshold) {
          String ttxt = terms.term().text();
          pq.insert(new TermDf(ttxt,df));
        }
      }
    } finally {
      ir.close();
    }
    String res[] = new String[pq.size()];
    int i = 0;
    while (pq.size()>0) {
      TermDf tdf = (TermDf) pq.pop();
      res[i++] = tdf.word;
      System.out.println(i+".   word:  "+tdf.df+"   "+tdf.word);
    }
    return res;
  }
View Full Code Here

  /**
   * @see #retrieveInterestingTerms(java.io.Reader)
   */
  public String [] retrieveInterestingTerms(int docNum) throws IOException{
    ArrayList al = new ArrayList( maxQueryTerms);
    PriorityQueue pq = retrieveTerms(docNum);
    Object cur;
    int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
    // we just want to return the top words
    while (((cur = pq.pop()) != null) && lim-- > 0) {
            Object[] ar = (Object[]) cur;
      al.add( ar[ 0]); // the 1st entry is the interesting word
    }
    String[] res = new String[ al.size()];
    return (String[]) al.toArray( res);
View Full Code Here

   * @see #retrieveTerms(java.io.Reader)
   * @see #setMaxQueryTerms
   */
  public String[] retrieveInterestingTerms( Reader r) throws IOException {
    ArrayList al = new ArrayList( maxQueryTerms);
    PriorityQueue pq = retrieveTerms( r);
    Object cur;
    int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
    // we just want to return the top words
    while (((cur = pq.pop()) != null) && lim-- > 0) {
            Object[] ar = (Object[]) cur;
      al.add( ar[ 0]); // the 1st entry is the interesting word
    }
    String[] res = new String[ al.size()];
    return (String[]) al.toArray( res);
View Full Code Here

    /**
     * @see #retrieveInterestingTerms(java.io.Reader)
     */
    public String[] retrieveInterestingTerms(int docNum) throws IOException {
        ArrayList al = new ArrayList(maxQueryTerms);
        PriorityQueue pq = retrieveTerms(docNum);
        Object cur;
        // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
        int lim = maxQueryTerms;
        // we just want to return the top words
        while (((cur = pq.pop()) != null) && lim-- > 0) {
            Object[] ar = (Object[]) cur;
            al.add(ar[0]); // the 1st entry is the interesting word
        }
        String[] res = new String[al.size()];
        return (String[]) al.toArray(res);
View Full Code Here

     * @see #retrieveTerms(java.io.Reader)
     * @see #setMaxQueryTerms
     */
    public String[] retrieveInterestingTerms(Reader r) throws IOException {
        ArrayList al = new ArrayList(maxQueryTerms);
        PriorityQueue pq = retrieveTerms(r);
        Object cur;
        // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
        int lim = maxQueryTerms;
        // we just want to return the top words
        while (((cur = pq.pop()) != null) && lim-- > 0) {
            Object[] ar = (Object[]) cur;
            al.add(ar[0]); // the 1st entry is the interesting word
        }
        String[] res = new String[al.size()];
        return (String[]) al.toArray(res);
View Full Code Here

      {
         // nothing to highlight
         return createDefaultExcerpt(text, excerptStart, excerptEnd, fragmentStart, fragmentEnd, surround * 2);
      }

      PriorityQueue bestFragments = new FragmentInfoPriorityQueue(maxFragments);
      for (int i = 0; i < offsets.length; i++)
      {
         if (offsets[i].getEndOffset() <= text.length())
         {
            FragmentInfo fi = new FragmentInfo(offsets[i], surround * 2);
            for (int j = i + 1; j < offsets.length; j++)
            {
               if (offsets[j].getEndOffset() > text.length())
               {
                  break;
               }
               if (!fi.add(offsets[j], text))
               {
                  break;
               }
            }
            bestFragments.insert(fi);
         }
      }

      if (bestFragments.size() == 0)
      {
         return createDefaultExcerpt(text, excerptStart, excerptEnd, fragmentStart, fragmentEnd, surround * 2);
      }

      // retrieve fragment infos from queue and fill into list, least
      // fragment comes out first
      List infos = new LinkedList();
      while (bestFragments.size() > 0)
      {
         FragmentInfo fi = (FragmentInfo)bestFragments.pop();
         infos.add(0, fi);
      }

      Map offsetInfos = new IdentityHashMap();
      // remove overlapping fragment infos
View Full Code Here

TOP

Related Classes of org.apache.lucene.util.PriorityQueue

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.