Package ivory.core.data.dictionary

Examples of ivory.core.data.dictionary.DefaultFrequencySortedDictionary


    FileSystem fs = FileSystem.getLocal(new Configuration());
    Path termsFilePath = new Path("etc/trec-index-terms.dat");
    Path termIDsFilePath = new Path("etc/trec-index-termids.dat");
    Path idToTermFilePath = new Path("etc/trec-index-termid-mapping.dat");

    DefaultFrequencySortedDictionary dictionary =
        new DefaultFrequencySortedDictionary(termsFilePath, termIDsFilePath, idToTermFilePath, fs);

    assertEquals(312232, dictionary.size());
    assertEquals("page", dictionary.getTerm(1));
    assertEquals("time", dictionary.getTerm(2));
    assertEquals("will", dictionary.getTerm(3));
    assertEquals("year", dictionary.getTerm(4));
    assertEquals("nikaan", dictionary.getTerm(100000));

    assertEquals(1, dictionary.getId("page"));
    assertEquals(2, dictionary.getId("time"));
    assertEquals(3, dictionary.getId("will"));
    assertEquals(4, dictionary.getId("year"));
    assertEquals(100000, dictionary.getId("nikaan"));
   
    assertEquals(null, dictionary.getTerm(312233));

    Iterator<String> iter = dictionary.iterator();
    assertTrue(iter.hasNext());
    assertEquals("page", iter.next());
    assertTrue(iter.hasNext());
    assertEquals("time", iter.next());
    assertTrue(iter.hasNext());
    assertEquals("will", iter.next());
    assertTrue(iter.hasNext());
    assertEquals("year", iter.next());
    assertTrue(iter.hasNext());

    int cnt = 0;
    for (@SuppressWarnings("unused") String s : dictionary) {
      cnt++;
    }
    assertEquals(dictionary.size(), cnt);

    cnt = 0;
    iter = dictionary.iterator();
    while(iter.hasNext()) {
      cnt++;
      iter.next();
    }
    assertEquals(dictionary.size(), cnt);
  }
View Full Code Here


    postingsIndex = new IntPostingsForwardIndex(indexPath, fs);
    LOG.info(" - Number of terms: " + readCollectionTermCount());
    LOG.info("Done!");

    try {
      termidMap = new DefaultFrequencySortedDictionary(new Path(getIndexTermsData()),
          new Path(getIndexTermIdsData()), new Path(getIndexTermIdMappingData()), fs);
    } catch (Exception e) {
      throw new ConfigurationException("Error initializing dictionary!");
    }
View Full Code Here

    postingsIndex = new IntPostingsForwardIndex(indexPath, fs);
    LOG.info(" - Number of terms: " + readCollectionTermCount());
    LOG.info("Done!");

    try {
      termidMap = new DefaultFrequencySortedDictionary(new Path(getIndexTermsData()),
          new Path(getIndexTermIdsData()), new Path(getIndexTermIdMappingData()), fs);
    } catch (Exception e) {
      throw new ConfigurationException("Error initializing dictionary!");
    }
View Full Code Here

        LOG.info(" - id: " + pathMapping.get(termidsFile));
        LOG.info(" - idToTerms: " + pathMapping.get(idToTermFile));
        LOG.info(" - df data: " + pathMapping.get(dfFile));

        try{
          dict = new DefaultFrequencySortedDictionary(pathMapping.get(termsFile),
              pathMapping.get(termidsFile), pathMapping.get(idToTermFile), FileSystem.getLocal(conf));
          dfTable = new DfTableArray(pathMapping.get(dfFile), FileSystem.getLocal(conf));
        } catch (Exception e) {
          e.printStackTrace();
          throw new RuntimeException("Error loading Terms File for dictionary from "+localFiles[0]);
View Full Code Here

        en2DeProbs = new TTable_monolithic_IFAs(fs2, new Path(e2fttableFile), true);
      } catch (IOException e) {
        e.printStackTrace();
     

      DefaultFrequencySortedDictionary dict = new DefaultFrequencySortedDictionary(new Path(env.getIndexTermsData()), new Path(env.getIndexTermIdsData()), new Path(env.getIndexTermIdMappingData()), fs2);
      DfTableArray dfTable = new DfTableArray(new Path(dfByIntFile), fs2);

      HMapIFW transDfTable = CLIRUtils.translateDFTable(eVocab_e2f, fVocab_e2f, en2DeProbs, dict, dfTable);

      SequenceFile.Writer writer = SequenceFile.createWriter(fs2, conf, new Path(transDfFile), IntWritable.class, FloatWritable.class);
View Full Code Here

    eScoreFn = (ScoringModel) new Bm25();
    eScoreFn.setAvgDocLength(lang2AvgSentLen.get(eLang));        //average sentence length = heuristic based on De-En data
    eScoreFn.setDocCount(env.readCollectionDocumentCount());

    dict = new DefaultFrequencySortedDictionary(new Path(env.getIndexTermsData()), new Path(env.getIndexTermIdsData()), new Path(env.getIndexTermIdMappingData()), fs);
    dfTable = new DfTableArray(new Path(env.getDfByTermData()), fs);
  }
View Full Code Here

    eScoreFn = (ScoringModel) new Bm25();
    eScoreFn.setAvgDocLength(lang2AvgSentLen.get(eLang));        //average sentence length = heuristic based on De-En data
    eScoreFn.setDocCount(env.readCollectionDocumentCount());

    dict = new DefaultFrequencySortedDictionary(new Path(env.getIndexTermsData()), new Path(env.getIndexTermIdsData()), new Path(env.getIndexTermIdMappingData()), localFs);
    dfTable = new DfTableArray(new Path(env.getDfByTermData()), localFs);
  }
View Full Code Here

        LOG.info(" - id: " + pathMapping.get(termidsFile));
        LOG.info(" - idToTerms: " + pathMapping.get(idToTermFile));
        LOG.info(" - df data: " + pathMapping.get(dfFile));

        try{
          dict = new DefaultFrequencySortedDictionary(pathMapping.get(termsFile),
              pathMapping.get(termidsFile), pathMapping.get(idToTermFile), FileSystem.getLocal(conf));
          dfTable = new DfTableArray(pathMapping.get(dfFile), FileSystem.getLocal(conf));
        } catch (Exception e) {
          e.printStackTrace();
          throw new RuntimeException("Error loading Terms File for dictionary from "+localFiles[0]);
View Full Code Here

        en2DeProbs = new TTable_monolithic_IFAs(fs2, new Path(e2fttableFile), true);
      } catch (IOException e) {
        e.printStackTrace();
     

      DefaultFrequencySortedDictionary dict = new DefaultFrequencySortedDictionary(new Path(env.getIndexTermsData()), new Path(env.getIndexTermIdsData()), new Path(env.getIndexTermIdMappingData()), fs2);
      DfTableArray dfTable = new DfTableArray(new Path(dfByIntFile), fs2);

      HMapIFW transDfTable = CLIRUtils.translateDFTable(eVocab_e2f, fVocab_e2f, en2DeProbs, dict, dfTable);

      SequenceFile.Writer writer = SequenceFile.createWriter(fs2, conf, new Path(transDfFile), IntWritable.class, FloatWritable.class);
View Full Code Here

    eScoreFn = (ScoringModel) new Bm25();
    eScoreFn.setAvgDocLength(lang2AvgSentLen.get(eLang));        //average sentence length = heuristic based on De-En data
    eScoreFn.setDocCount(env.readCollectionDocumentCount());

    dict = new DefaultFrequencySortedDictionary(new Path(env.getIndexTermsData()), new Path(env.getIndexTermIdsData()), new Path(env.getIndexTermIdMappingData()), fs);
    dfTable = new DfTableArray(new Path(env.getDfByTermData()), fs);
  }
View Full Code Here

TOP

Related Classes of ivory.core.data.dictionary.DefaultFrequencySortedDictionary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.