Package org.olat.core.commons.services.text.impl.nutch

Examples of org.olat.core.commons.services.text.impl.nutch.NGramProfile$NGramEntry


        String lang = (String) (alllanguages.nextElement());

        InputStream is = this.getClass().getResourceAsStream("_resources/" + lang + "." + NGramProfile.FILE_EXTENSION);

        if (is != null) {
          NGramProfile profile = new NGramProfile(lang, minLength, maxLength);
          try {
            profile.load(is);
            languages.add(profile);
            supportedLanguages.add(lang);
            List<NGramEntry> ngrams = profile.getSorted();
            for (int i=0; i<ngrams.size(); i++) {
                NGramEntry entry = ngrams.get(i);
                List<NGramEntry> registered = tmpIdx.get(entry);
                if (registered == null) {
                    registered = new ArrayList<NGramEntry>();
                    tmpIdx.put(entry, registered);
                }
                registered.add(entry);
                entry.setProfile(profile);
            }
            list.append(" " + lang + "(" + ngrams.size() + ")");
            is.close();
          } catch (IOException e1) {
            log.error("", e1);
          }
        }
      }
      // transform all ngrams lists to arrays for performances
      Iterator<NGramEntry> keys = tmpIdx.keySet().iterator();
      while (keys.hasNext()) {
        NGramEntry entry = keys.next();
        List<NGramEntry> l = tmpIdx.get(entry);
        if (l != null) {
          NGramEntry[] array = l.toArray(new NGramEntry[l.size()]);
          ngramsIdx.put(entry.getSeq(), array);
        }
      }
      log.info(list.toString());
      // Create the suspect profile
      suspect = new NGramProfile("suspect", minLength, maxLength);
    } catch (Exception e) {
      log.error("", e);
    }
  }
View Full Code Here


    while (iter.hasNext()) {
        searched = iter.next();
        NGramEntry[] ngrams = ngramsIdx.get(searched.getSeq());
        if (ngrams != null) {
            for (int j=0; j<ngrams.length; j++) {
                NGramProfile profile = ngrams[j].getProfile();
                Float pScore = scores.get(profile);
                if (pScore == null) {
                    pScore = new Float(0);
                }
                float plScore = pScore.floatValue();
                plScore += ngrams[j].getFrequency() + searched.getFrequency();
                scores.put(profile, new Float(plScore));
                if (plScore > topscore) {
                    topscore = plScore;
                    lang = profile.getName();
                }
            }
        }
    }
    return lang;
View Full Code Here

   * @param is is the stream to read
   * @param encoding is the encoding of stream
   */
  public static NGramProfile create(String name, InputStream is, String encoding) {

    NGramProfile newProfile = new NGramProfile(name, ABSOLUTE_MIN_NGRAM_LENGTH,
                                                     ABSOLUTE_MAX_NGRAM_LENGTH);
    BufferedInputStream bis = new BufferedInputStream(is);

    byte buffer[] = new byte[4096];
    StringBuilder text = new StringBuilder();
    int len;

    try {
      while ((len = bis.read(buffer)) != -1) {
        text.append(new String(buffer, 0, len, encoding));
      }
    } catch (IOException e) {
      e.printStackTrace();
    }

    newProfile.analyze(text);
    return newProfile;
  }
View Full Code Here

      case CREATE:

        File f = new File(filename);
        FileInputStream fis = new FileInputStream(f);
        NGramProfile newProfile = NGramProfile.create(profilename, fis, encoding);
        fis.close();
        f = new File(profilename + "." + FILE_EXTENSION);
        FileOutputStream fos = new FileOutputStream(f);
        newProfile.save(fos);
        System.out.println("new profile " + profilename + "." + FILE_EXTENSION + " was created.");
        break;

      case SIMILARITY:

        f = new File(filename);
        fis = new FileInputStream(f);
        newProfile = NGramProfile.create(filename, fis, encoding);
        newProfile.normalize();

        f = new File(filename2);
        fis = new FileInputStream(f);
        NGramProfile newProfile2 = NGramProfile.create(filename2, fis, encoding);
        newProfile2.normalize();
        System.out.println("Similarity is " + newProfile.getSimilarity(newProfile2));
        break;

      case SCORE:
        f = new File(filename);
        fis = new FileInputStream(f);
        newProfile = NGramProfile.create(filename, fis, encoding);

        f = new File(profilename + "." + FILE_EXTENSION);
        fis = new FileInputStream(f);
        NGramProfile compare = new NGramProfile(profilename,
                                                DEFAULT_MIN_NGRAM_LENGTH,
                                                DEFAULT_MAX_NGRAM_LENGTH);
        compare.load(fis);
        System.out.println("Score is " + compare.getSimilarity(newProfile));
        break;

      }

    } catch (Exception e) {
View Full Code Here

TOP

Related Classes of org.olat.core.commons.services.text.impl.nutch.NGramProfile$NGramEntry

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.