Examples of DictionaryLookup

morfologik.stemming.DictionaryLookup

Examples of morfologik.stemming.DictionaryLookup

    List<AnalyzedToken> taggerTokens;
    List<AnalyzedToken> lowerTaggerTokens;
    List<AnalyzedToken> upperTaggerTokens;
    final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
    int pos = 0;
    final IStemmer dictLookup = new DictionaryLookup(getDictionary());


    Matcher matcher;
    for (String word : sentenceTokens) {
      String probeWord = word;


      // This loop happens when we need to retry probing the dictionary
      // which happens rarely when trying to remove suffixes -mañ, -se, etc.
      for (;;) {
        final List<AnalyzedToken> l = new ArrayList<>();
        final String lowerWord = probeWord.toLowerCase(conversionLocale);
        taggerTokens = asAnalyzedTokenList(word, dictLookup.lookup(probeWord));
        lowerTaggerTokens = asAnalyzedTokenList(word, dictLookup.lookup(lowerWord));
        final boolean isLowercase = probeWord.equals(lowerWord);


        // Normal case.
        addTokens(taggerTokens, l);


        if (!isLowercase) {
          // Lowercase.
          addTokens(lowerTaggerTokens, l);
        }


        // Uppercase.
        if (lowerTaggerTokens.isEmpty() && taggerTokens.isEmpty()) {
          if (isLowercase) {
            upperTaggerTokens = asAnalyzedTokenList(word,
                dictLookup.lookup(StringTools.uppercaseFirstChar(probeWord)));
            if (!upperTaggerTokens.isEmpty()) {
              addTokens(upperTaggerTokens, l);
            }
          }
          if (l.isEmpty()) {

View Full Code Here

Examples of morfologik.stemming.DictionaryLookup

          dictFile = new File(url.toURI());
        } catch (URISyntaxException e) {
          throw new RuntimeException("Could not load " + ENGLISH_DICT, e);
        }
        try {
          dictLookup = new DictionaryLookup(Dictionary.read(dictFile));
        } catch (IOException e) {
          throw new RuntimeException("Could not load " + dictFile, e);
        }
        return dictLookup;
    }

View Full Code Here

Examples of morfologik.stemming.DictionaryLookup

      throws IOException {
    initializeIfRequired();


    final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
    int pos = 0;
    final IStemmer dictLookup = new DictionaryLookup(getDictionary());


    for (String word : sentenceTokens) {
      final List<AnalyzedToken> l = new ArrayList<>();
      final String lowerWord = word.toLowerCase(conversionLocale);
      final boolean isLowercase = word.equals(lowerWord);
      final boolean isMixedCase = StringTools.isMixedCase(word);
      List<AnalyzedToken> manualTaggerTokens=manualTagsAsAnalyzedTokenList(word, manualTagger.lookup(word));
      List<AnalyzedToken> manualLowerTaggerTokens=manualTagsAsAnalyzedTokenList(word, manualTagger.lookup(lowerWord));


      // normal case, manual tagger
      addTokens(manualTaggerTokens, l);
      // normal case, tagger dictionary
      if (manualTaggerTokens.isEmpty()) {
        addTokens(asAnalyzedTokenList(word, dictLookup.lookup(word)), l);
      }
      // tag non-lowercase words (alluppercase or startuppercase but not mixedcase)
      // with lowercase word tags
      if (!isLowercase && !isMixedCase) {
        // manual tagger
        addTokens(manualLowerTaggerTokens, l);
        // tagger dictionary
        if (manualLowerTaggerTokens.isEmpty()) {
          addTokens(asAnalyzedTokenList(word, dictLookup.lookup(lowerWord)), l);
        }
      }
      // additional tagging with prefixes
      if (l.isEmpty() && !isMixedCase) {
        addTokens(additionalTags(word), l);

View Full Code Here

Examples of morfologik.stemming.DictionaryLookup


  @Override
  public List<AnalyzedToken> additionalTags(String word) {
    final IStemmer dictLookup;
    try {
      dictLookup = new DictionaryLookup(getDictionary());
    } catch (IOException e) {
      throw new RuntimeException("Could not load Catalan dictionary from " + getFileName(), e);
    }
    List<AnalyzedToken> additionalTaggedTokens = new ArrayList<>();
    //Any well-formed adverb with suffix -ment is tagged as an adverb (RG)

View Full Code Here

Examples of morfologik.stemming.DictionaryLookup

  public final String[] synthesize(final AnalyzedToken token,
      final String posTag) throws IOException {
    if (posTag == null) {
      return null;
    }
    final IStemmer synthesizer = new DictionaryLookup(getDictionary());
    boolean isNegated = false;
    if (token.getPOSTag() != null) {
      isNegated = posTag.indexOf(NEGATION_TAG) > 0
          || token.getPOSTag().indexOf(NEGATION_TAG) > 0
          && !(posTag.indexOf(COMP_TAG) > 0) && !(posTag.indexOf(SUP_TAG) > 0);

View Full Code Here

Examples of morfologik.stemming.DictionaryLookup

    if (posTagRegExp) {
      if (possibleTags == null) {
        possibleTags = SynthesizerTools.loadWords(JLanguageTool.getDataBroker().
            getFromResourceDirAsStream(TAGS_FILE_NAME));
      }
      final IStemmer synthesizer = new DictionaryLookup(getDictionary());
      final List<String> results = new ArrayList<>();


      boolean isNegated = false;
      if (token.getPOSTag() != null) {
        isNegated = posTag.indexOf(NEGATION_TAG) > 0

View Full Code Here

Examples of morfologik.stemming.DictionaryLookup

        }
        return ret;        
    }
    
    private IStemmer loadDictionary() throws IOException {
        IStemmer dictLookup = new DictionaryLookup(Dictionary.read(dictFile));
        return dictLookup;
    }

View Full Code Here

Examples of morfologik.stemming.DictionaryLookup

    List<AnalyzedToken> taggerTokens;
    List<AnalyzedToken> lowerTaggerTokens;
    List<AnalyzedToken> upperTaggerTokens;    
    final List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
    int pos = 0;
    final IStemmer morfologik = new DictionaryLookup(getDictionary());


    for (String word : sentenceTokens) {
      final List<AnalyzedToken> l = new ArrayList<>();
      final String lowerWord = word.toLowerCase(plLocale);
      taggerTokens = asAnalyzedTokenList(word, morfologik.lookup(word));
      lowerTaggerTokens = asAnalyzedTokenList(word, morfologik.lookup(lowerWord));       
      final boolean isLowercase = word.equals(lowerWord);


      //normal case
      addTokens(taggerTokens, l);


      if (!isLowercase) {
        //lowercase
        addTokens(lowerTaggerTokens, l);
      }


      //uppercase
      if (lowerTaggerTokens.isEmpty() && taggerTokens.isEmpty()) {
        if (isLowercase) {
          upperTaggerTokens = asAnalyzedTokenList(word, morfologik.lookup(StringTools
              .uppercaseFirstChar(word)));
          if (!upperTaggerTokens.isEmpty()) {
            addTokens(upperTaggerTokens, l);
          } else {
            l.add(new AnalyzedToken(word, null, null));

View Full Code Here

0 1 2

TOP

All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.