Examples of tokenize()


Examples of edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenizationService.tokenize()

      TokenizationService service = TokenizationService.getTokenizationService();
      Map<YKDocument,Concordance> map = new HashMap<YKDocument,Concordance>();
      for (YKDocument doc : docs) {
        TokenList tl = tcache.getTokenList(doc);
        if (tl == null){
          tl = service.tokenize(doc);
          tcache.putTokenList(doc, tl);
        }
        Concordance c = yoshikoder.getDictionary().getConcordance(tl, n, wsize);
        map.put(doc, c);
      }
View Full Code Here

Examples of edu.stanford.nlp.process.PTBTokenizer.tokenize()

      int sNum = 0;
      int wNum = 0;


      PTBTokenizer ptb = PTBTokenizer.newPTBTokenizer(new BufferedReader(new StringReader(doc)), false, true);
      List<CoreLabel> words = ptb.tokenize();

      List<CoreLabel> result = new ArrayList<CoreLabel>();

      CoreLabel prev = null;
      String prevString = "";
View Full Code Here

Examples of edu.udo.cs.wvtool.generic.tokenizer.WVTTokenizer.tokenize()

                wordFilter = (WVTWordFilter) config.getComponentForStep(WVTConfiguration.STEP_WORDFILTER, d);
                stemmer = (WVTStemmer) config.getComponentForStep(WVTConfiguration.STEP_STEMMER, d);

                // Process the document

                TokenEnumeration tokens = stemmer.stem(wordFilter.filter(tokenizer.tokenize(charConverter.convertChars(infilter.convertToPlainText(loader.loadDocument(d), d), d), d), d), d);

                while (tokens.hasMoreTokens()) {
                    wordList.addWordOccurance(tokens.nextToken());
                }
View Full Code Here

Examples of edu.udo.cs.wvtool.generic.tokenizer.WVTTokenizer.tokenize()

                outputFilter = (WVTOutputFilter) config.getComponentForStep(WVTConfiguration.STEP_OUTPUT, d);

                // Process the document

                TokenEnumeration tokens = stemmer.stem(wordFilter.filter(tokenizer.tokenize(charConverter.convertChars(infilter.convertToPlainText(loader.loadDocument(d), d), d), d), d), d);

                while (tokens.hasMoreTokens()) {
                    wordList.addWordOccurance(tokens.nextToken());
                }
View Full Code Here

Examples of edu.udo.cs.wvtool.generic.tokenizer.WVTTokenizer.tokenize()

            vectorCreator = (WVTVectorCreator) config.getComponentForStep(WVTConfiguration.STEP_VECTOR_CREATION, d);

            // Process the document

            TokenEnumeration tokens = stemmer.stem(wordFilter.filter(tokenizer.tokenize(charConverter.convertChars(new StringReader(text), d), d), d), d);

            while (tokens.hasMoreTokens()) {
                wordList.addWordOccurance(tokens.nextToken());
            }
View Full Code Here

Examples of edu.udo.cs.wvtool.generic.tokenizer.WVTTokenizer.tokenize()

                wordFilter = (WVTWordFilter) config.getComponentForStep(WVTConfiguration.STEP_WORDFILTER, d);
                stemmer = (WVTStemmer) config.getComponentForStep(WVTConfiguration.STEP_STEMMER, d);

                // Process the document

                TokenEnumeration tokens = stemmer.stem(wordFilter.filter(tokenizer.tokenize(charConverter.convertChars(infilter.convertToPlainText(loader.loadDocument(d), d), d), d), d), d);

                while (tokens.hasMoreTokens()) {
                    listener.processWord(tokens.nextToken());
                }
View Full Code Here

Examples of net.sf.saxon.expr.Tokenizer.tokenize()

  }

  public static String replaceNameInPathOrQuery( String pathOrQuery, String oldName, String newName ) throws Exception
  {
    Tokenizer t = new Tokenizer();
    t.tokenize( pathOrQuery, 0, -1, 1 );
    StringBuffer result = new StringBuffer();
    int lastIx = 0;

    while( t.currentToken != Token.EOF )
    {
View Full Code Here

Examples of net.sf.saxon.regex.RegularExpression.tokenize()

                err.setLocator(this);
                throw err;
            }

        }
        return re.tokenize(input);
    }


    /**
     * Simple command-line interface for testing.
View Full Code Here

Examples of net.sf.saxon.regex.RegularExpression.tokenize()

                err.setLocator(this);
                throw err;
            }

        }
        return re.tokenize(input);
    }


    /**
     * Simple command-line interface for testing.
View Full Code Here

Examples of opennlp.ccg.lexicon.DefaultTokenizer.tokenize()

  lm.debugScore = true;
        int secs = (int) (System.currentTimeMillis() - start) / 1000;
        System.out.println("secs: " + secs);
        System.out.println();
        Tokenizer tokenizer = new DefaultTokenizer();
        List<Word> words = tokenizer.tokenize(tokens);
        System.out.println("scoring: " + tokens);
        System.out.println();
        lm.setWordsToScore(words, true);
        lm.prepareToScoreWords();
        double logprob = lm.logprob();
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.