Examples of tokenize()


Examples of batch.internal.support.DelimitedLineTokenizer.tokenize()

public class DelimitedLineTokenizerTests extends TestCase {

  public void testDelimitedLineTokenizer() {
    DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer();
    String[] line = tokenizer.tokenize("a,b,c");
    assertEquals(3, line.length);
  }

  public void testDelimitedLineTokenizerChar() {
    DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(' ');
View Full Code Here

Examples of client.net.sf.saxon.ce.regex.ARegularExpression.tokenize()

            // check that it's not a pattern that matches ""
            if (re.matches("")) {
                dynamicError("The regular expression in tokenize() must not be one that matches a zero-length string", "FORX0003", null);
            }

            return re.tokenize(input);

        } catch (XPathException err) {
            err.setErrorCode("FORX0002");
            err.setXPathContext(c);
            err.maybeSetLocation(this.getSourceLocator());
View Full Code Here

Examples of com.aliasi.tokenizer.Tokenizer.tokenize()

   public String getPOS(String sentence, boolean allTags)
   {
    StringBuffer xmlOutput =  new StringBuffer();
    char[] cs = sentence.toCharArray();
    Tokenizer tokenizer = TOKENIZER_FACTORY.tokenizer(cs, 0, cs.length);
    String[] tokens = tokenizer.tokenize();
    String[] tags = decoder.firstBest(tokens); int len = tokens.length;
    for (int i = 0; i < len; i++)
    {
     //*-- set the adjective tags
     if (tags[i].startsWith("j") || tags[i].equals("cd") || tags[i].endsWith("od") )
View Full Code Here

Examples of com.aliasi.tokenizer.Tokenizer.tokenize()

   {
    //*-- extract the sentence boundaries
    if (in.length() > Constants.DOC_LENGTH_MAXLIMIT) in = in.substring(0, Constants.DOC_LENGTH_MAXLIMIT - 1);
    ArrayList<Token> tokenList = new ArrayList<Token>(); ArrayList<Token> whiteList = new ArrayList<Token>();
    Tokenizer tokenizer = TOKENIZER_FACTORY.tokenizer(in.toCharArray(), 0, in.length() );
    tokenizer.tokenize(tokenList, whiteList);
    tokens = new String[tokenList.size()]; tokenList.toArray(tokens);
    whites = new String[whiteList.size()]; whiteList.toArray(whites);

    sentenceBoundaries = SENTENCE_MODEL.boundaryIndices(tokens, whites);  
    int numPossibleSentences = sentenceBoundaries.length;
View Full Code Here

Examples of com.aliasi.tokenizer.Tokenizer.tokenize()

   public String[] tokenizer(String in)
   {  
    if (in.length() > Constants.DOC_LENGTH_MAXLIMIT) in = in.substring(0, Constants.DOC_LENGTH_MAXLIMIT - 1);
    ArrayList<Token> tokenList = new ArrayList<Token>(); ArrayList<Token> whiteList = new ArrayList<Token>();
    Tokenizer tokenizer = new StandardBgramTokenizerFactory().tokenizer(in.toCharArray(), 0, in.length() );
    tokenizer.tokenize(tokenList, whiteList);
    String[] tokens = new String[tokenList.size()]; tokenList.toArray(tokens);
    return(tokens);
   }
  
View Full Code Here

Examples of com.aliasi.tokenizer.Tokenizer.tokenize()

  private void tokenize() {
    tokenList.clear();
    whiteList.clear();
    Tokenizer tokenizer = tokenizerFactory.tokenizer(text.toCharArray(),
        0, text.length());
    tokenizer.tokenize(tokenList, whiteList);
//    System.out.println(tokenList.size() + " TOKENS");
//    System.out.println(whiteList.size() + " WHITESPACES");
  }
 
  private void storeTokensInArrays() {
View Full Code Here

Examples of com.aliasi.tokenizer.Tokenizer.tokenize()

  private void tokenize() {
    tokenList.clear();
    whiteList.clear();
    Tokenizer tokenizer = tokenizerFactory.tokenizer(text.toCharArray(),
        0, text.length());
    tokenizer.tokenize(tokenList, whiteList);
//    System.out.println(tokenList.size() + " TOKENS");
//    System.out.println(whiteList.size() + " WHITESPACES");
  }
 
  private void storeTokensInArrays() {
View Full Code Here

Examples of com.atilika.kuromoji.AbstractTokenizer.tokenize()

    }
    System.out.println("AbstractTokenizer ready.  Provide input text and press RET.");
    BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
    String line;
    while ((line = reader.readLine()) != null) {
      List<Token> result = tokenizer.tokenize(line);
      for (Token token : result) {
        System.out.println(token.getSurfaceForm() + "\t"
            + token.getAllFeatures());
      }
    }
View Full Code Here

Examples of com.github.pmerienne.trident.ml.preprocessing.EnglishTokenizer.tokenize()

  @Test
  public void testWithSmallWiki() {
    EnglishTokenizer tokenizer = new EnglishTokenizer();

    KLDClassifier kldClassifier = new KLDClassifier(2);
    kldClassifier.update(0, tokenizer.tokenize(NOSQL_WIKI));
    kldClassifier.update(0, tokenizer.tokenize(MYSQL_WIKI));
    kldClassifier.update(1, tokenizer.tokenize(LILIUM_WIKI));
    kldClassifier.update(1, tokenizer.tokenize(ROSE_WIKI));

    assertEquals(0, (int) kldClassifier.classify(tokenizer.tokenize(DATABASE_WIKI)));
View Full Code Here

Examples of com.github.pmerienne.trident.ml.preprocessing.EnglishTokenizer.tokenize()

  public void testWithSmallWiki() {
    EnglishTokenizer tokenizer = new EnglishTokenizer();

    KLDClassifier kldClassifier = new KLDClassifier(2);
    kldClassifier.update(0, tokenizer.tokenize(NOSQL_WIKI));
    kldClassifier.update(0, tokenizer.tokenize(MYSQL_WIKI));
    kldClassifier.update(1, tokenizer.tokenize(LILIUM_WIKI));
    kldClassifier.update(1, tokenizer.tokenize(ROSE_WIKI));

    assertEquals(0, (int) kldClassifier.classify(tokenizer.tokenize(DATABASE_WIKI)));
    assertEquals(1, (int) kldClassifier.classify(tokenizer.tokenize(FLOWER_WIKI)));
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.