Package org.apache.uima.examples.tagger.trainAndTest

Examples of org.apache.uima.examples.tagger.trainAndTest.Token


    //    for (int i=0; i<sentences.size(); i++){ // iterate over sentences
         
             List<Token> tokens2 = new ArrayList<Token>(tokens.size());
            
             for (int x=0; x<tokens.size(); x++){ // iterate over tokens of the sentence with their corresponding POS
               Token current_token = (Token)tokens.get(x);
             
               if(current_token.pos.startsWith("N")){
                 current_token.pos="Noun";
               }
               if(current_token.pos.startsWith("V")){
                 current_token.pos="Verb";
               }
               if(current_token.pos.startsWith("ADJ")){
                 current_token.pos="Adjective";
               }
               if(current_token.pos.startsWith("P")){
                 current_token.pos="Pronoun";
               }
               if(current_token.pos.startsWith("KO")){
                 current_token.pos="Conjunction";
               }
               if(current_token.pos.startsWith("AP")){
                 current_token.pos="Preposition";
               }
               if(current_token.pos.startsWith("PTK")){
                 current_token.pos="PTK";
               }
               if(current_token.pos.startsWith("ADV")){
                 current_token.pos="Adverb";
               }
              
               if(current_token.pos.startsWith("ART")){
                 current_token.pos="Article";
               }
              
               if(current_token.pos.startsWith("ITJ")){
                 current_token.pos="Interjection";
               }
              
               Token zwischen = new Token(current_token.word, current_token.pos);
      
               tokens2.add(zwischen);
             }
            return tokens2;
           
View Full Code Here


    List<Token> tokens2 = new ArrayList<Token>(tokens.size());

    for (int x = 0; x < tokens.size(); x++) { // iterate over tokens of the sentence with their
      // corresponding POS
      Token current_token = (Token)tokens.get(x);
      String[] z = new String[2];
      if (current_token.pos.contains("+")) {
        z = current_token.pos.split("[+]");
        current_token.pos = z[0];
      }
      // for cases like : BEZ*
      if (current_token.pos.contains("*") && !(current_token.pos.startsWith("*"))) {
        z[0] = current_token.pos.replace("*", "");
        current_token.pos = z[0];
      }

      // for: *-h1
      if (current_token.pos.startsWith("*")) {
        z[0] = "*";
        current_token.pos = z[0];
      }
      if (current_token.pos.contains("-") && !(current_token.pos.startsWith("--"))) {
        z = current_token.pos.split("[-]");
        current_token.pos = z[0];
      }
      if (current_token.pos.startsWith("--")) {
        z[0] = "--";
        current_token.pos = z[0];
      }

      Token zwischen = new Token(current_token.word, current_token.pos);

      tokens2.add(zwischen);
    }
    return tokens2;

View Full Code Here

        Integer i = 0;
        while ( itPOS.hasNext() ) {
          Annotation token = itPOS.next();
          // Create a new token to be learned and add it to the list,
          // if the POS value is relevant (not null)
          Token tokenTmp = new Token();
          tokenTmp.word  = token.getCoveredText();
          tokenTmp.pos   = token.getStringValue(featPOS);
          if (tokenTmp.pos != null) {
            c++;
            theLearnedTokens.add(tokenTmp);
View Full Code Here

TOP

Related Classes of org.apache.uima.examples.tagger.trainAndTest.Token

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.