Source Code of org.apache.ctakes.core.ae.SectionSegmentAnnotator

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.ctakes.core.ae;


import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.HashMap;


import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;




import org.apache.ctakes.core.resource.FileResource;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.core.util.DocumentSection;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import findstruct.Section;
import findstruct.StructFinder;


/**
 * Creates a single segment annotation that spans the entire document. This is
 * useful for running a TAE without a CasInitializer that would normally create
 * the segment annotations.
 * 
 * @author Mayo Clinic
 */
public class SectionSegmentAnnotator extends JCasAnnotator_ImplBase {
  private String segmentId;
  private StructFinder structureFinder;


  private String templateFile= null;
  Logger logger = Logger.getLogger(this.getClass());


  public HashMap<Integer, DocumentSection> sections;


  public void initialize(UimaContext aContext) throws ResourceInitializationException {
    super.initialize(aContext);


    try {
      templateFile = ((FileResource)aContext.getResourceObject("template")).getFile().getAbsolutePath();
      structureFinder = new StructFinder();
    }catch(Exception e ){
      logger.error("Error reading template file: " + e.getMessage());
    }


    segmentId = (String) aContext.getConfigParameterValue("SegmentID");
    if (segmentId == null) {
      segmentId = "SIMPLE_SEGMENT";
    }
  }


  /**
   * Entry point for processing.
   * Identify all the sections of the medical record
   */
  public void process(JCas jCas) throws AnalysisEngineProcessException {


    String text = jCas.getDocumentText();


    if (text == null) {
      String docId = DocumentIDAnnotationUtil.getDocumentID(jCas);
      throw new AnalysisEngineProcessException("text is null for docId="
          + docId, null);
    }


    // use the API to get the list of sections.
    try{
      ArrayList<Section> foundSections = structureFinder.execute(text, new FileInputStream(templateFile));


      // iterate over the ordered sections...
      int index = 0;
      for (Section sct : foundSections) {
        String nodeName = sct.getHeader();
        String content  = sct.getContent();


        if(nodeName== null || nodeName.trim().isEmpty() || 
            content == null || content.trim().isEmpty())
          continue;


        //      String[] splitContent = content.split("\n");
        //      int endLine = startLine + splitContent.length;


        index = text.indexOf(content, index);


        Segment segment = new Segment(jCas);
        segment.setBegin(index);
        segment.setEnd(index+content.length());
        segment.setId(sct.getHeader());
        segment.addToIndexes();
        index = index + content.length();
        //      DocumentSection section = 
        //          new DocumentSection(startLine, endLine, content);
        //      section.setSectionName(nodeName);
        //      sections.put(startLine, section);
        //
        //      startLine = endLine ;
      }
    } catch(FileNotFoundException e) {
      e.printStackTrace();
      Segment seg = new Segment(jCas);
      seg.setBegin(0);
      seg.setEnd(text.length());
      seg.setId(segmentId);
      seg.addToIndexes();
    }
  }
}


//class StructFinder {
//
//  /** Creates a new instance of StructFinder */
//  public StructFinder() {
//  }
//
//  /**
//   * Main method that takes in the content of a file to process
//   * and the input stream of a template of section names 
//   * and returns the section names found in the given file
//   * @param wholeFile
//   * @param templateContent
//   * 
//   * @return a list with the found sections
//   */
//  public ArrayList<Section> execute(String wholeFile, 
//      InputStream templateContent) {
//    ArrayList<Section> foundSections = new ArrayList<Section>();
//
//    String templateFileName = null;
//    StructModel template = null;
//
//
//    SAXBuilder parser = new SAXBuilder();
//    try {
//      template = new StructModel(parser.build(templateContent));
//    } catch (JDOMException e) {
//      System.err.println("Error parsing template file "
//          + templateFileName + ": " + e);
//    }
//
//    if (template!=null) {
//      if (wholeFile!=null) { 
//        Element e = template.process(wholeFile);
//
//        for(Object el : e.getContent()) {
//          // find the type of the element
//          if (el.getClass().equals(Text.class)) {
//            Section sct = new Section("root", ((Text)el).getText());
//            foundSections.add(sct);
//          }
//          else if (el.getClass().equals(Element.class)) {
//            Element foundElement = ((Element)el);
//            Section sct = new Section(foundElement.getName(), 
//                foundElement.getText());
//            foundSections.add(sct);
//          }
//        }
//      }
//    }
//
//    return foundSections;
//  }
//}
Source Code of org.apache.ctakes.core.ae.SectionSegmentAnnotator

Related Classes of org.apache.ctakes.core.ae.SectionSegmentAnnotator