Package org.apache.ctakes.necontexts

Source Code of org.apache.ctakes.necontexts.ContextAnnotator

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.ctakes.necontexts;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import org.apache.ctakes.core.util.JCasUtil;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.jcas.tcas.DocumentAnnotation;
import org.apache.uima.resource.ResourceInitializationException;


/**
* The context annotator iterates through focus annotations and analyzes the
* surrounding context of each. The context is defined by a scope, window, and a
* maximum size (if applicable). The context can be further refined by boundary
* conditions as specified by the context analyzer used. For each focus
* annotation, a context is collected as a set of context annotations for each
* scope that is to be applied. The context annotations are passed to a context
* analyzer for analysis. If the context analyzer finds something of interest in
* the context annotations, then it will generate a context hit. Context hits
* are then passed to a context hit consumer which will perform an action on the
* hit such as updating an existing annotation or creating a new one.
*/
public class ContextAnnotator extends JCasAnnotator_ImplBase {
  // LOG4J logger based on class name
  private Logger iv_logger = Logger.getLogger(getClass().getName());

  /**
   * "MaxLeftScopeSize" is a required, single, integer parameter that
   * specifies the maximum size of the left scope.
   */
  public static final String MAX_LEFT_SCOPE_SIZE_PARAM = "MaxLeftScopeSize";
  /**
   * "MaxRightScopeSize" is a required, single, integer parameter that
   * specifies the maximum size of the right scope.
   */
  public static final String MAX_RIGHT_SCOPE_SIZE_PARAM = "MaxRightScopeSize";
  /**
   * "ScopeOrder" is a required, multiple, string parameter that specifies the
   * order that the scopes should be processed in. Possible values are "LEFT",
   * "MIDDLE", "RIGHT", and "ALL".
   */
  public static final String SCOPE_ORDER_PARAM = "ScopeOrder";

  /**
   * "WindowAnnotationClass" is a required, single, string parameter that
   * specifies the annotation type of the windows that specify the hard
   * boundaries of scopes. Note that the entire window may not be used
   * depending on the location of the focus annotation, the maximum scope
   * size, and the boundary conditions specified by the context analyzer. A
   * window encompasses all of the scopes (left, middle, and right). Examples
   * of likely window types would be:
   * <ul>
   * <li>DocumentAnnotation</li>
   * <li>SentenceAnnotation</li>
   * <li>SegmentAnnotation</li>
   * <li>...</li>
   * </ul>
   *
   * @see DocumentAnnotation
   * @see edu.mayo.bmi.common.type.Sentence
   * @see edu.mayo.bmi.common.type.Segment
   *
   */
  public static final String WINDOW_ANNOTATION_CLASS_PARAM = "WindowAnnotationClass";
  /**
   * "FocusAnnotationClass" is a required, single, string parameter that
   * specifies the annotation type of the focus annotations that are going to
   * be examined by this annotator. Examples of likely focus types would be:
   * <ul>
   * <li>NamedEntityAnnotation</li>
   * <li>Token</li>
   * <li>...</li>
   * </ul>
   *
   * @see edu.mayo.bmi.common.type.NamedEntity
   * @see edu.mayo.bmi.common.type.BaseToken
   */

  public static final String FOCUS_ANNOTATION_CLASS_PARAM = "FocusAnnotationClass";
  /**
   * "ContextAnnotationClass" is a required, single, string parameter that
   * specifies the annotation type of the context annotations (often "tokens")
   * that make up the context relative to a focus annotation within a scope
   * that is being examined. The context annotations are examined for context
   * hits by the context analyzer. Examples of likely focus types would be:
   * <ul>
   * <li>BaseToken</li>
   * <li>WordToken</li>
   * <li>NamedEntity</li>
   * </ul>
   *
   * @see edu.mayo.bmi.common.type.BaseToken
   * @see edu.mayo.bmi.common.type.WordToken
   * @see edu.mayo.bmi.common.type.NamedEntity
   *
   */

  public static final String CONTEXT_ANNOTATION_CLASS_PARAM = "ContextAnnotationClass";

  /**
   * "ContextAnalyzerClass" is a required, single, string parameter that
   * specifies the context analyzer class that determines if a "hit" is found
   * within a processed scope.
   *
   * @see ContextAnalyzer
   */
  public static final String CONTEXT_ANALYZER_CLASS_PARAM = "ContextAnalyzerClass";
  /**
   * "ContextHitConsumerClass" is a required, single, string parameter that
   * specifies the context hit consumer class that will process context hits
   * that are found.
   *
   * @see ContextHitConsumer
   */
  public static final String CONTEXT_HIT_CONSUMER_CLASS_PARAM = "ContextHitConsumerClass";

  public static final int LEFT_SCOPE = 1;
  /**
   * Provides context annotations that are "inside" the focus annotation. For
   * example, if the focus annotation type is a named entity mention and the
   * context annotation is a token type, then the middle scope will examine
   * the tokens that fall within the named entity mention.
   */
  public static final int MIDDLE_SCOPE = 2;
  public static final int RIGHT_SCOPE = 3;
  /**
   * The ALL_SCOPE scope provides the context annotation that are found in all three of the other scopes (LEFT, MIDDLE, and RIGHT). 
   */
  public static final int ALL_SCOPE = 4;

  protected int leftScopeSize;
  protected int rightScopeSize;

  protected List<Integer> scopes = new ArrayList<Integer>();

  protected ContextAnalyzer contextAnalyzer;
  protected ContextHitConsumer contextConsumer;

  int windowType;
  int focusType;
  int contextType;

  public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
    super.initialize(uimaContext);

    try {
      leftScopeSize = ((Integer) uimaContext.getConfigParameterValue(MAX_LEFT_SCOPE_SIZE_PARAM)).intValue();
      rightScopeSize = ((Integer) uimaContext.getConfigParameterValue(MAX_RIGHT_SCOPE_SIZE_PARAM)).intValue();

      String[] scopeOrderArr = (String[]) uimaContext.getConfigParameterValue(SCOPE_ORDER_PARAM);

      parseScopeOrder(scopeOrderArr);

      String contextAnalyzerClassName = (String) uimaContext.getConfigParameterValue(CONTEXT_ANALYZER_CLASS_PARAM);
      String contextConsumerClassName = (String) uimaContext.getConfigParameterValue(CONTEXT_HIT_CONSUMER_CLASS_PARAM);

      contextAnalyzer = (ContextAnalyzer) Class.forName(contextAnalyzerClassName).newInstance();
      contextAnalyzer.initialize(uimaContext);
      contextConsumer = (ContextHitConsumer) Class.forName(contextConsumerClassName).newInstance();

      windowType = JCasUtil.getType((String) uimaContext.getConfigParameterValue(WINDOW_ANNOTATION_CLASS_PARAM));
      focusType = JCasUtil.getType((String) uimaContext.getConfigParameterValue(FOCUS_ANNOTATION_CLASS_PARAM));
      contextType = JCasUtil.getType((String) uimaContext.getConfigParameterValue(CONTEXT_ANNOTATION_CLASS_PARAM));

    } catch (Exception e) {
      throw new ResourceInitializationException(e);
    }
  }

  void parseScopeOrder(String[] scopeStrings) throws AnnotatorConfigurationException {
    scopes.clear();
    for (int i = 0; i < scopeStrings.length; i++) {
      if (scopeStrings[i].equals("LEFT")) {
        scopes.add(new Integer(LEFT_SCOPE));
      } else if (scopeStrings[i].equals("MIDDLE")) {
        scopes.add(new Integer(MIDDLE_SCOPE));
      } else if (scopeStrings[i].equals("RIGHT")) {
        scopes.add(new Integer(RIGHT_SCOPE));
      } else if (scopeStrings[i].equals("ALL")) {
        scopes.add(new Integer(ALL_SCOPE));
      } else {
        Exception e = new Exception("Invalid scope value: " + scopeStrings[i]);
        throw new AnnotatorConfigurationException(e);
      }
    }
    iv_logger.info("SCOPE ORDER: " + scopes);
  }

  public void process(JCas jCas) throws AnalysisEngineProcessException {
    try {
      FSIterator windowIterator = jCas.getAnnotationIndex(windowType).iterator();
      while (windowIterator.hasNext()) {
        Annotation window = (Annotation) windowIterator.next();
        List<Annotation> focusList = constrainToWindow(jCas, focusType, window);

        // why is this list reversed?
        Collections.reverse(focusList);

        Iterator<Integer> scopeIterator = scopes.iterator();
        while (scopeIterator.hasNext()) {
          int scope = scopeIterator.next();
          Iterator<Annotation> focusIterator = focusList.iterator();
          while (focusIterator.hasNext()) {
            Annotation focus = focusIterator.next();
            List<Annotation> scopeContextAnnotations = getScopeContextAnnotations(jCas, focus, window,
                scope);
            ContextHit contextHit = contextAnalyzer.analyzeContext(scopeContextAnnotations, scope);
            if (contextHit != null) {
              contextConsumer.consumeHit(jCas, focus, scope, contextHit);
            }
          }
        }
      }
    } catch (Exception e) {
      throw new AnalysisEngineProcessException(e);
    }

  }

  protected List<Annotation> getScopeContextAnnotations(JCas jCas, Annotation focus, Annotation window, int scope)
      throws AnalysisEngineProcessException {
    List<Annotation> scopeContextAnnotations = new ArrayList<Annotation>();
    switch (scope) {
    case LEFT_SCOPE:
      scopeContextAnnotations = getLeftScopeContextAnnotations(jCas, focus, window);
      break;
    case MIDDLE_SCOPE:
      scopeContextAnnotations = getMiddleScopeContextAnnotations(jCas, focus);
      break;
    case RIGHT_SCOPE:
      scopeContextAnnotations = getRightScopeContextAnnotations(jCas, focus, window);
      break;
    case ALL_SCOPE:
      scopeContextAnnotations.addAll(getLeftScopeContextAnnotations(jCas, focus, window));
      scopeContextAnnotations.addAll(getMiddleScopeContextAnnotations(jCas, focus));
      scopeContextAnnotations.addAll(getRightScopeContextAnnotations(jCas, focus, window));
      break;
    }
    return scopeContextAnnotations;
  }

  protected List<Annotation> getLeftScopeContextAnnotations(JCas jCas, Annotation focus, Annotation window)
      throws AnalysisEngineProcessException {

    List<Annotation> scopeContextAnnotations = new ArrayList<Annotation>();

    // if focus is not completely contained inside the window annotation,
    // then return empty list.
    if (focus.getBegin() < window.getBegin() || focus.getEnd() > window.getEnd())
      return scopeContextAnnotations;

    FSIterator subiterator = jCas.getAnnotationIndex(contextType).subiterator(window);
    subiterator.moveTo(focus);
    subiterator.moveToNext();
    if (!subiterator.isValid())
      subiterator.moveTo(focus);

    while (scopeContextAnnotations.size() < leftScopeSize) {
      subiterator.moveToPrevious();
      if (subiterator.isValid()) {
        Annotation contextAnnotation = (Annotation) subiterator.get();
        if (contextAnnotation.getEnd() > focus.getBegin()) {
          continue;
        }
        if (!contextAnalyzer.isBoundary(contextAnnotation, LEFT_SCOPE)) {
          scopeContextAnnotations.add(contextAnnotation);
        } else {
          break;
        }
      } else {
        break;
      }
    }
    Collections.reverse(scopeContextAnnotations);
    return scopeContextAnnotations;
  }

  protected List<Annotation> getRightScopeContextAnnotations(JCas jCas, Annotation focus, Annotation window)
      throws AnalysisEngineProcessException {

    List<Annotation> scopeContextAnnotations = new ArrayList<Annotation>();

    // if focus is not completely contained inside the window annotation,
    // then return empty list.
    if (focus.getBegin() < window.getBegin() || focus.getEnd() > window.getEnd())
      return scopeContextAnnotations;

    FSIterator subiterator = jCas.getAnnotationIndex(contextType).subiterator(window);
    subiterator.moveTo(focus);
    subiterator.moveToPrevious();
    if (!subiterator.isValid())
      subiterator.moveTo(focus);

    while (scopeContextAnnotations.size() < rightScopeSize) {
      subiterator.moveToNext();
      if (subiterator.isValid()) {
        Annotation contextAnnotation = (Annotation) subiterator.get();
        if (contextAnnotation.getBegin() < focus.getEnd()) {
          continue;
        }
        if (!contextAnalyzer.isBoundary(contextAnnotation, RIGHT_SCOPE)) {
          scopeContextAnnotations.add(contextAnnotation);
        } else {
          break;
        }
      } else {
        break;
      }
    }
    return scopeContextAnnotations;
  }

  protected List<Annotation> getMiddleScopeContextAnnotations(JCas jCas, Annotation focus)
      throws AnalysisEngineProcessException {

    List<Annotation> scopeContextAnnotations = new ArrayList<Annotation>();

    FSIterator subiterator = jCas.getAnnotationIndex(contextType).subiterator(focus);
    while (subiterator.hasNext()) {
      scopeContextAnnotations.add((Annotation) subiterator.next());
    }
    if (scopeContextAnnotations.size() == 0 && JCasUtil.getType(focus.getClass()) == contextType)
      scopeContextAnnotations.add(focus);
    else if (scopeContextAnnotations.size() == 0) {
      TypeSystem typeSystem = jCas.getTypeSystem();
      Type superType = jCas.getType(focusType).casType;
      Type subType = focus.getType();
      if (typeSystem.subsumes(superType, subType))
        scopeContextAnnotations.add(focus);
    }
    return scopeContextAnnotations;
  }

  /**
   * Gets a list of annotations within the specified window annotation.
   *
   * @param annotItr
   * @param window
   * @param jcas
   * @return
   * @throws Exception
   */
  private List<Annotation> constrainToWindow(JCas jCas, int type, Annotation window) {

    List<Annotation> list = new ArrayList<Annotation>();

    FSIterator subiterator = jCas.getAnnotationIndex(type).subiterator(window);

    while (subiterator.hasNext()) {
      Annotation annot = (Annotation) subiterator.next();
      list.add(annot);
    }
    return list;
  }

}
TOP

Related Classes of org.apache.ctakes.necontexts.ContextAnnotator

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.