Package org.apache.uima.cas.impl

Source Code of org.apache.uima.cas.impl.CASMgrSerializer

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.uima.cas.impl;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.admin.FSIndexComparator;
import org.apache.uima.cas.admin.LinearTypeOrder;
import org.apache.uima.internal.util.IntVector;
import org.apache.uima.internal.util.SymbolTable;

/**
* Container for serialized CAS typing information. Contains information about the type system, as
* well as the index repository. If more than one CAS that use the same type system and index
* repository need to be serialized, this information needs to be serialized only once.
*/
public class CASMgrSerializer implements Serializable {

  // Constants to do different things depending on source (like name mapping).
  public static final int SOURCE_JEDI = 0;

  public static final int SOURCE_TAF = 1;

  static final long serialVersionUID = 5549299679614131956L;

  // Implementation note: when making changes, keep in mind that numbering of
  // types and features starts at 1, not 0. This makes book-keeping in the
  // various arrays a bit tricky at times.

  // ///////////////////////////////////////////////////////////////////////////
  // Encoding of index repository. Inherited index specifications are not
  // encoded explicitly. I.e., the fact that tokens are in any index that
  // annotations are in is implicit.

  public int[] typeOrder;

  /**
   * The index identifiers. Note that more than one identifier can refer to the same index.
   */
  public String[] indexNames;

  /**
   * A mapping from index names to index IDs. We have that
   * <code>indexNames.length == nameToIndexMap.length</code> and for each <code>i</code> in
   * <code>nameToIndexMap</code>, <code>0 &lt;= i &lt;
   * indexTypes.length</code>.
   */
  public int[] nameToIndexMap;

  /**
   * For each index, the type of that index (encoded as a reference into the type table).
   */
  // public int[] indexTypes;
  /**
   * For each index, the indexing strategy of that index. Current options are
   * {@link org.apache.uima.cas.FSIndex#SORTED_INDEX SORTED_INDEX},
   * {@link org.apache.uima.cas.FSIndex#SET_INDEX SET_INDEX} and
   * {@link org.apache.uima.cas.FSIndex#BAG_INDEX BAG_INDEX}.
   * <code>indexingStrategy.length == indexTypes.length</code>.
   */
  public int[] indexingStrategy;

  /**
   * For each index, where the corresponding comparator starts in the
   * {@link #comparators comparators} field.
   * <code>comparatorIndex.length == indexTypes.length</code>.
   */
  public int[] comparatorIndex;

  /**
   * Encodings of the actual comparators. Each comparator occupies an odd number of cells: one for
   * the type, then feature/comparison pairs. The feature is encoded with its type system code, and
   * comparison operations are encoded with
   * {@link org.apache.uima.cas.admin.FSIndexComparator#STANDARD_COMPARE STANDARD_COMPARE} and
   * {@link org.apache.uima.cas.admin.FSIndexComparator#REVERSE_STANDARD_COMPARE REVERSE_STANDARD_COMPARE}.
   */
  public int[] comparators;

  // ///////////////////////////////////////////////////////////////////////////
  // Type system encoding.

  /**
   * A list of type names (symbol table). Note: numbering of types starts at <code>1</code>, and
   * we index the names according to their internal code. That means that
   * <code>typeNames[0] == null</code>.
   */
  public String[] typeNames = null;

  /**
   * A list of feature names (symbol table). Note: numbering of features starts at <code>1</code>, ,
   * and we index the names according to their internal code. That means that
   * <code>featureNames[0] == null</code>.
   */
  public String[] featureNames = null;

  /**
   * Type inheritance information: for each type other than the top type, we provide the parent in
   * the inheritance scheme. We use the internal type codes for indexing, which means that cells 0
   * (no type) and 1 (top type doesn't inherit from anything) are not used.
   */
  public int[] typeInheritance;

  /**
   * Feature declarations. For each feature code <code>i</code> (which is an integer >= 1),
   * <code>featDecls[(i-1)*3]</code> is the domain type code, <code>featDecls[(i-1)*3+1]</code> is
   * the range type code, and <code>featDecls[(i-1)*3+2]</code> is the multipleReferencesAllowed
   * flag (0 or 1).
   */
  public int[] featDecls;

  /**
   * The internal code of the top type. Optional, used for sanity checks.
   */
  public int topTypeCode;

  /**
   * The offsets for features. Optional, used for sanity checks. Since feature numbering starts at
   * 1, the length of the array is 1 + number of features.
   */
  public int[] featureOffsets;

  /**
   * A list of type codes for the string subtypes.
   */
  public int[] stringSubtypes;

  /**
   * The string values for the string subtypes. Start and end postions for the values for the
   * individual types are in <code>stringSubtypeValuePos</code>.
   */
  public String[] stringSubtypeValues;

  /**
   * The start positions of the string value subarrays of <code>stringSubtypeValues</code>.
   * <code>stringSubtypeValuePos.length ==
   * stringSubtypes.length</code>. For each <code>i &lt;
   * stringSubtypes.length</code>,
   * <code>stringSubtypeValuePos[i]</code> is the start of the string values for
   * <code>stringSubtypes[i]</code>.
   */
  public int[] stringSubtypeValuePos;

  // ////////////////////////////////////////////////////////////////////////////
  // Other stuff

  /**
   * Set this appropriately.
   */
  public int source = SOURCE_JEDI;

  // public int source = SOURCE_TAF;

  /**
   * Constructor for CASMgrSerializer.
   */
  public CASMgrSerializer() {
    super();
  }

  /**
   * Serialize index repository.
   *
   * @param ir
   *          The index repository to be serialized.
   */
  public void addIndexRepository(FSIndexRepositoryImpl ir) {
    // Encode the type order.
    this.typeOrder = ir.getDefaultTypeOrder().getOrder();
    // Collect the index labels in a list, as we don't know how many there
    // are.
    final List<String> names = new ArrayList<String>();
    // Create an iterator over the names.
    final Iterator<String> namesIt = ir.getLabels();
    // Add the names to the list, filtering out auto-indexes.
    while (namesIt.hasNext()) {
      String name = (String) namesIt.next();
      if (ir.getIndex(name).getIndexingStrategy() != FSIndex.DEFAULT_BAG_INDEX) {
        names.add(name);
      }
    }
    // Now we know how many labels there are.
    final int numNames = names.size();
    // Create the array for the labels.
    this.indexNames = new String[numNames];
    String label;
    // Fill the name array.
    for (int i = 0; i < numNames; i++) {
      // Get the next label.
      label = (String) names.get(i);
      // Add the label.
      this.indexNames[i] = label;
    }
    // Create a vector of the indexes, and build the name-to-index map.
    this.nameToIndexMap = new int[numNames];
    Vector<FSIndex<FeatureStructure>> indexVector = new Vector<FSIndex<FeatureStructure>>();
    FSIndex<FeatureStructure> index;
    int pos;
    for (int i = 0; i < numNames; i++) {
      index = ir.getIndex(this.indexNames[i]);
      pos = indexVector.indexOf(index);
      if (pos < 0) {
        indexVector.add(index);
        pos = indexVector.size() - 1;
      }
      this.nameToIndexMap[i] = pos;
    }
    // Now we know how many indexes there are.
    final int numIndexes = indexVector.size();
    // Create the array with index types.
    // this.indexTypes = new int[numIndexes];
    // for (int i = 0; i < numIndexes; i++) {
    // // This looks ugly, but it just records the type code for each index.
    // indexTypes[i] =
    // ((TypeImpl) ((FSIndex) indexVector.get(i)).getType()).getCode();
    // }
    // Create the array with the indexing strategy.
    this.indexingStrategy = new int[numIndexes];
    for (int i = 0; i < numIndexes; i++) {
      this.indexingStrategy[i] = indexVector.get(i).getIndexingStrategy();
    }

    // Create the array for the comparator index.
    this.comparatorIndex = new int[numIndexes];
    // Put the comparators in an IntVector since we don't know how long it
    // will get.
    IntVector comps = new IntVector();
    // Represent the current position in comparator array. Use to build
    // the comparator index.
    int compPos = 0;
    int numCompFeats;
    FSIndexComparator comp;
    for (int i = 0; i < numIndexes; i++) {
      // Set the comparator index to the current position in comparator
      // array.
      this.comparatorIndex[i] = compPos;
      // Get the comparator.
      comp = ((FSIndexImpl) indexVector.get(i)).getComparator();
      // Encode the type of the comparator.
      comps.add(((TypeImpl) comp.getType()).getCode());
      // How many keys in the comparator?
      numCompFeats = comp.getNumberOfKeys();
      for (int j = 0; j < numCompFeats; j++) {
        // Encode key feature.
        switch (comp.getKeyType(j)) {
          case FSIndexComparator.FEATURE_KEY: {
            comps.add(((FeatureImpl) comp.getKeyFeature(j)).getCode());
            break;
          }
          case FSIndexComparator.TYPE_ORDER_KEY: {
            comps.add(0);
            break;
          }
          default: {
            // assert(false);
            throw new RuntimeException("Internal serialization error.");
          }
        }
        // Encode key comparator.
        comps.add(comp.getKeyComparator(j));
      }
      // Compute start of next comparator.
      compPos += 1 + (2 * numCompFeats);
    }
    // Set the comparator array.
    this.comparators = comps.toArray();
  }

  public void addTypeSystem(TypeSystemImpl ts) {
    this.typeNames = symbolTable2StringArray(ts.getTypeNameST());
    encodeTypeInheritance(ts);
    encodeFeatureDecls(ts);
    encodeStringSubtypes(ts);
  }

  private void encodeStringSubtypes(TypeSystemImpl ts) {
    Vector<Type> list = getStringSubtypes(ts);
    final int size = list.size();
    this.stringSubtypes = new int[size];
    this.stringSubtypeValuePos = new int[size];
    List<String> strVals = new ArrayList<String>();
    StringTypeImpl type;
    int pos = 0, typeCode;
    String[] stringSet;
    for (int i = 0; i < size; i++) {
      type = (StringTypeImpl) list.get(i);
      typeCode = type.getCode();
      this.stringSubtypes[i] = typeCode;
      this.stringSubtypeValuePos[i] = pos;
      stringSet = ts.ll_getStringSet(typeCode);
      pos += stringSet.length;
      for (int j = 0; j < stringSet.length; j++) {
        strVals.add(stringSet[j]);
      }
    }
    this.stringSubtypeValues = new String[strVals.size()];
    for (int i = 0; i < strVals.size(); i++) {
      this.stringSubtypeValues[i] = (String) strVals.get(i);
    }
  }

  private Vector<Type> getStringSubtypes(TypeSystemImpl ts) {
    return ts.getDirectlySubsumedTypes(ts.getType(CAS.TYPE_NAME_STRING));
  }

  // Encode a symbol table (list of strings) as an array of strings. Note: if
  // numbering in the symbol table starts at a point greater than 0, cells up
  // to that point will be null. Symbol tables may not start at less than 0.
  static String[] symbolTable2StringArray(SymbolTable st) {
    final int max = st.size();
    // This should be 1 for all cases we're interested in.
    final int offset = st.getStart();
    String[] ar = new String[max + offset];
    Arrays.fill(ar, 0, offset, null);
    int j = offset;
    for (int i = 0; i < max; i++) {
      ar[j] = st.getSymbol(j);
      ++j;
    }
    return ar;
  }

  private void encodeFeatureDecls(TypeSystemImpl ts) {
    final int max = ts.getSmallestFeature() + ts.getNumberOfFeatures();
    this.featureNames = new String[max];
    this.featDecls = new int[max * 3];
    Feature f;
    for (int i = ts.getSmallestFeature(); i < max; i++) {
      f = ts.ll_getFeatureForCode(i);
      this.featureNames[i] = f.getShortName();
      this.featDecls[i * 3] = ((TypeImpl) f.getDomain()).getCode();
      this.featDecls[(i * 3) + 1] = ((TypeImpl) f.getRange()).getCode();
      this.featDecls[(i * 3) + 2] = f.isMultipleReferencesAllowed() ? 1 : 0;
    }
  }

  private void encodeTypeInheritance(TypeSystemImpl ts) {
    final int max = ts.getSmallestType() + ts.getNumberOfTypes();
    this.typeInheritance = new int[max];
    TypeImpl parent;
    // The smallest type is top, which doesn't inherit.
    for (int i = ts.getSmallestType() + 1; i < max; i++) {
      parent = (TypeImpl) ts.getParent(ts.ll_getTypeForCode(i));
      this.typeInheritance[i] = parent.getCode();
    }
  }

  // Ouch.
  private int isStringSubtype(int type) {
    for (int i = 0; i < this.stringSubtypes.length; i++) {
      if (this.stringSubtypes[i] == type) {
        return i;
      }
    }
    return -1;
  }

  private String[] getSubarray(String[] array, int from, int to) {
    String[] sub = new String[to - from];
    for (int i = from; i < to; i++) {
      sub[i - from] = array[i];
    }
    return sub;
  }

  private String[] getStringArray(int pos) {
    int end;
    if (pos == this.stringSubtypes.length - 1) {
      // last entry in list, get all the rest
      end = this.stringSubtypeValues.length;
    } else {
      // else get up to the next entry
      end = this.stringSubtypeValuePos[pos + 1];
    }
    return getSubarray(this.stringSubtypeValues, this.stringSubtypeValuePos[pos], end);
  }

  public TypeSystemImpl getTypeSystem() {
    final TypeSystemImpl ts = new TypeSystemImpl();
    // First, add the top type.
//    ts.addTopType(CAS.TYPE_NAME_TOP);  // does nothing, top type already there
    // HashMap nameMap = null;
    // Temporary. The name map will go away completely.
//    HashMap nameMap = new HashMap();
    // if (source == SOURCE_TAF) {
    // nameMap = cas.mapTafTypesToCASTypes();
    // }
    String name;
//    int parent;
    // Now add all the other types.
//    if (this.source == SOURCE_TAF) {
//      for (int i = 2; i < this.typeNames.length; i++) {
//        parent = this.typeInheritance[i];
//        name = CASImpl.mapName(this.typeNames[i], nameMap);
//        // Check if the type we're adding is a subtype of string, in
//        // which case
//        // we need to call a different type system api.
//        int pos = isStringSubtype(i);
//        if (pos >= 0) {
//          ts.addStringSubtype(name, getStringArray(pos));
//        } else {
//          ts.addType(name, parent);
//        }
//      }
//    } else {
      for (int i = 2; i < this.typeNames.length; i++) {
        name = this.typeNames[i];
        int pos = isStringSubtype(i);
        if (pos >= 0) {
          ts.addStringSubtype(name, getStringArray(pos));
        } else if (TypeSystemImpl.isArrayTypeNameButNotBuiltIn(name)) {
            ts.getArrayType(ts.getType(TypeSystemImpl.getArrayComponentName(name)));
        } else {
            ts.addType(name, this.typeInheritance[i]);
        }
      }
//    }

    // Add feature declarations.
    final int max = this.featureNames.length;
    for (int i = 1; i < max; i++) {
//      if (this.source == SOURCE_TAF) {
//        name = CASImpl.mapName(this.featureNames[i], nameMap);
//      } else {
        name = this.featureNames[i];
//      }
      ts.addFeature(name, this.featDecls[i * 3], this.featDecls[(i * 3) + 1],
                    this.featDecls[(i * 3) + 2] == 1);
    }
    return ts;
  }

  public FSIndexRepositoryImpl getIndexRepository(CASImpl cas) {
    final FSIndexRepositoryImpl ir = new FSIndexRepositoryImpl(cas);
    // Get the type order.
    ir.setDefaultTypeOrder(LinearTypeOrderBuilderImpl.createTypeOrder(this.typeOrder, cas
            .getTypeSystem()));
    FSIndexComparator comp;
    final int max = this.indexNames.length;
    int pos = 0, next, maxComp;
    Type type;
    Feature feat;
    if (this.nameToIndexMap == null) {
      this.nameToIndexMap = new int[max];
      for (int i = 0; i < max; i++) {
        this.nameToIndexMap[i] = i;
      }
    }
    for (int i = 0; i < max; i++) {
      comp = ir.createComparator();
      // assert(pos == comparatorIndex[i]);
      pos = this.comparatorIndex[this.nameToIndexMap[i]];
      type = cas.getTypeSystemImpl().ll_getTypeForCode(this.comparators[pos]);
      comp.setType(type);
      ++pos;
      next = this.nameToIndexMap[i] + 1;
      if (next < max) {
        maxComp = this.comparatorIndex[next];
      } else {
        maxComp = this.comparators.length;
      }
      TypeSystemImpl tsi = (TypeSystemImpl) cas.getTypeSystem();
      while (pos < maxComp) {
        // System.out.println("Type system: " +
        // cas.getTypeSystem().toString());
        if (this.comparators[pos] > 0) {
          feat = tsi.ll_getFeatureForCode(this.comparators[pos]);
          // assert(feat != null);
          // System.out.println("Adding feature: " + feat.getName());
          ++pos;
          comp.addKey(feat, this.comparators[pos]);
          // assert(rc >= 0);
        } else {
          LinearTypeOrder order = ir.getDefaultTypeOrder();
          ++pos;
          comp.addKey(order, this.comparators[pos]);
        }
        ++pos;
      }
      ir.createIndex(comp, this.indexNames[i], this.indexingStrategy[this.nameToIndexMap[i]]);
    }
    ir.commit();
    return ir;
  }

}
TOP

Related Classes of org.apache.uima.cas.impl.CASMgrSerializer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.