Package com.ibm.icu.text

Source Code of com.ibm.icu.text.CollationParsedRuleBuilder

/**
*******************************************************************************
* Copyright (C) 1996-2011, International Business Machines Corporation and
* others. All Rights Reserved.
*******************************************************************************
*/
package com.ibm.icu.text;

import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.ibm.icu.impl.IntTrieBuilder;
import com.ibm.icu.impl.Norm2AllModes;
import com.ibm.icu.impl.Normalizer2Impl;
import com.ibm.icu.impl.TrieBuilder;
import com.ibm.icu.impl.TrieIterator;
import com.ibm.icu.impl.UCharacterProperty;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UCharacterCategory;
import com.ibm.icu.util.RangeValueIterator;
import com.ibm.icu.util.VersionInfo;

/**
* Class for building a collator from a list of collation rules. This class is uses CollationRuleParser
*
* @author Syn Wee Quek
* @since release 2.2, June 11 2002
*/
final class CollationParsedRuleBuilder {
  // package private constructors ------------------------------------------

  /**
   * Constructor
   *
   * @param rules
   *            collation rules
   * @exception ParseException
   *                thrown when argument rules have an invalid syntax
   */
  CollationParsedRuleBuilder(final String rules) throws ParseException {
    m_parser_ = new CollationRuleParser(rules);
    m_parser_.assembleTokenList();
    m_utilColEIter_ = RuleBasedCollator.UCA_.getCollationElementIterator("");
  }

  // package private inner classes -----------------------------------------

  /**
   * Inverse UCA wrapper
   */
  static class InverseUCA {
    // package private constructor ---------------------------------------

    InverseUCA() {
    }

    // package private data member ---------------------------------------

    /**
     * Array list of characters
     */
    int m_table_[];
    /**
     * Array list of continuation characters
     */
    char m_continuations_[];

    /**
     * UCA version of inverse UCA table
     */
    VersionInfo m_UCA_version_;

    // package private method --------------------------------------------

    /**
     * Returns the previous inverse ces of the argument ces
     *
     * @param ce
     *            ce to test
     * @param contce
     *            continuation ce to test
     * @param strength
     *            collation strength
     * @param prevresult
     *            an array to store the return results previous inverse ce and previous inverse continuation ce
     * @return result of the inverse ce
     */
    final int getInversePrevCE(int ce, int contce, final int strength, final int prevresult[]) {
      int result = findInverseCE(ce, contce);

      if (result < 0) {
        prevresult[0] = CollationElementIterator.NULLORDER;
        return -1;
      }

      ce &= STRENGTH_MASK_[strength];
      contce &= STRENGTH_MASK_[strength];

      prevresult[0] = ce;
      prevresult[1] = contce;

      while ((prevresult[0] & STRENGTH_MASK_[strength]) == ce && (prevresult[1] & STRENGTH_MASK_[strength]) == contce && result > 0) {
        // this condition should prevent falling off the edge of the
        // world
        // here, we end up in a singularity - zero
        prevresult[0] = m_table_[3 * (--result)];
        prevresult[1] = m_table_[3 * result + 1];
      }
      return result;
    }

    final int getCEStrengthDifference(final int CE, final int contCE, final int prevCE, final int prevContCE) {
      int strength = Collator.TERTIARY;
      while (((prevCE & STRENGTH_MASK_[strength]) != (CE & STRENGTH_MASK_[strength]) || (prevContCE & STRENGTH_MASK_[strength]) != (contCE & STRENGTH_MASK_[strength]))
          && (strength != 0)) {
        strength--;
      }
      return strength;
    }

    private int compareCEs(final int source0, final int source1, final int target0, final int target1) {
      int s1 = source0, s2, t1 = target0, t2;
      if (RuleBasedCollator.isContinuation(source1)) {
        s2 = source1;
      } else {
        s2 = 0;
      }
      if (RuleBasedCollator.isContinuation(target1)) {
        t2 = target1;
      } else {
        t2 = 0;
      }

      int s = 0, t = 0;
      if (s1 == t1 && s2 == t2) {
        return 0;
      }
      s = (s1 & 0xFFFF0000) | ((s2 & 0xFFFF0000) >>> 16);
      t = (t1 & 0xFFFF0000) | ((t2 & 0xFFFF0000) >>> 16);
      if (s == t) {
        s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00) >> 8;
        t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00) >> 8;
        if (s == t) {
          s = (s1 & 0x000000FF) << 8 | (s2 & 0x000000FF);
          t = (t1 & 0x000000FF) << 8 | (t2 & 0x000000FF);
          return Utility.compareUnsigned(s, t);
        } else {
          return Utility.compareUnsigned(s, t);
        }
      } else {
        return Utility.compareUnsigned(s, t);
      }
    }

    /**
     * Finding the inverse CE of the argument CEs
     *
     * @param ce
     *            CE to be tested
     * @param contce
     *            continuation CE
     * @return inverse CE
     */
    int findInverseCE(final int ce, final int contce) {
      int bottom = 0;
      int top = m_table_.length / 3;
      int result = 0;

      while (bottom < top - 1) {
        result = (top + bottom) >> 1;
        int first = m_table_[3 * result];
        int second = m_table_[3 * result + 1];
        int comparison = compareCEs(first, second, ce, contce);
        if (comparison > 0) {
          top = result;
        } else if (comparison < 0) {
          bottom = result;
        } else {
          break;
        }
      }

      return result;
    }

    /**
     * Getting gap offsets in the inverse UCA
     *
     * @param listheader
     *            parsed token lists
     * @exception Exception
     *                thrown when error occurs while finding the collation gaps
     */
    void getInverseGapPositions(final CollationRuleParser.TokenListHeader listheader) throws Exception {
      // reset all the gaps
      CollationRuleParser.Token token = listheader.m_first_;
      int tokenstrength = token.m_strength_;

      for (int i = 0; i < 3; i++) {
        listheader.m_gapsHi_[3 * i] = 0;
        listheader.m_gapsHi_[3 * i + 1] = 0;
        listheader.m_gapsHi_[3 * i + 2] = 0;
        listheader.m_gapsLo_[3 * i] = 0;
        listheader.m_gapsLo_[3 * i + 1] = 0;
        listheader.m_gapsLo_[3 * i + 2] = 0;
        listheader.m_numStr_[i] = 0;
        listheader.m_fStrToken_[i] = null;
        listheader.m_lStrToken_[i] = null;
        listheader.m_pos_[i] = -1;
      }

      if ((listheader.m_baseCE_ >>> 24) >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_
          && (listheader.m_baseCE_ >>> 24) <= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_) {
        // implicits -
        listheader.m_pos_[0] = 0;
        int t1 = listheader.m_baseCE_;
        int t2 = listheader.m_baseContCE_;
        listheader.m_gapsLo_[0] = mergeCE(t1, t2, Collator.PRIMARY);
        listheader.m_gapsLo_[1] = mergeCE(t1, t2, Collator.SECONDARY);
        listheader.m_gapsLo_[2] = mergeCE(t1, t2, Collator.TERTIARY);
        int primaryCE = t1 & RuleBasedCollator.CE_PRIMARY_MASK_ | (t2 & RuleBasedCollator.CE_PRIMARY_MASK_) >>> 16;
        primaryCE = RuleBasedCollator.impCEGen_.getImplicitFromRaw(RuleBasedCollator.impCEGen_.getRawFromImplicit(primaryCE) + 1);

        t1 = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505;
        t2 = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_ | RuleBasedCollator.CE_CONTINUATION_MARKER_;

        // if (listheader.m_baseCE_ < 0xEF000000) {
        // // first implicits have three byte primaries, with a gap of
        // // one so we esentially need to add 2 to the top byte in
        // // listheader.m_baseContCE_
        // t2 += 0x02000000;
        // }
        // else {
        // // second implicits have four byte primaries, with a gap of
        // // IMPLICIT_LAST2_MULTIPLIER_
        // // Now, this guy is not really accessible here, so until we
        // // find a better way to pass it around, assume that the gap
        // is 1
        // t2 += 0x00020000;
        // }
        listheader.m_gapsHi_[0] = mergeCE(t1, t2, Collator.PRIMARY);
        listheader.m_gapsHi_[1] = mergeCE(t1, t2, Collator.SECONDARY);
        listheader.m_gapsHi_[2] = mergeCE(t1, t2, Collator.TERTIARY);
      } else if (listheader.m_indirect_ == true && listheader.m_nextCE_ != 0) {
        listheader.m_pos_[0] = 0;
        int t1 = listheader.m_baseCE_;
        int t2 = listheader.m_baseContCE_;
        listheader.m_gapsLo_[0] = mergeCE(t1, t2, Collator.PRIMARY);
        listheader.m_gapsLo_[1] = mergeCE(t1, t2, Collator.SECONDARY);
        listheader.m_gapsLo_[2] = mergeCE(t1, t2, Collator.TERTIARY);
        t1 = listheader.m_nextCE_;
        t2 = listheader.m_nextContCE_;
        listheader.m_gapsHi_[0] = mergeCE(t1, t2, Collator.PRIMARY);
        listheader.m_gapsHi_[1] = mergeCE(t1, t2, Collator.SECONDARY);
        listheader.m_gapsHi_[2] = mergeCE(t1, t2, Collator.TERTIARY);
      } else {
        while (true) {
          if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_) {
            listheader.m_pos_[tokenstrength] = getInverseNext(listheader, tokenstrength);
            if (listheader.m_pos_[tokenstrength] >= 0) {
              listheader.m_fStrToken_[tokenstrength] = token;
            } else {
              // The CE must be implicit, since it's not in the
              // table
              // Error
              throw new Exception("Internal program error");
            }
          }

          while (token != null && token.m_strength_ >= tokenstrength) {
            if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_) {
              listheader.m_lStrToken_[tokenstrength] = token;
            }
            token = token.m_next_;
          }
          if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_ - 1) {
            // check if previous interval is the same and merge the
            // intervals if it is so
            if (listheader.m_pos_[tokenstrength] == listheader.m_pos_[tokenstrength + 1]) {
              listheader.m_fStrToken_[tokenstrength] = listheader.m_fStrToken_[tokenstrength + 1];
              listheader.m_fStrToken_[tokenstrength + 1] = null;
              listheader.m_lStrToken_[tokenstrength + 1] = null;
              listheader.m_pos_[tokenstrength + 1] = -1;
            }
          }
          if (token != null) {
            tokenstrength = token.m_strength_;
          } else {
            break;
          }
        }
        for (int st = 0; st < 3; st++) {
          int pos = listheader.m_pos_[st];
          if (pos >= 0) {
            int t1 = m_table_[3 * pos];
            int t2 = m_table_[3 * pos + 1];
            listheader.m_gapsHi_[3 * st] = mergeCE(t1, t2, Collator.PRIMARY);
            listheader.m_gapsHi_[3 * st + 1] = mergeCE(t1, t2, Collator.SECONDARY);
            listheader.m_gapsHi_[3 * st + 2] = (t1 & 0x3f) << 24 | (t2 & 0x3f) << 16;
            // pos --;
            // t1 = m_table_[3 * pos];
            // t2 = m_table_[3 * pos + 1];
            t1 = listheader.m_baseCE_;
            t2 = listheader.m_baseContCE_;

            listheader.m_gapsLo_[3 * st] = mergeCE(t1, t2, Collator.PRIMARY);
            listheader.m_gapsLo_[3 * st + 1] = mergeCE(t1, t2, Collator.SECONDARY);
            listheader.m_gapsLo_[3 * st + 2] = (t1 & 0x3f) << 24 | (t2 & 0x3f) << 16;
          }
        }
      }
    }

    /**
     * Gets the next CE in the inverse table
     *
     * @param listheader
     *            token list header
     * @param strength
     *            collation strength
     * @return next ce
     */
    private final int getInverseNext(final CollationRuleParser.TokenListHeader listheader, final int strength) {
      int ce = listheader.m_baseCE_;
      int secondce = listheader.m_baseContCE_;
      int result = findInverseCE(ce, secondce);

      if (result < 0) {
        return -1;
      }

      ce &= STRENGTH_MASK_[strength];
      secondce &= STRENGTH_MASK_[strength];

      int nextce = ce;
      int nextcontce = secondce;

      while ((nextce & STRENGTH_MASK_[strength]) == ce && (nextcontce & STRENGTH_MASK_[strength]) == secondce) {
        nextce = m_table_[3 * (++result)];
        nextcontce = m_table_[3 * result + 1];
      }

      listheader.m_nextCE_ = nextce;
      listheader.m_nextContCE_ = nextcontce;

      return result;
    }
  }

  // package private data members ------------------------------------------

  /**
   * Inverse UCA, instantiate only when required
   */
  static final InverseUCA INVERSE_UCA_;

  /**
   * UCA and Inverse UCA version do not match
   */
  private static final String INV_UCA_VERSION_MISMATCH_ = "UCA versions of UCA and inverse UCA should match";

  /**
   * UCA and Inverse UCA version do not match
   */
  private static final String UCA_NOT_INSTANTIATED_ = "UCA is not instantiated!";

  /**
   * Initializing the inverse UCA
   */
  static {
    InverseUCA temp = null;
    try {
      temp = CollatorReader.getInverseUCA();
    } catch (IOException e) {
    }
    /*
     * try { String invdat = "/com/ibm/icu/impl/data/invuca.icu";
     * InputStream i =
     * CollationParsedRuleBuilder.class.getResourceAsStream(invdat);
     * BufferedInputStream b = new BufferedInputStream(i, 110000);
     * INVERSE_UCA_ = CollatorReader.readInverseUCA(b); b.close();
     * i.close(); } catch (Exception e) { e.printStackTrace(); throw new
     * RuntimeException(e.getMessage()); }
     */

    if (temp != null && RuleBasedCollator.UCA_ != null) {
      if (!temp.m_UCA_version_.equals(RuleBasedCollator.UCA_.m_UCA_version_)) {
        throw new RuntimeException(INV_UCA_VERSION_MISMATCH_);
      }
    } else {
      throw new RuntimeException(UCA_NOT_INSTANTIATED_);
    }

    INVERSE_UCA_ = temp;
  }

  // package private methods -----------------------------------------------

  /**
   * Parse and sets the collation rules in the argument collator
   *
   * @param collator
   *            to set
   * @exception Exception
   *                thrown when internal program error occurs
   */
  void setRules(final RuleBasedCollator collator) throws Exception {
    if (m_parser_.m_resultLength_ > 0 || m_parser_.m_removeSet_ != null) {
      // we have a set of rules, let's make something of it
      assembleTailoringTable(collator);
    } else { // no rules, but no error either must be only options
      // We will init the collator from UCA
      collator.setWithUCATables();
    }
    // And set only the options
    m_parser_.setDefaultOptionsInCollator(collator);
  }

  private void copyRangeFromUCA(final BuildTable t, final int start, final int end) {
    int u = 0;
    for (u = start; u <= end; u++) {
      // if ((CE = ucmpe32_get(t.m_mapping, u)) == UCOL_NOT_FOUND
      int CE = t.m_mapping_.getValue(u);
      if (CE == CE_NOT_FOUND_
      // this test is for contractions that are missing the starting
      // element. Looks like latin-1 should be done before
      // assembling the table, even if it results in more false
      // closure elements
          || (isContractionTableElement(CE) && getCE(t.m_contractions_, CE, 0) == CE_NOT_FOUND_)) {
        // m_utilElement_.m_uchars_ = str.toString();
        m_utilElement_.m_uchars_ = UCharacter.toString(u);
        m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
        m_utilElement_.m_prefix_ = 0;
        m_utilElement_.m_CELength_ = 0;
        m_utilElement_.m_prefixChars_ = null;
        m_utilColEIter_.setText(m_utilElement_.m_uchars_);
        while (CE != CollationElementIterator.NULLORDER) {
          CE = m_utilColEIter_.next();
          if (CE != CollationElementIterator.NULLORDER) {
            m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = CE;
          }
        }
        addAnElement(t, m_utilElement_);
      }
    }
  }

  /**
   * 2. Eliminate the negative lists by doing the following for each non-null negative list: o if previousCE(baseCE, strongestN) != some
   * ListHeader X's baseCE, create new ListHeader X o reverse the list, add to the end of X's positive list. Reset the strength of the
   * first item you add, based on the stronger strength levels of the two lists.
   *
   * 3. For each ListHeader with a non-null positive list: o Find all character strings with CEs between the baseCE and the next/previous
   * CE, at the strength of the first token. Add these to the tailoring. ? That is, if UCA has ... x <<< X << x' <<< X' < y ..., and the
   * tailoring has & x < z... ? Then we change the tailoring to & x <<< X << x' <<< X' < z ...
   *
   * It is possible that this part should be done even while constructing list The problem is that it is unknown what is going to be the
   * strongest weight. So we might as well do it here o Allocate CEs for each token in the list, based on the total number N of the
   * largest level difference, and the gap G between baseCE and nextCE at that level. The relation * between the last item and nextCE is
   * the same as the strongest strength. o Example: baseCE < a << b <<< q << c < d < e * nextCE(X,1) ? There are 3 primary items: a, d, e.
   * Fit them into the primary gap. Then fit b and c into the secondary gap between a and d, then fit q into the tertiary gap between b
   * and c. o Example: baseCE << b <<< q << c * nextCE(X,2) ? There are 2 secondary items: b, c. Fit them into the secondary gap. Then fit
   * q into the tertiary gap between b and c. o When incrementing primary values, we will not cross high byte boundaries except where
   * there is only a single-byte primary. That is to ensure that the script reordering will continue to work.
   *
   * @param collator
   *            the rule based collator to update
   * @exception Exception
   *                thrown when internal program error occurs
   */
  void assembleTailoringTable(final RuleBasedCollator collator) throws Exception {

    for (int i = 0; i < m_parser_.m_resultLength_; i++) {
      // now we need to generate the CEs
      // We stuff the initial value in the buffers, and increase the
      // appropriate buffer according to strength
      if (m_parser_.m_listHeader_[i].m_first_ != null) {
        // if there are any elements
        // due to the way parser works, subsequent tailorings
        // may remove all the elements from a sequence, therefore
        // leaving an empty tailoring sequence.
        initBuffers(m_parser_.m_listHeader_[i]);
      }
    }

    if (m_parser_.m_variableTop_ != null) {
      // stuff the variable top value
      m_parser_.m_options_.m_variableTopValue_ = m_parser_.m_variableTop_.m_CE_[0] >>> 16;
      // remove it from the list
      if (m_parser_.m_variableTop_.m_listHeader_.m_first_ == m_parser_.m_variableTop_) { // first
                                                // in
                                                // list
        m_parser_.m_variableTop_.m_listHeader_.m_first_ = m_parser_.m_variableTop_.m_next_;
      }
      if (m_parser_.m_variableTop_.m_listHeader_.m_last_ == m_parser_.m_variableTop_) {
        // first in list
        m_parser_.m_variableTop_.m_listHeader_.m_last_ = m_parser_.m_variableTop_.m_previous_;
      }
      if (m_parser_.m_variableTop_.m_next_ != null) {
        m_parser_.m_variableTop_.m_next_.m_previous_ = m_parser_.m_variableTop_.m_previous_;
      }
      if (m_parser_.m_variableTop_.m_previous_ != null) {
        m_parser_.m_variableTop_.m_previous_.m_next_ = m_parser_.m_variableTop_.m_next_;
      }
    }

    BuildTable t = new BuildTable(m_parser_);

    // After this, we have assigned CE values to all regular CEs now we
    // will go through list once more and resolve expansions, make
    // UCAElements structs and add them to table
    for (int i = 0; i < m_parser_.m_resultLength_; i++) {
      // now we need to generate the CEs
      // We stuff the initial value in the buffers, and increase the
      // appropriate buffer according to strength */
      createElements(t, m_parser_.m_listHeader_[i]);
    }

    m_utilElement_.clear();

    // add latin-1 stuff
    copyRangeFromUCA(t, 0, 0xFF);

    // add stuff for copying
    if (m_parser_.m_copySet_ != null) {
      int i = 0;
      for (i = 0; i < m_parser_.m_copySet_.getRangeCount(); i++) {
        copyRangeFromUCA(t, m_parser_.m_copySet_.getRangeStart(i), m_parser_.m_copySet_.getRangeEnd(i));
      }
    }

    // copy contractions from the UCA - this is felt mostly for cyrillic
    char conts[] = RuleBasedCollator.UCA_CONTRACTIONS_;
    int maxUCAContractionLength = RuleBasedCollator.MAX_UCA_CONTRACTION_LENGTH;
    int offset = 0;
    while (conts[offset] != 0) {
      // A continuation is NUL-terminated and NUL-padded
      // except if it has the maximum length.
      int contractionLength = maxUCAContractionLength;
      while (contractionLength > 0 && conts[offset + contractionLength - 1] == 0) {
        --contractionLength;
      }
      int first = Character.codePointAt(conts, offset);
      int firstLength = Character.charCount(first);
      int tailoredCE = t.m_mapping_.getValue(first);
      Elements prefixElm = null;
      if (tailoredCE != CE_NOT_FOUND_) {
        boolean needToAdd = true;
        if (isContractionTableElement(tailoredCE)) {
          if (isTailored(t.m_contractions_, tailoredCE, conts, offset + firstLength) == true) {
            needToAdd = false;
          }
        }
        if (!needToAdd && isPrefix(tailoredCE) && conts[offset + 1] == 0) {
          // pre-context character in UCA
          // The format for pre-context character is
          // conts[0]: baseCP conts[1]:0 conts[2]:pre-context CP
          Elements elm = new Elements();
          elm.m_CELength_ = 0;
          elm.m_uchars_ = Character.toString(conts[offset]);
          elm.m_cPoints_ = m_utilElement_.m_uchars_;
          elm.m_prefixChars_ = Character.toString(conts[offset + 2]);
          elm.m_prefix_ = 0; // TODO(claireho) : confirm!
          prefixElm = t.m_prefixLookup_.get(elm);
          if ((prefixElm == null) || (prefixElm.m_prefixChars_.charAt(0) != conts[offset + 2])) {
            needToAdd = true;
          }
        }
        if (m_parser_.m_removeSet_ != null && m_parser_.m_removeSet_.contains(first)) {
          needToAdd = false;
        }

        if (needToAdd == true) {
          // we need to add if this contraction is not tailored.
          if (conts[offset + 1] != 0) { // not precontext
            m_utilElement_.m_prefix_ = 0;
            m_utilElement_.m_prefixChars_ = null;
            m_utilElement_.m_uchars_ = new String(conts, offset, contractionLength);
            m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
            m_utilElement_.m_CELength_ = 0;
            m_utilColEIter_.setText(m_utilElement_.m_uchars_);
          } else { // add a pre-context element
            int preKeyLen = 0;
            m_utilElement_.m_uchars_ = Character.toString(conts[offset]);
            m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
            m_utilElement_.m_CELength_ = 0;
            m_utilElement_.m_prefixChars_ = Character.toString(conts[offset + 2]);
            if (prefixElm == null) {
              m_utilElement_.m_prefix_ = 0;
            } else { // TODO (claireho): confirm!
              m_utilElement_.m_prefix_ = m_utilElement_.m_prefix_;
              // m_utilElement_.m_prefix_= prefixElm.m_prefix_;
            }
            m_utilColEIter_.setText(m_utilElement_.m_prefixChars_);
            while (m_utilColEIter_.next() != CollationElementIterator.NULLORDER) {
              // count number of keys for pre-context char.
              preKeyLen++;
            }
            m_utilColEIter_.setText(m_utilElement_.m_prefixChars_ + m_utilElement_.m_uchars_);
            // Skip the keys for prefix character, then copy the
            // rest to el.
            while ((preKeyLen-- > 0) && m_utilColEIter_.next() != CollationElementIterator.NULLORDER) {
              continue;
            }

          }
          while (true) {
            int CE = m_utilColEIter_.next();
            if (CE != CollationElementIterator.NULLORDER) {
              m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = CE;
            } else {
              break;
            }
          }
          addAnElement(t, m_utilElement_);
        }
      } else if (m_parser_.m_removeSet_ != null && m_parser_.m_removeSet_.contains(first)) {
        copyRangeFromUCA(t, first, first);
      }

      offset += maxUCAContractionLength;
    }

    // Add completely ignorable elements
    processUCACompleteIgnorables(t);

    // canonical closure
    canonicalClosure(t);

    // still need to produce compatibility closure
    assembleTable(t, collator);
  }

  // private inner classes -------------------------------------------------

  private static class CEGenerator {
    // package private data members --------------------------------------

    WeightRange m_ranges_[];
    int m_rangesLength_;
    int m_byteSize_;
    int m_start_;
    int m_limit_;
    int m_maxCount_;
    int m_count_;
    int m_current_;
    int m_fLow_; // forbidden Low
    int m_fHigh_; // forbidden High

    // package private constructor ---------------------------------------

    CEGenerator() {
      m_ranges_ = new WeightRange[7];
      for (int i = 6; i >= 0; i--) {
        m_ranges_[i] = new WeightRange();
      }
    }
  }

  private static class WeightRange implements Comparable<WeightRange> {
    // public methods ----------------------------------------------------

    /**
     * Compares this object with target
     *
     * @param target
     *            object to compare with
     * @return 0 if equals, 1 if this is > target, -1 otherwise
     */
    public int compareTo(final WeightRange target) {
      return Utility.compareUnsigned(m_start_, target.m_start_);
    }

    /**
     * Initialize
     */
    public void clear() {
      m_start_ = 0;
      m_end_ = 0;
      m_length_ = 0;
      m_count_ = 0;
      m_length2_ = 0;
      m_count2_ = 0;
    }

    // package private data members --------------------------------------

    int m_start_;
    int m_end_;
    int m_length_;
    int m_count_;
    int m_length2_;
    int m_count2_;

    // package private constructor ---------------------------------------

    WeightRange() {
      clear();
    }

    /**
     * Copy constructor. Cloneable is troublesome, needs to check for exception
     *
     * @param source
     *            to clone
     */
    WeightRange(final WeightRange source) {
      m_start_ = source.m_start_;
      m_end_ = source.m_end_;
      m_length_ = source.m_length_;
      m_count_ = source.m_count_;
      m_length2_ = source.m_length2_;
      m_count2_ = source.m_count2_;
    }
  }

  private static class MaxJamoExpansionTable {
    // package private data members --------------------------------------

    List<Integer> m_endExpansionCE_;
    // vector of booleans
    List<Boolean> m_isV_;
    byte m_maxLSize_;
    byte m_maxVSize_;
    byte m_maxTSize_;

    // package private constructor ---------------------------------------

    MaxJamoExpansionTable() {
      m_endExpansionCE_ = new ArrayList<Integer>();
      m_isV_ = new ArrayList<Boolean>();
      m_endExpansionCE_.add(Integer.valueOf(0));
      m_isV_.add(Boolean.FALSE);
      m_maxLSize_ = 1;
      m_maxVSize_ = 1;
      m_maxTSize_ = 1;
    }

    MaxJamoExpansionTable(final MaxJamoExpansionTable table) {
      m_endExpansionCE_ = new ArrayList<Integer>(table.m_endExpansionCE_);
      m_isV_ = new ArrayList<Boolean>(table.m_isV_);
      m_maxLSize_ = table.m_maxLSize_;
      m_maxVSize_ = table.m_maxVSize_;
      m_maxTSize_ = table.m_maxTSize_;
    }
  }

  private static class MaxExpansionTable {
    // package private constructor --------------------------------------

    MaxExpansionTable() {
      m_endExpansionCE_ = new ArrayList<Integer>();
      m_expansionCESize_ = new ArrayList<Byte>();
      m_endExpansionCE_.add(Integer.valueOf(0));
      m_expansionCESize_.add(Byte.valueOf((byte) 0));
    }

    MaxExpansionTable(final MaxExpansionTable table) {
      m_endExpansionCE_ = new ArrayList<Integer>(table.m_endExpansionCE_);
      m_expansionCESize_ = new ArrayList<Byte>(table.m_expansionCESize_);
    }

    // package private data member --------------------------------------

    List<Integer> m_endExpansionCE_;
    List<Byte> m_expansionCESize_;
  }

  private static class BasicContractionTable {
    // package private constructors -------------------------------------

    BasicContractionTable() {
      m_CEs_ = new ArrayList<Integer>();
      m_codePoints_ = new StringBuilder();
    }

    // package private data members -------------------------------------

    StringBuilder m_codePoints_;
    List<Integer> m_CEs_;
  }

  private static class ContractionTable {
    // package private constructor --------------------------------------

    /**
     * Builds a contraction table
     *
     * @param mapping
     */
    ContractionTable(final IntTrieBuilder mapping) {
      m_mapping_ = mapping;
      m_elements_ = new ArrayList<BasicContractionTable>();
      m_CEs_ = new ArrayList<Integer>();
      m_codePoints_ = new StringBuilder();
      m_offsets_ = new ArrayList<Integer>();
      m_currentTag_ = CE_NOT_FOUND_TAG_;
    }

    /**
     * Copies a contraction table. Not all data will be copied into their own object.
     *
     * @param table
     */
    ContractionTable(final ContractionTable table) {
      m_mapping_ = table.m_mapping_;
      m_elements_ = new ArrayList<BasicContractionTable>(table.m_elements_);
      m_codePoints_ = new StringBuilder(table.m_codePoints_);
      m_CEs_ = new ArrayList<Integer>(table.m_CEs_);
      m_offsets_ = new ArrayList<Integer>(table.m_offsets_);
      m_currentTag_ = table.m_currentTag_;
    }

    // package private data members ------------------------------------

    /**
     * Vector of BasicContractionTable
     */
    List<BasicContractionTable> m_elements_;
    IntTrieBuilder m_mapping_;
    StringBuilder m_codePoints_;
    List<Integer> m_CEs_;
    List<Integer> m_offsets_;
    int m_currentTag_;
  }

  /**
   * Private class for combining mark table. The table is indexed by the class value(0-255).
   */
  private static class CombinClassTable {
    /**
     * accumulated numbers of combining marks.
     */
    int[] index = new int[256];

    /**
     * code point array for combining marks.
     */
    char[] cPoints;

    /**
     * size of cPoints.
     */
    int size;

    // constructor
    CombinClassTable() {
      cPoints = null;
      size = 0;
      pos = 0;
      curClass = 1;
    }

    /**
     * Copy the combining mark table from ccc and index in compact way.
     *
     * @param cps
     *            : code point array
     * @param size
     *            : size of ccc
     * @param index
     *            : index of combining classes(0-255)
     */
    void generate(final char[] cps, final int numOfCM, final int[] ccIndex) {
      int count = 0;

      cPoints = new char[numOfCM];
      for (int i = 0; i < 256; i++) {
        for (int j = 0; j < ccIndex[i]; j++) {
          cPoints[count++] = cps[(i << 8) + j];
        }
        index[i] = count;
      }
      size = count;
    }

    /**
     * Get first CM(combining mark) with the combining class value cClass.
     *
     * @param cClass
     *            : combining class value.
     * @return combining mark codepoint or 0 if no combining make with class value cClass
     */
    char GetFirstCM(final int cClass) {
      curClass = cClass;
      if (cPoints == null || cClass == 0 || index[cClass] == index[cClass - 1]) {
        return 0;
      }
      pos = 1;
      return cPoints[index[cClass - 1]];
    }

    /**
     * Get next CM(combining mark) with the combining class value cClass. Return combining mark codepoint or 0 if no next CM.
     */
    char GetNextCM() {
      if (cPoints == null || index[curClass] == (index[curClass - 1] + pos)) {
        return 0;
      }
      return cPoints[index[curClass - 1] + (pos++)];
    }

    // private data members
    int pos;
    int curClass;
  }

  private static final class BuildTable implements TrieBuilder.DataManipulate {
    // package private methods ------------------------------------------

    /**
     * For construction of the Trie tables. Has to be labeled public
     *
     * @param cp
     *            The value of the code point.
     * @param offset
     *            The value of the offset.
     * @return data offset or 0
     */
    public int getFoldedValue(int cp, final int offset) {
      int limit = cp + 0x400;
      while (cp < limit) {
        int value = m_mapping_.getValue(cp);
        boolean inBlockZero = m_mapping_.isInZeroBlock(cp);
        int tag = getCETag(value);
        if (inBlockZero == true) {
          cp += TrieBuilder.DATA_BLOCK_LENGTH;
        } else if (!(isSpecial(value) && (tag == CE_IMPLICIT_TAG_ || tag == CE_NOT_FOUND_TAG_))) {
          // These are values that are starting in either UCA
          // (IMPLICIT_TAG) or in the tailorings (NOT_FOUND_TAG).
          // Presence of these tags means that there is nothing in
          // this position and that it should be skipped.
          return RuleBasedCollator.CE_SPECIAL_FLAG_ | (CE_SURROGATE_TAG_ << 24) | offset;
        } else {
          ++cp;
        }
      }
      return 0;
    }

    // package private constructor --------------------------------------

    /**
     * Returns a table
     */
    BuildTable(final CollationRuleParser parser) {
      m_collator_ = new RuleBasedCollator();
      m_collator_.setWithUCAData();
      MaxExpansionTable maxet = new MaxExpansionTable();
      MaxJamoExpansionTable maxjet = new MaxJamoExpansionTable();
      m_options_ = parser.m_options_;
      m_expansions_ = new ArrayList<Integer>();
      // Do your own mallocs for the structure, array and have linear
      // Latin 1
      int trieinitialvalue = RuleBasedCollator.CE_SPECIAL_FLAG_ | (CE_NOT_FOUND_TAG_ << 24);
      // temporary fix for jb3822, 0x100000 -> 30000
      m_mapping_ = new IntTrieBuilder(null, 0x30000, trieinitialvalue, trieinitialvalue, true);
      m_prefixLookup_ = new HashMap<Elements, Elements>();
      // uhash_open(prefixLookupHash, prefixLookupComp);
      m_contractions_ = new ContractionTable(m_mapping_);
      // copy UCA's maxexpansion and merge as we go along
      m_maxExpansions_ = maxet;
      // adding an extra initial value for easier manipulation
      for (int i = 0; i < RuleBasedCollator.UCA_.m_expansionEndCE_.length; i++) {
        maxet.m_endExpansionCE_.add(Integer.valueOf(RuleBasedCollator.UCA_.m_expansionEndCE_[i]));
        maxet.m_expansionCESize_.add(Byte.valueOf(RuleBasedCollator.UCA_.m_expansionEndCEMaxSize_[i]));
      }
      m_maxJamoExpansions_ = maxjet;

      m_unsafeCP_ = new byte[UNSAFECP_TABLE_SIZE_];
      m_contrEndCP_ = new byte[UNSAFECP_TABLE_SIZE_];
      Arrays.fill(m_unsafeCP_, (byte) 0);
      Arrays.fill(m_contrEndCP_, (byte) 0);
    }

    /**
     * Duplicating a BuildTable. Not all data will be duplicated into their own object.
     *
     * @param table
     *            to clone
     */
    BuildTable(final BuildTable table) {
      m_collator_ = table.m_collator_;
      m_mapping_ = new IntTrieBuilder(table.m_mapping_);
      m_expansions_ = new ArrayList<Integer>(table.m_expansions_);
      m_contractions_ = new ContractionTable(table.m_contractions_);
      m_contractions_.m_mapping_ = m_mapping_;
      m_options_ = table.m_options_;
      m_maxExpansions_ = new MaxExpansionTable(table.m_maxExpansions_);
      m_maxJamoExpansions_ = new MaxJamoExpansionTable(table.m_maxJamoExpansions_);
      m_unsafeCP_ = new byte[table.m_unsafeCP_.length];
      System.arraycopy(table.m_unsafeCP_, 0, m_unsafeCP_, 0, m_unsafeCP_.length);
      m_contrEndCP_ = new byte[table.m_contrEndCP_.length];
      System.arraycopy(table.m_contrEndCP_, 0, m_contrEndCP_, 0, m_contrEndCP_.length);
    }

    // package private data members -------------------------------------

    RuleBasedCollator m_collator_;
    IntTrieBuilder m_mapping_;
    List<Integer> m_expansions_;
    ContractionTable m_contractions_;
    // UCATableHeader image;
    CollationRuleParser.OptionSet m_options_;
    MaxExpansionTable m_maxExpansions_;
    MaxJamoExpansionTable m_maxJamoExpansions_;
    byte m_unsafeCP_[];
    byte m_contrEndCP_[];
    Map<Elements, Elements> m_prefixLookup_;
    CombinClassTable cmLookup = null;
  }

  private static class Elements {
    // package private data members -------------------------------------

    String m_prefixChars_;
    int m_prefix_;
    String m_uchars_;
    /**
     * Working string
     */
    String m_cPoints_;
    /**
     * Offset to the working string
     */
    int m_cPointsOffset_;
    /**
     * These are collation elements - there could be more than one - in case of expansion
     */
    int m_CEs_[];
    int m_CELength_;
    /**
     * This is the value element maps in original table
     */
    int m_mapCE_;
    int m_sizePrim_[];
    int m_sizeSec_[];
    int m_sizeTer_[];
    boolean m_variableTop_;
    boolean m_caseBit_;

    // package private constructors -------------------------------------

    /**
     * Package private constructor
     */
    Elements() {
      m_sizePrim_ = new int[128];
      m_sizeSec_ = new int[128];
      m_sizeTer_ = new int[128];
      m_CEs_ = new int[256];
      m_CELength_ = 0;
    }

    /**
     * Package private constructor
     */
    Elements(final Elements element) {
      m_prefixChars_ = element.m_prefixChars_;
      m_prefix_ = element.m_prefix_;
      m_uchars_ = element.m_uchars_;
      m_cPoints_ = element.m_cPoints_;
      m_cPointsOffset_ = element.m_cPointsOffset_;
      m_CEs_ = element.m_CEs_;
      m_CELength_ = element.m_CELength_;
      m_mapCE_ = element.m_mapCE_;
      m_sizePrim_ = element.m_sizePrim_;
      m_sizeSec_ = element.m_sizeSec_;
      m_sizeTer_ = element.m_sizeTer_;
      m_variableTop_ = element.m_variableTop_;
      m_caseBit_ = element.m_caseBit_;
    }

    // package private methods -------------------------------------------

    /**
     * Initializing the elements
     */
    public void clear() {
      m_prefixChars_ = null;
      m_prefix_ = 0;
      m_uchars_ = null;
      m_cPoints_ = null;
      m_cPointsOffset_ = 0;
      m_CELength_ = 0;
      m_mapCE_ = 0;
      Arrays.fill(m_sizePrim_, 0);
      Arrays.fill(m_sizeSec_, 0);
      Arrays.fill(m_sizeTer_, 0);
      m_variableTop_ = false;
      m_caseBit_ = false;
    }

    /**
     * Hashcode calculation for token
     *
     * @return the hashcode
     */
    @Override
    public int hashCode() {
      String str = m_cPoints_.substring(m_cPointsOffset_);
      return str.hashCode();
    }

    /**
     * Equals calculation
     *
     * @param target
     *            Object to compare
     * @return true if target is the same as this object
     */
    @Override
    public boolean equals(final Object target) {
      if (target == this) {
        return true;
      }
      if (target instanceof Elements) {
        Elements t = (Elements) target;
        int size = m_cPoints_.length() - m_cPointsOffset_;
        if (size == t.m_cPoints_.length() - t.m_cPointsOffset_) {
          return t.m_cPoints_.regionMatches(t.m_cPointsOffset_, m_cPoints_, m_cPointsOffset_, size);
        }
      }
      return false;
    }
  }

  // private data member ---------------------------------------------------

  /**
   * Maximum strength used in CE building
   */
  private static final int CE_BASIC_STRENGTH_LIMIT_ = 3;
  /**
   * Maximum collation strength
   */
  private static final int CE_STRENGTH_LIMIT_ = 16;
  /**
   * Strength mask array, used in inverse UCA
   */
  private static final int STRENGTH_MASK_[] = { 0xFFFF0000, 0xFFFFFF00, 0xFFFFFFFF };
  /**
   * CE tag for not found
   */
  private static final int CE_NOT_FOUND_ = 0xF0000000;
  /**
   * CE tag for not found
   */
  private static final int CE_NOT_FOUND_TAG_ = 0;
  /**
   * This code point results in an expansion
   */
  private static final int CE_EXPANSION_TAG_ = 1;
  /**
   * Start of a contraction
   */
  private static final int CE_CONTRACTION_TAG_ = 2;
  /*
   * Thai character - do the reordering
   */
  // private static final int CE_THAI_TAG_ = 3;
  /*
   * Charset processing, not yet implemented
   */
  // private static final int CE_CHARSET_TAG_ = 4;
  /**
   * Lead surrogate that is tailored and doesn't start a contraction
   */
  private static final int CE_SURROGATE_TAG_ = 5;
  /*
   * AC00-D7AF
   */
  // private static final int CE_HANGUL_SYLLABLE_TAG_ = 6;
  /*
   * D800-DBFF
   */
  // private static final int CE_LEAD_SURROGATE_TAG_ = 7;
  /*
   * DC00-DFFF
   */
  // private static final int CE_TRAIL_SURROGATE_TAG_ = 8;
  /*
   * 0x3400-0x4DB5, 0x4E00-0x9FA5, 0xF900-0xFA2D
   */
  // private static final int CE_CJK_IMPLICIT_TAG_ = 9;
  private static final int CE_IMPLICIT_TAG_ = 10;
  private static final int CE_SPEC_PROC_TAG_ = 11;
  /**
   * This is a three byte primary with starting secondaries and tertiaries. It fits in a single 32 bit CE and is used instead of expansion
   * to save space without affecting the performance (hopefully)
   */
  private static final int CE_LONG_PRIMARY_TAG_ = 12;
  /**
   * Unsafe UChar hash table table size. Size is 32 bytes for 1 bit for each latin 1 char + some power of two for hashing the rest of the
   * chars. Size in bytes
   */
  private static final int UNSAFECP_TABLE_SIZE_ = 1056;
  /**
   * Mask value down to "some power of two" -1. Number of bits, not num of bytes.
   */
  private static final int UNSAFECP_TABLE_MASK_ = 0x1fff;
  /**
   * Case values
   */
  private static final int UPPER_CASE_ = 0x80;
  private static final int MIXED_CASE_ = 0x40;
  private static final int LOWER_CASE_ = 0x00;
  /*
   * Initial table size
   */
  // private static final int INIT_TABLE_SIZE_ = 1028;
  /*
   * Header size, copied from ICU4C, to be changed when that value changes
   */
  // private static final int HEADER_SIZE_ = 0xC4;
  /**
   * Contraction table new element indicator
   */
  private static final int CONTRACTION_TABLE_NEW_ELEMENT_ = 0xFFFFFF;
  /**
   * Parser for the rules
   */
  private CollationRuleParser m_parser_;
  /**
   * Utility UCA collation element iterator
   */
  private CollationElementIterator m_utilColEIter_;
  /**
   * Utility data members
   */
  private CEGenerator m_utilGens_[] = { new CEGenerator(), new CEGenerator(), new CEGenerator() };
  private int m_utilCEBuffer_[] = new int[CE_BASIC_STRENGTH_LIMIT_];
  private int m_utilIntBuffer_[] = new int[CE_STRENGTH_LIMIT_];
  private Elements m_utilElement_ = new Elements();
  private Elements m_utilElement2_ = new Elements();
  private CollationRuleParser.Token m_utilToken_ = new CollationRuleParser.Token();
  private int m_utilCountBuffer_[] = new int[6];
  private long m_utilLongBuffer_[] = new long[5];
  private WeightRange m_utilLowerWeightRange_[] = { new WeightRange(), new WeightRange(), new WeightRange(), new WeightRange(),
      new WeightRange() };
  private WeightRange m_utilUpperWeightRange_[] = { new WeightRange(), new WeightRange(), new WeightRange(), new WeightRange(),
      new WeightRange() };
  private WeightRange m_utilWeightRange_ = new WeightRange();
  private final Normalizer2Impl m_nfcImpl_ = Norm2AllModes.getNFCInstance().impl;
  private CanonicalIterator m_utilCanIter_ = new CanonicalIterator("");
  private StringBuilder m_utilStringBuffer_ = new StringBuilder("");
  // Flag indicating a combining marks table is required or not.
  private static boolean buildCMTabFlag = false;

  // private methods -------------------------------------------------------

  /**
   * @param listheader
   *            parsed rule tokens
   * @exception Exception
   *                thrown when internal error occurs
   */
  private void initBuffers(final CollationRuleParser.TokenListHeader listheader) throws Exception {
    CollationRuleParser.Token token = listheader.m_last_;
    Arrays.fill(m_utilIntBuffer_, 0, CE_STRENGTH_LIMIT_, 0);

    token.m_toInsert_ = 1;
    m_utilIntBuffer_[token.m_strength_] = 1;
    while (token.m_previous_ != null) {
      if (token.m_previous_.m_strength_ < token.m_strength_) {
        // going up
        m_utilIntBuffer_[token.m_strength_] = 0;
        m_utilIntBuffer_[token.m_previous_.m_strength_]++;
      } else if (token.m_previous_.m_strength_ > token.m_strength_) {
        // going down
        m_utilIntBuffer_[token.m_previous_.m_strength_] = 1;
      } else {
        m_utilIntBuffer_[token.m_strength_]++;
      }
      token = token.m_previous_;
      token.m_toInsert_ = m_utilIntBuffer_[token.m_strength_];
    }

    token.m_toInsert_ = m_utilIntBuffer_[token.m_strength_];
    INVERSE_UCA_.getInverseGapPositions(listheader);

    token = listheader.m_first_;
    int fstrength = Collator.IDENTICAL;
    int initstrength = Collator.IDENTICAL;

    m_utilCEBuffer_[Collator.PRIMARY] = mergeCE(listheader.m_baseCE_, listheader.m_baseContCE_, Collator.PRIMARY);
    m_utilCEBuffer_[Collator.SECONDARY] = mergeCE(listheader.m_baseCE_, listheader.m_baseContCE_, Collator.SECONDARY);
    m_utilCEBuffer_[Collator.TERTIARY] = mergeCE(listheader.m_baseCE_, listheader.m_baseContCE_, Collator.TERTIARY);
    while (token != null) {
      fstrength = token.m_strength_;
      if (fstrength < initstrength) {
        initstrength = fstrength;
        if (listheader.m_pos_[fstrength] == -1) {
          while (listheader.m_pos_[fstrength] == -1 && fstrength > 0) {
            fstrength--;
          }
          if (listheader.m_pos_[fstrength] == -1) {
            throw new Exception("Internal program error");
          }
        }
        if (initstrength == Collator.TERTIARY) {
          // starting with tertiary
          m_utilCEBuffer_[Collator.PRIMARY] = listheader.m_gapsLo_[fstrength * 3];
          m_utilCEBuffer_[Collator.SECONDARY] = listheader.m_gapsLo_[fstrength * 3 + 1];
          m_utilCEBuffer_[Collator.TERTIARY] = getCEGenerator(m_utilGens_[Collator.TERTIARY], listheader.m_gapsLo_,
              listheader.m_gapsHi_, token, fstrength);
        } else if (initstrength == Collator.SECONDARY) {
          // secondaries
          m_utilCEBuffer_[Collator.PRIMARY] = listheader.m_gapsLo_[fstrength * 3];
          m_utilCEBuffer_[Collator.SECONDARY] = getCEGenerator(m_utilGens_[Collator.SECONDARY], listheader.m_gapsLo_,
              listheader.m_gapsHi_, token, fstrength);
          m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(m_utilGens_[Collator.TERTIARY], token, Collator.TERTIARY);
        } else {
          // primaries
          m_utilCEBuffer_[Collator.PRIMARY] = getCEGenerator(m_utilGens_[Collator.PRIMARY], listheader.m_gapsLo_,
              listheader.m_gapsHi_, token, fstrength);
          m_utilCEBuffer_[Collator.SECONDARY] = getSimpleCEGenerator(m_utilGens_[Collator.SECONDARY], token, Collator.SECONDARY);
          m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(m_utilGens_[Collator.TERTIARY], token, Collator.TERTIARY);
        }
      } else {
        if (token.m_strength_ == Collator.TERTIARY) {
          m_utilCEBuffer_[Collator.TERTIARY] = getNextGenerated(m_utilGens_[Collator.TERTIARY]);
        } else if (token.m_strength_ == Collator.SECONDARY) {
          m_utilCEBuffer_[Collator.SECONDARY] = getNextGenerated(m_utilGens_[Collator.SECONDARY]);
          m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(m_utilGens_[Collator.TERTIARY], token, Collator.TERTIARY);
        } else if (token.m_strength_ == Collator.PRIMARY) {
          m_utilCEBuffer_[Collator.PRIMARY] = getNextGenerated(m_utilGens_[Collator.PRIMARY]);
          m_utilCEBuffer_[Collator.SECONDARY] = getSimpleCEGenerator(m_utilGens_[Collator.SECONDARY], token, Collator.SECONDARY);
          m_utilCEBuffer_[Collator.TERTIARY] = getSimpleCEGenerator(m_utilGens_[Collator.TERTIARY], token, Collator.TERTIARY);
        }
      }
      doCE(m_utilCEBuffer_, token);
      token = token.m_next_;
    }
  }

  /**
   * Get the next generated ce
   *
   * @param g
   *            ce generator
   * @return next generated ce
   */
  private int getNextGenerated(final CEGenerator g) {
    g.m_current_ = nextWeight(g);
    return g.m_current_;
  }

  /**
   * @param g
   *            CEGenerator
   * @param token
   *            rule token
   * @param strength
   * @return ce generator
   * @exception Exception
   *                thrown when internal error occurs
   */
  private int getSimpleCEGenerator(final CEGenerator g, final CollationRuleParser.Token token, final int strength) throws Exception {
    int high, low, count = 1;
    int maxbyte = (strength == Collator.TERTIARY) ? 0x3F : 0xFF;

    if (strength == Collator.SECONDARY) {
      low = RuleBasedCollator.COMMON_TOP_2_ << 24;
      high = 0xFFFFFFFF;
      count = 0xFF - RuleBasedCollator.COMMON_TOP_2_;
    } else {
      low = RuleBasedCollator.BYTE_COMMON_ << 24; // 0x05000000;
      high = 0x40000000;
      count = 0x40 - RuleBasedCollator.BYTE_COMMON_;
    }

    if (token.m_next_ != null && token.m_next_.m_strength_ == strength) {
      count = token.m_next_.m_toInsert_;
    }

    g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte, g.m_ranges_);
    g.m_current_ = RuleBasedCollator.BYTE_COMMON_ << 24;

    if (g.m_rangesLength_ == 0) {
      throw new Exception("Internal program error");
    }
    return g.m_current_;
  }

  /**
   * Combines 2 ce into one with respect to the argument strength
   *
   * @param ce1
   *            first ce
   * @param ce2
   *            second ce
   * @param strength
   *            strength to use
   * @return combined ce
   */
  private static int mergeCE(int ce1, int ce2, final int strength) {
    int mask = RuleBasedCollator.CE_TERTIARY_MASK_;
    if (strength == Collator.SECONDARY) {
      mask = RuleBasedCollator.CE_SECONDARY_MASK_;
    } else if (strength == Collator.PRIMARY) {
      mask = RuleBasedCollator.CE_PRIMARY_MASK_;
    }
    ce1 &= mask;
    ce2 &= mask;
    switch (strength) {
    case Collator.PRIMARY:
      return ce1 | ce2 >>> 16;
    case Collator.SECONDARY:
      return ce1 << 16 | ce2 << 8;
    default:
      return ce1 << 24 | ce2 << 16;
    }
  }

  /**
   * @param g
   *            CEGenerator
   * @param lows
   *            low gap array
   * @param highs
   *            high gap array
   * @param token
   *            rule token
   * @param fstrength
   * @exception Exception
   *                thrown when internal error occurs
   */
  private int getCEGenerator(final CEGenerator g, final int lows[], final int highs[], final CollationRuleParser.Token token,
      final int fstrength) throws Exception {
    int strength = token.m_strength_;
    int low = lows[fstrength * 3 + strength];
    int high = highs[fstrength * 3 + strength];
    int maxbyte = 0;
    if (strength == Collator.TERTIARY) {
      maxbyte = 0x3F;
    } else if (strength == Collator.PRIMARY) {
      maxbyte = 0xFE;
    } else {
      maxbyte = 0xFF;
    }

    int count = token.m_toInsert_;

    if (Utility.compareUnsigned(low, high) >= 0 && strength > Collator.PRIMARY) {
      int s = strength;
      while (true) {
        s--;
        if (lows[fstrength * 3 + s] != highs[fstrength * 3 + s]) {
          if (strength == Collator.SECONDARY) {
            if (low < (RuleBasedCollator.COMMON_TOP_2_ << 24)) {
              // Override if low range is less than
              // UCOL_COMMON_TOP2.
              low = RuleBasedCollator.COMMON_TOP_2_ << 24;
            }
            high = 0xFFFFFFFF;
          } else {
            if (low < RuleBasedCollator.COMMON_BOTTOM_3 << 24) {
              // Override if low range is less than
              // UCOL_COMMON_BOT3.
              low = RuleBasedCollator.COMMON_BOTTOM_3 << 24;
            }
            high = 0x40000000;
          }
          break;
        }
        if (s < 0) {
          throw new Exception("Internal program error");
        }
      }
    }
    if (0 <= low && low < 0x02000000) {  // unsigned comparison < 0x02000000
      // We must not use CE weight byte 02, so we set it as the minimum lower bound.
      // See http://site.icu-project.org/design/collation/bytes
      low = 0x02000000;
    }

    if (strength == Collator.SECONDARY) { // similar as simple
      if (Utility.compareUnsigned(low, RuleBasedCollator.COMMON_BOTTOM_2_ << 24) >= 0
          && Utility.compareUnsigned(low, RuleBasedCollator.COMMON_TOP_2_ << 24) < 0) {
        low = RuleBasedCollator.COMMON_TOP_2_ << 24;
      }
      if (Utility.compareUnsigned(high, RuleBasedCollator.COMMON_BOTTOM_2_ << 24) > 0
          && Utility.compareUnsigned(high, RuleBasedCollator.COMMON_TOP_2_ << 24) < 0) {
        high = RuleBasedCollator.COMMON_TOP_2_ << 24;
      }
      if (Utility.compareUnsigned(low, RuleBasedCollator.COMMON_BOTTOM_2_ << 24) < 0) {
        g.m_rangesLength_ = allocateWeights(RuleBasedCollator.BYTE_UNSHIFTED_MIN_ << 24, high, count, maxbyte, g.m_ranges_);
        g.m_current_ = nextWeight(g);
        // g.m_current_ = RuleBasedCollator.COMMON_BOTTOM_2_ << 24;
        return g.m_current_;
      }
    }

    g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte, g.m_ranges_);
    if (g.m_rangesLength_ == 0) {
      throw new Exception("Internal program error");
    }
    g.m_current_ = nextWeight(g);
    return g.m_current_;
  }

  /**
   * @param ceparts
   *            list of collation elements parts
   * @param token
   *            rule token
   * @exception Exception
   *                thrown when forming case bits for expansions fails
   */
  private void doCE(final int ceparts[], final CollationRuleParser.Token token) throws Exception {
    // this one makes the table and stuff
    // int noofbytes[] = new int[3];
    for (int i = 0; i < 3; i++) {
      // noofbytes[i] = countBytes(ceparts[i]);
      m_utilIntBuffer_[i] = countBytes(ceparts[i]);
    }

    // Here we have to pack CEs from parts
    int cei = 0;
    int value = 0;

    while ((cei << 1) < m_utilIntBuffer_[0] || cei < m_utilIntBuffer_[1] || cei < m_utilIntBuffer_[2]) {
      if (cei > 0) {
        value = RuleBasedCollator.CE_CONTINUATION_MARKER_;
      } else {
        value = 0;
      }

      if ((cei << 1) < m_utilIntBuffer_[0]) {
        value |= ((ceparts[0] >> (32 - ((cei + 1) << 4))) & 0xFFFF) << 16;
      }
      if (cei < m_utilIntBuffer_[1]) {
        value |= ((ceparts[1] >> (32 - ((cei + 1) << 3))) & 0xFF) << 8;
      }

      if (cei < m_utilIntBuffer_[2]) {
        value |= ((ceparts[2] >> (32 - ((cei + 1) << 3))) & 0x3F);
      }
      token.m_CE_[cei] = value;
      cei++;
    }
    if (cei == 0) { // totally ignorable
      token.m_CELength_ = 1;
      token.m_CE_[0] = 0;
    } else { // there is at least something
      token.m_CELength_ = cei;
    }

    // Case bits handling for expansion
    if (token.m_CE_[0] != 0) { // case bits should be set only for
                  // non-ignorables
      token.m_CE_[0] &= 0xFFFFFF3F; // Clean the case bits field
      int cSize = (token.m_source_ & 0xFF000000) >>> 24;
      int startoftokenrule = token.m_source_ & 0x00FFFFFF;

      if (cSize > 1) {
        // Do it manually
        String tokenstr = token.m_rules_.substring(startoftokenrule, startoftokenrule + cSize);
        token.m_CE_[0] |= getCaseBits(tokenstr);
      } else {
        // Copy it from the UCA
        int caseCE = getFirstCE(token.m_rules_.charAt(startoftokenrule));
        token.m_CE_[0] |= (caseCE & 0xC0);
      }
    }
  }

  /**
   * Count the number of non-zero bytes used in the ce
   *
   * @param ce
   * @return number of non-zero bytes used in ce
   */
  private static final int countBytes(final int ce) {
    int mask = 0xFFFFFFFF;
    int result = 0;
    while (mask != 0) {
      if ((ce & mask) != 0) {
        result++;
      }
      mask >>>= 8;
    }
    return result;
  }

  /**
   * We are ready to create collation elements
   *
   * @param t
   *            build table to insert
   * @param lh
   *            rule token list header
   */
  private void createElements(final BuildTable t, final CollationRuleParser.TokenListHeader lh) {
    CollationRuleParser.Token tok = lh.m_first_;
    m_utilElement_.clear();
    while (tok != null) {
      // first, check if there are any expansions
      // if there are expansions, we need to do a little bit more
      // processing since parts of expansion can be tailored, while
      // others are not
      if (tok.m_expansion_ != 0) {
        int len = tok.m_expansion_ >>> 24;
        int currentSequenceLen = len;
        int expOffset = tok.m_expansion_ & 0x00FFFFFF;
        m_utilToken_.m_source_ = currentSequenceLen | expOffset;
        m_utilToken_.m_rules_ = m_parser_.m_source_;

        while (len > 0) {
          currentSequenceLen = len;
          while (currentSequenceLen > 0) {
            m_utilToken_.m_source_ = (currentSequenceLen << 24) | expOffset;
            CollationRuleParser.Token expt = m_parser_.m_hashTable_.get(m_utilToken_);
            if (expt != null && expt.m_strength_ != CollationRuleParser.TOKEN_RESET_) {
              // expansion is tailored
              int noOfCEsToCopy = expt.m_CELength_;
              for (int j = 0; j < noOfCEsToCopy; j++) {
                tok.m_expCE_[tok.m_expCELength_ + j] = expt.m_CE_[j];
              }
              tok.m_expCELength_ += noOfCEsToCopy;
              // never try to add codepoints and CEs.
              // For some odd reason, it won't work.
              expOffset += currentSequenceLen; // noOfCEsToCopy;
              len -= currentSequenceLen; // noOfCEsToCopy;
              break;
            } else {
              currentSequenceLen--;
            }
          }
          if (currentSequenceLen == 0) {
            // couldn't find any tailored subsequence, will have to
            // get one from UCA. first, get the UChars from the
            // rules then pick CEs out until there is no more and
            // stuff them into expansion
            m_utilColEIter_.setText(m_parser_.m_source_.substring(expOffset, expOffset + 1));
            while (true) {
              int order = m_utilColEIter_.next();
              if (order == CollationElementIterator.NULLORDER) {
                break;
              }
              tok.m_expCE_[tok.m_expCELength_++] = order;
            }
            expOffset++;
            len--;
          }
        }
      } else {
        tok.m_expCELength_ = 0;
      }

      // set the ucaelement with obtained values
      m_utilElement_.m_CELength_ = tok.m_CELength_ + tok.m_expCELength_;

      // copy CEs
      System.arraycopy(tok.m_CE_, 0, m_utilElement_.m_CEs_, 0, tok.m_CELength_);
      System.arraycopy(tok.m_expCE_, 0, m_utilElement_.m_CEs_, tok.m_CELength_, tok.m_expCELength_);

      // copy UChars
      // We kept prefix and source kind of together, as it is a kind of a
      // contraction.
      // However, now we have to slice the prefix off the main thing -
      m_utilElement_.m_prefix_ = 0;// el.m_prefixChars_;
      m_utilElement_.m_cPointsOffset_ = 0; // el.m_uchars_;
      if (tok.m_prefix_ != 0) {
        // we will just copy the prefix here, and adjust accordingly in
        // the addPrefix function in ucol_elm. The reason is that we
        // need to add both composed AND decomposed elements to the
        // unsafe table.
        int size = tok.m_prefix_ >> 24;
        int offset = tok.m_prefix_ & 0x00FFFFFF;
        m_utilElement_.m_prefixChars_ = m_parser_.m_source_.substring(offset, offset + size);
        size = (tok.m_source_ >> 24) - (tok.m_prefix_ >> 24);
        offset = (tok.m_source_ & 0x00FFFFFF) + (tok.m_prefix_ >> 24);
        m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(offset, offset + size);
      } else {
        m_utilElement_.m_prefixChars_ = null;
        int offset = tok.m_source_ & 0x00FFFFFF;
        int size = tok.m_source_ >>> 24;
        m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(offset, offset + size);
      }
      m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;

      boolean containCombinMarks = false;
      for (int i = 0; i < m_utilElement_.m_cPoints_.length() - m_utilElement_.m_cPointsOffset_; i++) {
        if (isJamo(m_utilElement_.m_cPoints_.charAt(i))) {
          t.m_collator_.m_isJamoSpecial_ = true;
          break;
        }
        if (!buildCMTabFlag) {
          // check combining class
          int fcd = m_nfcImpl_.getFCD16(m_utilElement_.m_cPoints_.charAt(i))// TODO: review for handling supplementary characters
          if ((fcd & 0xff) == 0) {
            // reset flag when current char is not combining mark.
            containCombinMarks = false;
          } else {
            containCombinMarks = true;
          }
        }
      }

      if (!buildCMTabFlag && containCombinMarks) {
        buildCMTabFlag = true;
      }

      /***
       * // Case bits handling m_utilElement_.m_CEs_[0] &= 0xFFFFFF3F; // Clean the case bits field if
       * (m_utilElement_.m_cPoints_.length() - m_utilElement_.m_cPointsOffset_ > 1) { // Do it manually m_utilElement_.m_CEs_[0] |=
       * getCaseBits(m_utilElement_.m_cPoints_); } else { // Copy it from the UCA int caseCE =
       * getFirstCE(m_utilElement_.m_cPoints_.charAt(0)); m_utilElement_.m_CEs_[0] |= (caseCE & 0xC0); }
       ***/
      // and then, add it
      addAnElement(t, m_utilElement_);
      tok = tok.m_next_;
    }
  }

  /**
   * Testing if the string argument has case
   *
   * @param src
   *            string
   * @return the case for this char array
   * @exception Exception
   *                thrown when internal program error occurs
   */
  private final int getCaseBits(String src) throws Exception {
    int uCount = 0;
    int lCount = 0;
    src = Normalizer.decompose(src, true);
    m_utilColEIter_.setText(src);
    for (int i = 0; i < src.length(); i++) {
      m_utilColEIter_.setText(src.substring(i, i + 1));
      int order = m_utilColEIter_.next();
      if (RuleBasedCollator.isContinuation(order)) {
        throw new Exception("Internal program error");
      }
      if ((order & RuleBasedCollator.CE_CASE_BIT_MASK_) == UPPER_CASE_) {
        uCount++;
      } else {
        char ch = src.charAt(i);
        if (UCharacter.isLowerCase(ch)) {
          lCount++;
        } else {
          if (toSmallKana(ch) == ch && toLargeKana(ch) != ch) {
            lCount++;
          }
        }
      }
    }

    if (uCount != 0 && lCount != 0) {
      return MIXED_CASE_;
    } else if (uCount != 0) {
      return UPPER_CASE_;
    } else {
      return LOWER_CASE_;
    }
  }

  /**
   * Converts a char to the uppercase Kana
   *
   * @param ch
   *            character to convert
   * @return the converted Kana character
   */
  private static final char toLargeKana(char ch) {
    if (0x3042 < ch && ch < 0x30ef) { // Kana range
      switch (ch - 0x3000) {
      case 0x41:
      case 0x43:
      case 0x45:
      case 0x47:
      case 0x49:
      case 0x63:
      case 0x83:
      case 0x85:
      case 0x8E:
      case 0xA1:
      case 0xA3:
      case 0xA5:
      case 0xA7:
      case 0xA9:
      case 0xC3:
      case 0xE3:
      case 0xE5:
      case 0xEE:
        ch++;
        break;
      case 0xF5:
        ch = 0x30AB;
        break;
      case 0xF6:
        ch = 0x30B1;
        break;
      }
    }
    return ch;
  }

  /**
   * Converts a char to the lowercase Kana
   *
   * @param ch
   *            character to convert
   * @return the converted Kana character
   */
  private static final char toSmallKana(char ch) {
    if (0x3042 < ch && ch < 0x30ef) { // Kana range
      switch (ch - 0x3000) {
      case 0x42:
      case 0x44:
      case 0x46:
      case 0x48:
      case 0x4A:
      case 0x64:
      case 0x84:
      case 0x86:
      case 0x8F:
      case 0xA2:
      case 0xA4:
      case 0xA6:
      case 0xA8:
      case 0xAA:
      case 0xC4:
      case 0xE4:
      case 0xE6:
      case 0xEF:
        ch--;
        break;
      case 0xAB:
        ch = 0x30F5;
        break;
      case 0xB1:
        ch = 0x30F6;
        break;
      }
    }
    return ch;
  }

  /**
   * This should be connected to special Jamo handling.
   */
  private int getFirstCE(final char ch) {
    m_utilColEIter_.setText(UCharacter.toString(ch));
    return m_utilColEIter_.next();
  }

  /**
   * This adds a read element, while testing for existence
   *
   * @param t
   *            build table
   * @param element
   * @return ce
   */
  private int addAnElement(final BuildTable t, final Elements element) {
    List<Integer> expansions = t.m_expansions_;
    element.m_mapCE_ = 0;

    if (element.m_CELength_ == 1) {
      element.m_mapCE_ = element.m_CEs_[0];

    } else {
      // unfortunately, it looks like we have to look for a long primary
      // here since in canonical closure we are going to hit some long
      // primaries from the first phase, and they will come back as
      // continuations/expansions destroying the effect of the previous
      // opitimization. A long primary is a three byte primary with
      // starting secondaries and tertiaries. It can appear in long runs
      // of only primary differences (like east Asian tailorings) also,
      // it should not be an expansion, as expansions would break with
      // this
      if (element.m_CELength_ == 2 // a two CE expansion
          && RuleBasedCollator.isContinuation(element.m_CEs_[1])
          && (element.m_CEs_[1] & (~(0xFF << 24 | RuleBasedCollator.CE_CONTINUATION_MARKER_))) == 0 // that
                                                        // has
                                                        // only
                                                        // primaries
                                                        // in
                                                        // continuation
          && (((element.m_CEs_[0] >> 8) & 0xFF) == RuleBasedCollator.BYTE_COMMON_)
          // a common secondary
          && ((element.m_CEs_[0] & 0xFF) == RuleBasedCollator.BYTE_COMMON_) // and
                                            // a
                                            // common
                                            // tertiary
      ) {
        element.m_mapCE_ = RuleBasedCollator.CE_SPECIAL_FLAG_
        // a long primary special
            | (CE_LONG_PRIMARY_TAG_ << 24)
            // first and second byte of primary
            | ((element.m_CEs_[0] >> 8) & 0xFFFF00)
            // third byte of primary
            | ((element.m_CEs_[1] >> 24) & 0xFF);
      } else {
        // omitting expansion offset in builder
        // (HEADER_SIZE_ >> 2)
        int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_ | (CE_EXPANSION_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_)
            | (addExpansion(expansions, element.m_CEs_[0]) << 4) & 0xFFFFF0;

        for (int i = 1; i < element.m_CELength_; i++) {
          addExpansion(expansions, element.m_CEs_[i]);
        }
        if (element.m_CELength_ <= 0xF) {
          expansion |= element.m_CELength_;
        } else {
          addExpansion(expansions, 0);
        }
        element.m_mapCE_ = expansion;
        setMaxExpansion(element.m_CEs_[element.m_CELength_ - 1], (byte) element.m_CELength_, t.m_maxExpansions_);
        if (isJamo(element.m_cPoints_.charAt(0))) {
          t.m_collator_.m_isJamoSpecial_ = true;
          setMaxJamoExpansion(element.m_cPoints_.charAt(0), element.m_CEs_[element.m_CELength_ - 1], (byte) element.m_CELength_,
              t.m_maxJamoExpansions_);
        }
      }
    }

    // We treat digits differently - they are "uber special" and should be
    // processed differently if numeric collation is on.
    int uniChar = 0;
    if ((element.m_uchars_.length() == 2) && UTF16.isLeadSurrogate(element.m_uchars_.charAt(0))) {
      uniChar = UCharacterProperty.getRawSupplementary(element.m_uchars_.charAt(0), element.m_uchars_.charAt(1));
    } else if (element.m_uchars_.length() == 1) {
      uniChar = element.m_uchars_.charAt(0);
    }

    // Here, we either have one normal CE OR mapCE is set. Therefore, we
    // stuff only one element to the expansion buffer. When we encounter a
    // digit and we don't do numeric collation, we will just pick the CE
    // we have and break out of case (see ucol.cpp ucol_prv_getSpecialCE
    // && ucol_prv_getSpecialPrevCE). If we picked a special, further
    // processing will occur. If it's a simple CE, we'll return due
    // to how the loop is constructed.
    if (uniChar != 0 && UCharacter.isDigit(uniChar)) {
      // prepare the element
      int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_
          | (CollationElementIterator.CE_DIGIT_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_) | 1;
      if (element.m_mapCE_ != 0) {
        // if there is an expansion, we'll pick it here
        expansion |= (addExpansion(expansions, element.m_mapCE_) << 4);
      } else {
        expansion |= (addExpansion(expansions, element.m_CEs_[0]) << 4);
      }
      element.m_mapCE_ = expansion;
    }

    // here we want to add the prefix structure.
    // I will try to process it as a reverse contraction, if possible.
    // prefix buffer is already reversed.

    if (element.m_prefixChars_ != null && element.m_prefixChars_.length() - element.m_prefix_ > 0) {
      // We keep the seen prefix starter elements in a hashtable we need
      // it to be able to distinguish between the simple codepoints and
      // prefix starters. Also, we need to use it for canonical closure.
      m_utilElement2_.m_caseBit_ = element.m_caseBit_;
      m_utilElement2_.m_CELength_ = element.m_CELength_;
      m_utilElement2_.m_CEs_ = element.m_CEs_;
      m_utilElement2_.m_mapCE_ = element.m_mapCE_;
      // m_utilElement2_.m_prefixChars_ = element.m_prefixChars_;
      m_utilElement2_.m_sizePrim_ = element.m_sizePrim_;
      m_utilElement2_.m_sizeSec_ = element.m_sizeSec_;
      m_utilElement2_.m_sizeTer_ = element.m_sizeTer_;
      m_utilElement2_.m_variableTop_ = element.m_variableTop_;
      m_utilElement2_.m_prefix_ = element.m_prefix_;
      m_utilElement2_.m_prefixChars_ = Normalizer.compose(element.m_prefixChars_, false);
      m_utilElement2_.m_uchars_ = element.m_uchars_;
      m_utilElement2_.m_cPoints_ = element.m_cPoints_;
      m_utilElement2_.m_cPointsOffset_ = 0;

      if (t.m_prefixLookup_ != null) {
        Elements uCE = t.m_prefixLookup_.get(element);
        if (uCE != null) {
          // there is already a set of code points here
          element.m_mapCE_ = addPrefix(t, uCE.m_mapCE_, element);
        } else { // no code points, so this spot is clean
          element.m_mapCE_ = addPrefix(t, CE_NOT_FOUND_, element);
          uCE = new Elements(element);
          uCE.m_cPoints_ = uCE.m_uchars_;
          t.m_prefixLookup_.put(uCE, uCE);
        }
        if (m_utilElement2_.m_prefixChars_.length() != element.m_prefixChars_.length() - element.m_prefix_
            || !m_utilElement2_.m_prefixChars_.regionMatches(0, element.m_prefixChars_, element.m_prefix_,
                m_utilElement2_.m_prefixChars_.length())) {
          // do it!
          m_utilElement2_.m_mapCE_ = addPrefix(t, element.m_mapCE_, m_utilElement2_);
        }
      }
    }

    // We need to use the canonical iterator here
    // the way we do it is to generate the canonically equivalent strings
    // for the contraction and then add the sequences that pass FCD check
    if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1
        && !(element.m_cPoints_.length() - element.m_cPointsOffset_ == 2 && UTF16.isLeadSurrogate(element.m_cPoints_.charAt(0)) && UTF16
            .isTrailSurrogate(element.m_cPoints_.charAt(1)))) {
      // this is a contraction, we should check whether a composed form
      // should also be included
      m_utilCanIter_.setSource(element.m_cPoints_);
      String source = m_utilCanIter_.next();
      while (source != null && source.length() > 0) {
        if (Normalizer.quickCheck(source, Normalizer.FCD, 0) != Normalizer.NO) {
          element.m_uchars_ = source;
          element.m_cPoints_ = element.m_uchars_;
          finalizeAddition(t, element);
        }
        source = m_utilCanIter_.next();
      }

      return element.m_mapCE_;
    } else {
      return finalizeAddition(t, element);
    }
  }

  /**
   * Adds an expansion ce to the expansion vector
   *
   * @param expansions
   *            vector to add to
   * @param value
   *            of the expansion
   * @return the current position of the new element
   */
  private static final int addExpansion(final List<Integer> expansions, final int value) {
    expansions.add(Integer.valueOf(value));
    return expansions.size() - 1;
  }

  /**
   * Looks for the maximum length of all expansion sequences ending with the same collation element. The size required for maxexpansion
   * and maxsize is returned if the arrays are too small.
   *
   * @param endexpansion
   *            the last expansion collation element to be added
   * @param expansionsize
   *            size of the expansion
   * @param maxexpansion
   *            data structure to store the maximum expansion data.
   * @returns size of the maxexpansion and maxsize used.
   */
  private static int setMaxExpansion(final int endexpansion, final byte expansionsize, final MaxExpansionTable maxexpansion) {
    int start = 0;
    int limit = maxexpansion.m_endExpansionCE_.size();
    long unsigned = (long) endexpansion;
    unsigned &= 0xFFFFFFFFl;

    // using binary search to determine if last expansion element is
    // already in the array
    int result = -1;
    if (limit > 0) {
      while (start < limit - 1) {
        int mid = (start + limit) >> 1;
        long unsignedce = (maxexpansion.m_endExpansionCE_.get(mid)).intValue();
        unsignedce &= 0xFFFFFFFFl;
        if (unsigned < unsignedce) {
          limit = mid;
        } else {
          start = mid;
        }
      }

      if ((maxexpansion.m_endExpansionCE_.get(start)).intValue() == endexpansion) {
        result = start;
      }
    }
    if (result > -1) {
      // found the ce in expansion, we'll just modify the size if it
      // is smaller
      Object currentsize = maxexpansion.m_expansionCESize_.get(result);
      if (((Byte) currentsize).byteValue() < expansionsize) {
        maxexpansion.m_expansionCESize_.set(result, Byte.valueOf(expansionsize));
      }
    } else {
      // we'll need to squeeze the value into the array. initial
      // implementation. shifting the subarray down by 1
      maxexpansion.m_endExpansionCE_.add(start + 1, Integer.valueOf(endexpansion));
      maxexpansion.m_expansionCESize_.add(start + 1, Byte.valueOf(expansionsize));
    }
    return maxexpansion.m_endExpansionCE_.size();
  }

  /**
   * Sets the maximum length of all jamo expansion sequences ending with the same collation element. The size required for maxexpansion
   * and maxsize is returned if the arrays are too small.
   *
   * @param ch
   *            the jamo codepoint
   * @param endexpansion
   *            the last expansion collation element to be added
   * @param expansionsize
   *            size of the expansion
   * @param maxexpansion
   *            data structure to store the maximum expansion data.
   * @returns size of the maxexpansion and maxsize used.
   */
  private static int setMaxJamoExpansion(final char ch, final int endexpansion, final byte expansionsize,
      final MaxJamoExpansionTable maxexpansion) {
    boolean isV = true;
    if (ch >= 0x1100 && ch <= 0x1112) {
      // determines L for Jamo, doesn't need to store this since it is
      // never at the end of a expansion
      if (maxexpansion.m_maxLSize_ < expansionsize) {
        maxexpansion.m_maxLSize_ = expansionsize;
      }
      return maxexpansion.m_endExpansionCE_.size();
    }

    if (ch >= 0x1161 && ch <= 0x1175) {
      // determines V for Jamo
      if (maxexpansion.m_maxVSize_ < expansionsize) {
        maxexpansion.m_maxVSize_ = expansionsize;
      }
    }

    if (ch >= 0x11A8 && ch <= 0x11C2) {
      isV = false;
      // determines T for Jamo
      if (maxexpansion.m_maxTSize_ < expansionsize) {
        maxexpansion.m_maxTSize_ = expansionsize;
      }
    }

    int pos = maxexpansion.m_endExpansionCE_.size();
    while (pos > 0) {
      pos--;
      if ((maxexpansion.m_endExpansionCE_.get(pos)).intValue() == endexpansion) {
        return maxexpansion.m_endExpansionCE_.size();
      }
    }
    maxexpansion.m_endExpansionCE_.add(Integer.valueOf(endexpansion));
    maxexpansion.m_isV_.add(isV ? Boolean.TRUE : Boolean.FALSE);

    return maxexpansion.m_endExpansionCE_.size();
  }

  /**
   * Adds a prefix to the table
   *
   * @param t
   *            build table to update
   * @param CE
   *            collation element to add
   * @param element
   *            rule element to add
   * @return modified ce
   */
  private int addPrefix(final BuildTable t, int CE, final Elements element) {
    // currently the longest prefix we're supporting in Japanese is two
    // characters long. Although this table could quite easily mimic
    // complete contraction stuff there is no good reason to make a general
    // solution, as it would require some error prone messing.
    ContractionTable contractions = t.m_contractions_;
    String oldCP = element.m_cPoints_;
    int oldCPOffset = element.m_cPointsOffset_;

    contractions.m_currentTag_ = CE_SPEC_PROC_TAG_;
    // here, we will normalize & add prefix to the table.
    int size = element.m_prefixChars_.length() - element.m_prefix_;
    for (int j = 1; j < size; j++) {
      // First add NFD prefix chars to unsafe CP hash table
      // Unless it is a trail surrogate, which is handled algoritmically
      // and shouldn't take up space in the table.
      char ch = element.m_prefixChars_.charAt(j + element.m_prefix_);
      if (!UTF16.isTrailSurrogate(ch)) {
        unsafeCPSet(t.m_unsafeCP_, ch);
      }
    }

    // StringBuffer reversed = new StringBuffer();
    m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length());
    for (int j = 0; j < size; j++) {
      // prefixes are going to be looked up backwards
      // therefore, we will promptly reverse the prefix buffer...
      int offset = element.m_prefixChars_.length() - j - 1;
      m_utilStringBuffer_.append(element.m_prefixChars_.charAt(offset));
    }
    element.m_prefixChars_ = m_utilStringBuffer_.toString();
    element.m_prefix_ = 0;

    // the first codepoint is also unsafe, as it forms a 'contraction' with
    // the prefix
    if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(0))) {
      unsafeCPSet(t.m_unsafeCP_, element.m_cPoints_.charAt(0));
    }

    element.m_cPoints_ = element.m_prefixChars_;
    element.m_cPointsOffset_ = element.m_prefix_;

    // Add the last char of the contraction to the contraction-end hash
    // table. unless it is a trail surrogate, which is handled
    // algorithmically and shouldn't be in the table
    if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(element.m_cPoints_.length() - 1))) {
      ContrEndCPSet(t.m_contrEndCP_, element.m_cPoints_.charAt(element.m_cPoints_.length() - 1));
    }
    // First we need to check if contractions starts with a surrogate
    // int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_);

    // If there are any Jamos in the contraction, we should turn on special
    // processing for Jamos
    if (isJamo(element.m_prefixChars_.charAt(element.m_prefix_))) {
      t.m_collator_.m_isJamoSpecial_ = true;
    }
    // then we need to deal with it
    // we could aready have something in table - or we might not
    if (!isPrefix(CE)) {
      // if it wasn't contraction, we wouldn't end up here
      int firstContractionOffset = addContraction(contractions, CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, CE);
      int newCE = processContraction(contractions, element, CE_NOT_FOUND_);
      addContraction(contractions, firstContractionOffset, element.m_prefixChars_.charAt(element.m_prefix_), newCE);
      addContraction(contractions, firstContractionOffset, (char) 0xFFFF, CE);
      CE = constructSpecialCE(CE_SPEC_PROC_TAG_, firstContractionOffset);
    } else {
      // we are adding to existing contraction
      // there were already some elements in the table, so we need to add
      // a new contraction
      // Two things can happen here: either the codepoint is already in
      // the table, or it is not
      char ch = element.m_prefixChars_.charAt(element.m_prefix_);
      int position = findCP(contractions, CE, ch);
      if (position > 0) {
        // if it is we just continue down the chain
        int eCE = getCE(contractions, CE, position);
        int newCE = processContraction(contractions, element, eCE);
        setContraction(contractions, CE, position, ch, newCE);
      } else {
        // if it isn't, we will have to create a new sequence
        processContraction(contractions, element, CE_NOT_FOUND_);
        insertContraction(contractions, CE, ch, element.m_mapCE_);
      }
    }

    element.m_cPoints_ = oldCP;
    element.m_cPointsOffset_ = oldCPOffset;

    return CE;
  }

  /**
   * Checks if the argument ce is a contraction
   *
   * @param CE
   *            collation element
   * @return true if argument ce is a contraction
   */
  private static final boolean isContraction(final int CE) {
    return isSpecial(CE) && (getCETag(CE) == CE_CONTRACTION_TAG_);
  }

  /**
   * Checks if the argument ce has a prefix
   *
   * @param CE
   *            collation element
   * @return true if argument ce has a prefix
   */
  private static final boolean isPrefix(final int CE) {
    return isSpecial(CE) && (getCETag(CE) == CE_SPEC_PROC_TAG_);
  }

  /**
   * Checks if the argument ce is special
   *
   * @param CE
   *            collation element
   * @return true if argument ce is special
   */
  private static final boolean isSpecial(final int CE) {
    return (CE & RuleBasedCollator.CE_SPECIAL_FLAG_) == 0xF0000000;
  }

  /**
   * Checks if the argument ce has a prefix
   *
   * @param CE
   *            collation element
   * @return true if argument ce has a prefix
   */
  private static final int getCETag(final int CE) {
    return (CE & RuleBasedCollator.CE_TAG_MASK_) >>> RuleBasedCollator.CE_TAG_SHIFT_;
  }

  /**
   * Gets the ce at position in contraction table
   *
   * @param table
   *            contraction table
   * @param position
   *            offset to the contraction table
   * @return ce
   */
  private static final int getCE(final ContractionTable table, int element, final int position) {
    element &= 0xFFFFFF;
    BasicContractionTable tbl = getBasicContractionTable(table, element);

    if (tbl == null) {
      return CE_NOT_FOUND_;
    }
    if (position > tbl.m_CEs_.size() || position == -1) {
      return CE_NOT_FOUND_;
    } else {
      return tbl.m_CEs_.get(position).intValue();
    }
  }

  /**
   * Sets the unsafe character
   *
   * @param table
   *            unsafe table
   * @param c
   *            character to be added
   */
  private static final void unsafeCPSet(final byte table[], final char c) {
    int hash = c;
    if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) {
      if (hash >= 0xd800 && hash <= 0xf8ff) {
        // Part of a surrogate, or in private use area.
        // These don't go in the table
        return;
      }
      hash = (hash & UNSAFECP_TABLE_MASK_) + 256;
    }
    table[hash >> 3] |= (1 << (hash & 7));
  }

  /**
   * Sets the contraction end character
   *
   * @param table
   *            contraction end table
   * @param c
   *            character to be added
   */
  private static final void ContrEndCPSet(final byte table[], final char c) {
    int hash = c;
    if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) {
      hash = (hash & UNSAFECP_TABLE_MASK_) + 256;
    }
    table[hash >> 3] |= (1 << (hash & 7));
  }

  /**
   * Adds more contractions in table. If element is non existant, it creates on. Returns element handle
   *
   * @param table
   *            contraction table
   * @param element
   *            offset to the contraction table
   * @param codePoint
   *            codepoint to add
   * @param value
   * @return collation element
   */
  private static int addContraction(final ContractionTable table, int element, final char codePoint, final int value) {
    BasicContractionTable tbl = getBasicContractionTable(table, element);
    if (tbl == null) {
      tbl = addAContractionElement(table);
      element = table.m_elements_.size() - 1;
    }

    tbl.m_CEs_.add(Integer.valueOf(value));
    tbl.m_codePoints_.append(codePoint);
    return constructSpecialCE(table.m_currentTag_, element);
  }

  /**
   * Adds a contraction element to the table
   *
   * @param table
   *            contraction table to update
   * @return contraction
   */
  private static BasicContractionTable addAContractionElement(final ContractionTable table) {
    BasicContractionTable result = new BasicContractionTable();
    table.m_elements_.add(result);
    return result;
  }

  /**
   * Constructs a special ce
   *
   * @param tag
   *            special tag
   * @param CE
   *            collation element
   * @return a contraction ce
   */
  private static final int constructSpecialCE(final int tag, final int CE) {
    return RuleBasedCollator.CE_SPECIAL_FLAG_ | (tag << RuleBasedCollator.CE_TAG_SHIFT_) | (CE & 0xFFFFFF);
  }

  /**
   * Sets and inserts the element that has a contraction
   *
   * @param contractions
   *            contraction table
   * @param element
   *            contracting element
   * @param existingCE
   * @return contraction ce
   */
  private static int processContraction(final ContractionTable contractions, final Elements element, int existingCE) {
    int firstContractionOffset = 0;
    // end of recursion
    if (element.m_cPoints_.length() - element.m_cPointsOffset_ == 1) {
      if (isContractionTableElement(existingCE) && getCETag(existingCE) == contractions.m_currentTag_) {
        changeContraction(contractions, existingCE, (char) 0, element.m_mapCE_);
        changeContraction(contractions, existingCE, (char) 0xFFFF, element.m_mapCE_);
        return existingCE;
      } else {
        // can't do just that. existingCe might be a contraction,
        // meaning that we need to do another step
        return element.m_mapCE_;
      }
    }

    // this recursion currently feeds on the only element we have...
    // We will have to copy it in order to accomodate for both backward
    // and forward cycles
    // we encountered either an empty space or a non-contraction element
    // this means we are constructing a new contraction sequence
    element.m_cPointsOffset_++;
    if (!isContractionTableElement(existingCE)) {
      // if it wasn't contraction, we wouldn't end up here
      firstContractionOffset = addContraction(contractions, CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, existingCE);
      int newCE = processContraction(contractions, element, CE_NOT_FOUND_);
      addContraction(contractions, firstContractionOffset, element.m_cPoints_.charAt(element.m_cPointsOffset_), newCE);
      addContraction(contractions, firstContractionOffset, (char) 0xFFFF, existingCE);
      existingCE = constructSpecialCE(contractions.m_currentTag_, firstContractionOffset);
    } else {
      // we are adding to existing contraction
      // there were already some elements in the table, so we need to add
      // a new contraction
      // Two things can happen here: either the codepoint is already in
      // the table, or it is not
      int position = findCP(contractions, existingCE, element.m_cPoints_.charAt(element.m_cPointsOffset_));
      if (position > 0) {
        // if it is we just continue down the chain
        int eCE = getCE(contractions, existingCE, position);
        int newCE = processContraction(contractions, element, eCE);
        setContraction(contractions, existingCE, position, element.m_cPoints_.charAt(element.m_cPointsOffset_), newCE);
      } else {
        // if it isn't, we will have to create a new sequence
        int newCE = processContraction(contractions, element, CE_NOT_FOUND_);
        insertContraction(contractions, existingCE, element.m_cPoints_.charAt(element.m_cPointsOffset_), newCE);
      }
    }
    element.m_cPointsOffset_--;
    return existingCE;
  }

  /**
   * Checks if CE belongs to the contraction table
   *
   * @param CE
   *            collation element to test
   * @return true if CE belongs to the contraction table
   */
  private static final boolean isContractionTableElement(final int CE) {
    return isSpecial(CE) && (getCETag(CE) == CE_CONTRACTION_TAG_ || getCETag(CE) == CE_SPEC_PROC_TAG_);
  }

  /**
   * Gets the codepoint
   *
   * @param table
   *            contraction table
   * @param element
   *            offset to the contraction element in the table
   * @param codePoint
   *            code point to look for
   * @return the offset to the code point
   */
  private static int findCP(final ContractionTable table, final int element, final char codePoint) {
    BasicContractionTable tbl = getBasicContractionTable(table, element);
    if (tbl == null) {
      return -1;
    }

    int position = 0;
    while (codePoint > tbl.m_codePoints_.charAt(position)) {
      position++;
      if (position > tbl.m_codePoints_.length()) {
        return -1;
      }
    }
    if (codePoint == tbl.m_codePoints_.charAt(position)) {
      return position;
    } else {
      return -1;
    }
  }

  /**
   * Gets the contraction element out of the contraction table
   *
   * @param table
   *            contraction table
   * @param offset
   *            to the element in the contraction table
   * @return basic contraction element at offset in the contraction table
   */
  private static final BasicContractionTable getBasicContractionTable(final ContractionTable table, int offset) {
    offset &= 0xFFFFFF;
    if (offset == 0xFFFFFF) {
      return null;
    }
    return table.m_elements_.get(offset);
  }

  /**
   * Changes the contraction element
   *
   * @param table
   *            contraction table
   * @param element
   *            offset to the element in the contraction table
   * @param codePoint
   *            codepoint
   * @param newCE
   *            new collation element
   * @return basic contraction element at offset in the contraction table
   */
  private static final int changeContraction(final ContractionTable table, final int element, final char codePoint, final int newCE) {
    BasicContractionTable tbl = getBasicContractionTable(table, element);
    if (tbl == null) {
      return 0;
    }
    int position = 0;
    while (codePoint > tbl.m_codePoints_.charAt(position)) {
      position++;
      if (position > tbl.m_codePoints_.length()) {
        return CE_NOT_FOUND_;
      }
    }
    if (codePoint == tbl.m_codePoints_.charAt(position)) {
      tbl.m_CEs_.set(position, Integer.valueOf(newCE));
      return element & 0xFFFFFF;
    } else {
      return CE_NOT_FOUND_;
    }
  }

  /**
   * Sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle.
   *
   * @param table
   *            contraction table
   * @param element
   *            offset to the contraction table
   * @param offset
   * @param codePoint
   *            contraction character
   * @param value
   *            ce value
   * @return new contraction ce
   */
  private static final int setContraction(final ContractionTable table, int element, final int offset, final char codePoint,
      final int value) {
    element &= 0xFFFFFF;
    BasicContractionTable tbl = getBasicContractionTable(table, element);
    if (tbl == null) {
      tbl = addAContractionElement(table);
      element = table.m_elements_.size() - 1;
    }

    tbl.m_CEs_.set(offset, Integer.valueOf(value));
    tbl.m_codePoints_.setCharAt(offset, codePoint);
    return constructSpecialCE(table.m_currentTag_, element);
  }

  /**
   * Inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates
   * on.
   *
   * @param table
   *            contraction
   * @param element
   *            offset to the table contraction
   * @param codePoint
   *            code point
   * @param value
   *            collation element value
   * @return contraction collation element
   */
  private static final int insertContraction(final ContractionTable table, int element, final char codePoint, final int value) {
    element &= 0xFFFFFF;
    BasicContractionTable tbl = getBasicContractionTable(table, element);
    if (tbl == null) {
      tbl = addAContractionElement(table);
      element = table.m_elements_.size() - 1;
    }

    int offset = 0;
    while (tbl.m_codePoints_.charAt(offset) < codePoint && offset < tbl.m_codePoints_.length()) {
      offset++;
    }

    tbl.m_CEs_.add(offset, Integer.valueOf(value));
    tbl.m_codePoints_.insert(offset, codePoint);

    return constructSpecialCE(table.m_currentTag_, element);
  }

  /**
   * Finalize addition
   *
   * @param t
   *            build table
   * @param element
   *            to add
   */
  private final static int finalizeAddition(final BuildTable t, final Elements element) {
    int CE = CE_NOT_FOUND_;
    // This should add a completely ignorable element to the
    // unsafe table, so that backward iteration will skip
    // over it when treating contractions.
    if (element.m_mapCE_ == 0) {
      for (int i = 0; i < element.m_cPoints_.length(); i++) {
        char ch = element.m_cPoints_.charAt(i);
        if (!UTF16.isTrailSurrogate(ch)) {
          unsafeCPSet(t.m_unsafeCP_, ch);
        }
      }
    }

    if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1) {
      // we're adding a contraction
      int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_);
      CE = t.m_mapping_.getValue(cp);
      CE = addContraction(t, CE, element);
    } else {
      // easy case
      CE = t.m_mapping_.getValue(element.m_cPoints_.charAt(element.m_cPointsOffset_));

      if (CE != CE_NOT_FOUND_) {
        if (isContractionTableElement(CE)) {
          // adding a non contraction element (thai, expansion,
          // single) to already existing contraction
          if (!isPrefix(element.m_mapCE_)) {
            // we cannot reenter prefix elements - as we are going
            // to create a dead loop
            // Only expansions and regular CEs can go here...
            // Contractions will never happen in this place
            setContraction(t.m_contractions_, CE, 0, (char) 0, element.m_mapCE_);
            // This loop has to change the CE at the end of
            // contraction REDO!
            changeLastCE(t.m_contractions_, CE, element.m_mapCE_);
          }
        } else {
          t.m_mapping_.setValue(element.m_cPoints_.charAt(element.m_cPointsOffset_), element.m_mapCE_);
          if (element.m_prefixChars_ != null && element.m_prefixChars_.length() > 0 && getCETag(CE) != CE_IMPLICIT_TAG_) {
            // Add CE for standalone precontext char.
            Elements origElem = new Elements();
            origElem.m_prefixChars_ = null;
            origElem.m_uchars_ = element.m_cPoints_;
            origElem.m_cPoints_ = origElem.m_uchars_;
            origElem.m_CEs_[0] = CE;
            origElem.m_mapCE_ = CE;
            origElem.m_CELength_ = 1;
            finalizeAddition(t, origElem);
          }
        }
      } else {
        t.m_mapping_.setValue(element.m_cPoints_.charAt(element.m_cPointsOffset_), element.m_mapCE_);
      }
    }
    return CE;
  }

  /**
   * Note regarding surrogate handling: We are interested only in the single or leading surrogates in a contraction. If a surrogate is
   * somewhere else in the contraction, it is going to be handled as a pair of code units, as it doesn't affect the performance AND
   * handling surrogates specially would complicate code way too much.
   */
  private static int addContraction(final BuildTable t, int CE, final Elements element) {
    ContractionTable contractions = t.m_contractions_;
    contractions.m_currentTag_ = CE_CONTRACTION_TAG_;

    // First we need to check if contractions starts with a surrogate
    int cp = UTF16.charAt(element.m_cPoints_, 0);
    int cpsize = 1;
    if (UCharacter.isSupplementary(cp)) {
      cpsize = 2;
    }
    if (cpsize < element.m_cPoints_.length()) {
      // This is a real contraction, if there are other characters after
      // the first
      int size = element.m_cPoints_.length() - element.m_cPointsOffset_;
      for (int j = 1; j < size; j++) {
        // First add contraction chars to unsafe CP hash table
        // Unless it is a trail surrogate, which is handled
        // algoritmically and shouldn't take up space in the table.
        if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(element.m_cPointsOffset_ + j))) {
          unsafeCPSet(t.m_unsafeCP_, element.m_cPoints_.charAt(element.m_cPointsOffset_ + j));
        }
      }
      // Add the last char of the contraction to the contraction-end
      // hash table. unless it is a trail surrogate, which is handled
      // algorithmically and shouldn't be in the table
      if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(element.m_cPoints_.length() - 1))) {
        ContrEndCPSet(t.m_contrEndCP_, element.m_cPoints_.charAt(element.m_cPoints_.length() - 1));
      }

      // If there are any Jamos in the contraction, we should turn on
      // special processing for Jamos
      if (isJamo(element.m_cPoints_.charAt(element.m_cPointsOffset_))) {
        t.m_collator_.m_isJamoSpecial_ = true;
      }
      // then we need to deal with it
      // we could aready have something in table - or we might not
      element.m_cPointsOffset_ += cpsize;
      if (!isContraction(CE)) {
        // if it wasn't contraction, we wouldn't end up here
        int firstContractionOffset = addContraction(contractions, CONTRACTION_TABLE_NEW_ELEMENT_, (char) 0, CE);
        int newCE = processContraction(contractions, element, CE_NOT_FOUND_);
        addContraction(contractions, firstContractionOffset, element.m_cPoints_.charAt(element.m_cPointsOffset_), newCE);
        addContraction(contractions, firstContractionOffset, (char) 0xFFFF, CE);
        CE = constructSpecialCE(CE_CONTRACTION_TAG_, firstContractionOffset);
      } else {
        // we are adding to existing contraction
        // there were already some elements in the table, so we need to
        // add a new contraction
        // Two things can happen here: either the codepoint is already
        // in the table, or it is not
        int position = findCP(contractions, CE, element.m_cPoints_.charAt(element.m_cPointsOffset_));
        if (position > 0) {
          // if it is we just continue down the chain
          int eCE = getCE(contractions, CE, position);
          int newCE = processContraction(contractions, element, eCE);
          setContraction(contractions, CE, position, element.m_cPoints_.charAt(element.m_cPointsOffset_), newCE);
        } else {
          // if it isn't, we will have to create a new sequence
          int newCE = processContraction(contractions, element, CE_NOT_FOUND_);
          insertContraction(contractions, CE, element.m_cPoints_.charAt(element.m_cPointsOffset_), newCE);
        }
      }
      element.m_cPointsOffset_ -= cpsize;
      t.m_mapping_.setValue(cp, CE);
    } else if (!isContraction(CE)) {
      // this is just a surrogate, and there is no contraction
      t.m_mapping_.setValue(cp, element.m_mapCE_);
    } else {
      // fill out the first stage of the contraction with the surrogate
      // CE
      changeContraction(contractions, CE, (char) 0, element.m_mapCE_);
      changeContraction(contractions, CE, (char) 0xFFFF, element.m_mapCE_);
    }
    return CE;
  }

  /**
   * this is for adding non contractions
   *
   * @param table
   *            contraction table
   * @param element
   *            offset to the contraction table
   * @param value
   *            collation element value
   * @return new collation element
   */
  private static final int changeLastCE(final ContractionTable table, final int element, final int value) {
    BasicContractionTable tbl = getBasicContractionTable(table, element);
    if (tbl == null) {
      return 0;
    }

    tbl.m_CEs_.set(tbl.m_CEs_.size() - 1, Integer.valueOf(value));
    return constructSpecialCE(table.m_currentTag_, element & 0xFFFFFF);
  }

  /**
   * Given a set of ranges calculated by allocWeights(), iterate through the weights. Sets the next weight in cegenerator.m_current_.
   *
   * @param cegenerator
   *            object that contains ranges weight range array and its rangeCount
   * @return the next weight
   */
  private static int nextWeight(final CEGenerator cegenerator) {
    if (cegenerator.m_rangesLength_ > 0) {
      // get maxByte from the .count field
      int maxByte = cegenerator.m_ranges_[0].m_count_;
      // get the next weight
      int weight = cegenerator.m_ranges_[0].m_start_;
      if (weight == cegenerator.m_ranges_[0].m_end_) {
        // this range is finished, remove it and move the following
        // ones up
        cegenerator.m_rangesLength_--;
        if (cegenerator.m_rangesLength_ > 0) {
          System.arraycopy(cegenerator.m_ranges_, 1, cegenerator.m_ranges_, 0, cegenerator.m_rangesLength_);
          cegenerator.m_ranges_[0].m_count_ = maxByte;
          // keep maxByte in ranges[0]
        }
      } else {
        // increment the weight for the next value
        cegenerator.m_ranges_[0].m_start_ = incWeight(weight, cegenerator.m_ranges_[0].m_length2_, maxByte);
      }
      return weight;
    }
    return -1;
  }

  /**
   * Increment the collation weight
   *
   * @param weight
   *            to increment
   * @param length
   * @param maxByte
   * @return new incremented weight
   */
  private static final int incWeight(int weight, int length, final int maxByte) {
    while (true) {
      int b = getWeightByte(weight, length);
      if (b < maxByte) {
        return setWeightByte(weight, length, b + 1);
      } else {
        // roll over, set this byte to BYTE_FIRST_TAILORED_ and
        // increment the previous one
        weight = setWeightByte(weight, length, RuleBasedCollator.BYTE_FIRST_TAILORED_);
        --length;
      }
    }
  }

  /**
   * Gets the weight byte
   *
   * @param weight
   * @param index
   * @return byte
   */
  private static final int getWeightByte(final int weight, final int index) {
    return (weight >> ((4 - index) << 3)) & 0xff;
  }

  /**
   * Set the weight byte in table
   *
   * @param weight
   * @param index
   * @param b
   *            byte
   */
  private static final int setWeightByte(final int weight, int index, final int b) {
    index <<= 3;
    // 0xffffffff except a 00 "hole" for the index-th byte
    int mask;
    if (index < 32) {
      mask = 0xffffffff >>> index;
    } else {
      // Do not use int>>>32 because that does not shift at all
      // while we need it to become 0.
      //
      // Java Language Specification (Third Edition) 15.19 Shift Operators:
      // "If the promoted type of the left-hand operand is int,
      // only the five lowest-order bits of the right-hand operand
      // are used as the shift distance.
      // It is as if the right-hand operand were subjected to
      // a bitwise logical AND operator & (§15.22.1) with the mask value 0x1f.
      // The shift distance actually used is therefore
      // always in the range 0 to 31, inclusive."
      mask = 0;
    }
    index = 32 - index;
    mask |= 0xffffff00 << index;
    return (weight & mask) | (b << index);
  }

  /**
   * Call getWeightRanges and then determine heuristically which ranges to use for a given number of weights between (excluding) two
   * limits
   *
   * @param lowerLimit
   * @param upperLimit
   * @param n
   * @param maxByte
   * @param ranges
   * @return
   */
  private int allocateWeights(final int lowerLimit, final int upperLimit, final int n, final int maxByte, final WeightRange ranges[]) {
    // number of usable byte values 3..maxByte
    int countBytes = maxByte - RuleBasedCollator.BYTE_FIRST_TAILORED_ + 1;
    // [0] unused, [5] to make index checks unnecessary, m_utilCountBuffer_
    // countBytes to the power of index, m_utilLongBuffer_ for unsignedness
    // gcc requires explicit initialization
    m_utilLongBuffer_[0] = 1;
    m_utilLongBuffer_[1] = countBytes;
    m_utilLongBuffer_[2] = m_utilLongBuffer_[1] * countBytes;
    m_utilLongBuffer_[3] = m_utilLongBuffer_[2] * countBytes;
    m_utilLongBuffer_[4] = m_utilLongBuffer_[3] * countBytes;
    int rangeCount = getWeightRanges(lowerLimit, upperLimit, maxByte, countBytes, ranges);
    if (rangeCount <= 0) {
      return 0;
    }
    // what is the maximum number of weights with these ranges?
    long maxCount = 0;
    for (int i = 0; i < rangeCount; ++i) {
      maxCount += (long) ranges[i].m_count_ * m_utilLongBuffer_[4 - ranges[i].m_length_];
    }
    if (maxCount < n) {
      return 0;
    }
    // set the length2 and count2 fields
    for (int i = 0; i < rangeCount; ++i) {
      ranges[i].m_length2_ = ranges[i].m_length_;
      ranges[i].m_count2_ = ranges[i].m_count_;
    }
    // try until we find suitably large ranges
    while (true) {
      // get the smallest number of bytes in a range
      int minLength = ranges[0].m_length2_;
      // sum up the number of elements that fit into ranges of each byte
      // length
      Arrays.fill(m_utilCountBuffer_, 0);
      for (int i = 0; i < rangeCount; ++i) {
        m_utilCountBuffer_[ranges[i].m_length2_] += ranges[i].m_count2_;
      }
      // now try to allocate n elements in the available short ranges
      if (n <= m_utilCountBuffer_[minLength] + m_utilCountBuffer_[minLength + 1]) {
        // trivial cases, use the first few ranges
        maxCount = 0;
        rangeCount = 0;
        do {
          maxCount += ranges[rangeCount].m_count2_;
          ++rangeCount;
        } while (n > maxCount);
        break;
      } else if (n <= ranges[0].m_count2_ * countBytes) {
        // easy case, just make this one range large enough by
        // lengthening it once more, possibly split it
        rangeCount = 1;
        // calculate how to split the range between maxLength-1
        // (count1) and maxLength (count2)
        long power_1 = m_utilLongBuffer_[minLength - ranges[0].m_length_];
        long power = power_1 * countBytes;
        int count2 = (int) ((n + power - 1) / power);
        int count1 = ranges[0].m_count_ - count2;
        // split the range
        if (count1 < 1) {
          // lengthen the entire range to maxLength
          lengthenRange(ranges, 0, maxByte, countBytes);
        } else {
          // really split the range
          // create a new range with the end and initial and current
          // length of the old one
          rangeCount = 2;
          ranges[1].m_end_ = ranges[0].m_end_;
          ranges[1].m_length_ = ranges[0].m_length_;
          ranges[1].m_length2_ = minLength;
          // set the end of the first range according to count1
          int i = ranges[0].m_length_;
          int b = getWeightByte(ranges[0].m_start_, i) + count1 - 1;
          // ranges[0].count and count1 may be >countBytes from
          // merging adjacent ranges; b > maxByte is possible
          if (b <= maxByte) {
            ranges[0].m_end_ = setWeightByte(ranges[0].m_start_, i, b);
          } else {
            ranges[0].m_end_ = setWeightByte(incWeight(ranges[0].m_start_, i - 1, maxByte), i, b - countBytes);
          }
          // set the bytes in the end weight at length + 1..length2
          // to maxByte
          b = (maxByte << 24) | (maxByte << 16) | (maxByte << 8) | maxByte; // this used to be 0xffffffff
          ranges[0].m_end_ = truncateWeight(ranges[0].m_end_, i) | (b >>> (i << 3)) & (b << ((4 - minLength) << 3));
          // set the start of the second range to immediately follow
          // the end of the first one
          ranges[1].m_start_ = incWeight(ranges[0].m_end_, minLength, maxByte);
          // set the count values (informational)
          ranges[0].m_count_ = count1;
          ranges[1].m_count_ = count2;

          ranges[0].m_count2_ = (int) (count1 * power_1);
          // will be *countBytes when lengthened
          ranges[1].m_count2_ = (int) (count2 * power_1);

          // lengthen the second range to maxLength
          lengthenRange(ranges, 1, maxByte, countBytes);
        }
        break;
      }
      // no good match, lengthen all minLength ranges and iterate
      for (int i = 0; ranges[i].m_length2_ == minLength; ++i) {
        lengthenRange(ranges, i, maxByte, countBytes);
      }
    }

    if (rangeCount > 1) {
      // sort the ranges by weight values
      Arrays.sort(ranges, 0, rangeCount);
    }

    // set maxByte in ranges[0] for ucol_nextWeight()
    ranges[0].m_count_ = maxByte;

    return rangeCount;
  }

  /**
   * Updates the range length
   *
   * @param range
   *            weight range array
   * @param offset
   *            to weight range array
   * @param maxByte
   * @param countBytes
   * @return new length
   */
  private static final int lengthenRange(final WeightRange range[], final int offset, final int maxByte, final int countBytes) {
    int length = range[offset].m_length2_ + 1;
    range[offset].m_start_ = setWeightTrail(range[offset].m_start_, length, RuleBasedCollator.BYTE_FIRST_TAILORED_);
    range[offset].m_end_ = setWeightTrail(range[offset].m_end_, length, maxByte);
    range[offset].m_count2_ *= countBytes;
    range[offset].m_length2_ = length;
    return length;
  }

  /**
   * Gets the weight
   *
   * @param weight
   * @param length
   * @param trail
   * @return new weight
   */
  private static final int setWeightTrail(final int weight, int length, final int trail) {
    length = (4 - length) << 3;
    return (weight & (0xffffff00 << length)) | (trail << length);
  }

  /**
   * take two CE weights and calculate the possible ranges of weights between the two limits, excluding them for weights with up to 4
   * bytes there are up to 2*4-1=7 ranges
   *
   * @param lowerLimit
   * @param upperLimit
   * @param maxByte
   * @param countBytes
   * @param ranges
   * @return weight ranges
   */
  private int getWeightRanges(final int lowerLimit, final int upperLimit, final int maxByte, final int countBytes,
      final WeightRange ranges[]) {
    // assume that both lowerLimit & upperLimit are not 0
    // get the lengths of the limits
    int lowerLength = lengthOfWeight(lowerLimit);
    int upperLength = lengthOfWeight(upperLimit);
    if (Utility.compareUnsigned(lowerLimit, upperLimit) >= 0) {
      return 0;
    }
    // check that neither is a prefix of the other
    if (lowerLength < upperLength) {
      if (lowerLimit == truncateWeight(upperLimit, lowerLength)) {
        return 0;
      }
    }
    // if the upper limit is a prefix of the lower limit then the earlier
    // test lowerLimit >= upperLimit has caught it
    // reset local variables
    // With the limit lengths of 1..4, there are up to 7 ranges for
    // allocation:
    // range minimum length
    // lower[4] 4
    // lower[3] 3
    // lower[2] 2
    // middle 1
    // upper[2] 2
    // upper[3] 3
    // upper[4] 4
    // We are now going to calculate up to 7 ranges.
    // Some of them will typically overlap, so we will then have to merge
    // and eliminate ranges.

    // We have to clean cruft from previous invocations
    // before doing anything. C++ already does that
    for (int length = 0; length < 5; length++) {
      m_utilLowerWeightRange_[length].clear();
      m_utilUpperWeightRange_[length].clear();
    }
    m_utilWeightRange_.clear();

    int weight = lowerLimit;
    for (int length = lowerLength; length >= 2; --length) {
      m_utilLowerWeightRange_[length].clear();
      int trail = getWeightByte(weight, length);
      if (trail < maxByte) {
        m_utilLowerWeightRange_[length].m_start_ = incWeightTrail(weight, length);
        m_utilLowerWeightRange_[length].m_end_ = setWeightTrail(weight, length, maxByte);
        m_utilLowerWeightRange_[length].m_length_ = length;
        m_utilLowerWeightRange_[length].m_count_ = maxByte - trail;
      }
      weight = truncateWeight(weight, length - 1);
    }
    m_utilWeightRange_.m_start_ = incWeightTrail(weight, 1);

    weight = upperLimit;
    // [0] and [1] are not used - this simplifies indexing,
    // m_utilUpperWeightRange_

    for (int length = upperLength; length >= 2; length--) {
      int trail = getWeightByte(weight, length);
      if (trail > RuleBasedCollator.BYTE_FIRST_TAILORED_) {
        m_utilUpperWeightRange_[length].m_start_ = setWeightTrail(weight, length, RuleBasedCollator.BYTE_FIRST_TAILORED_);
        m_utilUpperWeightRange_[length].m_end_ = decWeightTrail(weight, length);
        m_utilUpperWeightRange_[length].m_length_ = length;
        m_utilUpperWeightRange_[length].m_count_ = trail - RuleBasedCollator.BYTE_FIRST_TAILORED_;
      }
      weight = truncateWeight(weight, length - 1);
    }
    m_utilWeightRange_.m_end_ = decWeightTrail(weight, 1);

    // set the middle range
    m_utilWeightRange_.m_length_ = 1;
    if (Utility.compareUnsigned(m_utilWeightRange_.m_end_, m_utilWeightRange_.m_start_) >= 0) {
      // if (m_utilWeightRange_.m_end_ >= m_utilWeightRange_.m_start_) {
      m_utilWeightRange_.m_count_ = ((m_utilWeightRange_.m_end_ - m_utilWeightRange_.m_start_) >>> 24) + 1;
    } else {
      // eliminate overlaps
      // remove the middle range
      m_utilWeightRange_.m_count_ = 0;
      // reduce or remove the lower ranges that go beyond upperLimit
      for (int length = 4; length >= 2; --length) {
        if (m_utilLowerWeightRange_[length].m_count_ > 0 && m_utilUpperWeightRange_[length].m_count_ > 0) {
          int start = m_utilUpperWeightRange_[length].m_start_;
          int end = m_utilLowerWeightRange_[length].m_end_;
          if (end >= start || incWeight(end, length, maxByte) == start) {
            // lower and upper ranges collide or are directly
            // adjacent: merge these two and remove all shorter
            // ranges
            start = m_utilLowerWeightRange_[length].m_start_;
            end = m_utilLowerWeightRange_[length].m_end_ = m_utilUpperWeightRange_[length].m_end_;
            // merging directly adjacent ranges needs to subtract
            // the 0/1 gaps in between;
            // it may result in a range with count>countBytes
            m_utilLowerWeightRange_[length].m_count_ = getWeightByte(end, length) - getWeightByte(start, length) + 1
                + countBytes * (getWeightByte(end, length - 1) - getWeightByte(start, length - 1));
            m_utilUpperWeightRange_[length].m_count_ = 0;
            while (--length >= 2) {
              m_utilLowerWeightRange_[length].m_count_ = m_utilUpperWeightRange_[length].m_count_ = 0;
            }
            break;
          }
        }
      }
    }

    // copy the ranges, shortest first, into the result array
    int rangeCount = 0;
    if (m_utilWeightRange_.m_count_ > 0) {
      ranges[0] = new WeightRange(m_utilWeightRange_);
      rangeCount = 1;
    }
    for (int length = 2; length <= 4; ++length) {
      // copy upper first so that later the middle range is more likely
      // the first one to use
      if (m_utilUpperWeightRange_[length].m_count_ > 0) {
        ranges[rangeCount] = new WeightRange(m_utilUpperWeightRange_[length]);
        ++rangeCount;
      }
      if (m_utilLowerWeightRange_[length].m_count_ > 0) {
        ranges[rangeCount] = new WeightRange(m_utilLowerWeightRange_[length]);
        ++rangeCount;
      }
    }
    return rangeCount;
  }

  /**
   * Truncates the weight with length
   *
   * @param weight
   * @param length
   * @return truncated weight
   */
  private static final int truncateWeight(final int weight, final int length) {
    return weight & (0xffffffff << ((4 - length) << 3));
  }

  /**
   * Length of the weight
   *
   * @param weight
   * @return length of the weight
   */
  private static final int lengthOfWeight(final int weight) {
    if ((weight & 0xffffff) == 0) {
      return 1;
    } else if ((weight & 0xffff) == 0) {
      return 2;
    } else if ((weight & 0xff) == 0) {
      return 3;
    }
    return 4;
  }

  /**
   * Increment the weight trail
   *
   * @param weight
   * @param length
   * @return new weight
   */
  private static final int incWeightTrail(final int weight, final int length) {
    return weight + (1 << ((4 - length) << 3));
  }

  /**
   * Decrement the weight trail
   *
   * @param weight
   * @param length
   * @return new weight
   */
  private static int decWeightTrail(final int weight, final int length) {
    return weight - (1 << ((4 - length) << 3));
  }

  /**
   * Gets the codepoint
   *
   * @param tbl
   *            contraction table
   * @param codePoint
   *            code point to look for
   * @return the offset to the code point
   */
  private static int findCP(final BasicContractionTable tbl, final char codePoint) {
    int position = 0;
    while (codePoint > tbl.m_codePoints_.charAt(position)) {
      position++;
      if (position > tbl.m_codePoints_.length()) {
        return -1;
      }
    }
    if (codePoint == tbl.m_codePoints_.charAt(position)) {
      return position;
    } else {
      return -1;
    }
  }

  /**
   * Finds a contraction ce
   *
   * @param table
   * @param element
   * @param ch
   * @return ce
   */
  private static int findCE(final ContractionTable table, final int element, final char ch) {
    if (table == null) {
      return CE_NOT_FOUND_;
    }
    BasicContractionTable tbl = getBasicContractionTable(table, element);
    if (tbl == null) {
      return CE_NOT_FOUND_;
    }
    int position = findCP(tbl, ch);
    if (position > tbl.m_CEs_.size() || position < 0) {
      return CE_NOT_FOUND_;
    }
    return tbl.m_CEs_.get(position).intValue();
  }

  /**
   * Checks if the string is tailored in the contraction
   *
   * @param table
   *            contraction table
   * @param element
   * @param array
   *            character array to check
   * @param offset
   *            array offset
   * @return true if it is tailored
   */
  private static boolean isTailored(final ContractionTable table, int element, final char array[], int offset) {
    while (array[offset] != 0) {
      element = findCE(table, element, array[offset]);
      if (element == CE_NOT_FOUND_) {
        return false;
      }
      if (!isContractionTableElement(element)) {
        return true;
      }
      offset++;
    }
    if (getCE(table, element, 0) != CE_NOT_FOUND_) {
      return true;
    } else {
      return false;
    }
  }

  /**
   * Assemble RuleBasedCollator
   *
   * @param t
   *            build table
   * @param collator
   *            to update
   */
  private void assembleTable(final BuildTable t, final RuleBasedCollator collator) {
    IntTrieBuilder mapping = t.m_mapping_;
    List<Integer> expansions = t.m_expansions_;
    ContractionTable contractions = t.m_contractions_;
    MaxExpansionTable maxexpansion = t.m_maxExpansions_;

    // contraction offset has to be in since we are building on the
    // UCA contractions
    // int beforeContractions = (HEADER_SIZE_
    // + paddedsize(expansions.size() << 2)) >>> 1;
    collator.m_contractionOffset_ = 0;
    int contractionsSize = constructTable(contractions);

    // the following operation depends on the trie data. Therefore, we have
    // to do it before the trie is compacted
    // sets jamo expansions
    getMaxExpansionJamo(mapping, maxexpansion, t.m_maxJamoExpansions_, collator.m_isJamoSpecial_);

    // TODO: LATIN1 array is now in the utrie - it should be removed from
    // the calculation
    setAttributes(collator, t.m_options_);
    // copy expansions
    int size = expansions.size();
    collator.m_expansion_ = new int[size];
    for (int i = 0; i < size; i++) {
      collator.m_expansion_[i] = expansions.get(i).intValue();
    }
    // contractions block
    if (contractionsSize != 0) {
      // copy contraction index
      collator.m_contractionIndex_ = new char[contractionsSize];
      contractions.m_codePoints_.getChars(0, contractionsSize, collator.m_contractionIndex_, 0);
      // copy contraction collation elements
      collator.m_contractionCE_ = new int[contractionsSize];
      for (int i = 0; i < contractionsSize; i++) {
        collator.m_contractionCE_[i] = contractions.m_CEs_.get(i).intValue();
      }
    }
    // copy mapping table
    collator.m_trie_ = mapping.serialize(t, RuleBasedCollator.DataManipulate.getInstance());
    // copy max expansion table
    // not copying the first element which is a dummy
    // to be in synch with icu4c's builder, we continue to use the
    // expansion offset
    // omitting expansion offset in builder
    collator.m_expansionOffset_ = 0;
    size = maxexpansion.m_endExpansionCE_.size();
    collator.m_expansionEndCE_ = new int[size - 1];
    for (int i = 1; i < size; i++) {
      collator.m_expansionEndCE_[i - 1] = maxexpansion.m_endExpansionCE_.get(i).intValue();
    }
    collator.m_expansionEndCEMaxSize_ = new byte[size - 1];
    for (int i = 1; i < size; i++) {
      collator.m_expansionEndCEMaxSize_[i - 1] = maxexpansion.m_expansionCESize_.get(i).byteValue();
    }
    // Unsafe chars table. Finish it off, then copy it.
    unsafeCPAddCCNZ(t);
    // Or in unsafebits from UCA, making a combined table.
    for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i++) {
      t.m_unsafeCP_[i] |= RuleBasedCollator.UCA_.m_unsafe_[i];
    }
    collator.m_unsafe_ = t.m_unsafeCP_;

    // Finish building Contraction Ending chars hash table and then copy it
    // out.
    // Or in unsafebits from UCA, making a combined table
    for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i++) {
      t.m_contrEndCP_[i] |= RuleBasedCollator.UCA_.m_contractionEnd_[i];
    }
    collator.m_contractionEnd_ = t.m_contrEndCP_;
  }

  /**
   * Sets this collator to use the all options and tables in UCA.
   *
   * @param collator
   *            which attribute is to be set
   * @param option
   *            to set with
   */
  private static final void setAttributes(final RuleBasedCollator collator, final CollationRuleParser.OptionSet option) {
    collator.latinOneFailed_ = true;
    collator.m_caseFirst_ = option.m_caseFirst_;
    collator.setDecomposition(option.m_decomposition_);
    collator.setAlternateHandlingShifted(option.m_isAlternateHandlingShifted_);
    collator.setCaseLevel(option.m_isCaseLevel_);
    collator.setFrenchCollation(option.m_isFrenchCollation_);
    collator.m_isHiragana4_ = option.m_isHiragana4_;
    collator.setStrength(option.m_strength_);
    collator.m_variableTopValue_ = option.m_variableTopValue_;
    collator.m_reorderCodes_ = option.m_scriptOrder_;
    collator.latinOneFailed_ = false;
  }

  /**
   * Constructing the contraction table
   *
   * @param table
   *            contraction table
   * @return
   */
  private int constructTable(final ContractionTable table) {
    // See how much memory we need
    int tsize = table.m_elements_.size();
    if (tsize == 0) {
      return 0;
    }
    table.m_offsets_.clear();
    int position = 0;
    for (int i = 0; i < tsize; i++) {
      table.m_offsets_.add(Integer.valueOf(position));
      position += table.m_elements_.get(i).m_CEs_.size();
    }
    table.m_CEs_.clear();
    table.m_codePoints_.delete(0, table.m_codePoints_.length());
    // Now stuff the things in
    StringBuilder cpPointer = table.m_codePoints_;
    List<Integer> CEPointer = table.m_CEs_;
    for (int i = 0; i < tsize; i++) {
      BasicContractionTable bct = table.m_elements_.get(i);
      int size = bct.m_CEs_.size();
      char ccMax = 0;
      char ccMin = 255;
      int offset = CEPointer.size();
      CEPointer.add(bct.m_CEs_.get(0));
      for (int j = 1; j < size; j++) {
        char ch = bct.m_codePoints_.charAt(j);
        char cc = (char) (UCharacter.getCombiningClass(ch) & 0xFF);
        if (cc > ccMax) {
          ccMax = cc;
        }
        if (cc < ccMin) {
          ccMin = cc;
        }
        cpPointer.append(ch);
        CEPointer.add(bct.m_CEs_.get(j));
      }
      cpPointer.insert(offset, (char) (((ccMin == ccMax) ? 1 : 0 << 8) | ccMax));
      for (int j = 0; j < size; j++) {
        if (isContractionTableElement(CEPointer.get(offset + j).intValue())) {
          int ce = CEPointer.get(offset + j).intValue();
          CEPointer.set(offset + j,
              Integer.valueOf(constructSpecialCE(getCETag(ce), table.m_offsets_.get(getContractionOffset(ce)).intValue())));
        }
      }
    }

    for (int i = 0; i <= 0x10FFFF; i++) {
      int CE = table.m_mapping_.getValue(i);
      if (isContractionTableElement(CE)) {
        CE = constructSpecialCE(getCETag(CE), table.m_offsets_.get(getContractionOffset(CE)).intValue());
        table.m_mapping_.setValue(i, CE);
      }
    }
    return position;
  }

  /**
   * Get contraction offset
   *
   * @param ce
   *            collation element
   * @return contraction offset
   */
  private static final int getContractionOffset(final int ce) {
    return ce & 0xFFFFFF;
  }

  /**
   * Gets the maximum Jamo expansion
   *
   * @param mapping
   *            trie table
   * @param maxexpansion
   *            maximum expansion table
   * @param maxjamoexpansion
   *            maximum jamo expansion table
   * @param jamospecial
   *            is jamo special?
   */
  private static void getMaxExpansionJamo(final IntTrieBuilder mapping, final MaxExpansionTable maxexpansion,
      final MaxJamoExpansionTable maxjamoexpansion, final boolean jamospecial) {
    int VBASE = 0x1161;
    int TBASE = 0x11A8;
    int VCOUNT = 21;
    int TCOUNT = 28;
    int v = VBASE + VCOUNT - 1;
    int t = TBASE + TCOUNT - 1;

    while (v >= VBASE) {
      int ce = mapping.getValue(v);
      if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) != RuleBasedCollator.CE_SPECIAL_FLAG_) {
        setMaxExpansion(ce, (byte) 2, maxexpansion);
      }
      v--;
    }

    while (t >= TBASE) {
      int ce = mapping.getValue(t);
      if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) != RuleBasedCollator.CE_SPECIAL_FLAG_) {
        setMaxExpansion(ce, (byte) 3, maxexpansion);
      }
      t--;
    }
    // According to the docs, 99% of the time, the Jamo will not be special
    if (jamospecial) {
      // gets the max expansion in all unicode characters
      int count = maxjamoexpansion.m_endExpansionCE_.size();
      byte maxTSize = (byte) (maxjamoexpansion.m_maxLSize_ + maxjamoexpansion.m_maxVSize_ + maxjamoexpansion.m_maxTSize_);
      byte maxVSize = (byte) (maxjamoexpansion.m_maxLSize_ + maxjamoexpansion.m_maxVSize_);

      while (count > 0) {
        count--;
        if ((maxjamoexpansion.m_isV_.get(count)).booleanValue() == true) {
          setMaxExpansion((maxjamoexpansion.m_endExpansionCE_.get(count)).intValue(), maxVSize, maxexpansion);
        } else {
          setMaxExpansion((maxjamoexpansion.m_endExpansionCE_.get(count)).intValue(), maxTSize, maxexpansion);
        }
      }
    }
  }

  /**
   * To the UnsafeCP hash table, add all chars with combining class != 0
   *
   * @param t
   *            build table
   */
  private final void unsafeCPAddCCNZ(final BuildTable t) {
    boolean buildCMTable = (buildCMTabFlag & (t.cmLookup == null));
    char[] cm = null; // combining mark array
    int[] index = new int[256];
    int count = 0;

    if (buildCMTable) {
      cm = new char[0x10000];
    }
    for (char c = 0; c < 0xffff; c++) {
      int fcd;
      if (UTF16.isLeadSurrogate(c)) {
        fcd = 0;
        if (m_nfcImpl_.singleLeadMightHaveNonZeroFCD16(c)) {
          int supp = Character.toCodePoint(c, (char) 0xdc00);
          int suppLimit = supp + 0x400;
          while (supp < suppLimit) {
            fcd |= m_nfcImpl_.getFCD16FromNormData(supp++);
          }
        }
      } else {
        fcd = m_nfcImpl_.getFCD16(c);
      }
      // TODO: review for handling supplementary characters
      if (fcd >= 0x100 || // if the leading combining class(c) > 0 ||
          (UTF16.isLeadSurrogate(c) && fcd != 0)) {
        // c is a leading surrogate with some FCD data
        unsafeCPSet(t.m_unsafeCP_, c);
        if (buildCMTable) {
          int cc = (fcd & 0xff);
          int pos = (cc << 8) + index[cc];
          cm[pos] = c;
          index[cc]++;
          count++;
        }
      }
    }

    if (t.m_prefixLookup_ != null) {
      Enumeration<Elements> els = Collections.enumeration(t.m_prefixLookup_.values());
      while (els.hasMoreElements()) {
        Elements e = els.nextElement();
        // codepoints here are in the NFD form. We need to add the
        // first code point of the NFC form to unsafe, because
        // strcoll needs to backup over them.
        // weiv: This is wrong! See the comment above.
        // String decomp = Normalizer.decompose(e.m_cPoints_, true);
        // unsafeCPSet(t.m_unsafeCP_, decomp.charAt(0));
        // it should be:
        String comp = Normalizer.compose(e.m_cPoints_, false);
        unsafeCPSet(t.m_unsafeCP_, comp.charAt(0));
      }
    }

    if (buildCMTable) {
      t.cmLookup = new CombinClassTable();
      t.cmLookup.generate(cm, count, index);
    }
  }

  /**
   * Create closure
   *
   * @param t
   *            build table
   * @param collator
   *            RuleBasedCollator
   * @param colEl
   *            collation element iterator
   * @param start
   * @param limit
   * @param type
   *            character type
   * @return
   */
  private boolean enumCategoryRangeClosureCategory(final BuildTable t, final RuleBasedCollator collator,
      final CollationElementIterator colEl, final int start, final int limit, final int type) {
    if (type != UCharacterCategory.UNASSIGNED && type != UCharacterCategory.PRIVATE_USE) {
      // if the range is assigned - we might ommit more categories later

      for (int u32 = start; u32 < limit; u32++) {
        String decomp = m_nfcImpl_.getDecomposition(u32);
        if (decomp != null) {
          String comp = UCharacter.toString(u32);
          if (!collator.equals(comp, decomp)) {
            m_utilElement_.m_cPoints_ = decomp;
            m_utilElement_.m_prefix_ = 0;
            Elements prefix = t.m_prefixLookup_.get(m_utilElement_);
            if (prefix == null) {
              m_utilElement_.m_cPoints_ = comp;
              m_utilElement_.m_prefix_ = 0;
              m_utilElement_.m_prefixChars_ = null;
              colEl.setText(decomp);
              int ce = colEl.next();
              m_utilElement_.m_CELength_ = 0;
              while (ce != CollationElementIterator.NULLORDER) {
                m_utilElement_.m_CEs_[m_utilElement_.m_CELength_++] = ce;
                ce = colEl.next();
              }
            } else {
              m_utilElement_.m_cPoints_ = comp;
              m_utilElement_.m_prefix_ = 0;
              m_utilElement_.m_prefixChars_ = null;
              m_utilElement_.m_CELength_ = 1;
              m_utilElement_.m_CEs_[0] = prefix.m_mapCE_;
              // This character uses a prefix. We have to add it
              // to the unsafe table, as it decomposed form is
              // already in. In Japanese, this happens for \u309e
              // & \u30fe
              // Since unsafeCPSet is static in ucol_elm, we are
              // going to wrap it up in the unsafeCPAddCCNZ
              // function
            }
            addAnElement(t, m_utilElement_);
          }
        }
      }
    }
    return true;
  }

  /**
   * Determine if a character is a Jamo
   *
   * @param ch
   *            character to test
   * @return true if ch is a Jamo, false otherwise
   */
  private static final boolean isJamo(final char ch) {
    return (ch >= 0x1100 && ch <= 0x1112) || (ch >= 0x1175 && ch <= 0x1161) || (ch >= 0x11A8 && ch <= 0x11C2);
  }

  /**
   * Produces canonical closure
   */
  private void canonicalClosure(final BuildTable t) {
    BuildTable temp = new BuildTable(t);
    assembleTable(temp, temp.m_collator_);
    // produce canonical closure
    CollationElementIterator coleiter = temp.m_collator_.getCollationElementIterator("");
    RangeValueIterator typeiter = UCharacter.getTypeIterator();
    RangeValueIterator.Element element = new RangeValueIterator.Element();
    while (typeiter.next(element)) {
      enumCategoryRangeClosureCategory(t, temp.m_collator_, coleiter, element.start, element.limit, element.value);
    }

    t.cmLookup = temp.cmLookup;
    temp.cmLookup = null;

    for (int i = 0; i < m_parser_.m_resultLength_; i++) {
      char baseChar, firstCM;
      // now we need to generate the CEs
      // We stuff the initial value in the buffers, and increase the
      // appropriate buffer according to strength */
      // createElements(t, m_parser_.m_listHeader_[i]);
      CollationRuleParser.Token tok = m_parser_.m_listHeader_[i].m_first_;
      m_utilElement_.clear();
      while (tok != null) {
        m_utilElement_.m_prefix_ = 0;// el.m_prefixChars_;
        m_utilElement_.m_cPointsOffset_ = 0; // el.m_uchars_;
        if (tok.m_prefix_ != 0) {
          // we will just copy the prefix here, and adjust accordingly
          // in
          // the addPrefix function in ucol_elm. The reason is that we
          // need to add both composed AND decomposed elements to the
          // unsafe table.
          int size = tok.m_prefix_ >> 24;
          int offset = tok.m_prefix_ & 0x00FFFFFF;
          m_utilElement_.m_prefixChars_ = m_parser_.m_source_.substring(offset, offset + size);
          size = (tok.m_source_ >> 24) - (tok.m_prefix_ >> 24);
          offset = (tok.m_source_ & 0x00FFFFFF) + (tok.m_prefix_ >> 24);
          m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(offset, offset + size);
        } else {
          m_utilElement_.m_prefixChars_ = null;
          int offset = tok.m_source_ & 0x00FFFFFF;
          int size = tok.m_source_ >>> 24;
          m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(offset, offset + size);
        }
        m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;

        baseChar = firstCM = 0; // reset
        for (int j = 0; j < m_utilElement_.m_cPoints_.length() - m_utilElement_.m_cPointsOffset_; j++) {

          int fcd = m_nfcImpl_.getFCD16(m_utilElement_.m_cPoints_.charAt(j))// TODO: review for handling supplementary characters
          if ((fcd & 0xff) == 0) {
            baseChar = m_utilElement_.m_cPoints_.charAt(j);
          } else {
            if ((baseChar != 0) && (firstCM == 0)) {
              firstCM = m_utilElement_.m_cPoints_.charAt(j); // first
                                      // combining
                                      // mark
            }
          }
        }

        if ((baseChar != 0) && (firstCM != 0)) {
          addTailCanonicalClosures(t, temp.m_collator_, coleiter, baseChar, firstCM);
        }
        tok = tok.m_next_;
      }
    }
  }

  private void addTailCanonicalClosures(final BuildTable t, final RuleBasedCollator m_collator, final CollationElementIterator colEl,
      final char baseChar, final char cMark) {
    if (t.cmLookup == null) {
      return;
    }
    CombinClassTable cmLookup = t.cmLookup;
    int[] index = cmLookup.index;
    int cClass = m_nfcImpl_.getFCD16(cMark) & 0xff// TODO: review for handling supplementary characters
    int maxIndex = 0;
    char[] precompCh = new char[256];
    int[] precompClass = new int[256];
    int precompLen = 0;
    Elements element = new Elements();

    if (cClass > 0) {
      maxIndex = index[cClass - 1];
    }
    for (int i = 0; i < maxIndex; i++) {
      StringBuilder decompBuf = new StringBuilder();
      decompBuf.append(baseChar).append(cmLookup.cPoints[i]);
      String comp = Normalizer.compose(decompBuf.toString(), false);
      if (comp.length() == 1) {
        precompCh[precompLen] = comp.charAt(0);
        precompClass[precompLen] = m_nfcImpl_.getFCD16(cmLookup.cPoints[i]) & 0xff// TODO: review for handling supplementary characters
        precompLen++;
        StringBuilder decomp = new StringBuilder();
        for (int j = 0; j < m_utilElement_.m_cPoints_.length(); j++) {
          if (m_utilElement_.m_cPoints_.charAt(j) == cMark) {
            decomp.append(cmLookup.cPoints[i]);
          } else {
            decomp.append(m_utilElement_.m_cPoints_.charAt(j));
          }
        }
        comp = Normalizer.compose(decomp.toString(), false);
        StringBuilder buf = new StringBuilder(comp);
        buf.append(cMark);
        decomp.append(cMark);
        comp = buf.toString();

        element.m_cPoints_ = decomp.toString();
        element.m_CELength_ = 0;
        element.m_prefix_ = 0;
        Elements prefix = t.m_prefixLookup_.get(element);
        element.m_cPoints_ = comp;
        element.m_uchars_ = comp;

        if (prefix == null) {
          element.m_prefix_ = 0;
          element.m_prefixChars_ = null;
          colEl.setText(decomp.toString());
          int ce = colEl.next();
          element.m_CELength_ = 0;
          while (ce != CollationElementIterator.NULLORDER) {
            element.m_CEs_[element.m_CELength_++] = ce;
            ce = colEl.next();
          }
        } else {
          element.m_cPoints_ = comp;
          element.m_prefix_ = 0;
          element.m_prefixChars_ = null;
          element.m_CELength_ = 1;
          element.m_CEs_[0] = prefix.m_mapCE_;
        }
        setMapCE(t, element);
        finalizeAddition(t, element);

        if (comp.length() > 2) {
          // This is a fix for tailoring contractions with accented
          // character at the end of contraction string.
          addFCD4AccentedContractions(t, colEl, comp, element);
        }
        if (precompLen > 1) {
          precompLen = addMultiCMontractions(t, colEl, element, precompCh, precompClass, precompLen, cMark, i, decomp.toString());
        }
      }
    }

  }

  private void setMapCE(final BuildTable t, final Elements element) {
    List<Integer> expansions = t.m_expansions_;
    element.m_mapCE_ = 0;

    if (element.m_CELength_ == 2 // a two CE expansion
        && RuleBasedCollator.isContinuation(element.m_CEs_[1])
        && (element.m_CEs_[1] & (~(0xFF << 24 | RuleBasedCollator.CE_CONTINUATION_MARKER_))) == 0 // that
                                                      // has
                                                      // only
                                                      // primaries
                                                      // in
                                                      // continuation
        && (((element.m_CEs_[0] >> 8) & 0xFF) == RuleBasedCollator.BYTE_COMMON_)
        // a common secondary
        && ((element.m_CEs_[0] & 0xFF) == RuleBasedCollator.BYTE_COMMON_)) { // and
                                            // a
                                            // common
                                            // tertiary

      element.m_mapCE_ = RuleBasedCollator.CE_SPECIAL_FLAG_
      // a long primary special
          | (CE_LONG_PRIMARY_TAG_ << 24)
          // first and second byte of primary
          | ((element.m_CEs_[0] >> 8) & 0xFFFF00)
          // third byte of primary
          | ((element.m_CEs_[1] >> 24) & 0xFF);
    } else {
      // omitting expansion offset in builder
      // (HEADER_SIZE_ >> 2)
      int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_ | (CE_EXPANSION_TAG_ << RuleBasedCollator.CE_TAG_SHIFT_)
          | (addExpansion(expansions, element.m_CEs_[0]) << 4) & 0xFFFFF0;

      for (int i = 1; i < element.m_CELength_; i++) {
        addExpansion(expansions, element.m_CEs_[i]);
      }
      if (element.m_CELength_ <= 0xF) {
        expansion |= element.m_CELength_;
      } else {
        addExpansion(expansions, 0);
      }
      element.m_mapCE_ = expansion;
      setMaxExpansion(element.m_CEs_[element.m_CELength_ - 1], (byte) element.m_CELength_, t.m_maxExpansions_);
    }
  }

  private int addMultiCMontractions(final BuildTable t, final CollationElementIterator colEl, final Elements element,
      final char[] precompCh, final int[] precompClass, final int maxComp, final char cMark, final int cmPos, final String decomp) {

    CombinClassTable cmLookup = t.cmLookup;
    char[] combiningMarks = { cMark };
    int cMarkClass = UCharacter.getCombiningClass(cMark) & 0xFF;
    String comMark = new String(combiningMarks);
    int noOfPrecomposedChs = maxComp;

    for (int j = 0; j < maxComp; j++) {
      int count = 0;
      StringBuilder temp;

      do {
        String newDecomp, comp;

        if (count == 0) { // Decompose the saved precomposed char.
          newDecomp = Normalizer.decompose(new String(precompCh, j, 1), false);
          temp = new StringBuilder(newDecomp);
          temp.append(cmLookup.cPoints[cmPos]);
          newDecomp = temp.toString();
        } else {
          temp = new StringBuilder(decomp);
          temp.append(precompCh[j]);
          newDecomp = temp.toString();
        }
        comp = Normalizer.compose(newDecomp, false);
        if (comp.length() == 1) {
          temp.append(cMark);
          element.m_cPoints_ = temp.toString();
          element.m_CELength_ = 0;
          element.m_prefix_ = 0;
          Elements prefix = t.m_prefixLookup_.get(element);
          element.m_cPoints_ = comp + comMark;
          if (prefix == null) {
            element.m_prefix_ = 0;
            element.m_prefixChars_ = null;
            colEl.setText(temp.toString());
            int ce = colEl.next();
            element.m_CELength_ = 0;
            while (ce != CollationElementIterator.NULLORDER) {
              element.m_CEs_[element.m_CELength_++] = ce;
              ce = colEl.next();
            }
          } else {
            element.m_cPoints_ = comp;
            element.m_prefix_ = 0;
            element.m_prefixChars_ = null;
            element.m_CELength_ = 1;
            element.m_CEs_[0] = prefix.m_mapCE_;
          }
          setMapCE(t, element);
          finalizeAddition(t, element);
          precompCh[noOfPrecomposedChs] = comp.charAt(0);
          precompClass[noOfPrecomposedChs] = cMarkClass;
          noOfPrecomposedChs++;
        }
      } while (++count < 2 && (precompClass[j] == cMarkClass));
    }
    return noOfPrecomposedChs;
  }

  private void addFCD4AccentedContractions(final BuildTable t, final CollationElementIterator colEl, final String data,
      final Elements element) {
    String decomp = Normalizer.decompose(data, false);
    String comp = Normalizer.compose(data, false);

    element.m_cPoints_ = decomp;
    element.m_CELength_ = 0;
    element.m_prefix_ = 0;
    Elements prefix = t.m_prefixLookup_.get(element);
    if (prefix == null) {
      element.m_cPoints_ = comp;
      element.m_prefix_ = 0;
      element.m_prefixChars_ = null;
      element.m_CELength_ = 0;
      colEl.setText(decomp);
      int ce = colEl.next();
      element.m_CELength_ = 0;
      while (ce != CollationElementIterator.NULLORDER) {
        element.m_CEs_[element.m_CELength_++] = ce;
        ce = colEl.next();
      }
      addAnElement(t, element);
    }
  }

  private void processUCACompleteIgnorables(final BuildTable t) {
    TrieIterator trieiterator = new TrieIterator(RuleBasedCollator.UCA_.m_trie_);
    RangeValueIterator.Element element = new RangeValueIterator.Element();
    while (trieiterator.next(element)) {
      int start = element.start;
      int limit = element.limit;
      if (element.value == 0) {
        while (start < limit) {
          int CE = t.m_mapping_.getValue(start);
          if (CE == CE_NOT_FOUND_) {
            m_utilElement_.m_prefix_ = 0;
            m_utilElement_.m_uchars_ = UCharacter.toString(start);
            m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_;
            m_utilElement_.m_cPointsOffset_ = 0;
            m_utilElement_.m_CELength_ = 1;
            m_utilElement_.m_CEs_[0] = 0;
            addAnElement(t, m_utilElement_);
          }
          start++;
        }
      }
    }
  }
}
TOP

Related Classes of com.ibm.icu.text.CollationParsedRuleBuilder

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.