Package ORG.oclc.os.SRW.Normalization

Source Code of ORG.oclc.os.SRW.Normalization.SRUNormalizer

package ORG.oclc.os.SRW.Normalization;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import com.ibm.icu.text.CollationKey;
import com.ibm.icu.text.RuleBasedCollator;

/**
* author@ Jui-wen Chang Date: Mar. 22, 2006 Time: 10:20:43 PM
*/
public class SRUNormalizer {
  //public static final String ruleset = "[strength 1][alternate shifted] &[before 1]0 < ' ' &'\u20B1'=[variable top] ";
  public static final String ruleset = "[strength 1][alternate shifted]";
  public static RuleBasedCollator col = null;
  private static char[] hexChar = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
  static Log log=LogFactory.getLog(SRUNormalizer.class);

  static {
    try{
      col = new RuleBasedCollator (ruleset);
    } catch (Exception e){
      log.fatal("Customized ICU collation creation failed. Rule= " + ruleset + ". " + e.toString());
      System.err.println ("Customized collation creation failed.");
      e.printStackTrace();
    }   
  }
  public static StringBuffer toHexString ( byte[] b, int start, int len ){
    StringBuffer sb = new StringBuffer( (b.length-1) * 2 );
    for ( int i=start; i<len; i++ ){
      //   look up high nibble char
      sb.append( hexChar [( b[i] & 0xf0 ) >>> 4] );
      //   look up low nibble char
      sb.append( hexChar [b[i] & 0x0f] );
    }
    return sb;
  }
  private static byte[] getSortByteArray (String term)
  {
    CollationKey colKey = col.getCollationKey(term);
    return colKey.toByteArray();
  }
 
  public static byte[] getSortBytes (String term)
  {
    byte[] bytes = getSortByteArray(term);       //    bytes (with trailing x'00')
    byte[] retbytes = new byte[bytes.length-1];
    System.arraycopy(bytes, 0, retbytes, 0, bytes.length-1);
    return retbytes;
    }
  
  public static String getSrotString(String term) {
    byte[] bytes = getSortByteArray(term);       //    bytes (with trailing x'00')
    String retStr = new String(bytes, 0, bytes.length-1);
    return retStr;
  }
   
  public static String getSortHexString (String term)
  {
    byte[] bytes = getSortByteArray(term);       //    bytes (with trailing x'00')
    return toHexString(bytes, 0, bytes.length-1).toString();
   }
  
  public static String getSortHexString_word (String term) {
    byte[] bytes = getSortByteArray(term);       //    bytes (with trailing x'00')
    if (bytes.length<=0)
      return "";
    if ((int)bytes[0]>=38 && (int)bytes[0] <=93//remove leading none 0..z byte for non-roman word search
      return toHexString(bytes, 0, bytes.length-1).toString();
    return toHexString(bytes, 1, bytes.length-1).toString();
  }
 
  public static String getIndexString(String func, String term) {
    if (func.equalsIgnoreCase("upper"))
      return term.toUpperCase();
    if (func.equalsIgnoreCase("lower"))
      return term.toLowerCase();
    if (func.equalsIgnoreCase("ICUSort")) {
      String hexString = getSortHexString(term);
      if (hexString.length()>0)
        return hexString;
      return hexString;
    }
    return null;
  }

  public static String getIndexString_word(String func, String term) {
    if (func.equalsIgnoreCase("upper"))
      return term.toUpperCase();
    if (func.equalsIgnoreCase("lower"))
      return term.toLowerCase();
    if (func.equalsIgnoreCase("ICUSort")) {
      String hexString = getSortHexString_word(term);
      if (hexString.length()>0)
        return hexString;
      return hexString;
    }
    return null;
  }

  public static byte[] getIndexBytes(String func, String term) {
    if (func.equals("ICUSort"))
      return getSortBytes(term);
    return null
  }
  public static int getIndexInt(String func, String term) {
    return Integer.parseInt(term);
  }
  public static double getIndexDecimal(String func, String term) {
    return Double.parseDouble(term);
  }

 
}
TOP

Related Classes of ORG.oclc.os.SRW.Normalization.SRUNormalizer

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.