Package org.ictclas4j.bean

Examples of org.ictclas4j.bean.Dictionary


  public static PersonName chineseNameSplit(String word, PosTagger personTagger) {
    PersonName result = null;

    if (word != null && personTagger != null) {
      Dictionary personDict = personTagger.getUnknownDict();
      int len = word.length();
      if (len < 2 || len > 4)
        return null;
      String[] atoms = GFString.atomSplit(word);
      for (String s : atoms) {
        if (Utility.charType(s) != Utility.CT_CHINESE && Utility.charType(s) != Utility.CT_OTHER)
          return null;
      }

      String surName = null;
      int surNameLen = 2;
      if (len > 2)
        surName = word.substring(0, surNameLen);
      else if (len == 2)
        surName = word;
      if (!personDict.isExist(surName, 1)) {
        surNameLen = 1;
        if (len > 1)
          surName = word.substring(0, surNameLen);
        else if (len == 1)
          surName = word;
        if (!personDict.isExist(surName, 1)) {
          surName = null;
          surNameLen = 0;
        }
      }
      String giveName = word.substring(surNameLen);
      if (len > 3) {
        String temp = word.substring(surNameLen, surNameLen + 1);
        if (personDict.isExist(temp, 1)) {

          giveName = word.substring(surNameLen + 1);
        }
      }

      double freq = personDict.getFreq(surName, 1);
      String temp = giveName.substring(0, 1);
      double freq2 = personDict.getFreq(temp, 2);

      if (surNameLen != 2
          && ((surNameLen == 0 && len > 2) || giveName.length() > 2 || getForeignCharCount(word) >= 3
              && freq < personDict.getFreq("��", 1) / 40 && freq2 < personDict.getFreq("��", 2) / 20 || (freq < 10 && getForeignCharCount(giveName) == (len - surNameLen) / 2)))
        return null;
      if (len == 2 && personTagger.isGivenName(word))
        return null;
      result = new PersonName();
      result.setFirstName(surName);
View Full Code Here


    if (fileName != null) {
      this.coreDict = coreDict;
      if (type == Utility.TAG_TYPE.TT_NORMAL)
        this.unknownDict = coreDict;
      else {
        unknownDict = new Dictionary();
        unknownDict.load(fileName + ".dct");

      }
      context = new ContextStat();
      context.load(fileName + ".ctx");
View Full Code Here

  static Logger logger = Logger.getLogger(Segment.class);

  public Segment(int segPathCount) {
    this.segPathCount = segPathCount;
    coreDict = new Dictionary("data\\coreDict.dct");
    bigramDict = new Dictionary("data\\bigramDict.dct");
    personTagger = new PosTagger(Utility.TAG_TYPE.TT_PERSON, "data\\nr", coreDict);
    transPersonTagger = new PosTagger(Utility.TAG_TYPE.TT_TRANS_PERSON, "data\\tr", coreDict);
    placeTagger = new PosTagger(Utility.TAG_TYPE.TT_TRANS_PERSON, "data\\ns", coreDict);
    lexTagger = new PosTagger(Utility.TAG_TYPE.TT_NORMAL, "data\\lexical", coreDict);
View Full Code Here

TOP

Related Classes of org.ictclas4j.bean.Dictionary

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.