Package org.snu.ids.ha.util

Examples of org.snu.ids.ha.util.Hangul


   */
  public long getBasicPhonemeConds(String string)
  {
    long cond = 0;
    char lastCh = string.charAt(string.length() - 1);
    Hangul lastHg = Hangul.split(lastCh);

    // 자음 조건 설정
    if( lastHg.hasJong() ) {
      cond |= Condition.JAEUM;
    } else {
      cond |= Condition.MOEUM;
    }

View Full Code Here


    String stem = mCandidate.getExp();
    int stemLen = stem.length();
    String preStem = stem.substring(0, stemLen - 1);

    char lastCh = stem.charAt(stemLen - 1), preLastCh = 0, mo = 0;
    Hangul lastHg = Hangul.split(lastCh), preLastHg = null;
    if( stemLen > 1 ) {
      preLastCh = stem.charAt(stemLen - 2);
      preLastHg = Hangul.split(preLastCh);
    } else {
      preLastCh = 0;
    }

    String exp = null;
    MCandidate mCandidateClone = null;
   
    // TODO
    // 사 주다 -> 사+아+주+다 와 같이 한글자 어간 'ㅏ'로 끝나는 말
    // 2007-07-06 너무 많은 후보군들이 생성되버려서 문제 생김
    // 많이 사용되는 것만 따로 사전에 추가하도록 함
    // 2009-10-17 일단 넣어줌.
    if( stem.length() == 1 && !lastHg.hasJong() && lastHg.cho != 'ㅎ' ) {
      exp = stem;
      if( lastHg.jung == 'ㅏ' ) {
        mCandidateClone = mCandidate.copy();
        mCandidateClone.add(new Morpheme("아", POSTag.ECS));
        mCandidateClone.setExp(exp);
        mCandidateClone.setAutoExtd(true);
        mCandidateClone.clearHavingCondition();
        mCandidateClone.initHavingCond(exp);
        mCandidateClone.addHavingCond(Condition.AH);
        mCandidateClone.setRealDicLen((byte) exp.length());
        ret.add(mCandidateClone);
      } else if( lastHg.jung == 'ㅓ' ) {
        mCandidateClone = mCandidate.copy();
        mCandidateClone.add(new Morpheme("어", POSTag.ECS));
        mCandidateClone.setExp(exp);
        mCandidateClone.setAutoExtd(true);
        mCandidateClone.clearHavingCondition();
        mCandidateClone.initHavingCond(exp);
        mCandidateClone.addHavingCond(Condition.AH);
        mCandidateClone.setRealDicLen((byte) exp.length());
        ret.add(mCandidateClone);
      }
    }


    // 겹모음 'ㄶ'의 경우 'ㅎ'을 빼먹고 사용하는 경우가 많으므로 이를 처리해줌
    if( lastCh == '찮' || lastCh == '잖' ) {
      mCandidateClone = mCandidate.copy();
      exp = preStem + Hangul.combine(lastHg.cho, lastHg.jung, 'ㄴ');
      mCandidateClone.setExp(exp);
      mCandidateClone.setRealDicLen((byte)exp.length());
      mCandidateClone.decreaseNumOfPrfrdCond();
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.setExp(exp);
      ret.add(mCandidateClone);
    }


    // 과거형 붙여주기
    if( lastCh == '하' ) {
      // 했 -> 하였
      mCandidateClone = mCandidate.copy();
      exp = preStem + "했";
      mCandidateClone.add(new Morpheme("였", POSTag.EPT));
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.addHavingCond(Condition.EUT);
      mCandidateClone.setCandDicLen((byte)exp.length());
      ret.add(mCandidateClone);

      // 해 -> 하여
      mCandidateClone = mCandidate.copy();
      exp = preStem + "해";
      mCandidateClone.add(new Morpheme("여", POSTag.ECS));
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.addHavingCond(Condition.AH);
      mCandidateClone.setRealDicLen((byte)exp.length());
      ret.add(mCandidateClone);
     
      // 종결형
      mCandidateClone = mCandidate.copy();
      exp = preStem + "해";
      mCandidateClone.add(new Morpheme("여", POSTag.EFN));
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.addHavingCond(Condition.AH);
      mCandidateClone.setRealDicLen((byte)exp.length());
      ret.add(mCandidateClone);

      // 형용사는 하지 -> 치 로 줄여질 수 있다.
      if( mCandidate.isTagOf(POSTag.VA | POSTag.VXA) ) {
        mCandidateClone = mCandidate.copy();
        exp = preStem + "치";
        mCandidateClone.add(new Morpheme("지", POSTag.ECS));
        mCandidateClone.setExp(exp);
        mCandidateClone.setAutoExtd(true);
        mCandidateClone.clearHavingCondition();
        mCandidateClone.initHavingCond(exp);
        mCandidateClone.setRealDicLen((byte) exp.length());
        ret.add(mCandidateClone);
      }
    }
    // '이'로 끝나는 말
    else if( !lastHg.hasJong() && lastHg.jung == 'ㅣ' ) {
      // ㅣ -> ㅣ었->ㅕㅆ
      mCandidateClone = mCandidate.copy();
      exp = preStem + Hangul.combine(lastHg.cho, 'ㅕ', 'ㅆ');
      mCandidateClone.add(new Morpheme("었", POSTag.EPT));
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.addHavingCond(Condition.EUT);
      mCandidateClone.setCandDicLen((byte)exp.length());
      ret.add(mCandidateClone);

      // ㅣ -> ㅣ어->ㅕ
      mCandidateClone = mCandidate.copy();
      exp = preStem + Hangul.combine(lastHg.cho, 'ㅕ', ' ');
      mCandidateClone.add(new Morpheme("어", POSTag.ECS));
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.addHavingCond(Condition.MOEUM | Condition.EUMSEONG | Condition.AH);
      mCandidateClone.setRealDicLen((byte)exp.length());
      ret.add(mCandidateClone);
    }
    // ㅆ, ㅏㅆ, ㅐㅆ, ㅕㅆ  결합에 의한 어간 출력
    else if( !lastHg.hasJong() && MO_SET1.contains(lastHg.jung) ) {
      mCandidateClone = mCandidate.copy();
      exp = preStem + Hangul.combine(lastHg.cho, lastHg.jung, 'ㅆ');
      mCandidateClone.add(new Morpheme("었", POSTag.EPT));
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.addHavingCond(Condition.EUT);
      mCandidateClone.setCandDicLen((byte)exp.length());
      ret.add(mCandidateClone);
    }
    // '르'불규칙
    else if( lastCh == '르' ) {
      // 았
      mCandidateClone = mCandidate.copy();
      mCandidateClone.clearHavingCondition();
      if( preLastCh == '따' ) {
        exp = preStem + "랐";
        mCandidateClone.add(new Morpheme("았", POSTag.EPT));
        mCandidateClone.addHavingCond(Condition.EUT);
      } else if( preLastCh == '푸' ) {
        exp = stem + "렀";
        mCandidateClone.add(new Morpheme("었", POSTag.EPT));
        mCandidateClone.addHavingCond(Condition.EUT);
      } else {
        mo = getMoeum(lastHg, preLastHg);
        exp = stem.substring(0, stemLen - 2)
        + Hangul.combine(preLastHg.cho, preLastHg.jung, 'ㄹ')
        + Hangul.combine(lastHg.cho, mo, 'ㅆ');
        if( mo == 'ㅏ' ) {
          mCandidateClone.add(new Morpheme("았", POSTag.EPT));
        } else {
          mCandidateClone.add(new Morpheme("었", POSTag.EPT));
        }
        mCandidateClone.addHavingCond(Condition.EUT);
      }
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.setCandDicLen((byte)exp.length());
     
      ret.add(mCandidateClone);

      // 아
      mCandidateClone = mCandidate.copy();
      mCandidateClone.clearHavingCondition();
      if( preLastCh == '따' ) {
        exp = preStem + "라";
        mCandidateClone.add(new Morpheme("아", POSTag.ECS));
        mCandidateClone.addHavingCond(Condition.AH);
      } else if( preLastCh == '푸' ) {
        exp = stem + "러";
        mCandidateClone.add(new Morpheme("어", POSTag.ECS));
        mCandidateClone.addHavingCond(Condition.AH);
      } else {
        mo = getMoeum(lastHg, preLastHg);
        exp = stem.substring(0, stemLen - 2)
        + Hangul.combine(preLastHg.cho, preLastHg.jung, 'ㄹ')
        + Hangul.combine(lastHg.cho, mo, ' ');
        if( mo == 'ㅏ' ) {
          mCandidateClone.add(new Morpheme("아", POSTag.ECS));
          mCandidateClone.addHavingCond(Condition.AH);
        } else {
          mCandidateClone.add(new Morpheme("어", POSTag.ECS));
          mCandidateClone.addHavingCond(Condition.AH);
        }
      }
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.setRealDicLen((byte)exp.length());
      ret.add(mCandidateClone);

    }
    // 'ㅡ' 결합에 의한 어간 출력
    else if( !lastHg.hasJong() && lastHg.jung == 'ㅡ' ) {
      // 양성으로 한번 결합
      mo = getMoeum(lastHg, preLastHg);
      mCandidateClone = mCandidate.copy();
      mCandidateClone.clearHavingCondition();
      exp = preStem + Hangul.combine(lastHg.cho, mo, 'ㅆ');
      if( mo == 'ㅏ' ) {
        mCandidateClone.add(new Morpheme("았", POSTag.EPT));
        mCandidateClone.addHavingCond(Condition.EUT);
      } else {
        mCandidateClone.add(new Morpheme("었", POSTag.EPT));
        mCandidateClone.addHavingCond(Condition.EUT);
      }
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.setCandDicLen((byte)exp.length());
      ret.add(mCandidateClone);

      // ㅓ, ㅏ
      mCandidateClone = mCandidate.copy();
      mCandidateClone.clearHavingCondition();
      exp = preStem + Hangul.combine(lastHg.cho, mo, ' ');
      if( mo == 'ㅏ' ) {
        mCandidateClone.add(new Morpheme("아", POSTag.ECS));
        mCandidateClone.addHavingCond(Condition.AH);
      } else {
        mCandidateClone.add(new Morpheme("어", POSTag.ECS));
        mCandidateClone.addHavingCond(Condition.AH);
      }
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.setRealDicLen((byte)exp.length());
      ret.add(mCandidateClone);
    }
    // ㅜ, ㅗ결합에 의한 어간 출력
    else if( !lastHg.hasJong() && MO_SET2.contains(lastHg.jung) ) {
      // 었, 았
      mCandidateClone = mCandidate.copy();
      exp = preStem + Hangul.combine(lastHg.cho, getMoeum(lastHg, preLastHg), 'ㅆ');
      if( lastHg.jung == 'ㅜ' ) {
        mCandidateClone.add(new Morpheme("었", POSTag.EPT));
      } else {
        mCandidateClone.add(new Morpheme("았", POSTag.EPT));
      }
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.addHavingCond(Condition.EUT);
      mCandidateClone.setCandDicLen((byte)exp.length());
      ret.add(mCandidateClone);

      // 어, 아
      mCandidateClone = mCandidate.copy();
      exp = preStem + Hangul.combine(lastHg.cho, getMoeum(lastHg, preLastHg), ' ');
      if( lastHg.jung == 'ㅜ' ) {
        mCandidateClone.add(new Morpheme("어", POSTag.ECS));
      } else {
        mCandidateClone.add(new Morpheme("아", POSTag.ECS));
      }
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.addHavingCond(Condition.AH);
      mCandidateClone.setRealDicLen((byte)exp.length());
      ret.add(mCandidateClone);
    }
    // 겹모음 처리
    else if( !lastHg.hasJong() && lastHg.jung != 'ㅚ' ) {
      // 'ㅓ' 결합
      mCandidateClone = mCandidate.copy();
      exp = preStem + Hangul.combine(lastHg.cho, 'ㅙ', ' ');
      mCandidateClone.add(new Morpheme("어", POSTag.ECS));
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.addHavingCond(Condition.AH);
      mCandidateClone.setRealDicLen((byte)exp.length());
      ret.add(mCandidateClone);

      // '었' 결합
      mCandidateClone = mCandidate.copy();
      exp = preStem + Hangul.combine(lastHg.cho, 'ㅙ', 'ㅆ');
      mCandidateClone.add(new Morpheme("었", POSTag.EPT));
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.addHavingCond(Condition.EUT);
      mCandidateClone.setCandDicLen((byte)exp.length());
      ret.add(mCandidateClone);
    }


    // ㅂ 불규칙
    // ㅂ불규칙 활용하는 어간의 마지막 어절
    // '뵙뽑씹업입잡접좁집' 들은 활용 안함~
    if( "갑겁겹곱굽깁깝껍꼽납눕답덥돕둡땁떱랍럽렵롭립맙맵밉볍섭쉽습엽줍쭙춥탑".indexOf(lastCh) > -1 ) {

      // ㅂ탈락된 음절 생성
      char bChar = Hangul.combine(lastHg.cho, lastHg.jung, ' ');

      // 럽은 '러운' 뿐만 아니라 짧게 '런' 등으로도 활용됨
      if( lastCh == '럽' ) {
        mCandidateClone = mCandidate.copy();
        exp = preStem + '런';
        mCandidateClone.add(new Morpheme("ㄴ", POSTag.ETD));
        mCandidateClone.setExp(exp);
        mCandidateClone.setAutoExtd(true);
        mCandidateClone.clearHavingCondition();
        mCandidateClone.initHavingCond(exp);
        mCandidateClone.decreaseNumOfPrfrdCond();
        mCandidateClone.setRealDicLen((byte)exp.length());
        ret.add(mCandidateClone);
      }

      // 워, 와
      mCandidateClone = mCandidate.copy();
      if( lastHg.jung == 'ㅗ') {
        mo = 'ㅘ';
        mCandidateClone.add(new Morpheme("아", POSTag.ECS));
      } else {
        mo = 'ㅝ';
        mCandidateClone.add(new Morpheme("어", POSTag.ECS));
      }
      exp = preStem + bChar + Hangul.combine('ㅇ', mo, ' ');
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.addHavingCond(Condition.AH);
      mCandidateClone.setRealDicLen((byte)exp.length());
      ret.add(mCandidateClone);

      // 웠, 왔
      mCandidateClone = mCandidate.copy();
      mCandidateClone.clearHavingCondition();
      if( lastHg.jung == 'ㅗ') {
        mo = 'ㅘ';
        mCandidateClone.add(new Morpheme("았", POSTag.EPT));
      } else {
        mo = 'ㅝ';
        mCandidateClone.add(new Morpheme("었", POSTag.EPT));
      }
      exp = preStem + bChar + Hangul.combine('ㅇ', mo, 'ㅆ');
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.addHavingCond(Condition.EUT);
      mCandidateClone.setCandDicLen((byte)exp.length());
      ret.add(mCandidateClone);

      // 우
      mCandidateClone = mCandidate.copy();
      exp = preStem + bChar + '우';
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.setCandDicLen((byte)exp.length());
      ret.add(mCandidateClone);

      // ㄴ, ㄹ, ㅁ 에 의한 활용
      mCandidateClone = mCandidate.copy();
      exp = preStem + bChar + '운';
      mCandidateClone.add(new Morpheme("ㄴ", POSTag.ETD));
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.setRealDicLen((byte)exp.length());
      ret.add(mCandidateClone);

      mCandidateClone = mCandidate.copy();
      mCandidateClone.add(new Morpheme("ㄹ", POSTag.ETD));
      exp = preStem + bChar + '울';
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.setRealDicLen((byte)exp.length());
      ret.add(mCandidateClone);

      mCandidateClone = mCandidate.copy();
      mCandidateClone.add(new Morpheme("ㅁ", POSTag.ETN));
      exp = preStem + bChar + '움';
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.setRealDicLen((byte)exp.length());
      ret.add(mCandidateClone);
    }
    // 'ㅅ' 뷸규칙
    else if( "젓짓긋낫붓잇".indexOf(lastCh) > -1 )
    {
      mCandidateClone = mCandidate.copy();
      exp = preStem + Hangul.combine(lastHg.cho, lastHg.jung, ' ');
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.addHavingCond(Condition.MINUS_SIOT);
      mCandidateClone.decreaseNumOfPrfrdCond();
      mCandidateClone.setCandDicLen((byte)exp.length());
      ret.add(mCandidateClone);
    }
    // 'ㄷ' 뷸규칙
    else if( lastHg.jong == 'ㄷ' ) {
      mCandidateClone = mCandidate.copy();
      exp = preStem + Hangul.combine(lastHg.cho, lastHg.jung, 'ㄹ');
      mCandidateClone.setExp(exp);
      mCandidateClone.setAutoExtd(true);
      mCandidateClone.clearHavingCondition();
      mCandidateClone.initHavingCond(exp);
      mCandidateClone.addHavingCond(Condition.MINUS_SIOT);
      mCandidateClone.decreaseNumOfPrfrdCond();
      mCandidateClone.setCandDicLen((byte)exp.length());
      ret.add(mCandidateClone);
    }
    // 그외 처리
    else if( !lastHg.hasJong() || lastHg.jong == 'ㄹ'
      // ㅎ 불규칙 처리
      || lastCh == '맣' || lastCh == '갛' || lastCh == '랗'
      )
    {
      // ㄴ, ㄹ, ㅁ, ㅂ 에 의한 활용
View Full Code Here

TOP

Related Classes of org.snu.ids.ha.util.Hangul

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.