Examples of SegNode


Examples of org.ictclas4j.bean.SegNode

            else
              break;
          }
        }
        if (end > start) {
          SegNode newsn = new SegNode();
          newsn.setRow(sns.get(start).getRow());
          newsn.setCol(sns.get(end - 1).getCol());
          newsn.setPos(pos);
          newsn.setWord(unknownFlags);
          newsn.setSrcWord(srcWord);
          double value = computePossibility(start, end - start + 1, sns);
          newsn.setWeight(value);
          segGraph.insert(newsn, true);
        }
      }
    }
  }
View Full Code Here

Examples of org.ictclas4j.bean.SegNode

    }
  }

  private int getBestTag(ArrayList<SegNode> sns, int index) {
    if (sns != null && index >= 0 && index < sns.size()) {
      SegNode sn = sns.get(index);
      return getBestTag(sn);

    }

    return -1;
View Full Code Here

Examples of org.ictclas4j.bean.SegNode

    if (sns != null) {
      int size = sns.size();

      // �����ǿ�ʼ�ͽ������
      for (int i = size - 1, j = 0; i >= 0; i--) {
        SegNode sn = sns.get(i);
        ArrayList<AdjoiningPos> allPos = sn.getAllPos();
        if (allPos != null && allPos.size() > j) {
          AdjoiningPos pos = allPos.get(j);
          pos.setBest(true);
          j = pos.getPrev();
        } else if (i + 1 < size - 1) {
          int tag = getBestTag(sns.get(i + 1));
          AdjoiningPos pos = new AdjoiningPos(tag, 0);
          pos.setBest(true);
          sns.get(i).addPos(pos);
        }
        // ����ô�����ĸ���֣��������û��������ҵʵ�������������øôʵĴ���
        if (sn.getPos() == POSTag.NOUN_LETTER || sn.getPos() == POSTag.NUM) {
          for (AdjoiningPos pos : allPos) {
            if (pos.isBest() && pos.getPos().getTag() > 0) {
              sn.setPos(pos.getPos().getTag());
              break;
            }
          }
        }
      }
View Full Code Here

Examples of org.ictclas4j.bean.SegNode

  private double computePossibility(int startPos, int length, ArrayList<SegNode> sns) {
    double retValue = 0, posPoss;

    if (sns != null && unknownDict != null && context != null) {
      for (int i = startPos; sns != null && i < startPos + length && i < sns.size(); i++) {
        SegNode sn = sns.get(i);
        int bestTag = getBestTag(sn);
        if (bestTag != -1) {
          int gbkID = sn.getGbkID();// dictLib.getGBKID(sn.getSrcWord());
          int freq = unknownDict.getFreq(sn.getSrcWord(), bestTag, gbkID);
          posPoss = Math.log((double) (context.getFreq(sn.getPos()) + 1));
          posPoss += -Math.log((double) (freq + 1));
          retValue += posPoss;
        }
      }
    }
View Full Code Here

Examples of org.ictclas4j.bean.SegNode

   * @param atoms
   * @return
   */
  public static SegGraph generate(ArrayList<Atom> atoms, DictLib dictLib) {
    SegGraph segGraph = null;
    SegNode sn = null;
    Atom atom = null;

    if (atoms != null && atoms.size() > 0 && dictLib != null) {
      segGraph = new SegGraph();
      Dictionary dict = dictLib.getCoreDict();

      // �ȰѷǺ����ַ��Ĵ���ʶ�����
      for (int i = 0; i < atoms.size(); i++) {
        atom = atoms.get(i);
        String word = atom.getWord();
        if (atom.getPos() == Utility.CT_CHINESE)
          sn = new SegNode(i, i + 1, 0, 0, atom.getWord());
        else {
          int pos = 0;
          double value = Utility.MAX_FREQUENCE;

          switch (atom.getPos()) {
          case Utility.CT_INDEX:
          case Utility.CT_NUM:
            pos = -POSTag.NUM;// 'm'*256
            word = Utility.UNKNOWN_NUM;
            value = 0;
            break;
          case Utility.CT_DELIMITER:
            pos = POSTag.PUNC;// 'w'*256;
            break;
          case Utility.CT_LETTER:
            pos = -POSTag.NOUN_LETTER;//
            value = 0;
            word = Utility.UNKNOWN_LETTER;
            break;
          case Utility.CT_SINGLE:// 12021-2129-3121
            if (Utility.getCharCount("+-1234567890", atom.getWord()) == atom.getLen()) {
              pos = -POSTag.NUM;// 'm'*256
              word = Utility.UNKNOWN_NUM;
            } else {
              pos = -POSTag.NOUN_LETTER;//
              word = Utility.UNKNOWN_LETTER;
            }
            value = 0;
            break;
          default:
            pos = atom.getPos();// '?'*256;
            break;
          }

          int gbkID = dictLib.getGBKID(word);
          sn = new SegNode(i, i + 1, pos, value, word);
          sn.setGbkID(gbkID);
        }

        sn.setSrcWord(atom.getWord());
        segGraph.insert(sn, true);
      }

      StringBuffer words = new StringBuffer();
      for (int i = 0; i < atoms.size(); i++) {
        int j = i + 1;
        words.delete(0, words.length());
        words.append(atoms.get(i).getWord());

        // ����ǡ��·ݡ�����Ҫ�ָ�
        boolean flag = false;
        if (j < atoms.size()) {
          Atom a2 = atoms.get(j);
          if ("��".equals(words.toString()) && "��".equals(a2.getWord())) {
            segGraph.delete(i, j);
            segGraph.delete(i + 1, j + 1);
            words.append(a2.getWord());
            flag = true;
            j++;
          }
        }

        SegAtom sa = null;
        String word = words.toString();
        int gbkID = dictLib.getGBKID(word);
        int wordMaxLen = dict.getWordMaxLen(word, gbkID);
        for (; j <= atoms.size() && word.length() < wordMaxLen; j++) {
          word = words.toString();
          sa = dict.getSegAtom(word, gbkID);
          if (sa != null) {
            // 1���ڣ�1999��ĩ
            // if (word.length() == 2 && segGraph.getSize() > 0) {
            // SegNode g2 = segGraph.getLast();
            // if (Utility.isAllNum(g2.getWord()) ||
            // Utility.isAllChinese(g2.getWord())
            // && (g2.getWord().indexOf("��") == 0 ||
            // g2.getWord().indexOf("��") == 0)) {
            // if ("ĩ���е�ǰ���".indexOf(words.substring(1)) != -1)
            // break;
            // }
            // }
            // ֻ��һ���Դʣ�������
            SegNode sg = null;
            if (sa.getPosCount() == 1) {
              Pos pos = sa.getPos(0);
              sg = new SegNode(i, j, pos.getTag(), sa.getTotalFreq(), word);
            } else
              sg = new SegNode(i, j, 0, sa.getTotalFreq(), word);
            sg.setSrcWord(word);
            sg.setGbkID(gbkID);
            segGraph.insert(sg, true);
          }

          if (j < atoms.size()) {
            String word2 = atoms.get(j).getWord();
View Full Code Here

Examples of org.ictclas4j.bean.SegNode

    if (seg != null && dict != null && biDict != null) {
      segGraph = new SegGraph();
      ArrayList<SegNode> sgs = seg.getSnList();

      for (int i = 0; sgs != null && i < sgs.size(); i++) {
        SegNode sg = sgs.get(i);
        if (sg.getPos() >= 0)
          curFreq = sg.getWeight();
        else {
          int gbkID = sg.getGbkID();// dictLib.getGBKID(sg.getWord());
          curFreq = dict.getFreq(sg.getWord(), 2, gbkID);
        }

        // �õ�������ֵ�͸���ֵ��ȵ�����Ԫ��
        ArrayList<SegNode> nextSgs = seg.getNextElements(i);
        for (SegNode graph : nextSgs) {
          String twoWords = sg.getWord();
          twoWords += Utility.WORD_SEGMENTER;
          twoWords += graph.getWord();
          int gbkID = sg.getGbkID();// dictLib.getGBKID(twoWords);

          // ��������������֮���ƽ��ֵ
          // -log{a*P(Ci-1)+(1-a)P(Ci|Ci-1)} Note 0<a<1
          int twoFreq = biDict.getFreq(twoWords, 3, gbkID);
          double temp = (double) 1 / Utility.MAX_FREQUENCE;
          double value = smoothParam * (1 + curFreq) / (Utility.MAX_FREQUENCE + 80000);
          value += (1 - smoothParam) * ((1 - temp) * twoFreq / (1 + curFreq) + temp);
          value = -Math.log(value);

          if (value < 0) {
            value += sg.getFreq();
          }

          SegNode sg2 = new SegNode();
          // �ָ���@ǰ�Ĵ��������е�λ��
          int wordIndex = getWordIndex(sgs, sg);
          sg2.setRow(wordIndex);

          // �ָ���@��Ĵ��������е�λ��
          wordIndex = getWordIndex(sgs, graph);
          sg2.setCol(wordIndex);
          sg2.setWord(twoWords);
          sg2.setPos(sg.getPos());
          sg2.setWeight(value);
          sg2.setGbkID(gbkID);
          segGraph.insert(sg2, false);
        }
      }
    }
    return segGraph;
View Full Code Here

Examples of org.ictclas4j.bean.SegNode

  public SegGraph(ArrayList<SegNode> snList) {
    this.snList = snList;
  }

  public SegNode getElement(int row, int col) {
    SegNode result = new SegNode();
    result.setWeightUtility.INFINITE_VALUE );
    // if (row > m_nRow || col > m_nCol)
    // return null;

    int index = 0;
    if (snList != null) {
      if (isRowFirst) {
        for (int i = 0; i < snList.size(); i++, index++) {
          SegNode sg = snList.get(i);
          if (row != -1 && sg.getRow() < row || col != -1 && sg.getRow() == row && sg.getCol() < col)
            continue;
          else
            break;
        }
      } else {
        for (int i = 0; i < snList.size(); i++, index++) {
          SegNode sg = snList.get(i);
          if (col != -1 && sg.getCol() < col || row != -1 && sg.getCol() == col && sg.getRow() < row)
            continue;
          else
            break;
        }
      }

      // Find it and return the value
      if (index < snList.size()) {
        SegNode sg = snList.get(index);
        if ((sg.getRow() == row || row == -1) && (sg.getCol() == col || col == -1))
          result = sg;
      }
    }
    return result;
View Full Code Here

Examples of org.ictclas4j.bean.SegNode

    if (sg != null) {
      if (snList == null)
        snList = new ArrayList<SegNode>();

      int i = 0;
      SegNode sgTemp = null;
      if (isRowFirst) {
        for (i = 0; i < snList.size(); i++) {
          sgTemp = snList.get(i);
          if (sgTemp.getRow() < sg.getRow() || sgTemp.getRow() == sg.getRow()
              && sgTemp.getCol() < sg.getCol())
            continue;
          else
            break;
        }
      } else {
        for (i = 0; i < snList.size(); i++) {
          sgTemp = snList.get(i);
          if (sgTemp.getCol() < sg.getCol() || sgTemp.getCol() == sg.getCol()
              && sgTemp.getRow() < sg.getRow())
            continue;
          else
            break;
        }
      }

      if (sgTemp != null && sgTemp.getRow() == sg.getRow() && sgTemp.getCol() == sg.getCol())
        sgTemp = sg;
      else if (i > 0)
        snList.add(i - 1, sg);

    }
View Full Code Here

Examples of org.ictclas4j.bean.SegNode

    ArrayList<SegNode> result = null;

    if (snList != null && snList.size() > 0 && curIndex >= 0) {
      result = new ArrayList<SegNode>();
      for (int i = 0; i < snList.size(); i++) {
        SegNode sg = snList.get(i);
        if (isColFirst) {
          if (sg.getCol() == curIndex)
            result.add(sg);
        } else {
          if (sg.getRow() == curIndex)
            result.add(sg);
        }

      }
    }
View Full Code Here

Examples of org.ictclas4j.bean.SegNode

            nPos = j;
            personName = "";
            // Get the possible person name
            while (nPos < j + patterns[k].length()) {
              SegNode sn = sns.get(nPos);
              if (sn.getPos() < 4
                  && unknownDict.getFreq(sn.getWord(), sn.getPos()) < Utility.LITTLE_FREQUENCY)
                personName += sn.getWord();
              nPos += 1;
            }
            if ("CDCD".equals(patterns[k])) {
              if (GetForeignCharCount(personName) > 0)
                j += patterns[k].length() - 1;
              continue;
            }

            SegNode usn = new SegNode();
            usn.setRow(sns.get(j).getRow());
            usn.setCol(sns.get(j + patterns[k].length() - 1).getCol());
            usn.setWord(unknownFlags);
            usn.setSrcWord(personName);
            double value = -Math.log(factor[k]) + computePossibility(j, patterns[k].length(), sns);
            usn.setPos(pos);
            usn.setValue(value);
            segGraph.insert(usn, true);

            j += patterns[k].length();
            bMatched = true;
          }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.