Package weka.classifiers.bayes.net.search.ci

Source Code of weka.classifiers.bayes.net.search.ci.ICSSearchAlgorithm$SeparationSet

/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/*
* ICSSearchAlgorithm.java
* Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
*
*/


package weka.classifiers.bayes.net.search.ci;

import weka.classifiers.bayes.BayesNet;
import weka.classifiers.bayes.net.ParentSet;
import weka.core.Instances;
import weka.core.Option;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;

import java.io.FileReader;
import java.util.Enumeration;
import java.util.Vector;

/**
<!-- globalinfo-start -->
* This Bayes Network learning algorithm uses conditional independence tests to find a skeleton, finds V-nodes and applies a set of rules to find the directions of the remaining arrows.
* <p/>
<!-- globalinfo-end -->
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -cardinality &lt;num&gt;
*  When determining whether an edge exists a search is performed
*  for a set Z that separates the nodes. MaxCardinality determines
*  the maximum size of the set Z. This greatly influences the
*  length of the search. (default 2)</pre>
*
* <pre> -mbc
*  Applies a Markov Blanket correction to the network structure,
*  after a network structure is learned. This ensures that all
*  nodes in the network are part of the Markov blanket of the
*  classifier node.</pre>
*
* <pre> -S [BAYES|MDL|ENTROPY|AIC|CROSS_CLASSIC|CROSS_BAYES]
*  Score type (BAYES, BDeu, MDL, ENTROPY and AIC)</pre>
*
<!-- options-end -->
*
* @author Remco Bouckaert
* @version $Revision: 1.8 $
*/
public class ICSSearchAlgorithm
    extends CISearchAlgorithm {

    /** for serialization */
    static final long serialVersionUID = -2510985917284798576L;
 
    /**
     * returns the name of the attribute with the given index
     *
     * @param iAttribute the index of the attribute
     * @return the name of the attribute
     */
    String name(int iAttribute) {
      return m_instances.attribute(iAttribute).name();
    }
   
    /**
     * returns the number of attributes
     *
     * @return the number of attributes
     */
    int maxn() {
      return m_instances.numAttributes();
    }
   
    /** maximum size of separating set **/
    private int m_nMaxCardinality = 2;

    /**
     * sets the cardinality
     *
     * @param nMaxCardinality the max cardinality
     */
    public void setMaxCardinality(int nMaxCardinality) {
      m_nMaxCardinality = nMaxCardinality;
    }
   
    /**
     * returns the max cardinality
     *
     * @return the max cardinality
     */
    public int getMaxCardinality() {
      return m_nMaxCardinality;
    }
 

 
    class SeparationSet
        implements RevisionHandler {
     
        public int [] m_set;
       
        /**
         * constructor
         */
        public SeparationSet() {
      m_set= new int [getMaxCardinality() + 1];
        } // c'tor

        public boolean contains(int nItem) {
          for (int iItem = 0; iItem < getMaxCardinality() && m_set[iItem] != -1; iItem++) {
            if (m_set[iItem] == nItem) {
          return true;
        }
          }
      return false;
      } // contains
       
        /**
         * Returns the revision string.
         *
         * @return    the revision
         */
        public String getRevision() {
          return RevisionUtils.extract("$Revision: 1.8 $");
        }

    } // class sepset

  /**
   * Search for Bayes network structure using ICS algorithm
   * @param bayesNet datastructure to build network structure for
   * @param instances data set to learn from
   * @throws Exception if something goes wrong
   */
  protected void search(BayesNet bayesNet, Instances instances) throws Exception {
        // init
        m_BayesNet = bayesNet;
        m_instances = instances;

        boolean edges[][] = new boolean [maxn() + 1][];
    boolean [] [] arrows = new boolean [maxn() + 1][];
        SeparationSet [] [] sepsets = new SeparationSet [maxn() + 1][];
        for (int iNode = 0 ; iNode < maxn() + 1; iNode++) {
            edges[iNode] = new boolean[maxn()];
      arrows[iNode] = new boolean[maxn()];
            sepsets[iNode] = new SeparationSet[maxn()];
        }

        calcDependencyGraph(edges, sepsets);
        calcVeeNodes(edges, arrows, sepsets);
        calcArcDirections(edges, arrows);

    // transfrom into BayesNet datastructure
    for (int iNode = 0; iNode < maxn(); iNode++) {
      // clear parent set of AttributeX
      ParentSet oParentSet = m_BayesNet.getParentSet(iNode);
      while (oParentSet.getNrOfParents() > 0) {
        oParentSet.deleteLastParent(m_instances);
      }
      for (int iParent = 0; iParent < maxn(); iParent++) {
        if (arrows[iParent][iNode]) {
          oParentSet.addParent(iParent, m_instances);
        }
      }
    }
  } // search
 
 
  /** CalcDependencyGraph determines the skeleton of the BayesNetwork by
   * starting with a complete graph and removing edges (a--b) if it can
   * find a set Z such that a and b conditionally independent given Z.
   * The set Z is found by trying all possible subsets of nodes adjacent
   * to a and b, first of size 0, then of size 1, etc. up to size
   * m_nMaxCardinality
   * @param edges boolean matrix representing the edges
   * @param sepsets set of separating sets
   */
  void calcDependencyGraph(boolean[][] edges, SeparationSet[][] sepsets) {
    /*calc undirected graph a-b iff D(a,S,b) for all S)*/
    SeparationSet oSepSet;

    for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
      /*start with complete graph*/
      for (int iNode2 = 0; iNode2 < maxn(); iNode2++) {
        edges[iNode1][iNode2] = true;
      }
    }
    for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
      edges[iNode1][iNode1] = false;
    }

    for (int iCardinality = 0; iCardinality <= getMaxCardinality(); iCardinality++) {
      for (int iNode1 = 0; iNode1 <= maxn() - 2; iNode1++) {
        for (int iNode2 = iNode1 + 1; iNode2 < maxn(); iNode2++) {
          if (edges[iNode1][iNode2]) {
            oSepSet = existsSepSet(iNode1, iNode2, iCardinality, edges);
            if (oSepSet != null) {
              edges[iNode1][iNode2] = false;
              edges[iNode2][iNode1] = false;
              sepsets[iNode1][iNode2] = oSepSet;
              sepsets[iNode2][iNode1] = oSepSet;
              // report separating set
              System.err.print("I(" + name(iNode1) + ", {");
              for (int iNode3 = 0; iNode3 < iCardinality; iNode3++) {
                System.err.print(name(oSepSet.m_set[iNode3]) + " ");
              }
              System.err.print("} ," + name(iNode2) + ")\n");
            }
          }
        }
      }
      // report current state of dependency graph
      System.err.print(iCardinality + " ");
      for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
        System.err.print(name(iNode1) + " ");
      }
      System.err.print('\n');
      for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
        for (int iNode2 = 0; iNode2 < maxn(); iNode2++) {
          if (edges[iNode1][iNode2])
            System.err.print("X ");
          else
            System.err.print(". ");
        }
        System.err.print(name(iNode1) + " ");
        System.err.print('\n');
      }
    }
  } /*CalcDependencyGraph*/

  /** ExistsSepSet tests if a separating set Z of node a and b exists of given
   * cardiniality exists.
   * The set Z is found by trying all possible subsets of nodes adjacent
   * to both a and b of the requested cardinality.
   * @param iNode1 index of first node a
   * @param iNode2 index of second node b
   * @param nCardinality size of the separating set Z
   * @param edges
   * @return SeparationSet containing set that separates iNode1 and iNode2 or null if no such set exists
   */
    SeparationSet existsSepSet(int iNode1, int iNode2, int nCardinality, boolean [] [] edges)
    {
        /*Test if a separating set of node d and e exists of cardiniality k*/
//        int iNode1_, iNode2_;
        int iNode3, iZ;
    SeparationSet Z = new SeparationSet();
    Z.m_set[nCardinality] = -1;

//        iNode1_ = iNode1;
//        iNode2_ = iNode2;

    // find first candidate separating set Z
        if (nCardinality > 0) {
            Z.m_set[0] = next(-1, iNode1, iNode2, edges);
            iNode3 = 1;
            while (iNode3 < nCardinality) {
              Z.m_set[iNode3] = next(Z.m_set[iNode3 - 1], iNode1, iNode2, edges);
              iNode3++;
            }
        }

        if (nCardinality > 0) {
          iZ = maxn() - Z.m_set[nCardinality - 1] - 1;
        } else {
          iZ = 0;
        }
       

        while (iZ >= 0)
        { 
          //check if candidate separating set makes iNode2_ and iNode1_ independent
            if (isConditionalIndependent(iNode2, iNode1, Z.m_set, nCardinality))  {
                return Z;
            }
      // calc next candidate separating set
            if (nCardinality > 0) {
                Z.m_set[nCardinality - 1] = next(Z.m_set[nCardinality - 1], iNode1, iNode2, edges);
            }
            iZ = nCardinality - 1;  
            while (iZ >= 0 && Z.m_set[iZ] >= maxn()) {
                iZ = nCardinality - 1;
                while (iZ >= 0 && Z.m_set[iZ] >= maxn()) {
                  iZ--;
                }
                if (iZ < 0) {
                    break;
                }
                Z.m_set[iZ] = next(Z.m_set[iZ], iNode1, iNode2, edges);
                for (iNode3 = iZ + 1; iNode3 < nCardinality; iNode3++) {
                    Z.m_set[iNode3] = next(Z.m_set[iNode3 - 1], iNode1, iNode2, edges);
                }
                iZ = nCardinality - 1;
            }
        }

        return null;
    /*ExistsSepSet*/

  /**
   * determine index of node that makes next candidate separating set
   * adjacent to iNode1 and iNode2, but not iNode2 itself
   * @param x index of current node
   * @param iNode1 first node
   * @param iNode2 second node (must be larger than iNode1)
   * @param edges skeleton so far
   * @return int index of next node adjacent to iNode1 after x
   */
  int next(int x, int iNode1, int iNode2, boolean [] [] edges)
  {
    x++;
    while (x < maxn() && (!edges[iNode1][x] || !edges[iNode2][x] ||x == iNode2)) {
      x++;
    }
    return x;
  /*next*/


  /** CalcVeeNodes tries to find V-nodes, i.e. nodes a,b,c such that
   * a->c<-b and a-/-b. These nodes are identified by finding nodes
   * a,b,c in the skeleton such that a--c, c--b and a-/-b and furthermore
   * c is not in the set Z that separates a and b
   * @param edges skeleton
   * @param arrows resulting partially directed skeleton after all V-nodes
   * have been identified
   * @param sepsets separating sets
   */
  void calcVeeNodes(
    boolean[][] edges,
    boolean[][] arrows,
    SeparationSet[][] sepsets) {

    // start with complete empty graph
    for (int iNode1 = 0; iNode1 < maxn(); iNode1++) {
      for (int iNode2 = 0; iNode2 < maxn(); iNode2++) {
        arrows[iNode1][iNode2] = false;
      }
    }

    for (int iNode1 = 0; iNode1 < maxn() - 1; iNode1++) {
      for (int iNode2 = iNode1 + 1; iNode2 < maxn(); iNode2++) {
        if (!edges[iNode1][iNode2]) { /*i nonadj j*/
          for (int iNode3 = 0; iNode3 < maxn(); iNode3++) {
            if ((iNode3 != iNode1
              && iNode3 != iNode2
              && edges[iNode1][iNode3]
              && edges[iNode2][iNode3])
              & (!sepsets[iNode1][iNode2].contains(iNode3))) {
              arrows[iNode1][iNode3] = true; /*add arc i->k*/
              arrows[iNode2][iNode3] = true; /*add arc j->k*/
            }
          }
        }
      }
    }
  } // CalcVeeNodes


  /** CalcArcDirections assigns directions to edges that remain after V-nodes have
   * been identified. The arcs are directed using the following rules:
     Rule 1: i->j--k & i-/-k => j->k
     Rule 2: i->j->k & i--k => i->k
     Rule 3  m
       /|\
      i | k  => m->j
  i->j<-k  \|/
        j
 
     Rule 4  m
       / \
      i---k  => i->m & k->m
    i->j   \ /
        j
     Rule 5: if no edges are directed then take a random one (first we can find)
   * @param edges skeleton
   * @param arrows resulting fully directed DAG
   */
  void calcArcDirections(boolean[][] edges, boolean[][] arrows) {
    /*give direction to remaining arcs*/
    int i, j, k, m;
    boolean bFound;

    do {
      bFound = false;

      /*Rule 1: i->j--k & i-/-k => j->k*/

      for (i = 0; i < maxn(); i++) {
        for (j = 0; j < maxn(); j++) {
          if (i != j && arrows[i][j]) {
            for (k = 0; k < maxn(); k++) {
              if (i != k
                && j != k
                && edges[j][k]
                && !edges[i][k]
                && !arrows[j][k]
                && !arrows[k][j]) {
                arrows[j][k] = true;
                bFound = true;
              }
            }
          }
        }
      }

      /*Rule 2: i->j->k & i--k => i->k*/

      for (i = 0; i < maxn(); i++) {
        for (j = 0; j < maxn(); j++) {
          if (i != j && arrows[i][j]) {
            for (k = 0; k < maxn(); k++) {
              if (i != k
                && j != k
                && edges[i][k]
                && arrows[j][k]
                && !arrows[i][k]
                && !arrows[k][i]) {
                arrows[i][k] = true;
                bFound = true;
              }
            }
          }
        }
      }

      /* Rule 3  m
               /|\
              i | k  => m->j
      i->j<-k  \|/
                j
      */
      for (i = 0; i < maxn(); i++) {
        for (j = 0; j < maxn(); j++) {
          if (i != j && arrows[i][j]) {
            for (k = 0; k < maxn(); k++) {
              if (k != i
                && k != j
                && arrows[k][j]
                && !edges[k][i]) {
                for (m = 0; m < maxn(); m++) {
                  if (m != i
                    && m != j
                    && m != k
                    && edges[m][i]
                    && !arrows[m][i]
                    && !arrows[i][m]
                    && edges[m][j]
                    && !arrows[m][j]
                    && !arrows[j][m]
                    && edges[m][k]
                    && !arrows[m][k]
                    && !arrows[k][m]) {
                    arrows[m][j] = true;
                    bFound = true;
                  }
                }
              }
            }
          }
        }
      }

      /* Rule 4  m
               / \
              i---k  => i->m & k->m
        i->j   \ /
                j
      */
      for (i = 0; i < maxn(); i++) {
        for (j = 0; j < maxn(); j++) {
          if (i != j && arrows[j][i]) {
            for (k = 0; k < maxn(); k++) {
              if (k != i
                && k != j
                && edges[k][j]
                && !arrows[k][j]
                && !arrows[j][k]
                && edges[k][i]
                && !arrows[k][i]
                && !arrows[i][k]) {
                for (m = 0; m < maxn(); m++) {
                  if (m != i
                    && m != j
                    && m != k
                    && edges[m][i]
                    && !arrows[m][i]
                    && !arrows[i][m]
                    && edges[m][k]
                    && !arrows[m][k]
                    && !arrows[k][m]) {
                    arrows[i][m] = true;
                    arrows[k][m] = true;
                    bFound = true;
                  }
                }
              }
            }
          }
        }
      }

      /*Rule 5: if no edges are directed then take a random one (first we can find)*/

      if (!bFound) {
        i = 0;
        while (!bFound && i < maxn()) {
          j = 0;
          while (!bFound && j < maxn()) {
            if (edges[i][j]
              && !arrows[i][j]
              && !arrows[j][i]) {
              arrows[i][j] = true;
              bFound = true;
            }
            j++;
          }
          i++;
        }
      }

    }
    while (bFound);

  } // CalcArcDirections

  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {
    Vector result = new Vector();
   
    result.addElement(new Option(
                "\tWhen determining whether an edge exists a search is performed \n"
              + "\tfor a set Z that separates the nodes. MaxCardinality determines \n"
              + "\tthe maximum size of the set Z. This greatly influences the \n"
              + "\tlength of the search. (default 2)",
        "cardinality", 1, "-cardinality <num>"));
   
    Enumeration en = super.listOptions();
    while (en.hasMoreElements())
      result.addElement(en.nextElement());
   
    return result.elements();
  } // listOption
 
  /**
   * Parses a given list of options. <p/>
   *
   <!-- options-start -->
   * Valid options are: <p/>
   *
   * <pre> -cardinality &lt;num&gt;
   *  When determining whether an edge exists a search is performed
   *  for a set Z that separates the nodes. MaxCardinality determines
   *  the maximum size of the set Z. This greatly influences the
   *  length of the search. (default 2)</pre>
   *
   * <pre> -mbc
   *  Applies a Markov Blanket correction to the network structure,
   *  after a network structure is learned. This ensures that all
   *  nodes in the network are part of the Markov blanket of the
   *  classifier node.</pre>
   *
   * <pre> -S [BAYES|MDL|ENTROPY|AIC|CROSS_CLASSIC|CROSS_BAYES]
   *  Score type (BAYES, BDeu, MDL, ENTROPY and AIC)</pre>
   *
   <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String        tmpStr;
   
    tmpStr = Utils.getOption("cardinality", options);
    if (tmpStr.length() != 0)
      setMaxCardinality(Integer.parseInt(tmpStr));
    else
            setMaxCardinality(2);
           
          super.setOptions(options);
  } // setOptions
 
  /**
   * Gets the current settings of the Classifier.
   *
   * @return an array of strings suitable for passing to setOptions
   */
  public String[] getOptions() {
    Vector        result;
    String[]      options;
    int           i;
   
    result  = new Vector();
    options = super.getOptions();
    for (i = 0; i < options.length; i++)
      result.add(options[i]);
   
    result.add("-cardinality");
    result.add("" + getMaxCardinality());
   
    return (String[]) result.toArray(new String[result.size()]);
  } // getOptions
 

  /**
   * @return a string to describe the MaxCardinality option.
   */
  public String maxCardinalityTipText() {
    return "When determining whether an edge exists a search is performed for a set Z "+
    "that separates the nodes. MaxCardinality determines the maximum size of the set Z. " +
    "This greatly influences the length of the search. Default value is 2.";
  } // maxCardinalityTipText

  /**
   * This will return a string describing the search algorithm.
   * @return The string.
   */
  public String globalInfo() {
    return "This Bayes Network learning algorithm uses conditional independence tests " +
    "to find a skeleton, finds V-nodes and applies a set of rules to find the directions " +
    "of the remaining arrows.";
  }

  /**
   * Returns the revision string.
   *
   * @return    the revision
   */
  public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.8 $");
  }

  /**
   * for testing the class
   *
   * @param argv the commandline parameters
   */
  static public void main(String [] argv) {
    try {
      BayesNet b = new BayesNet();
      b.setSearchAlgorithm( new ICSSearchAlgorithm());
      Instances instances = new Instances(new FileReader("C:\\eclipse\\workspace\\weka\\data\\contact-lenses.arff"));
      instances.setClassIndex(instances.numAttributes() - 1);
      b.buildClassifier(instances);
      System.out.println(b.toString());
    } catch (Exception e) {
      e.printStackTrace();
    }
  } // main

} // class ICSSearchAlgorithm
TOP

Related Classes of weka.classifiers.bayes.net.search.ci.ICSSearchAlgorithm$SeparationSet

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.