Package weka.associations

Source Code of weka.associations.LabeledItemSet

/*
*    This program is free software; you can redistribute it and/or modify
*    it under the terms of the GNU General Public License as published by
*    the Free Software Foundation; either version 2 of the License, or
*    (at your option) any later version.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU General Public License for more details.
*
*    You should have received a copy of the GNU General Public License
*    along with this program; if not, write to the Free Software
*    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

/*
* LabeledItemSet.java
* Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
*
*/

package weka.associations;

import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;

import java.io.Serializable;
import java.util.Enumeration;
import java.util.Hashtable;


/**
* Class for storing a set of items together with a  class label. Item sets are stored in a lexicographic
* order, which is determined by the header information of the set of instances
* used for generating the set of items. All methods in this class assume that
* item sets are stored in lexicographic order.
* The class provides the methods used for item sets in class association rule mining.
* Because every item set knows its class label the training set can be splitted up virtually.
*
* @author Stefan Mutter (mutter@cs.waikato.ac.nz)
* @version $Revision: 1.5 $
*/

public class LabeledItemSet
  extends ItemSet
  implements Serializable, RevisionHandler {

  /** for serialization */
  private static final long serialVersionUID = 4158771925518299903L;

    /** The class label. */
    protected int m_classLabel;

     /** The support of the rule. */
    protected int m_ruleSupCounter;   
 
    /**
     * Constructor
     *
     * @param totalTrans the total number of transactions
     * @param classLabel the class lebel
     */   
    public LabeledItemSet(int totalTrans, int classLabel){
 
  super(totalTrans);
  m_classLabel = classLabel;
    }

   

    /**
     * Deletes all item sets that don't have minimum support and have more than maximum support
     * @return the reduced set of item sets
     * @param maxSupport the maximum support
     * @param itemSets the set of item sets to be pruned
     * @param minSupport the minimum number of transactions to be covered
     */
  public static FastVector deleteItemSets(FastVector itemSets,
            int minSupport,
            int maxSupport) {

    FastVector newVector = new FastVector(itemSets.size());

    for (int i = 0; i < itemSets.size(); i++) {
      LabeledItemSet current = (LabeledItemSet)itemSets.elementAt(i);
      if ((current.m_ruleSupCounter >= minSupport)
    && (current.m_ruleSupCounter <= maxSupport))
            newVector.addElement(current);
    }
    return newVector;
  }

    /**
   * Tests if two item sets are equal.
   *
   * @param itemSet another item set
   * @return true if this item set contains the same items as the given one
   */
  public final boolean equals(Object itemSet) {

    if (!(this.equalCondset(itemSet)))
      return false;
    if(m_classLabel != ((LabeledItemSet)itemSet).m_classLabel)
  return false;
   
    return true;
  }

  /**
   * Compares two item sets
   * @param itemSet an item set
   * @return true if the the item sets are equal, false otherwise
   */ 
    public final boolean equalCondset(Object itemSet) {

    if ((itemSet == null) || !(itemSet.getClass().equals(this.getClass()))) {
      return false;
    }
    if (m_items.length != ((ItemSet)itemSet).items().length)
      return false;
    for (int i = 0; i < m_items.length; i++)
      if (m_items[i] != ((ItemSet)itemSet).itemAt(i))
  return false;
    return true;
  }

    /**
   * Return a hashtable filled with the given item sets.
   *
   * @param itemSets the set of item sets to be used for filling the hash table
   * @param initialSize the initial size of the hashtable
   * @return the generated hashtable
   */
  public static Hashtable getHashtable(FastVector itemSets, int initialSize) {

    Hashtable hashtable = new Hashtable(initialSize);
    for (int i = 0; i < itemSets.size(); i++) {
      LabeledItemSet current = (LabeledItemSet)itemSets.elementAt(i);
      hashtable.put(current, new Integer(current.m_classLabel));
    }
  
    return hashtable;
  }



    /**
     * Merges all item sets in the set of (k-1)-item sets
     * to create the (k)-item sets and updates the counters.
     * @return the generated (k)-item sets
     * @param totalTrans the total number of transactions
     * @param itemSets the set of (k-1)-item sets
     * @param size the value of (k-1)
     */
  public static FastVector mergeAllItemSets(FastVector itemSets, int size,
              int totalTrans) {
     
      FastVector newVector = new FastVector();
      LabeledItemSet result;
      int numFound, k;
     
      for (int i = 0; i < itemSets.size(); i++) {
    LabeledItemSet first = (LabeledItemSet)itemSets.elementAt(i);
      out:
    for (int j = i+1; j < itemSets.size(); j++) {
        LabeledItemSet second = (LabeledItemSet)itemSets.elementAt(j);
        while(first.m_classLabel != second.m_classLabel){
      j++;
      if(j == itemSets.size())
          break out;
      second = (LabeledItemSet)itemSets.elementAt(j);
        }
        result = new LabeledItemSet(totalTrans,first.m_classLabel);
        result.m_items = new int[first.m_items.length];
       
        // Find and copy common prefix of size 'size'
        numFound = 0;
        k = 0;
        while (numFound < size) {
      if (first.m_items[k] == second.m_items[k]) {
          if (first.m_items[k] != -1)
        numFound++;
          result.m_items[k] = first.m_items[k];
      } else
          break out;
      k++;
        }
       
        // Check difference
        while (k < first.m_items.length) {
      if ((first.m_items[k] != -1) && (second.m_items[k] != -1))
          break;
      else {
          if (first.m_items[k] != -1)
        result.m_items[k] = first.m_items[k];
          else
        result.m_items[k] = second.m_items[k];
      }
      k++;
        }
        if (k == first.m_items.length) {
      result.m_ruleSupCounter = 0;
      result.m_counter = 0;
      newVector.addElement(result);
        }
    }
      }
     
    return newVector;
  }
   
  /**
   * Splits the class attribute away. Depending on the invert flag, the instances without class attribute or only the class attribute of all instances is returned
   * @param instances the instances
   * @param invert flag; if true only the class attribute remains, otherweise the class attribute is the only attribute that is deleted.
   * @throws Exception exception if instances cannot be splitted
   * @return Instances without the class attribute or instances with only the class attribute
   */ 
    public static Instances divide(Instances instances, boolean invert) throws Exception{
   
  Instances newInstances = new Instances(instances);
  if(instances.classIndex() < 0)
            throw new Exception("For class association rule mining a class attribute has to be specified.");
  if(invert){
    for(int i=0;i<newInstances.numAttributes();i++){
        if(i!=newInstances.classIndex()){
      newInstances.deleteAttributeAt(i);
      i--;
        }
    }
      return newInstances;
  }
  else{
      newInstances.setClassIndex(-1);
            newInstances.deleteAttributeAt(instances.classIndex());
      return newInstances;
  }
    }

  
   
     /**
      * Converts the header info of the given set of instances into a set
      * of item sets (singletons). The ordering of values in the header file
      * determines the lexicographic order. Each item set knows its class label.
      * @return a set of item sets, each containing a single item
      * @param instancesNoClass instances without the class attribute
      * @param classes the values of the class attribute sorted according to instances
      * @exception Exception if singletons can't be generated successfully
      */
    public static FastVector singletons(Instances instancesNoClass, Instances classes) throws Exception {

    FastVector cSet, setOfItemSets = new FastVector();
    LabeledItemSet current;
   
   
    //make singletons
    for (int i = 0; i < instancesNoClass.numAttributes(); i++) {
      if (instancesNoClass.attribute(i).isNumeric())
    throw new Exception("Can't handle numeric attributes!");
      for (int j = 0; j < instancesNoClass.attribute(i).numValues(); j++){
    for(int k =0; k < (classes.attribute(0)).numValues(); k++){
        current = new LabeledItemSet(instancesNoClass.numInstances(),k);
        current.m_items = new int[instancesNoClass.numAttributes()];
        for (int l = 0; l < instancesNoClass.numAttributes(); l++)
      current.m_items[l] = -1;
        current.m_items[i] = j;
        setOfItemSets.addElement(current);
    }     
      }
    }
    return setOfItemSets;
  }
   
   
 
    /**
   * Prunes a set of (k)-item sets using the given (k-1)-item sets.
   *
   * @param toPrune the set of (k)-item sets to be pruned
   * @param kMinusOne the (k-1)-item sets to be used for pruning
   * @return the pruned set of item sets
   */
  public static FastVector pruneItemSets(FastVector toPrune, Hashtable kMinusOne){

    FastVector newVector = new FastVector(toPrune.size());
    int help, j;

  
    for (int i = 0; i < toPrune.size(); i++) {
      LabeledItemSet current = (LabeledItemSet)toPrune.elementAt(i);
     
      for (j = 0; j < current.m_items.length; j++){
        if (current.m_items[j] != -1) {
      help = current.m_items[j];
      current.m_items[j] = -1;
      if(kMinusOne.get(current) != null && (current.m_classLabel == (((Integer)kMinusOne.get(current)).intValue())))
          current.m_items[j] = help;
                  else{
          current.m_items[j] = help;
          break;
      }
    }
      }
      if (j == current.m_items.length)
  newVector.addElement(current);
    }
    return newVector;
  }

   
  /**
   * Outputs the support for an item set.
   *
   * @return the support
   */
  public final int support() {

    return m_ruleSupCounter;
  }

 
 
     /**
      * Updates counter of item set with respect to given transaction.
      * @param instanceNoClass instances without the class attribute
      * @param instanceClass the values of the class attribute sorted according to instances
      */
  public final void upDateCounter(Instance instanceNoClass, Instance instanceClass) {

    
      if (containedBy(instanceNoClass)){
    m_counter++;
    if(this.m_classLabel == instanceClass.value(0))
        m_ruleSupCounter++;
      }
  }

  /**
   * Updates counter of a specific item set
   * @param itemSets an item sets
   * @param instancesNoClass instances without the class attribute
   * @param instancesClass the values of the class attribute sorted according to instances
   */ 
   public static void upDateCounters(FastVector itemSets, Instances instancesNoClass, Instances instancesClass){

    for (int i = 0; i < instancesNoClass.numInstances(); i++) {
        Enumeration enu = itemSets.elements();
  while (enu.hasMoreElements())
            ((LabeledItemSet)enu.nextElement()).upDateCounter(instancesNoClass.instance(i),instancesClass.instance(i));
    }

  }

   /**
    * Generates rules out of item sets
    * @param minConfidence the minimum confidence
    * @param noPrune flag indicating whether the rules are pruned accoridng to the minimum confidence value
    * @return a set of rules
    */  
    public final FastVector[] generateRules(double minConfidence, boolean noPrune) {

  FastVector premises = new FastVector(),consequences = new FastVector(),
      conf = new FastVector();
  FastVector[] rules = new FastVector[3];
  ItemSet premise, consequence;
 
    // Generate all rules with class in the consequence.
  premise = new ItemSet(m_totalTransactions);
  consequence = new ItemSet(m_totalTransactions);
  int[] premiseItems = new int[m_items.length];
  int[] consequenceItems = new int[1];
  System.arraycopy(m_items, 0, premiseItems, 0, m_items.length);
        consequence.setItem(consequenceItems);
        premise.setItem(premiseItems);
  consequence.setItemAt(m_classLabel,0);
  consequence.setCounter(this.m_ruleSupCounter);
  premise.setCounter(this.m_counter);
  premises.addElement(premise);
  consequences.addElement(consequence);
  conf.addElement(new Double((double)this.m_ruleSupCounter/(double)this.m_counter));
     
  rules[0] = premises;
  rules[1] = consequences;
  rules[2] = conf;
  if(!noPrune)
      pruneRules(rules, minConfidence);
 
   
  return rules;
    }
   
    /**
     * Returns the revision string.
     *
     * @return    the revision
     */
    public String getRevision() {
      return RevisionUtils.extract("$Revision: 1.5 $");
    }
}
TOP

Related Classes of weka.associations.LabeledItemSet

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.