edu.indiana.iucbrf.feature.stats
Class InformationTheory

java.lang.Object
  extended by edu.indiana.iucbrf.feature.stats.InformationTheory

public class InformationTheory
extends java.lang.Object


Field Summary
protected  java.util.HashSet allKeys
           
protected  java.util.ArrayList cases
           
protected  Domain domain
           
protected  double entropyC
           
protected  java.util.HashSet keysOfInterest
           
protected  java.util.HashMap keyToGain
           
protected  java.util.HashMap keyToGainRatio
           
protected  FeatureKey maxGainKey
           
protected  FeatureKey maxGainRatioKey
           
protected  KeyToValueToDouble probKeyToCountMap
           
protected  FeatureKey solutionFeatureKey
           
protected  FeatureSpec solutionFeatureSpec
           
protected  java.util.HashMap solValueToCount
           
 
Constructor Summary
InformationTheory(Domain domain)
          Creates a new instance of InformationTheory.
InformationTheory(Domain domain, java.util.HashSet allKeys)
          Creates a new instance of InformationTheory
 
Method Summary
 double calcEntropy(int numCases, java.util.HashMap myValueToCount)
          Calculate the entropy with the given value counts.
protected  void calcEntropyC()
          Calculates the entropy over the solutions of all the cases.
protected  double calcEntropyOnSubset(FeatureKey key, java.lang.String attributeValue, java.util.LinkedList remainingCases)
          Calculate the entropy of the set of cases where the attribute specified by key has value attributeValue.
protected  double calcGain(FeatureKey key)
           
protected  double calcGainRatio(FeatureKey key)
           
protected  void calcGainRatios()
           
protected  void calcGains()
          Calculates the gain for each problem attribute in the domain, as a HashMap mapping FeatureKey to the gain (a Double).
protected  double calcSplitInfo(FeatureKey key)
           
protected  void eraseCalculations()
          Destroy the old calculations.
 double getEntropy()
           
 double getGain(FeatureKey key)
           
 double getGainRatio(FeatureKey key)
           
 java.util.HashMap getGainRatios()
           
 java.util.HashMap getGains()
           
 FeatureKey getMaxGainKey()
           
 FeatureKey getMaxGainRatioKey()
           
protected  void init(Domain domain, java.util.HashSet allKeys)
           
protected  void reinitCounts()
          Reinitialize the counts of occurrences of attribute values, to handle a new list of cases.
 void setCases(java.util.ArrayList cases)
          Reset the state of the InformationTheory object for a new list of cases.
 void setCases(java.util.ArrayList cases, java.util.HashSet keysOfInterest)
          Reset the state of the InformationTheory object for a new list of cases.
 java.lang.String toString()
           
protected  void updateCounts()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
 

Field Detail

domain

protected Domain domain

solutionFeatureKey

protected FeatureKey solutionFeatureKey

solutionFeatureSpec

protected FeatureSpec solutionFeatureSpec

allKeys

protected java.util.HashSet allKeys

cases

protected java.util.ArrayList cases

keysOfInterest

protected java.util.HashSet keysOfInterest

probKeyToCountMap

protected KeyToValueToDouble probKeyToCountMap

solValueToCount

protected java.util.HashMap solValueToCount

entropyC

protected double entropyC

keyToGain

protected java.util.HashMap keyToGain

keyToGainRatio

protected java.util.HashMap keyToGainRatio

maxGainKey

protected FeatureKey maxGainKey

maxGainRatioKey

protected FeatureKey maxGainRatioKey
Constructor Detail

InformationTheory

public InformationTheory(Domain domain)
Creates a new instance of InformationTheory. allKeys will be calculated from the domain, as all problem FeatureKeys.


InformationTheory

public InformationTheory(Domain domain,
                         java.util.HashSet allKeys)
Creates a new instance of InformationTheory

Parameters:
allKeys - the maximal set of keys that will ever be considered
Method Detail

init

protected void init(Domain domain,
                    java.util.HashSet allKeys)

setCases

public void setCases(java.util.ArrayList cases,
                     java.util.HashSet keysOfInterest)
Reset the state of the InformationTheory object for a new list of cases.

Parameters:
keysOfInterest - The FeatureKeys corresponding to problem features to be considered.

setCases

public void setCases(java.util.ArrayList cases)
Reset the state of the InformationTheory object for a new list of cases. All problem features are considered.


getEntropy

public double getEntropy()
Returns:
The entropy over the solutions of the cases.

getGains

public java.util.HashMap getGains()
Returns:
HashMap mapping FeatureKey to gain for that feature.

getGain

public double getGain(FeatureKey key)

getMaxGainKey

public FeatureKey getMaxGainKey()

getGainRatios

public java.util.HashMap getGainRatios()
Returns:
HashMap mapping FeatureKey to gain ratio for that feature.

getGainRatio

public double getGainRatio(FeatureKey key)

getMaxGainRatioKey

public FeatureKey getMaxGainRatioKey()

eraseCalculations

protected void eraseCalculations()
Destroy the old calculations. They will be recomputed if their values are requested.


reinitCounts

protected void reinitCounts()
Reinitialize the counts of occurrences of attribute values, to handle a new list of cases. Called by updateCounts().


updateCounts

protected void updateCounts()

calcEntropyC

protected void calcEntropyC()
Calculates the entropy over the solutions of all the cases.


calcEntropy

public double calcEntropy(int numCases,
                          java.util.HashMap myValueToCount)
Calculate the entropy with the given value counts.

Parameters:
myValueToCount - maps feature value (String) to count (Double)

calcGains

protected void calcGains()
Calculates the gain for each problem attribute in the domain, as a HashMap mapping FeatureKey to the gain (a Double).


calcGain

protected double calcGain(FeatureKey key)

calcEntropyOnSubset

protected double calcEntropyOnSubset(FeatureKey key,
                                     java.lang.String attributeValue,
                                     java.util.LinkedList remainingCases)
Calculate the entropy of the set of cases where the attribute specified by key has value attributeValue. Returns 0 if there are no such cases.


calcGainRatios

protected void calcGainRatios()

calcGainRatio

protected double calcGainRatio(FeatureKey key)

calcSplitInfo

protected double calcSplitInfo(FeatureKey key)

toString

public java.lang.String toString()
Overrides:
toString in class java.lang.Object