Two-Way Anova in IronPython QuickStart Sample

Illustrates how to use the TwoWayAnovaModel class to perform a two-way analysis of variance in IronPython.

View this sample in: C# Visual Basic F#

import numerics

from System import Array

from Extreme.Statistics import *

# Illustrates the use of the TwoWayAnovaModel class for performing 
# a two-way analysis of variance.

# This example investigates the effect of the color and shape
# of packages on the sales of the product. The data comes from
# 12 stores. Packages can be either red, green or blue in color.
# The shape can be either square or rectangular.

# Set up the data in an ADO.NET data table.
import clr
from System.Data import DataTable

dataTable = DataTable()
dataTable.Columns.Add("Store", int)
dataTable.Columns.Add("Color", str)
dataTable.Columns.Add("Shape", str)
dataTable.Columns.Add("Sales", float)

dataTable.Rows.Add(Array[object]([1, "Blue", "Square", 6]))
dataTable.Rows.Add(Array[object]([2, "Blue", "Square", 14]))
dataTable.Rows.Add(Array[object]([3, "Blue", "Rectangle", 19]))
dataTable.Rows.Add(Array[object]([4, "Blue", "Rectangle", 17]))

dataTable.Rows.Add(Array[object]([5, "Red", "Square", 18]))
dataTable.Rows.Add(Array[object]([6, "Red", "Square", 11]))
dataTable.Rows.Add(Array[object]([7, "Red", "Rectangle", 20]))
dataTable.Rows.Add(Array[object]([8, "Red", "Rectangle", 23]))

dataTable.Rows.Add(Array[object]([9, "Green", "Square", 7]))
dataTable.Rows.Add(Array[object]([10, "Green", "Square", 11]))
dataTable.Rows.Add(Array[object]([11, "Green", "Rectangle", 18]))
dataTable.Rows.Add(Array[object]([12, "Green", "Rectangle", 10]))

# Construct the OneWayAnova object.
anova = TwoWayAnovaModel(dataTable, "Color", "Shape", "Sales")
# Verify that the design is balanced:
if not anova.IsBalanced:
	print "The design is not balanced."
# Perform the calculation.
# The AnovaTable property gives us a classic anova table.
# We can write the table directly to the console:
print anova.AnovaTable
# A Cell object represents the data in a cell of the model, # i.e. the data related to one combination of levels of each factor. 
# We can use it to access the group means of our color groups.

# First we get the CategoricalScale object so we can easily iterate
# through the levels:
colorFactor = anova.GetFactor(0)
for level in colorFactor.GetLevels():
	print "Mean for square boxes group '{0}': {1:.4f}".format(level, anova.Cells[level, "Square"].Mean)
# We could have accessed the cells directly as well:
print "Variance for red, rectangular packages:", anova.Cells["Red", "Rectangle"].Variance
# The special index Cell.All permits us to summarize the data
# over all levels of a factor. For example, to get the means
# of the shape groups, we use:
shapeFactor = anova.GetFactor(1)
for level in shapeFactor.GetLevels():
	print "Mean for group '{0}': {1:.4f}".format(level, anova.Cells[Cell.All, level].Mean)

# We can get the summary data for the entire model 
# by using the 'Cell.All' value for both indices:
totalSummary = anova.Cells[Cell.All, Cell.All]
print "Summary data:"
print "# observations:", totalSummary.Count
print "Grand mean:     ", totalSummary.Mean