Sorting and Filtering in IronPython QuickStart Sample

Illustrates how to sort and filter data used for data analysis in IronPython.

View this sample in: C# Visual Basic F#

```Python
import numerics

import clr
clr.AddReference("System.Data")
from System.Data import *
from System.Data.OleDb import *

# Variable classes reside in the Extreme.Statistics namespace.
from Extreme.Statistics import *
    
# Illustrates sorting and filtering of data sets and variables.

def LoadTimeSeriesData():
	filename = r"..\Data\MicrosoftStock.xls"
	connectionString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source="+filename+";Extended Properties='Excel 8.0;HDR=Yes;IMEX=1'"
	cnn = None
	ds = DataSet()
	try:
		cnn = OleDbConnection(connectionString)
		cnn.Open()
		adapter = OleDbDataAdapter("Select * from [MicrosoftStock$]", cnn)
		adapter.Fill(	ds)
	except ex as OleDbException:
		print ex.InnerException
	finally:
		if cnn != None:
			cnn.Close()
	return ds.Tables[0]

seriesTable = LoadTimeSeriesData()
timeSeries = VariableCollection(seriesTable)

# Next, we create some helper variables:
date = timeSeries["Date"]
high = timeSeries["High"]
low = timeSeries["Low"]
open = timeSeries["Open"]
close = timeSeries["Close"]
volume = timeSeries["Volume"]

# Let's print some basic statistics for the full data set:
print "Total # observations:", timeSeries.Observations.Count
print "Average volume: {0:.0f}".format(volume.Mean)
print "Total volume: {0:.0f}".format(volume.Sum)

#
# Filtering
#

# Next, we create a filter, selecting observations where the close price
# was greater than the open price:
filter = close.Filters.GreaterThan(open)
# and set the VariableCollection's Filter property.
timeSeries.Filter = filter

# Data is now filtered:
print "Filtered # observations:", timeSeries.Observations.Count
# Summary statistics apply only to the filtered data:
print "Average volume: {0:.0f}".format(volume.Mean)
print "Total volume: {0:.0f}".format(volume.Sum)

# Filters can be combined using set operations.
volumeFilter = volume.Filters.Between(200e+6, 300e+6)
print "Volume filtered #:", volumeFilter.FilteredLength
combinedFilter = Filter.Intersection(volumeFilter, filter)
# Alternatively: combinedFilter = volumeFilter & filter
print "Combined filtered #:", combinedFilter.FilteredLength
timeSeries.Filter = combinedFilter

#
# Sorting
#

# The simplest way to sort data is calling the Sort method 
# with the name of the variable to sort on:
timeSeries.Sort("High")
for i in range(timeSeries.Observations.Count):
    print "{0:8.2f}".format(high[i])
print 

# We can also create a CollectionSortOrder object that
# defines the sort order over multiple fields/variables.
# We first create an order with one variable:
ordering = CollectionSortOrder(timeSeries, "High", SortOrder.Ascending)
# We then use the Then method repeatedly to add more sort fields:
ordering = ordering.Then("Date", SortOrder.Descending)

# And call the Sort method with the CollectionSortOrder object:
timeSeries.Filter = high.Filters.Between(25.11, 25.15)
# Sort orders are automatically combined with filters.
timeSeries.Sort(ordering)

print "Sorted on High, Date (desc.)"
for i in range(timeSeries.Observations.Count):
    print "{0:8.2f} {1:11}".format(high[i], date[i].ToShortDateString())
print 

print "Sorted on High, Date"
timeSeries.Sort(CollectionSortOrder(high, SortOrder.Ascending) \
    .Then(date, SortOrder.Ascending))
for i in range(timeSeries.Observations.Count):
    print "{0:8.2f} {1:11}".format(high[i], date[i].ToShortDateString())
print 

# The sort order remains, even if we change the filter:
timeSeries.Filter = high.Filters.Between(25.21, 25.25)
print "Same sort order, different data"
for i in range(timeSeries.Observations.Count):
    print "{0:8.2f} {1:11}".format(high[i], date[i].ToShortDateString())
print 
```