Sorting and Filtering in IronPython QuickStart Sample
Illustrates how to sort and filter data used for data analysis in IronPython.
View this sample in: C# Visual Basic F#
```Python import numerics import clr clr.AddReference("System.Data") from System.Data import * from System.Data.OleDb import * # Variable classes reside in the Extreme.Statistics namespace. from Extreme.Statistics import * # Illustrates sorting and filtering of data sets and variables. def LoadTimeSeriesData(): filename = r"..\Data\MicrosoftStock.xls" connectionString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source="+filename+";Extended Properties='Excel 8.0;HDR=Yes;IMEX=1'" cnn = None ds = DataSet() try: cnn = OleDbConnection(connectionString) cnn.Open() adapter = OleDbDataAdapter("Select * from [MicrosoftStock$]", cnn) adapter.Fill( ds) except ex as OleDbException: print ex.InnerException finally: if cnn != None: cnn.Close() return ds.Tables[0] seriesTable = LoadTimeSeriesData() timeSeries = VariableCollection(seriesTable) # Next, we create some helper variables: date = timeSeries["Date"] high = timeSeries["High"] low = timeSeries["Low"] open = timeSeries["Open"] close = timeSeries["Close"] volume = timeSeries["Volume"] # Let's print some basic statistics for the full data set: print "Total # observations:", timeSeries.Observations.Count print "Average volume: {0:.0f}".format(volume.Mean) print "Total volume: {0:.0f}".format(volume.Sum) # # Filtering # # Next, we create a filter, selecting observations where the close price # was greater than the open price: filter = close.Filters.GreaterThan(open) # and set the VariableCollection's Filter property. timeSeries.Filter = filter # Data is now filtered: print "Filtered # observations:", timeSeries.Observations.Count # Summary statistics apply only to the filtered data: print "Average volume: {0:.0f}".format(volume.Mean) print "Total volume: {0:.0f}".format(volume.Sum) # Filters can be combined using set operations. volumeFilter = volume.Filters.Between(200e+6, 300e+6) print "Volume filtered #:", volumeFilter.FilteredLength combinedFilter = Filter.Intersection(volumeFilter, filter) # Alternatively: combinedFilter = volumeFilter & filter print "Combined filtered #:", combinedFilter.FilteredLength timeSeries.Filter = combinedFilter # # Sorting # # The simplest way to sort data is calling the Sort method # with the name of the variable to sort on: timeSeries.Sort("High") for i in range(timeSeries.Observations.Count): print "{0:8.2f}".format(high[i]) print # We can also create a CollectionSortOrder object that # defines the sort order over multiple fields/variables. # We first create an order with one variable: ordering = CollectionSortOrder(timeSeries, "High", SortOrder.Ascending) # We then use the Then method repeatedly to add more sort fields: ordering = ordering.Then("Date", SortOrder.Descending) # And call the Sort method with the CollectionSortOrder object: timeSeries.Filter = high.Filters.Between(25.11, 25.15) # Sort orders are automatically combined with filters. timeSeries.Sort(ordering) print "Sorted on High, Date (desc.)" for i in range(timeSeries.Observations.Count): print "{0:8.2f} {1:11}".format(high[i], date[i].ToShortDateString()) print print "Sorted on High, Date" timeSeries.Sort(CollectionSortOrder(high, SortOrder.Ascending) \ .Then(date, SortOrder.Ascending)) for i in range(timeSeries.Observations.Count): print "{0:8.2f} {1:11}".format(high[i], date[i].ToShortDateString()) print # The sort order remains, even if we change the filter: timeSeries.Filter = high.Filters.Between(25.21, 25.25) print "Same sort order, different data" for i in range(timeSeries.Observations.Count): print "{0:8.2f} {1:11}".format(high[i], date[i].ToShortDateString()) print ```