Sorting and Filtering in C# QuickStart Sample

Illustrates how to sort and filter data used for data analysis in C#.

View this sample in: Visual Basic F# IronPython

using System;

using Numerics.NET.Data.Text;
using Numerics.NET.DataAnalysis;
using Numerics.NET;
using Numerics.NET.Statistics;
using Index = Numerics.NET.DataAnalysis.Index;
using Range = Numerics.NET.Range;

namespace Numerics.NET.QuickStart.CSharp
{
    /// <summary>
    /// Illustrates sorting and filtering of data frames.
    /// </summary>
    class SortingAndFiltering
    {
        static void Main(string[] args)
        {
            // The license is verified at runtime. We're using
            // a 30 day trial key here. For more information, see
            //     https://numerics.net/trial-key
            Numerics.NET.License.Verify("64542-18980-57619-62268");

            // We load the data into a data frame with a DateTime row index:
            var timeSeries = DelimitedTextFile.ReadDataFrame<DateTime>(
                @"..\..\..\..\Data\MicrosoftStock.csv", "Date");
            var date = timeSeries.RowIndex;

            // The following are all equivalent ways of getting
            // a strongly typed vector from a data frame:
            var open = timeSeries["Open"].As<double>();
            var close = timeSeries.GetColumn("Close");
            var high = timeSeries.GetColumn<double>("High");
            var low = (Vector<double>)timeSeries["Low"];

            var volume = timeSeries["Volume"].As<double>();

            // Let's print some basic statistics for the full data set:
            Console.WriteLine($"Total # observations: {timeSeries.RowCount}");
            Console.WriteLine($"Average volume: {volume.Mean():F0}");
            Console.WriteLine($"Total volume: {volume.Sum():F0}");

            //
            // Filtering
            //

            // Use the GetRows method to select subsets of rows.

            // You can use a sequence of keys:
            var subset = timeSeries.GetRows(new[] {
                    new DateTime(2000,3,1), new DateTime(2000,3,2) });

            // When the index is sorted, you can use a range:
            subset = timeSeries.GetRows(
                new DateTime(2000, 1, 1), new DateTime(2010, 1, 1));

            // Another option is to use a boolean mask. Here we select
            // observations where the close price was greater 
            // than the open price:
            var filter = Vector.GreaterThan(close, open);
            // Then we can use the GetRows method:
            subset = timeSeries.GetRows(filter);
            // Data is now filtered:
            Console.WriteLine($"Filtered # observations: {subset.RowCount}");

            // Masks can be combined using logical operations:
            var volumeFilter = volume.Map(x => 200e+6 <= x && x < 300e+6);
            Console.WriteLine($"Volume filtered #: {volumeFilter.CountTrue()}");
            var intersection = Vector.And(volumeFilter, filter);
            var union = Vector.Or(volumeFilter, filter);
            var negation = Vector.Not(filter);

            Console.WriteLine($"Combined filtered #: {intersection.CountTrue()}");
            subset = timeSeries.GetRows(intersection);

            // When the row index is ordered, it is possible
            // to get the rows with the key nearest to the 
            // supplied keys:
            var startDate = new DateTime(2001, 1, 1, 3, 0, 0);
            var offsetDates = Index.CreateDateRange(startDate,
                100, Recurrence.Daily);
            subset = timeSeries.GetNearestRows(offsetDates, Direction.Forward);

            //
            // Sorting
            //

            // The simplest way to sort data is calling the Sort method 
            // with the name of the variable to sort on:
            var sortedSeries = timeSeries.SortBy("High", SortOrder.Descending);
            var sortedHigh = sortedSeries.GetColumn("High")[new Range(0, 4)];
            Console.WriteLine("Largest 'High' values:");
            Console.WriteLine(sortedHigh.ToString("F2"));

            // If you just want the largest few items in a series,
            // you can use the Top Or Bottom method
            Console.WriteLine(high.Top(5).ToString("F2"));

            Console.Write("Press any key to exit.");
            Console.ReadLine();
        }
    }
}