Sorting and Filtering in Visual Basic QuickStart Sample

Illustrates how to sort and filter data used for data analysis in Visual Basic.

View this sample in: C# F# IronPython

Option Infer On

Imports Extreme.Data.Text
Imports Extreme.DataAnalysis
Imports Extreme.Mathematics
Imports Extreme.Statistics

    ' Illustrates sorting and filtering of data sets and variables.
    Module SortingAndFiltering

        Sub Main()

        ' The license is verified at runtime. We're using
        ' a demo license here. For more information, see
        ' https://numerics.net/trial-key
        Extreme.License.Verify("Demo license")

        ' We load the data into a data frame with a DateTime row index
        Dim timeSeries = DelimitedTextFile.ReadDataFrame(Of DateTime)(
                "..\..\..\..\Data\MicrosoftStock.csv", "Date")
        Dim dates = timeSeries.RowIndex

        ' The following are all equivalent ways of getting
        ' a strongly typed vector from a data frame
        Dim open = timeSeries("Open").As(Of Double)()
        Dim close = timeSeries.GetColumn("Close")
        Dim high = timeSeries.GetColumn(Of Double)("High")
        Dim low = CType(timeSeries("Low"), Vector(Of Double))

        Dim volume = timeSeries("Volume").As(Of Double)()

        ' Let's print some basic statistics for the full data set:
        Console.WriteLine("Total # observations: {0}", timeSeries.RowCount)
        Console.WriteLine("Average volume: {0:F0}", volume.Mean())
        Console.WriteLine("Total volume: {0:F0}", volume.Sum())

        '
        ' Filtering
        '

        ' Use the GetRows method to select subsets of rows.

        ' You can use a sequence of keys
        Dim subset = timeSeries.GetRows(
                {New DateTime(2000, 3, 1), New DateTime(2000, 3, 2)})

        ' When the index Is sorted, you can use a range
        subset = timeSeries.GetRows(
                New DateTime(2000, 1, 1), New DateTime(2010, 1, 1))

        ' Another option Is to use a boolean mask. Here we select
        ' observations where the close price was greater 
        ' than the open price
        Dim Filter = Vector.GreaterThan(close, open)
        ' Then we can use the GetRows method
        subset = timeSeries.GetRows(Filter)
        ' Data Is now filtered
        Console.WriteLine("Filtered # observations: {0}", subset.RowCount)

        ' Masks can be combined using logical operations
        Dim volumeFilter = volume.Map(Function(x) 200000000.0 <= x And x < 300000000.0)
        Console.WriteLine("Volume filtered #: {0}", volumeFilter.CountTrue())
        Dim intersection = Vector.And(volumeFilter, Filter)
        Dim union = Vector.Or(volumeFilter, Filter)
        Dim negation = Vector.Not(Filter)

        Console.WriteLine("Combined filtered #: {0}", intersection.CountTrue())
        subset = timeSeries.GetRows(intersection)

        ' When the row index Is ordered, it Is possible
        ' to get the rows with the key nearest to the 
        ' supplied keys
        Dim startDate = New DateTime(2001, 1, 1, 3, 0, 0)
        Dim offsetDates = Index.CreateDateRange(startDate,
                100, Recurrence.Daily)
        subset = timeSeries.GetNearestRows(offsetDates, Direction.Forward)

        '
        ' Sorting
        '

        ' The simplest way to sort data Is calling the Sort method 
        ' with the name of the variable to sort on
        Dim sortedSeries = timeSeries.SortBy("High", SortOrder.Descending)
        Dim highSorted = sortedSeries.GetColumn("High")(New Range(0, 4))
        Console.WriteLine("Largest 'High' values:")
        Console.WriteLine(highSorted.ToString("F2"))

        ' If you just want the largest few items in a series,
        ' you can use the Top Or Bottom method
        Console.WriteLine(high.Top(5).ToString("F2"))

        Console.Write("Press any key to exit.")
        Console.ReadLine()
    End Sub

End Module