Manipulating Columns in Visual Basic QuickStart Sample

Illustrates how to transform and manipulate the columns of a data frame in Visual Basic.

View this sample in: C# F#

Option Infer On

Imports Extreme.DataAnalysis
Imports Extreme.Mathematics

' Illustrates how to transform And manipulate the columns
' of a data frame.
Module ManipulatingColumns

    Sub Main()
        ' The license is verified at runtime. We're using
        ' a demo license here. For more information, see
        Extreme.License.Verify("Demo license")

        ' Let's start with a data frame with a DateTime index:
        Const rowCount = 1000
        Dim dates = Index.CreateDateRange(New DateTime(2016, 1, 17), rowCount, Recurrence.Daily)
        Dim frame = DataFrame.FromColumns(New Dictionary(Of String, Object)() From {
                    {"values1", Vector.CreateRandom(rowCount)},
                    {"values2", Vector.CreateRandom(rowCount)}},

        ' The columns of a data frame are immutable,
        ' but the collection of columns Is Not.

        ' We can add columns
        frame.AddColumn("vzlues3", Vector.CreateRandom(rowCount))
        frame.AddColumn("values4", Vector.CreateRandom(rowCount))
        frame.AddColumn("values6", Vector.CreateRandom(rowCount))
        ' Rename columns
        frame = frame.RenameColumn("values4", "vzlues5")
        frame = frame.RenameColumns(
                Function(s) s.StartsWith("vzlues"),
                Function(s) "values" + s.Substring(6))
        ' And remove columns

        ' You can transform a column And add the result 
        ' in various places:
        ' As the last column
        frame.MapAndAppend(Of Double)("values1", Function(x) Vector.Cos(x), "cosValues1")
        ' After a specific column:
        frame.MapAndInsertAfter(Of Double)("values1", Function(x) Vector.Sin(x), "sinValues1")
        ' Replacing the column
        frame.MapAndReplace(Of Double)("values6", Function(x) Vector.Exp(x), "expValues6")

        ' The same operations can be performed on multiple columns
        ' at once:
        Dim columns = {"values1", "values2"}
        ' We can supply the keys for the new columns explicitly:
        Dim negColumns = {"-values1", "-values2"}
        frame.MapAndAppend(Of Double)(columns, Function(x) -x, negColumns)
        ' or as a function of the original key:
        frame.MapAndInsertAfter(Of Double)(columns, Function(x) 2.0 * x, Function(s) "2*" + s)

        ' A more complex example: replace missing values
        ' with the mean of a group.

        ' We create a categorical variable with 5 categories
        ' so we will have 5 group means.
        Dim group = frame.GetColumn("values1").Bin(5)
        ' and a variable that has some missing values:
        Dim withNAs = frame.GetColumn("values2").Clone _
                .SetValues(Double.NaN, Function(x) x < 0.15)
        Console.WriteLine(withNAs.GetSlice(0, 12))
        ' Now for the actual calculation, which has 3 steps:
        ' First, we compute the means for each group:
        Dim meansPerGroup = withNAs.AggregateBy(group, Aggregators.Mean)
        ' Next, create a vector with the means of the group
        ' that each element belongs to:
        Dim means = group.WithCategories(meansPerGroup)
        ' Next, we replace the missing values with the corresponding
        ' elements from that vector.
        Dim withNAsReplaced = withNAs.ReplaceMissingValues(means)
        Console.WriteLine(withNAsReplaced.GetSlice(0, 12))

        ' Row-based operations

        ' Data frames are column-based data structures.
        ' Even though it is not recommended, it is possible
        ' to perform operations on rows:

        frame.AddColumn("values3", Vector.CreateRandom(rowCount))
        Dim avg1 = Vector.Create(Of Double)(frame.RowCount)
        Dim i = 0
        For Each row In frame.Rows
            avg1(i) = (row.Get(Of Double)("values1") +
                        row.Get(Of Double)("values2") +
                        row.Get(Of Double)("values3")) / 3
            i = i + 1
        frame.AddColumn("Average", avg1)

        ' Performing the operation directly on the columns
        ' is much more efficient:
        Dim avg2 = (frame.GetColumn("values1") +
                     frame.GetColumn("values2") +
                     frame.GetColumn("values3")) / 3.0
        frame.AddColumn("Average2", avg2)

        Console.Write("Press any key to exit.")

    End Sub

End Module