Multiple Linear Regression in C# QuickStart Sample

Illustrates how to use the LinearRegressionModel class to perform a multiple linear regression in C#.

View this sample in: Visual Basic F# IronPython

using System;

using Extreme.DataAnalysis;
using Extreme.Mathematics;
using Extreme.Statistics;
using Extreme.Data.Text;

namespace Extreme.Numerics.QuickStart.CSharp
    /// <summary>
    /// Illustrates building multiple linear regression models using 
    /// the LinearRegressionModel class in the 
    /// Extreme.Statistics namespace of Extreme Numerics.NET.
    /// </summary>
    class MultipleRegression
        static void Main(string[] args)
            // The license is verified at runtime. We're using
            // a demo license here. For more information, see
            Extreme.License.Verify("Demo license");
            // Multiple linear regression can be performed using 
            // the LinearRegressionModel class.

            // This QuickStart sample uses data test scores of 200 high school
            // students, including science, math, and reading.

            // First, read the data from a file into a data frame. 
            var data = DelimitedTextFile.ReadDataFrame(@"..\..\..\..\Data\hsb2.csv");

            // Now create the regression model. Parameters are the data frame,
            // the name of the dependent variable, and a string array containing 
            // the names of the independent variables.
            var model = new LinearRegressionModel(data,
                "science", new string[] {"math", "female", "socst", "read"});

            // Alternatively, we can use a formula to describe the variables
            // in the model. The dependent variable goes on the left, the
            // independent variables on the right of the ~:
            var model2 = new LinearRegressionModel(data,
                "science ~ math + female + socst + read");

            // We can set model options now, such as whether to exclude 
            // the constant term:
            // model.NoIntercept = false;

            // The Fit method performs the actual regression analysis.

            // The Parameters collection contains information about the regression 
            // parameters.
            Console.WriteLine("Variable              Value    Std.Error  t-stat  p-Value");
            foreach(var parameter in model.Parameters)
                // Parameter objects have the following properties:
                Console.WriteLine("{0,-20}{1,10:F6}{2,10:F6}{3,8:F2} {4,7:F5}",
                    // Name, usually the name of the variable:
                    // Estimated value of the parameter:
                    // Standard error:
                    // The value of the t statistic for the hypothesis that the parameter
                    // is zero.
                    // Probability corresponding to the t statistic.

            // In addition to these properties, Parameter objects have 
            // a GetConfidenceInterval method that returns 
            // a confidence interval at a specified confidence level.
            // Notice that individual parameters can be accessed 
            // using their numeric index. Parameter 0 is the intercept, 
            // if it was included.
            Interval confidenceInterval = model.Parameters[0].GetConfidenceInterval(0.95);
            Console.WriteLine("95% confidence interval for intercept: {0:F4} - {1:F4}",
                confidenceInterval.LowerBound, confidenceInterval.UpperBound);
            // Parameters can also be accessed by name:
            confidenceInterval = model.Parameters.Get("math").GetConfidenceInterval(0.95);
            Console.WriteLine("95% confidence interval for 'math': {0:F4} - {1:F4}",
                confidenceInterval.LowerBound, confidenceInterval.UpperBound);

            // There is also a wealth of information about the analysis available
            // through various properties of the LinearRegressionModel object:
            Console.WriteLine("Residual standard error: {0:F3}", model.StandardError);
            Console.WriteLine("R-Squared:               {0:F4}", model.RSquared);
            Console.WriteLine("Adjusted R-Squared:      {0:F4}", model.AdjustedRSquared);
            Console.WriteLine("F-statistic:             {0:F4}", model.FStatistic);
            Console.WriteLine("Corresponding p-value:   {0:F5}", model.PValue);

            // Much of this data can be summarized in the form of an ANOVA table:

            // All this information can be printed using the Summarize method.
            // You will also see summaries using the library in C# interactive.

            Console.Write("Press any key to exit.");