Non-Parametric Tests in C# QuickStart Sample

Illustrates how to perform non-parametric tests like the Wilcoxon-Mann-Whitney test and the Kruskal-Wallis test in C#.

View this sample in: Visual Basic F# IronPython

using System;

using Extreme.Mathematics;
using Extreme.Statistics;
using Extreme.Statistics.Tests;

namespace Extreme.Numerics.Quickstart.CSharp {
    /// <summary>
    /// Demonstrates how to use non-parametric hypothesis tests 
    /// like the Mann-Whitney (Wilcoxon) rank sum test and the
    /// Kruskal-Wallis test.
    /// </summary>
    class NonParametricTests {
        static void Main(string[] args) {
            // The license is verified at runtime. We're using
            // a demo license here. For more information, see
            // https://numerics.net/trial-key
            Extreme.License.Verify("Demo license");

            //
            // Mann-Whitney test
            //

            Console.WriteLine("Mann-Whitney Test");

            // The Mann-Whitney test compares to samples to see if they were
            // drawn from the same distribution.

            // We use an example from McDonald, et.al. (1996), who compared
            // the geographic variation in oyster DNA to the variation in
            // proteins. A significant difference in the samples would suggest
            // that natural selection played a role in the oyster diversification.

            // There are two ways to create a test with multiple samples.
            
            // The first is to put all the data in one variable,
            // and use a second variable to group the data in the first.
            Console.WriteLine("\nUsing grouping variable:");

            var values = Vector.Create(new double[] { 
                -0.005, 0.116,-0.006, 0.095, 0.053, 0.003,
                -0.005, 0.016, 0.041, 0.016, 0.066, 
                 0.163, 0.004, 0.049, 0.006, 0.058,
                -0.002, 0.015, 0.044, 0.024  
            });
            var groups = Vector.Create(new Group[] {
                Group.DNA, Group.DNA, Group.DNA, Group.DNA, Group.DNA, Group.DNA, 
                Group.Protein, Group.Protein, Group.Protein, Group.Protein, Group.Protein, 
                Group.Protein, Group.Protein, Group.Protein, Group.Protein, Group.Protein, 
                Group.Protein, Group.Protein, Group.Protein, Group.Protein
            }).AsCategorical();

            // With this data, we can create the test:
            var mw = new MannWhitneyTest<double>(values, groups);

            // We can obtan the value of the test statistic through the Statistic property,
            // and the corresponding P-value through the PValue property:
            Console.WriteLine("Test statistic: {0:F4}", mw.Statistic);
            Console.WriteLine("P-value:        {0:F4}", mw.PValue);

            // The significance level is the default value of 0.05:
            Console.WriteLine("Significance level:     {0:F2}", mw.SignificanceLevel);
            // We can now print the test scores:
            Console.WriteLine("Reject null hypothesis? {0}", mw.Reject() ? "yes" : "no");

            // We can get the same scores for the 0.01 significance level by explicitly
            // passing the significance level as a parameter to these methods:
            Console.WriteLine("Significance level:     {0:F2}", 0.01);
            Console.WriteLine("Reject null hypothesis? {0}", mw.Reject(0.01) ? "yes" : "no");


            // The second method is to put the data in different variables
            Console.WriteLine("\nUsing multiple variables:");

            var dnaValues = Vector.Create(new double[] { 
                -0.005, 0.116,-0.006, 0.095, 0.053, 0.003 });
            var proteinValues = Vector.Create(new double[] { 
                -0.005, 0.016, 0.041, 0.016, 0.066, 
                 0.163, 0.004, 0.049, 0.006, 0.058,
                -0.002, 0.015, 0.044, 0.024  
            });

            // With this data, we can create the test:
            mw = new MannWhitneyTest<double>(dnaValues, proteinValues);

            // We can obtan the value of the test statistic through the Statistic property,
            // and the corresponding P-value through the PValue property:
            Console.WriteLine("Test statistic: {0:F4}", mw.Statistic);
            Console.WriteLine("P-value:        {0:F4}", mw.PValue);

            // The significance level is the default value of 0.05:
            Console.WriteLine("Significance level:     {0:F2}", mw.SignificanceLevel);
            // We can now print the test scores:
            Console.WriteLine("Reject null hypothesis? {0}", mw.Reject() ? "yes" : "no");

            //
            // Kruskal-Wallis test
            //

            Console.WriteLine("\nKruskal-Wallis Test\n");

            // The Kruskal-Wallis test is a generalization of the Mann-Whitney test
            // to more than 2 groups.

            // The following example was taken from the NIST Engineering Statistics Handbook 
            // at http://www.itl.nist.gov/div898/handbook/prc/section4/prc41.htm
            
            // The data represents percentage quarterly growth 
            // in 4 investment funds:
            var aValues = Vector.Create(new double[] { 4.2, 4.6, 3.9, 4.0 });
            var bValues = Vector.Create(new double[] { 3.3, 2.4, 2.6, 3.8, 2.8 });
            var cValues = Vector.Create(new double[] { 1.9, 2.4, 2.1, 2.7, 1.8 });
            var dValues = Vector.Create(new double[] { 3.5, 3.1, 3.7, 4.1, 4.4 });

            // We simply pass these variables to the constructor:
            var kw = new KruskalWallisTest(aValues, bValues, cValues, dValues);

            // We can obtan the value of the test statistic through the Statistic property,
            // and the corresponding P-value through the PValue property:
            Console.WriteLine("Test statistic: {0:F4}", kw.Statistic);
            Console.WriteLine("P-value:        {0:F4}", kw.PValue);

            // The significance level is the default value of 0.05:
            Console.WriteLine("Significance level:     {0:F2}", kw.SignificanceLevel);
            // We can now print the test scores:
            Console.WriteLine("Reject null hypothesis? {0}", kw.Reject() ? "yes" : "no");

            //
            // Runs test
            //

            Console.WriteLine("\nRuns Test\n");

            // The runs test is a test of randomness.

            // It compares the lengths of runs of the same value
            // in a sample to what would be expected.

            var genders = Vector.Create(new Gender[] {
                Gender.Male, Gender.Male, Gender.Male, Gender.Female, Gender.Female, 
                Gender.Female, Gender.Male, Gender.Male, Gender.Male, Gender.Male, 
                Gender.Female, Gender.Female, Gender.Male, Gender.Male, Gender.Male, 
                Gender.Female, Gender.Female, Gender.Female, Gender.Female, Gender.Female, 
                Gender.Female, Gender.Female, Gender.Male, Gender.Male, Gender.Female, 
                Gender.Male, Gender.Male, Gender.Female, Gender.Female, Gender.Female, 
                Gender.Female}).AsCategorical();

            var rt = new RunsTest<Gender>(genders);

            // We can obtan the value of the test statistic through the Statistic property,
            // and the corresponding P-value through the PValue property:
            Console.WriteLine("Test statistic: {0:F4}", rt.Statistic);
            Console.WriteLine("P-value:        {0:F4}", rt.PValue);

            // The significance level is the default value of 0.05:
            Console.WriteLine("Significance level:     {0:F2}", rt.SignificanceLevel);
            // We can now print the test scores:
            Console.WriteLine("Reject null hypothesis? {0}", rt.Reject() ? "yes" : "no");

            Console.Write("Press any key to exit.");
            Console.ReadLine();
        }
    }

    enum Group {
        DNA,
        Protein
    }

    enum Gender { 
        Male,
        Female
    }

}