r.net performance

Feb 16, 2013 at 4:40 PM
I've got serious performance problem with R integration (R.NET 1.5). I use following code:
    public static void computeAnova(double[] samples, double[] fRule, double[] fGroup, ref double pRule, ref double pGroup, ref double pInteraction )
    {
        rEngine.SetSymbol("samples", rEngine.CreateNumericVector(samples));
        rEngine.SetSymbol("fr", rEngine.CreateNumericVector(fRule));
        rEngine.SetSymbol("fg", rEngine.CreateNumericVector(fGroup));

        var res = rEngine.Evaluate("Anova(lm(y~a*b, data = data.frame(y = samples, a = fr, b = fg)), type=\"III\", singular.ok=TRUE)").AsList()["Pr(>F)"].AsNumeric();
        pRule = res[1];
        pGroup = res[2];
        pInteraction = res[3];
    }
The problem is that I need to call this function many many times, say 50000 for fairly small data set (and it takes about 15mins). Function itself runs for ~20ms. I've checked that about half of the time it spends on SetSymbol, e.g. putting vectors into the R (each vector is 320 doubles for this example).

Is there any method of putting data into R faster? Unfortunately I cannot batch the computations.
Developer
Feb 17, 2013 at 10:11 PM
Hi,

Could you zip a small C# console application showing the issue and submit it e.g. as an issue (I think you should be able to attach it to the issue). I could then run a profiler on it to confirm the hot spot and see if/what can be improved.
Feb 23, 2013 at 12:32 AM
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using RDotNet;
using System.IO;
using System.Diagnostics;

namespace r_net_integration_sample
{
class Program
{
    private static REngine rEngine = null;
    public static int count = 0;

    public static void init()
    {
        try
        {
            // Set the folder in which R.dll locates.
            var envPath = Environment.GetEnvironmentVariable("PATH");
            var rBinPath = @"C:\Program Files\R\R-2.15.2\bin\x64";
            Environment.SetEnvironmentVariable("PATH", envPath + Path.PathSeparator + rBinPath);

            rEngine = REngine.CreateInstance("RDotNet");
            rEngine.Initialize();

            rEngine.Evaluate("options(contrasts=c(\"contr.sum\", \"contr.poly\"))");
            rEngine.Evaluate("library(car)");
        }
        catch (Exception e)
        {
        }
    }

    public static void computeAnova(double[] samples, double[] fRule, double[] fGroup,
ref double pRule1, ref double pGroup1, ref double pInteraction1,
ref double pRule3, ref double pGroup3, ref double pInteraction3)
    {
        if (rEngine != null)
        {
            rEngine.SetSymbol("samples", rEngine.CreateNumericVector(samples));
            rEngine.SetSymbol("fr", rEngine.CreateNumericVector(fRule));
            rEngine.SetSymbol("fg", rEngine.CreateNumericVector(fGroup));

            var res = rEngine.Evaluate("anova(lm(y~a*b, data = data.frame(y = samples, a = fr, b = fg)))").AsList()["Pr(>F)"].AsNumeric();
            pRule1 = res[0];
            pGroup1 = res[1];
            pInteraction1 = res[2];

            if (double.IsNaN(pInteraction1))
                pInteraction1 = 1;

            res = rEngine.Evaluate("Anova(lm(y~a*b, data = data.frame(y = samples, a = fr, b = fg)), type=\"III\", singular.ok=TRUE)").AsList()["Pr(>F)"].AsNumeric();
            pRule3 = res[1];
            pGroup3 = res[2];
            pInteraction3 = res[3];

            if (double.IsNaN(pInteraction3))
                pInteraction3 = 1;
        }
    }

    static void Main(string[] args)
    {
        double pRule1 = 0, pGroup1 = 0, pInteraction1 = 0, pRule3 = 0, pGroup3 = 0, pInteraction3 = 0;
        double[] samples;
        double[] fRule;
        double[] fGroup;
        Random r = new Random();
        long totalTime = 0;

        init();

        Stopwatch sw = new Stopwatch();

        int i;
        for (i = 0; i < 10000; i++)
        {
            samples = new double[300];
            fRule = new double[300];
            fGroup = new double[300];

            for( int j = 0; j < samples.Length; j++ )
            {
                samples[j] = r.NextDouble();
                fRule[j] = r.Next(1);
                fGroup[j] = r.Next(1);
            }

            sw.Restart();
            computeAnova(samples, fRule, fGroup, ref pRule1, ref pGroup1, ref pInteraction1, ref pRule3, ref pGroup3, ref pInteraction3);
            sw.Stop();
            totalTime += sw.ElapsedMilliseconds;
        }

        Console.WriteLine("Iterations: {0}\nTotal time: {1} ms\nAverage time: {2} ms", i, totalTime, 1.0 * totalTime / i);
    }
}
}



This should work as a sample, but it fails at rEngine = REngine.CreateInstance("RDotNet") with error:
A first chance exception of type 'System.DllNotFoundException' occurred in RDotNet.NativeLibrary.dll
Dec 4, 2013 at 5:19 PM
So... After a big headache, I found a way to fix the problem "A first chance exception of type 'System.DllNotFoundException' occurred in RDotNet.NativeLibrary.dll" .

Basically, my windows is 64 bits and I had installed R 64 Bits. - THAT WAS THE PROBLEM!!!

I had to reinstall R with support to 32 bits and change my code as below:

// var rBinPath = @"C:\Program Files\R\R-3.0.2\bin\x64;";
          var rBinPath = @"C:\Program Files\R\R-3.0.2\bin\i386;";

I tested it using VB and C# (with support for 32 bits) under Visual Studio 2010 Professional and all worked fine.

Hope it can help!

Lowreno.