Sunday, August 21, 2016

Dynamically Invoke Methods Quickly, with InvokeHelpers.EfficientInvoke

In my previous blog post, I talked about Optimizing Dynamic Method Invokes in .NET. In this post, we will use that information to create a static helper method that is twice as fast as MethodInfo.Invoke.

Basically, we create and cache a delegate in a concurrent dictionary, and then cast both it and it's arguments to dynamics and invoke them directly. The concurrent dictionary introduces overhead, but it still more than twice as fast as calling MethodInfo.Invoke. Please note that this method is highly optimized to reduce the use of hash code look ups, property getters, closure allocations, and if checks.

let's take a look at the code...

InvokeHelpers.EfficientInvoke

public static class InvokeHelpers
{
    private const string TooManyArgsMessage = "Invokes for more than 10 args are not yet implemented";
 
    private static readonly Type VoidType = typeof(void);
 
    private static readonly ConcurrentDictionary<Tuple<string, object>, DelegatePair> DelegateMap 
        = new ConcurrentDictionary<Tuple<string, object>, DelegatePair>();
 
    public static object EfficientInvoke(object obj, string methodName, params object[] args)
    {
        var key = Tuple.Create(methodName, obj);
        var delPair = DelegateMap.GetOrAdd(key, CreateDelegate);
            
        if (delPair.HasReturnValue)
        {
            switch (delPair.ArgumentCount)
            {
                case 0: return delPair.Delegate();
                case 1: return delPair.Delegate((dynamic)args[0]);
                case 2: return delPair.Delegate((dynamic)args[0], (dynamic)args[1]);
                case 3: return delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2]);
                case 4: return delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2], (dynamic)args[3]);
                case 5: return delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2], (dynamic)args[3], (dynamic)args[4]);
                case 6: return delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2], (dynamic)args[3], (dynamic)args[4], (dynamic)args[5]);
                case 7: return delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2], (dynamic)args[3], (dynamic)args[4], (dynamic)args[5], (dynamic)args[6]);
                case 8: return delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2], (dynamic)args[3], (dynamic)args[4], (dynamic)args[5], (dynamic)args[6], (dynamic)args[7]);
                case 9: return delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2], (dynamic)args[3], (dynamic)args[4], (dynamic)args[5], (dynamic)args[6], (dynamic)args[7], (dynamic)args[8]);
                case 10: return delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2], (dynamic)args[3], (dynamic)args[4], (dynamic)args[5], (dynamic)args[6], (dynamic)args[7], (dynamic)args[8], (dynamic)args[9]);
                default: throw new NotImplementedException(TooManyArgsMessage);
            }
        }
 
        switch (delPair.ArgumentCount)
        {
            case 0: delPair.Delegate(); break;
            case 1: delPair.Delegate((dynamic)args[0]); break;
            case 2: delPair.Delegate((dynamic)args[0], (dynamic)args[1]); break;
            case 3: delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2]); break;
            case 4: delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2], (dynamic)args[3]); break;
            case 5: delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2], (dynamic)args[3], (dynamic)args[4]); break;
            case 6: delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2], (dynamic)args[3], (dynamic)args[4], (dynamic)args[5]); break;
            case 7: delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2], (dynamic)args[3], (dynamic)args[4], (dynamic)args[5], (dynamic)args[6]); break;
            case 8: delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2], (dynamic)args[3], (dynamic)args[4], (dynamic)args[5], (dynamic)args[6], (dynamic)args[7]); break;
            case 9: delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2], (dynamic)args[3], (dynamic)args[4], (dynamic)args[5], (dynamic)args[6], (dynamic)args[7], (dynamic)args[8]); break;
            case 10: delPair.Delegate((dynamic)args[0], (dynamic)args[1], (dynamic)args[2], (dynamic)args[3], (dynamic)args[4], (dynamic)args[5], (dynamic)args[6], (dynamic)args[7], (dynamic)args[8], (dynamic)args[9]); break;
            default: throw new NotImplementedException(TooManyArgsMessage);
        }
 
        return null;
    }
 
    private static DelegatePair CreateDelegate(Tuple<string, object> key)
    {
        var method = key.Item2
            .GetType()
            .GetMethod(key.Item1);
 
        var argTypes = method
            .GetParameters()
            .Select(p => p.ParameterType)
            .Concat(new[] { method.ReturnType })
            .ToArray();
 
        var newDelType = Expression.GetDelegateType(argTypes);
        var newDel = Delegate.CreateDelegate(newDelType, key.Item2, method);
 
        return new DelegatePair(newDel, argTypes.Length - 1, method.ReturnType != VoidType);
    }
 
    private class DelegatePair
    {
        public DelegatePair(dynamic del, int argumentCount, bool hasReturnValue)
        {
            Delegate = del;
            ArgumentCount = argumentCount;
            HasReturnValue = hasReturnValue;
        }
 
        public readonly dynamic Delegate;
        public readonly int ArgumentCount;
        public readonly bool HasReturnValue;
    }
}

Now let's take a look at some performance tests...

Unit Tests

public class InvokeHelpersTests
{
    public const int Iterations = 1000000;
 
    private static readonly TestClass Obj = new TestClass();
    private static readonly object[] Args = { 1, true };
 
    private readonly ITestOutputHelper _output;
 
    public InvokeHelpersTests(ITestOutputHelper output)
    {
        _output = output;
    }
 
    /// <summary>
    /// This is not realistic, because our million invokes do not simulate
    /// the cost of looking up the MethodInfo. However, for the sake of
    /// argument, this takes 930,136 ticks for one million iterations.
    /// </summary>
    [Fact]
    public void MethodInfoInvoke()
    {
        var methodInfo = Obj.GetType().GetMethod("TestMethod");
 
        var sw0 = Stopwatch.StartNew();
        methodInfo.Invoke(Obj, Args);
        sw0.Stop();
 
        var sw1 = Stopwatch.StartNew();
        for (var i = 0; i < Iterations; i++)
        {
            methodInfo.Invoke(Obj, Args);
        }
        sw1.Stop();
 
        _output.WriteLine(sw0.ElapsedTicks.ToString());
        _output.WriteLine(sw1.ElapsedTicks.ToString());
    }
 
    /// <summary>
    /// This use case is more realistic, where we look up the MethodInfo
    /// each iteration. This takes 1,370,052 ticks for one million iterations.
    /// </summary>
    [Fact]
    public void GetMethodInfoInvoke()
    {
        var sw0 = Stopwatch.StartNew();
        Obj.GetType().GetMethod("TestMethod").Invoke(Obj, Args);
        sw0.Stop();
 
        var sw1 = Stopwatch.StartNew();
        for (var i = 0; i < Iterations; i++)
        {
            Obj.GetType().GetMethod("TestMethod").Invoke(Obj, Args);
        }
        sw1.Stop();
 
        _output.WriteLine(sw0.ElapsedTicks.ToString());
        _output.WriteLine(sw1.ElapsedTicks.ToString());
    }
 
    /// <summary>
    /// This is an apples to apples comparision of using a ConcurrentDictionary to cache
    /// the MethodInfo lookup. This takes 1,300,751 ticks for one million iterations.
    /// </summary>
    [Fact]
    public void MappedMethodInfoInvoke()
    {
        var sw0 = Stopwatch.StartNew();
        MappedMethodInvokeHelpers.EfficientInvoke(Obj, "TestMethod", Args);
        sw0.Stop();
 
        var sw1 = Stopwatch.StartNew();
        for (var i = 0; i < Iterations; i++)
        {
            MappedMethodInvokeHelpers.EfficientInvoke(Obj, "TestMethod", Args);
        }
        sw1.Stop();
 
        _output.WriteLine(sw0.ElapsedTicks.ToString());
        _output.WriteLine(sw1.ElapsedTicks.ToString());
    }
 
    /// <summary>
    /// Here is our custom optimized solution, which beats ALL of the previous iterations by
    /// more than a factor of 2! This only takes 467,158 ticks for one million iterations.
    /// </summary>
    [Fact]
    public void EfficientInvoke()
    {
        var sw0 = Stopwatch.StartNew();
        InvokeHelpers.EfficientInvoke(Obj, "TestMethod", Args);
        sw0.Stop();
 
        var sw1 = Stopwatch.StartNew();
        for (var i = 0; i < Iterations; i++)
        {
            InvokeHelpers.EfficientInvoke(Obj, "TestMethod", Args);
        }
        sw1.Stop();
 
        _output.WriteLine(sw0.ElapsedTicks.ToString());
        _output.WriteLine(sw1.ElapsedTicks.ToString());
    }
 
    public class TestClass
    {
        public int TestMethod(int i, bool b)
        {
            return i + (b ? 1 : 2);
        }
    }
 
    public static class MappedMethodInvokeHelpers
    {
        private static readonly ConcurrentDictionary<Tuple<string, object>, MethodInfo> DelegateMap
            = new ConcurrentDictionary<Tuple<string, object>, MethodInfo>();
 
        public static object EfficientInvoke(object obj, string methodName, params object[] args)
        {
            var key = Tuple.Create(methodName, obj);
            var methodInfo = DelegateMap.GetOrAdd(key, GetMethodInfo);
            return methodInfo.Invoke(obj, args);
        }
 
        private static MethodInfo GetMethodInfo(Tuple<string, object> key)
        {
            return key.Item2.GetType().GetMethod(key.Item1);
        }
    }
}

Enjoy,
Tom

1 comment:

Real Time Web Analytics