Scalable Contains method for LINQ against a SQL backend

I’ve solved this problem with a little different approach a view month ago. Maybe it’s a good solution for you too.

I didn’t want my solution to change the query itself. So a ids.ChunkContains(p.Id) or a special WhereContains method was unfeasible. Also should the solution be able to combine a Contains with another filter as well as using the same collection multiple times.

db.TestEntities.Where(p => (ids.Contains(p.Id) || ids.Contains(p.ParentId)) && p.Name.StartsWith("Test"))

So I tried to encapsulate the logic in a special ToList method that could rewrite the Expression for a specified collection to be queried in chunks.

var ids = Enumerable.Range(1, 11);
var result = db.TestEntities.Where(p => Ids.Contains(p.Id) && p.Name.StartsWith ("Test"))
                                .ToChunkedList(ids,4);

To rewrite the expression tree I discovered all Contains Method calls from local collections in the query with a view helping classes.

private class ContainsExpression
{
    public ContainsExpression(MethodCallExpression methodCall)
    {
        this.MethodCall = methodCall;
    }

    public MethodCallExpression MethodCall { get; private set; }

    public object GetValue()
    {
        var parent = MethodCall.Object ?? MethodCall.Arguments.FirstOrDefault();
        return Expression.Lambda<Func<object>>(parent).Compile()();
    }

    public bool IsLocalList()
    {
        Expression parent = MethodCall.Object ?? MethodCall.Arguments.FirstOrDefault();
        while (parent != null) {
            if (parent is ConstantExpression)
                return true;
            var member = parent as MemberExpression;
            if (member != null) {
                parent = member.Expression;
            } else {
                parent = null;
            }
        }
        return false;
    }
}

private class FindExpressionVisitor<T> : ExpressionVisitor where T : Expression
{
    public List<T> FoundItems { get; private set; }

    public FindExpressionVisitor()
    {
        this.FoundItems = new List<T>();
    }

    public override Expression Visit(Expression node)
    {
        var found = node as T;
        if (found != null) {
            this.FoundItems.Add(found);
        }
        return base.Visit(node);
    }
}

public static List<T> ToChunkedList<T, TValue>(this IQueryable<T> query, IEnumerable<TValue> list, int chunkSize)
{
    var finder = new FindExpressionVisitor<MethodCallExpression>();
    finder.Visit(query.Expression);
    var methodCalls = finder.FoundItems.Where(p => p.Method.Name == "Contains").Select(p => new ContainsExpression(p)).Where(p => p.IsLocalList()).ToList();
    var localLists = methodCalls.Where(p => p.GetValue() == list).ToList();

If the local collection passed in the ToChunkedList method was found in the query expression, I replace the Contains call to the original list with a new call to a temporary list containing the ids for one batch.

if (localLists.Any()) {
    var result = new List<T>();
    var valueList = new List<TValue>();

    var containsMethod = typeof(Enumerable).GetMethods(BindingFlags.Static | BindingFlags.Public)
                        .Single(p => p.Name == "Contains" && p.GetParameters().Count() == 2)
                        .MakeGenericMethod(typeof(TValue));

    var queryExpression = query.Expression;

    foreach (var item in localLists) {
        var parameter = new List<Expression>();
        parameter.Add(Expression.Constant(valueList));
        if (item.MethodCall.Object == null) {
            parameter.AddRange(item.MethodCall.Arguments.Skip(1));
        } else {
            parameter.AddRange(item.MethodCall.Arguments);
        }

        var call = Expression.Call(containsMethod, parameter.ToArray());

        var replacer = new ExpressionReplacer(item.MethodCall,call);

        queryExpression = replacer.Visit(queryExpression);
    }

    var chunkQuery = query.Provider.CreateQuery<T>(queryExpression);


    for (int i = 0; i < Math.Ceiling((decimal)list.Count() / chunkSize); i++) {
        valueList.Clear();
        valueList.AddRange(list.Skip(i * chunkSize).Take(chunkSize));

        result.AddRange(chunkQuery.ToList());
    }
    return result;
}
// if the collection was not found return query.ToList()
return query.ToList();

Expression Replacer:

private class ExpressionReplacer : ExpressionVisitor {

    private Expression find, replace;

    public ExpressionReplacer(Expression find, Expression replace)
    {
        this.find = find;
        this.replace = replace;
    }

    public override Expression Visit(Expression node)
    {
        if (node == this.find)
            return this.replace;

        return base.Visit(node);
    }
}

Leave a Comment