Looking at each combination in jagged array

Here is IMO a good algorithm with a minimum allocations (avoids string concatenations):

public static class Algorithms
{
    public static IEnumerable<string> GetCombinations(this char[][] input)
    {
        var result = new char[input.Length]; 
        var indices = new int[input.Length];
        for (int pos = 0, index = 0; ;)
        {
            for (; pos < input.Length; pos++, index = 0)
            {
                indices[pos] = index;
                result[pos] = input[pos][index];
            }
            yield return new string(result);
            do
            {
                if (pos == 0) yield break;
                index = indices[--pos] + 1;
            }
            while (index >= input[pos].Length);
        }
    }
}

Usage:

char[][] myArray = new char[][]{
    new char[] {'A', 'B'},
    new char[] {'C', 'D'},
    new char[] {'E', 'F'}
};
var combinations = myArray.GetCombinations();

Basically it’s an unrolled implementation of something like this

from c1 in input[0]
from c2 in input[1]
...
from cN in input[N]
select new string(new [] { c1, c2, ..., cN })

P.S If you actually need char[] type result, simply change the signature to

public static IEnumerable<char[]> GetCombinations(this char[][] input)

and remove the new string from the yield.

But note that in such case the consumer of the enumerable should be responsible of making a copy of the combination array if it needs to store it. Yielding shared internal mutating array is bad (evil) from the public API design standpoint, but perfect for internal performance scenarios.

UPDATE: Since the question is about the performance, I’ve made a test to compare the string versions of the above algorithm (A) with the LINQ solution from Enigmativity answer(B). I’ve ran it against different number of 26 letter alphabet combinations from Release built exe outside the VS, and here are the results:

A: N=2 Count=         676 Time=00:00:00.0010139 Memory=     16K
B: N=2 Count=         676 Time=00:00:00.0042598 Memory=    233K

A: N=3 Count=      17,576 Time=00:00:00.0004310 Memory=    348K
B: N=3 Count=      17,576 Time=00:00:00.0126294 Memory=  2,185K

A: N=4 Count=     456,976 Time=00:00:00.0111155 Memory=  1,496K
B: N=4 Count=     456,976 Time=00:00:00.4019500 Memory=  2,104K

A: N=5 Count=  11,881,376 Time=00:00:00.2813208 Memory=  1,995K
B: N=5 Count=  11,881,376 Time=00:00:13.4492150 Memory=  2,014K

A: N=6 Count= 308,915,776 Time=00:00:07.5473890 Memory=  2,059K
B: N=6 Count= 308,915,776 Time=00:07:37.2985051 Memory=    455K

Here is the full test code in case someone is interested:

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Globalization;
using System.Linq;
using System.Threading;

namespace Samples
{
    public static class Algorithms
    {
        public static IEnumerable<string> GetCombinationsA(this char[][] input)
        {
            var result = new char[input.Length];
            var indices = new int[input.Length];
            for (int pos = 0, index = 0; ;)
            {
                for (; pos < input.Length; pos++, index = 0)
                {
                    indices[pos] = index;
                    result[pos] = input[pos][index];
                }
                yield return new string(result);
                do
                {
                    if (pos == 0) yield break;
                    index = indices[--pos] + 1;
                }
                while (index >= input[pos].Length);
            }
        }

        public static IEnumerable<string> GetCombinationsB(this char[][] input)
        {
            Func<IEnumerable<IEnumerable<char>>, IEnumerable<IEnumerable<char>>> combine = null;
            combine = css =>
                        from c in css.First()
                        from cs in css.Skip(1).Any()
                            ? combine(css.Skip(1))
                            : new[] { Enumerable.Empty<char>() }
                        select new[] { c }.Concat(cs);
            return combine(input).Select(c => String.Join("", c));
        }
    }

    class Program
    {
        class Algorithm
        {
            public string Name;
            public Func<char[][], IEnumerable<string>> Func;
        }

        static void Main(string[] args)
        {
            Thread.CurrentThread.CurrentCulture = CultureInfo.InvariantCulture;
            Algorithm[] algorithms =
            {
                new Algorithm { Name = "A", Func = Algorithms.GetCombinationsA },
                new Algorithm { Name = "B", Func = Algorithms.GetCombinationsB },
            };
            char[][] myArray = { new char[] {'A', 'B'}, new char[] {'C', 'D'}, new char[] {'E', 'F'} };
            foreach (var algo in algorithms)
                algo.Func(myArray);
            var chars = Enumerable.Range('A', 'Z' - 'A' + 1).Select(c => (char)c).ToArray();
            for (int n = 2; n < 7; n++)
            {
                var input = Enumerable.Range(0, n).Select(_ => chars).ToArray();
                foreach (var algo in algorithms)
                    Test(algo, input);
                Console.WriteLine();
            }
            Console.WriteLine("Done.");
            Console.ReadLine();
        }

        static void Test(Algorithm algo, char[][] input)
        {
            GC.Collect(); GC.WaitForPendingFinalizers();
            GC.Collect(); GC.WaitForPendingFinalizers();
            var totalMem = GC.GetTotalMemory(false);
            var timer = Stopwatch.StartNew();
            long count = 0;
            foreach (var comb in algo.Func(input)) count++;
            timer.Stop();
            totalMem = GC.GetTotalMemory(false) - totalMem;
            Console.WriteLine($"{algo.Name}: N={input.Length} Count={count,12:n0} Time={timer.Elapsed} Memory={totalMem / 1024,7:n0}K");
        }
    }
}

Leave a Comment