Find the intersection of two integer arrays

Got this question today (3/15/2011) from the Amazon team. They weren't impressed with my rambling, but I still coded it correctly.

Back to Code Samples

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace CodeSamples.Libraries
{
public static class FindIntersection
{
//=============================================================================================
/// <summary>
/// Function to return the list of numbers that occur in both arrays using
/// an algorithm based on walking the sorted lists once each.
/// </summary>
/// <remarks>
/// Speed Index:
/// Small arrays: 1.00 (2 25 element arrays parsed 1,000,000 times)
/// Large Arrays: 1.00 (2 1,000,000 element arrays parsed once)
///
/// Contrary to discussions I've seen online, this one not only uses less memory but also is
/// faster both with small and large arrays.
/// </remarks>
/// <param name="list1">The first array of numbers.</param>
/// <param name="list2">The second array of numbers.</param>
/// <returns>The list of numbers that occur in both arrays.</returns>
//=============================================================================================
public static IEnumerable<int> UsingSortedLists(int[] list1, int[] list2)
{
List<int> results = new List<int>();

// Short circuit for untestable values.
if (list1 == null || list2 == null || list1.Length == 0 || list2.Length == 0)
{
return results;
}

Array.Sort(list1);
Array.Sort(list2);

int list2Offset = 0;
int i = 0;

while (i < list1.Length && list2Offset < list2.Length)
{
int currentValue = list1[i];

while (list2Offset < list2.Length && list2[list2Offset] < currentValue)
{
list2Offset++;
}

if (list2Offset < list2.Length)
{
if (list2[list2Offset] == currentValue)
{
results.Add(currentValue);
}

do {
i++;
} while (i < list1.Length && list1[i] == currentValue);
}
}

return results;
}

//=============================================================================================
/// <summary>
/// Function to return the list of numbers that occur in both arrays using
/// an algorithm based on fast lookups in the second list using a hash set.
/// </summary>
/// <remarks>
/// Speed Index:
/// Small arrays: 3.27950 (2 25 element arrays parsed 1,000,000 times)
/// Large Arrays: 1.29517 (2 1,000,000 element arrays parsed once)
///
/// This method uses a lot more memory and is slower both with small and large arrays. Don't
/// see a good reason to use it.
/// </remarks>
/// <param name="list1">The first array of numbers.</param>
/// <param name="list2">The second array of numbers.</param>
/// <returns>The list of numbers that occur in both arrays.</returns>
//=============================================================================================
public static IEnumerable<int> UsingHashtable(int[] list1, int[] list2)
{
HashSet<int> results = new HashSet<int>();

// Short circuit for untestable values.
if (list1 == null || list2 == null || list1.Length == 0 || list2.Length == 0)
{
return results;
}

HashSet<int> secondSet = new HashSet<int>(list2);

int i = 0;

while (i < list1.Length)
{
int currentValue = list1[i++];

if (secondSet.Contains(currentValue))
{
results.Add(currentValue);
}
}

return results.ToArray();
}
}
}

Back to Code Samples