Skip to content

Commit

Permalink
Simplify RandomSubsetImpl (#568)
Browse files Browse the repository at this point in the history
  • Loading branch information
viceroypenguin authored Nov 20, 2023
1 parent 79347f3 commit 8e22727
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 14 deletions.
17 changes: 8 additions & 9 deletions Source/SuperLinq/RandomSubset.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,43 +40,42 @@ public static IEnumerable<T> RandomSubset<T>(this IEnumerable<T> source, int sub
Guard.IsNotNull(source);
Guard.IsGreaterThanOrEqualTo(subsetSize, 0);

return RandomSubsetImpl(source, rand, seq => (seq.ToArray(), subsetSize));
return RandomSubsetImpl(source, rand, subsetSize);
}

#pragma warning disable MA0050 // arguments validated in both callers
private static IEnumerable<T> RandomSubsetImpl<T>(IEnumerable<T> source, Random rand, Func<IEnumerable<T>, (T[], int)> seeder)
#pragma warning restore MA0050
private static IEnumerable<T> RandomSubsetImpl<T>(IEnumerable<T> source, Random rand, int? subsetSize)
{
// The simplest and most efficient way to return a random subset is to perform
// an in-place, partial Fisher-Yates shuffle of the sequence. While we could do
// a full shuffle, it would be wasteful in the cases where subsetSize is shorter
// than the length of the sequence.
// See: http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle

var (array, subsetSize) = seeder(source);
var list = source.ToList();
subsetSize ??= list.Count;

if (array.Length < subsetSize)
if (list.Count < subsetSize)
{
ThrowHelper.ThrowArgumentOutOfRangeException(
nameof(subsetSize),
"Subset size must be less than or equal to the source length.");
}

var m = 0; // keeps track of count items shuffled
var w = array.Length; // upper bound of shrinking swap range
var w = list.Count; // upper bound of shrinking swap range
var g = w - 1; // used to compute the second swap index

// perform in-place, partial Fisher-Yates shuffle
while (m < subsetSize)
{
var k = g - rand.Next(w);
(array[m], array[k]) = (array[k], array[m]);
(list[m], list[k]) = (list[k], list[m]);
++m;
--w;
}

// yield the random subset as a new sequence
for (var i = 0; i < subsetSize; i++)
yield return array[i];
yield return list[i];
}
}
6 changes: 1 addition & 5 deletions Source/SuperLinq/Shuffle.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,6 @@ public static IEnumerable<T> Shuffle<T>(this IEnumerable<T> source, Random rand)
Guard.IsNotNull(source);
Guard.IsNotNull(rand);

return RandomSubsetImpl(source, rand, seq =>
{
var array = seq.ToArray();
return (array, array.Length);
});
return RandomSubsetImpl(source, rand, null);
}
}

0 comments on commit 8e22727

Please sign in to comment.