151 lines
4.3 KiB
C++
151 lines
4.3 KiB
C++
/*=========================================================================
|
||
|
||
Program: Visualization Toolkit
|
||
Module: vtkReservoirSampler.h
|
||
|
||
Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
|
||
All rights reserved.
|
||
See Copyright.txt or http://www.kitware.com/Copyright.htm for details.
|
||
|
||
This software is distributed WITHOUT ANY WARRANTY; without even
|
||
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||
PURPOSE. See the above copyright notice for more information.
|
||
|
||
=========================================================================*/
|
||
/**
|
||
* @class vtkReservoirSampler<IntegerType>
|
||
* @brief Generate a monotonic sequence of integers that randomly kk-sample
|
||
* a range without substitution.
|
||
*
|
||
* Given a sequence of size nn, we wish to choose kk random values from the array.
|
||
* This class returns kk (or fewer, if nn < kk) indices in the range [0, nn-1[ that
|
||
* are ordered from smallest to largest.
|
||
*
|
||
* The algorithm is an implementation of Kim-Hung Li's approach, known as
|
||
* "Algorithm L" and documented in the article "Reservoir-Sampling Algorithms of
|
||
* Time Complexity O(n(1+log(N/n)))". ACM Transactions on Mathematical Software.
|
||
* 20 (4): 481–493. doi:10.1145/198429.198435.
|
||
*/
|
||
|
||
#ifndef vtkReservoirSampler_h
|
||
#define vtkReservoirSampler_h
|
||
|
||
#include "vtkAbstractArray.h"
|
||
#include "vtkCommonMathModule.h"
|
||
|
||
#include <algorithm>
|
||
#include <cmath>
|
||
#include <limits>
|
||
#include <random>
|
||
#include <stdexcept>
|
||
|
||
class VTKCOMMONMATH_EXPORT vtkReservoirSamplerBase
|
||
{
|
||
protected:
|
||
using SeedType = typename std::random_device::result_type;
|
||
|
||
static SeedType RandomSeed();
|
||
};
|
||
|
||
template <typename Integer, bool Monotonic = true>
|
||
class vtkReservoirSampler : public vtkReservoirSamplerBase
|
||
{
|
||
public:
|
||
/// Choose kk items from a sequence of (0, nn - 1).
|
||
///
|
||
/// This will throw an exception if kk <= 0.
|
||
const std::vector<Integer>& operator()(Integer kk, Integer nn) const
|
||
{
|
||
VTK_THREAD_LOCAL static std::vector<Integer> data;
|
||
this->GenerateSample(kk, nn, data);
|
||
return data;
|
||
}
|
||
|
||
/// Choose kk items from a sequence of (0, \a array->GetNumberOfTuples() - 1).
|
||
///
|
||
/// This will throw an exception if kk <= 0.
|
||
const std::vector<Integer>& operator()(Integer kk, vtkAbstractArray* array) const
|
||
{
|
||
VTK_THREAD_LOCAL static std::vector<Integer> data;
|
||
if (!array)
|
||
{
|
||
throw std::invalid_argument("Null arrays are disallowed.");
|
||
}
|
||
if (array->GetNumberOfTuples() > std::numeric_limits<Integer>::max())
|
||
{
|
||
throw std::invalid_argument("Array size would overflow integer type.");
|
||
}
|
||
this->GenerateSample(kk, array->GetNumberOfTuples(), data);
|
||
return data;
|
||
}
|
||
|
||
protected:
|
||
void GenerateSample(Integer kk, Integer nn, std::vector<Integer>& data) const
|
||
{
|
||
if (nn < kk)
|
||
{
|
||
kk = nn;
|
||
}
|
||
if (kk < 0)
|
||
{
|
||
throw std::invalid_argument(
|
||
"You must choose a non-negative number of values from a proper sequence.");
|
||
}
|
||
data.resize(kk);
|
||
if (kk == 0)
|
||
{
|
||
return;
|
||
}
|
||
// I. Fill the output with the first kk values.
|
||
Integer ii;
|
||
for (ii = 0; ii < kk; ++ii)
|
||
{
|
||
data[ii] = ii;
|
||
}
|
||
if (kk == nn)
|
||
{
|
||
return;
|
||
}
|
||
|
||
std::mt19937 generator(vtkReservoirSampler::RandomSeed());
|
||
std::uniform_real_distribution<> unitUniform(0., 1.);
|
||
std::uniform_int_distribution<Integer> randomIndex(0, kk - 1);
|
||
double w = exp(log(unitUniform(generator)) / kk);
|
||
|
||
while (true)
|
||
{
|
||
Integer delta = static_cast<Integer>(floor(log(unitUniform(generator)) / log(1.0 - w)) + 1);
|
||
if (delta < 0)
|
||
{
|
||
// If delta overflows the size of the integer, we are done.
|
||
break;
|
||
}
|
||
// Be careful here since delta may be large and nn may be
|
||
// at or near numeric_limits<Integer>::max().
|
||
if (nn - ii > delta)
|
||
{
|
||
Integer jj = randomIndex(generator);
|
||
#if 0
|
||
std::cout << " i " << ii << " δ " << delta << " w " << w << " → j " << jj << "\n";
|
||
#endif
|
||
ii += delta;
|
||
data[jj] = ii;
|
||
w *= exp(log(unitUniform(generator)) / kk);
|
||
}
|
||
else
|
||
{
|
||
// Adding delta to ii would step beyond our sequence size,
|
||
// so we are done.
|
||
break;
|
||
}
|
||
}
|
||
if (Monotonic)
|
||
{
|
||
std::sort(data.begin(), data.end());
|
||
}
|
||
}
|
||
};
|
||
|
||
#endif // vtkReservoirSampler_h
|
||
// VTK-HeaderTest-Exclude: vtkReservoirSampler.h
|