Tuesday, December 2, 2014

The normal / traditional resolution for reservoir sampling proves the 1/n probability for picking a random number. Though, the random is largely biased in the sense much less variations since the only time the number changed is the probability matches the count. Given the constraint with only one number allowed for caching, I added line to toggle the cached values (improved the toggling rate for cached value). This logic applied after deciding the return value. It's a separate logic: I got a new value, do I keep it, or toggle with cached one?

Try the run and compare the result.


import java.util.ArrayList;
import java.util.List;
import java.util.Random;

public class ReservoirSampling {

    public static void main(String[] args) {
        List lst = new ArrayList();
        List lstToggled = new ArrayList();
        ReservoirSampling r = new ReservoirSampling();
        ReservoirSampling rt = new ReservoirSampling();
        for (int i = 1; i <= 100; i++) {
            lst.add(r.run(i));
            lstToggled.add(rt.runToggled(i));
        }
        System.out.println(lst);
        System.out.println(lstToggled);
    }

    public ReservoirSampling() {
        _rdm = new Random(System.currentTimeMillis());
    }

    public int run(int number) {
        _count++;

        if (_count == 1) {
            _lastNumber = number;
            return _lastNumber;
        }

        int n = _rdm.nextInt(_count + 1);
        if (n == _count) {
            _lastNumber = number;
        }

        return _lastNumber;
    }

    public int runToggled(int number) {
        _count++;

        if (_count == 1) {
            _lastNumber = number;
            return number;
        }

        int n = _rdm.nextInt(_count + 1);
        if (n == _count) {
            _lastNumber = number;
            return number;
        }

        int result = _lastNumber;
        if (n % 2 == 0) // toggled
            _lastNumber = number;

        return result;
    }

    private int _count;
    private int _lastNumber; // previous random number cached
    private Random _rdm;
}