/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ignite.ml.dataset.impl.bootstrapping;

import java.util.Arrays;
import java.util.Iterator;
import org.apache.commons.math3.distribution.PoissonDistribution;
import org.apache.commons.math3.random.RandomGenerator;
import org.apache.commons.math3.random.Well19937c;
import org.apache.ignite.ml.dataset.PartitionDataBuilder;
import org.apache.ignite.ml.dataset.UpstreamEntry;
import org.apache.ignite.ml.dataset.impl.bootstrapping.BootstrappedDatasetPartition;
import org.apache.ignite.ml.dataset.impl.bootstrapping.BootstrappedVector;
import org.apache.ignite.ml.dataset.primitive.context.EmptyContext;
import org.apache.ignite.ml.environment.LearningEnvironment;
import org.apache.ignite.ml.math.primitives.vector.Vector;
import org.apache.ignite.ml.preprocessing.Preprocessor;
import org.apache.ignite.ml.structures.LabeledVector;

public class BootstrappedDatasetBuilder<K, V>
implements PartitionDataBuilder<K, V, EmptyContext, BootstrappedDatasetPartition> {
    private static final long serialVersionUID = 8146220902914010559L;
    private final Preprocessor<K, V> preprocessor;
    private final int samplesCnt;
    private final double subsampleSize;

    public BootstrappedDatasetBuilder(Preprocessor<K, V> preprocessor, int samplesCnt, double subsampleSize) {
        this.preprocessor = preprocessor;
        this.samplesCnt = samplesCnt;
        this.subsampleSize = subsampleSize;
    }

    @Override
    public BootstrappedDatasetPartition build(LearningEnvironment env, Iterator<UpstreamEntry<K, V>> upstreamData, long upstreamDataSize, EmptyContext ctx) {
        BootstrappedVector[] dataset = new BootstrappedVector[Math.toIntExact(upstreamDataSize)];
        int cntr = 0;
        PoissonDistribution poissonDistribution = new PoissonDistribution((RandomGenerator)new Well19937c(env.randomNumbersGenerator().nextLong()), this.subsampleSize, 1.0E-12, 10000000);
        while (upstreamData.hasNext()) {
            UpstreamEntry<K, V> nextRow = upstreamData.next();
            LabeledVector vecAndLb = (LabeledVector)this.preprocessor.apply(nextRow.getKey(), nextRow.getValue());
            Object features = vecAndLb.features();
            Double lb = (Double)vecAndLb.label();
            int[] repetitionCounters = new int[this.samplesCnt];
            Arrays.setAll(repetitionCounters, i -> poissonDistribution.sample());
            dataset[cntr++] = new BootstrappedVector((Vector)features, lb, repetitionCounters);
        }
        return new BootstrappedDatasetPartition(dataset);
    }
}

