/*
 * Decompiled with CFR 0.152.
 */
package aima.learning.reinforcement;

import aima.learning.reinforcement.MDPAgent;
import aima.probability.decision.MDP;
import aima.probability.decision.MDPPerception;
import aima.probability.decision.MDPPolicy;
import aima.probability.decision.MDPTransition;
import aima.probability.decision.MDPUtilityFunction;
import aima.util.Pair;
import java.util.Hashtable;
import java.util.List;

public class PassiveADPAgent<STATE_TYPE, ACTION_TYPE>
extends MDPAgent<STATE_TYPE, ACTION_TYPE> {
    private MDPPolicy<STATE_TYPE, ACTION_TYPE> policy;
    private MDPUtilityFunction<STATE_TYPE> utilityFunction;
    private Hashtable<Pair<STATE_TYPE, ACTION_TYPE>, Double> nsa;
    private Hashtable<MDPTransition<STATE_TYPE, ACTION_TYPE>, Double> nsasdash;

    public PassiveADPAgent(MDP<STATE_TYPE, ACTION_TYPE> mdp, MDPPolicy<STATE_TYPE, ACTION_TYPE> policy) {
        super(mdp.emptyMdp());
        this.policy = policy;
        this.utilityFunction = new MDPUtilityFunction();
        this.nsa = new Hashtable();
        this.nsasdash = new Hashtable();
    }

    @Override
    public ACTION_TYPE decideAction(MDPPerception<STATE_TYPE> perception) {
        if (!this.utilityFunction.hasUtilityFor(perception.getState())) {
            this.utilityFunction.setUtility(perception.getState(), perception.getReward());
            this.mdp.setReward(perception.getState(), perception.getReward());
        }
        if (this.previousState != null) {
            Double oldValue1 = this.nsa.get(new Pair<Object, Object>(this.previousState, this.previousAction));
            if (oldValue1 == null) {
                this.nsa.put(new Pair<Object, Object>(this.previousState, this.previousAction), 1.0);
            } else {
                this.nsa.put(new Pair<Object, Object>(this.previousState, this.previousAction), oldValue1 + 1.0);
            }
            Double oldValue2 = this.nsasdash.get(new MDPTransition<Object, Object>(this.previousState, this.previousAction, this.currentState));
            if (oldValue2 == null) {
                this.nsasdash.put(new MDPTransition<Object, Object>(this.previousState, this.previousAction, this.currentState), 1.0);
            } else {
                this.nsasdash.put(new MDPTransition<Object, Object>(this.previousState, this.previousAction, this.currentState), oldValue2 + 1.0);
            }
            for (MDPTransition<STATE_TYPE, ACTION_TYPE> transition : this.nsasdash.keySet()) {
                if (this.nsasdash.get(transition) == 0.0) continue;
                double newValue = this.nsasdash.get(transition) / this.nsa.get(new Pair<STATE_TYPE, ACTION_TYPE>(transition.getInitialState(), transition.getAction()));
                this.mdp.setTransitionProbability(transition, newValue);
            }
            List<MDPTransition<STATE_TYPE, ACTION_TYPE>> validTransitions = this.mdp.getTransitionsWith(this.previousState, this.policy.getAction(this.previousState));
            this.utilityFunction = this.valueDetermination(validTransitions, 1.0);
        }
        if (this.mdp.isTerminalState(this.currentState)) {
            this.previousState = null;
            this.previousAction = null;
        } else {
            this.previousState = this.currentState;
            this.previousAction = this.policy.getAction(this.currentState);
        }
        return (ACTION_TYPE)this.previousAction;
    }

    private MDPUtilityFunction<STATE_TYPE> valueDetermination(List<MDPTransition<STATE_TYPE, ACTION_TYPE>> validTransitions, double gamma) {
        MDPUtilityFunction<STATE_TYPE> uf = this.utilityFunction.copy();
        double additional = 0.0;
        if (validTransitions.size() > 0) {
            STATE_TYPE initState = validTransitions.get(0).getInitialState();
            double reward = this.mdp.getRewardFor(initState);
            for (MDPTransition<STATE_TYPE, ACTION_TYPE> transition : validTransitions) {
                additional += this.mdp.getTransitionProbability(transition) * this.utilityFunction.getUtility(transition.getDestinationState());
            }
            uf.setUtility(initState, reward + gamma * additional);
        }
        return uf;
    }

    public MDPUtilityFunction<STATE_TYPE> getUtilityFunction() {
        return this.utilityFunction;
    }
}

