S0 = "S0"
S1 = "S1"
S2 = "S2"
BLUE = "blue"
RED = "red"
gamma = 0.5 # TODO MODIFY GAMMA HERE
# P(s'|s,a)
P_destination_start_action = \
{(S0,S0, BLUE):0.5,(S0,S0,RED):0.9, (S0,S1,BLUE):0.8,(S0,S1,RED):0, (S0,S2, BLUE):0,(S0,S2,RED):0,
(S1,S0, BLUE):0.5,(S1,S0,RED):0, (S1,S1,BLUE):0.2,(S1,S1,RED):0.6, (S1,S2, BLUE):0,(S1,S2,RED):0,
(S2,S0, BLUE):0, (S2,S0,RED):0.1, (S2,S1,BLUE):0 ,(S2,S1,RED):0.4, (S2,S2, BLUE):1,(S2,S2,RED):1}
class MDP:
def __init__(self):
self.states = [S0, S1, S2]
self.actions = [BLUE, RED]
self.P_dest_start_action = P_destination_start_action
self.rewards = {S0: -2, S1: -5, S2: 0}
def POLICY_EVALUATION(policy_vec, utility_vec, mdp):
new_utility_vector = {}
for s in mdp.states:
to_sum = [(mdp.P_dest_start_action[(s_tag, s, policy_vec[s])] * utility_vec[s_tag])
for s_tag in mdp.states]
new_utility_vector[s] = mdp.rewards[s] + gamma * sum(to_sum)
return new_utility_vector
def POLICY_ITERATION(mdp):
utility_vector = {state: 0 for state in mdp.states}
policy_vector = {S0: BLUE, S1: RED, S2: RED}
unchanged = False
while not unchanged:
utility_vector = POLICY_EVALUATION(policy_vector, utility_vector, mdp)
unchanged = True
for s in mdp.states:
BLUE_sum = sum([(mdp.P_dest_start_action[(s_tag, s, BLUE)] * utility_vector[s_tag])
for s_tag in mdp.states])
RED_sum = sum([(mdp.P_dest_start_action[(s_tag, s, RED)] * utility_vector[s_tag])
for s_tag in mdp.states])
if policy_vector[s] == RED and BLUE_sum > RED_sum:
policy_vector[s] = BLUE
unchanged = False
elif policy_vector[s] == BLUE and RED_sum > BLUE_sum:
policy_vector[s] = RED
unchanged = False
return policy_vector
if __name__ == "__main__":
Q2_mdp = MDP()
new_policy_vec = POLICY_ITERATION(Q2_mdp)
print("===========================END===============================")
print("S_O policy =", new_policy_vec[S0], " ,S_1 Policy =", new_policy_vec[S1])
UzAgPSAiUzAiClMxID0gIlMxIgpTMiA9ICJTMiIKQkxVRSA9ICJibHVlIgpSRUQgPSAicmVkIgpnYW1tYSA9IDAuNSAgIyBUT0RPIE1PRElGWSBHQU1NQSBIRVJFCgojIFAocyd8cyxhKQpQX2Rlc3RpbmF0aW9uX3N0YXJ0X2FjdGlvbiA9IFwKeyhTMCxTMCwgQkxVRSk6MC41LChTMCxTMCxSRUQpOjAuOSwgKFMwLFMxLEJMVUUpOjAuOCwoUzAsUzEsUkVEKTowLCAgIChTMCxTMiwgQkxVRSk6MCwoUzAsUzIsUkVEKTowLAooUzEsUzAsIEJMVUUpOjAuNSwoUzEsUzAsUkVEKTowLCAgIChTMSxTMSxCTFVFKTowLjIsKFMxLFMxLFJFRCk6MC42LCAoUzEsUzIsIEJMVUUpOjAsKFMxLFMyLFJFRCk6MCwKKFMyLFMwLCBCTFVFKTowLCAgKFMyLFMwLFJFRCk6MC4xLCAoUzIsUzEsQkxVRSk6MCAgLChTMixTMSxSRUQpOjAuNCwgKFMyLFMyLCBCTFVFKToxLChTMixTMixSRUQpOjF9CgpjbGFzcyBNRFA6CiAgICBkZWYgX19pbml0X18oc2VsZik6CiAgICAgICAgc2VsZi5zdGF0ZXMgPSBbUzAsIFMxLCBTMl0KICAgICAgICBzZWxmLmFjdGlvbnMgPSBbQkxVRSwgUkVEXQoKCiAgICAgICAgc2VsZi5QX2Rlc3Rfc3RhcnRfYWN0aW9uID0gUF9kZXN0aW5hdGlvbl9zdGFydF9hY3Rpb24KICAgICAgICBzZWxmLnJld2FyZHMgPSB7UzA6IC0yLCBTMTogLTUsIFMyOiAwfQoKZGVmIFBPTElDWV9FVkFMVUFUSU9OKHBvbGljeV92ZWMsIHV0aWxpdHlfdmVjLCBtZHApOgogICAgbmV3X3V0aWxpdHlfdmVjdG9yID0ge30KICAgIGZvciBzIGluIG1kcC5zdGF0ZXM6CiAgICAgICAgdG9fc3VtID0gWyhtZHAuUF9kZXN0X3N0YXJ0X2FjdGlvblsoc190YWcsIHMsIHBvbGljeV92ZWNbc10pXSAqIHV0aWxpdHlfdmVjW3NfdGFnXSkKICAgICAgICAgICAgICAgICAgZm9yIHNfdGFnIGluIG1kcC5zdGF0ZXNdCiAgICAgICAgbmV3X3V0aWxpdHlfdmVjdG9yW3NdID0gbWRwLnJld2FyZHNbc10gKyBnYW1tYSAqIHN1bSh0b19zdW0pCiAgICByZXR1cm4gbmV3X3V0aWxpdHlfdmVjdG9yCgpkZWYgUE9MSUNZX0lURVJBVElPTihtZHApOgogICAgdXRpbGl0eV92ZWN0b3IgPSB7c3RhdGU6IDAgZm9yIHN0YXRlIGluIG1kcC5zdGF0ZXN9CiAgICBwb2xpY3lfdmVjdG9yID0ge1MwOiBCTFVFLCBTMTogUkVELCBTMjogUkVEfQogICAgdW5jaGFuZ2VkID0gRmFsc2UKCiAgICB3aGlsZSBub3QgdW5jaGFuZ2VkOgogICAgICAgIHV0aWxpdHlfdmVjdG9yID0gUE9MSUNZX0VWQUxVQVRJT04ocG9saWN5X3ZlY3RvciwgdXRpbGl0eV92ZWN0b3IsIG1kcCkKICAgICAgICB1bmNoYW5nZWQgPSBUcnVlCiAgICAgICAgZm9yIHMgaW4gbWRwLnN0YXRlczoKICAgICAgICAgICAgQkxVRV9zdW0gPSBzdW0oWyhtZHAuUF9kZXN0X3N0YXJ0X2FjdGlvblsoc190YWcsIHMsIEJMVUUpXSAqIHV0aWxpdHlfdmVjdG9yW3NfdGFnXSkKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGZvciBzX3RhZyBpbiBtZHAuc3RhdGVzXSkKICAgICAgICAgICAgUkVEX3N1bSA9IHN1bShbKG1kcC5QX2Rlc3Rfc3RhcnRfYWN0aW9uWyhzX3RhZywgcywgUkVEKV0gKiB1dGlsaXR5X3ZlY3RvcltzX3RhZ10pCiAgICAgICAgICAgICAgICAgICAgICAgICAgIGZvciBzX3RhZyBpbiBtZHAuc3RhdGVzXSkKICAgICAgICAgICAgaWYgcG9saWN5X3ZlY3RvcltzXSA9PSBSRUQgYW5kIEJMVUVfc3VtID4gUkVEX3N1bToKICAgICAgICAgICAgICAgIHBvbGljeV92ZWN0b3Jbc10gPSBCTFVFCiAgICAgICAgICAgICAgICB1bmNoYW5nZWQgPSBGYWxzZQoKICAgICAgICAgICAgZWxpZiBwb2xpY3lfdmVjdG9yW3NdID09IEJMVUUgYW5kIFJFRF9zdW0gPiBCTFVFX3N1bToKICAgICAgICAgICAgICAgIHBvbGljeV92ZWN0b3Jbc10gPSBSRUQKICAgICAgICAgICAgICAgIHVuY2hhbmdlZCA9IEZhbHNlCgogICAgcmV0dXJuIHBvbGljeV92ZWN0b3IKCmlmIF9fbmFtZV9fID09ICJfX21haW5fXyI6CiAgICBRMl9tZHAgPSBNRFAoKQogICAgbmV3X3BvbGljeV92ZWMgPSBQT0xJQ1lfSVRFUkFUSU9OKFEyX21kcCkKICAgIHByaW50KCI9PT09PT09PT09PT09PT09PT09PT09PT09PT1FTkQ9PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09IikKICAgIHByaW50KCJTX08gcG9saWN5ID0iLCBuZXdfcG9saWN5X3ZlY1tTMF0sICIgLFNfMSBQb2xpY3kgPSIsIG5ld19wb2xpY3lfdmVjW1MxXSkKCg==