This program has been disqualified.
Author | jlubea |
Submission date | 2019-11-13 21:03:00.685142 |
Rating | 6699 |
Matches played | 23 |
Win rate | 73.91 |
# Hybrid of two strong players with a first attempt at a markov strategy switcher
#
if input == "":
import random
import operator
from operator import add
from operator import truediv
from collections import defaultdict
rps = ['R', 'P', 'S']
fondness = {'R':[0,-1,0], 'P':[0,0,-1], 'S':[-1,0,0]}
regrets = {'R':[0,0,1], 'P':[1,0,0], 'S':[0,1,0]}
beats = {'R':'P', 'P':'S', 'S':'R'}
values = { 'R': 0, 'P': 1, 'S': 2 }
output = random.choice(rps)
class MarkovPredictor:
def __init__(self, max_markov_order):
self.debug = 0
self.USE_RANDOM = 1
self.USE_MARKOW = 1
self.LAST_ROUND = 1000
self.ROUND = 1
self.history = []
self.moves = ["R","P","S"]
self.beatedBy = {"R":"P", "P":"S", "S":"R"}
self.result = {"R":{"R":0, "P":-1, "S":1}, "P":{"R":1, "P":0, "S":-1}, "S":{"R":-1, "P":1, "S":0}}
self.alpha = 0.01
self.M = 0
if self.USE_RANDOM == 1:
self.M += 1
if self.USE_MARKOW == 1:
self.markov_orders = []
for m in range(0, max_markov_order + 1):
self.markov_orders.append(m)
self.historyCount = {}
self.M += 6 * len(self.markov_orders)
self.weight = [1] * self.M
self.decay = [0.85] * self.M
self.score = [0] * self.M
self.selected = [0] * self.M
self.move = [random.choice(self.moves) for i in range(self.M)]
self.last = output
def selectBest(self, s):
return random.choice([i for i in range(len(s)) if max(s) == s[i]])
def selectBestDict(self, s):
ew = {i:s[self.beatedBy[self.beatedBy[i]]] - s[self.beatedBy[i]] for i in s.keys()};
return random.choice([i for i in ew.keys() if max(ew.values()) == ew[i]])
def learn(self, output, input):
if not input:
return
self.ROUND += 1
self.history += [(self.last,input)]
self.score = [ self.decay[i] * self.score[i] + self.weight[i] * self.result[self.move[i]][input] for i in range(self.M)]
self.weight = [ self.weight[i] + self.alpha * self.result[self.move[i]][input] for i in range(self.M)]
index = 0
# random optimal
if self.USE_RANDOM == 1:
self.move[index] = random.choice(self.moves)
# adjust random optimal score to zero
self.score[index] = 0
index += 1
first_meta_index = index
if self.USE_MARKOW == 1:
# markow with meta strategies
for m in self.markov_orders:
if len(self.history) > m:
key = tuple(self.history[-m-1:-1])
if not (key in self.historyCount):
self.historyCount[key] = [{"R":0,"P":0,"S":0},{"R":0,"P":0,"S":0}]
self.historyCount[key][0][self.history[-1][0]] += 1
self.historyCount[key][1][self.history[-1][1]] += 1
for m in self.markov_orders:
if len(self.history) >= m:
key = tuple(self.history[-m:])
if key in self.historyCount:
self.move[index] = self.selectBestDict(self.historyCount[key][0])
self.move[index+3] = self.selectBestDict(self.historyCount[key][1])
else:
self.move[index] = random.choice(self.moves)
self.move[index+3] = random.choice(self.moves)
else:
self.move[index] = random.choice(self.moves)
self.move[index+3] = random.choice(self.moves)
index += 6
# set other meta strategies
for i in range(first_meta_index, self.M, 3):
self.move[i+1] = self.beatedBy[self.move[i]]
self.move[i+2] = self.beatedBy[self.move[i+1]]
def evaluate(self):
best = self.selectBest(self.score)
self.selected[best] += 1
output = self.move[best]
self.last = output
if self.debug == 1:
if self.ROUND == self.LAST_ROUND:
print "score =", self.score
print "selected =", self.selected
return output
class Bayes:
name = "Bayes13-Modified"
def __init__(self, lookback):
self.score = {'RR': 0, 'PP': 0, 'SS': 0, 'PR': 1, 'RS': 1, 'SP': 1,'RP': -1, 'SR': -1, 'PS': -1,}
self.cscore = {'RR': 'r', 'PP': 'r', 'SS': 'r', 'PR': 'b', 'RS': 'b', 'SP': 'b','RP': 'c', 'SR': 'c', 'PS': 'c',}
self.beat = {'P': 'S', 'S': 'R', 'R': 'P'}
self.cede = {'P': 'R', 'S': 'P', 'R': 'S'}
self.lookback = lookback
self.reset()
def reset(self):
self.played_probs = defaultdict(lambda: 1)
self.opp_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.my_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.both_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singleopp_opp_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singleopp_my_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singleopp_both_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singlemy_opp_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singlemy_my_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singlemy_both_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.opp2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.my2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.both2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singleopp_opp2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singleopp_my2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singleopp_both2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singlemy_opp2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singlemy_my2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.singlemy_both2_probs = defaultdict(lambda: defaultdict(lambda: 1))
self.win_probs = defaultdict(lambda: 1)
self.lose_probs = defaultdict(lambda: 1)
self.tie_probs = defaultdict(lambda: 1)
self.opp_answers = {'c': 1, 'b': 1, 'r': 1}
self.my_answers = {'c': 1, 'b': 1, 'r': 1}
self.opp2_answers = {'c': 1, 'b': 1, 'r': 1}
self.my2_answers = {'c': 1, 'b': 1, 'r': 1}
self.singleopp_opp_answers = {'c': 1, 'b': 1, 'r': 1}
self.singleopp_my_answers = {'c': 1, 'b': 1, 'r': 1}
self.singleopp_opp2_answers = {'c': 1, 'b': 1, 'r': 1}
self.singleopp_my2_answers = {'c': 1, 'b': 1, 'r': 1}
self.singlemy_opp_answers = {'c': 1, 'b': 1, 'r': 1}
self.singlemy_my_answers = {'c': 1, 'b': 1, 'r': 1}
self.singlemy_opp2_answers = {'c': 1, 'b': 1, 'r': 1}
self.singlemy_my2_answers = {'c': 1, 'b': 1, 'r': 1}
self.patterndict = defaultdict(str)
self.patterndict2 = defaultdict(str)
self.opppatterndict = defaultdict(str)
self.opppatterndict2 = defaultdict(str)
self.mypatterndict = defaultdict(str)
self.mypatterndict2 = defaultdict(str)
self.csu = [0] * 6 # consecutive strategy usage
self.csc = [] # consecutive strategy candidates
output = random.choice(["R", "P", "S"])
self.hist = ""
self.myhist = ""
self.opphist = ""
self.my = self.opp = self.my2 = self.opp2 = ""
self.singleopp_my = self.singleopp_opp = self.singleopp_my2 = self.singleopp_opp2 = ""
self.singlemy_my = self.singlemy_opp = self.singlemy_my2 = self.singlemy_opp2 = ""
self.sc = 0
self.opp_strats = []
def counter_prob(self, probs):
weighted_list = []
for h in ['R', 'P', 'S']:
weighted = 0
for p in probs.keys():
points = self.score[h+p]
prob = probs[p]
weighted += points * prob
weighted_list.append((h, weighted))
return max(weighted_list, key=operator.itemgetter(1))[0]
def learn(self, output, input):
self.previous_opp_strats = self.opp_strats[:]
self.previous_sc = self.sc
self.sc = self.score[output + input]
for i, c in enumerate(self.csc):
if c == input:
self.csu[i] += 1
else:
self.csu[i] = 0
m = max(self.csu)
self.opp_strats = [i for i, c in enumerate(self.csc) if self.csu[i] == m]
if self.previous_sc == 1:
for s1 in self.previous_opp_strats:
for s2 in self.opp_strats:
self.win_probs[chr(s1)+chr(s2)] += 1
if self.previous_sc == 0:
for s1 in self.previous_opp_strats:
for s2 in self.opp_strats:
self.tie_probs[chr(s1)+chr(s2)] += 1
if self.previous_sc == -1:
for s1 in self.previous_opp_strats:
for s2 in self.opp_strats:
self.lose_probs[chr(s1)+chr(s2)] += 1
if self.my and self.opp:
self.opp_answers[self.cscore[input+self.opp]] += 1
self.my_answers[self.cscore[input+self.my]] += 1
if self.my2 and self.opp2:
self.opp2_answers[self.cscore[input+self.opp2]] += 1
self.my2_answers[self.cscore[input+self.my2]] += 1
if self.singleopp_my and self.singleopp_opp:
self.singleopp_opp_answers[self.cscore[input+self.singleopp_opp]] += 1
self.singleopp_my_answers[self.cscore[input+self.singleopp_my]] += 1
if self.singleopp_my2 and self.singleopp_opp2:
self.singleopp_opp2_answers[self.cscore[input+self.singleopp_opp2]] += 1
self.singleopp_my2_answers[self.cscore[input+self.singleopp_my2]] += 1
if self.singlemy_my and self.singlemy_opp:
self.singlemy_opp_answers[self.cscore[input+self.singlemy_opp]] += 1
self.singlemy_my_answers[self.cscore[input+self.singlemy_my]] += 1
if self.singlemy_my2 and self.singlemy_opp2:
self.singlemy_opp2_answers[self.cscore[input+self.singlemy_opp2]] += 1
self.singlemy_my2_answers[self.cscore[input+self.singlemy_my2]] += 1
for length in range(min(self.lookback, len(self.hist)), 0, -2):
pattern = self.patterndict[self.hist[-length:]]
if pattern:
for length2 in range(min(self.lookback, len(pattern)), 0, -2):
self.patterndict2[pattern[-length2:]] += output + input
self.patterndict[self.hist[-length:]] += output + input
# singleopp
for length in range(min(self.lookback / 2, len(self.opphist)), 0, -1):
pattern = self.opppatterndict[self.opphist[-length:]]
if pattern:
for length2 in range(min(self.lookback, len(pattern)), 0, -2):
self.opppatterndict2[pattern[-length2:]] += output + input
self.opppatterndict[self.opphist[-length:]] += output + input
# singlemy
for length in range(min(self.lookback / 2, len(self.myhist)), 0, -1):
pattern = self.mypatterndict[self.myhist[-length:]]
if pattern:
for length2 in range(min(self.lookback, len(pattern)), 0, -2):
self.mypatterndict2[pattern[-length2:]] += output + input
self.mypatterndict[self.myhist[-length:]] += output + input
self.played_probs[input] += 1
self.opp_probs[self.opp][input] += 1
self.my_probs[self.my][input] += 1
self.both_probs[self.my+self.opp][input] += 1
self.opp2_probs[self.opp2][input] += 1
self.my2_probs[self.my2][input] += 1
self.both2_probs[self.my2+self.opp2][input] += 1
self.hist += output + input
self.myhist += output
self.opphist += input
self.my = self.opp = self.my2 = self.opp2 = ""
self.singleopp_my = self.singleopp_opp = self.singleopp_my2 = self.singleopp_opp2 = ""
self.singlemy_my = self.singlemy_opp = self.singlemy_my2 = self.singlemy_opp2 = ""
for length in range(min(self.lookback, len(self.hist)), 0, -2):
pattern = self.patterndict[self.hist[-length:]]
if pattern != "":
self.my = pattern[-2]
self.opp = pattern[-1]
for length2 in range(min(self.lookback, len(pattern)), 0, -2):
pattern2 = self.patterndict2[pattern[-length2:]]
if pattern2 != "":
self.my2 = pattern2[-2]
self.opp2 = pattern2[-1]
break
break
# singleopp
for length in range(min(self.lookback / 2, len(self.opphist)), 0, -1):
pattern = self.opppatterndict[self.opphist[-length:]]
if pattern != "":
self.singleopp_my = pattern[-2]
self.singleopp_opp = pattern[-1]
for length2 in range(min(self.lookback, len(pattern)), 0, -2):
pattern2 = self.opppatterndict2[pattern[-length2:]]
if pattern2 != "":
self.singleopp_my2 = pattern2[-2]
self.singleopp_opp2 = pattern2[-1]
break
break
# singlemy
for length in range(min(self.lookback / 2, len(self.myhist)), 0, -1):
pattern = self.mypatterndict[self.myhist[-length:]]
if pattern != "":
self.singlemy_my = pattern[-2]
self.singlemy_opp = pattern[-1]
for length2 in range(min(self.lookback, len(pattern)), 0, -2):
pattern2 = self.mypatterndict2[pattern[-length2:]]
if pattern2 != "":
self.singlemy_my2 = pattern2[-2]
self.singlemy_opp2 = pattern2[-1]
break
break
def evaluate(self):
probs = {}
for hand in rps:
probs[hand] = self.played_probs[hand]
if self.my and self.opp:
for hand in rps:
probs[hand] *= self.opp_probs[self.opp][hand] * self.my_probs[self.my][hand] * self.both_probs[self.my+self.opp][hand]
probs[hand] *= self.opp_answers[self.cscore[hand+self.opp]] * self.my_answers[self.cscore[hand+self.my]]
self.csc = [self.opp, self.beat[self.opp], self.cede[self.opp], self.my, self.cede[self.my], self.beat[self.my]]
strats_for_hand = {'R': [], 'P': [], 'S': []}
for i, c in enumerate(self.csc):
strats_for_hand[c].append(i)
if self.sc == 1:
pr = self.win_probs
if self.sc == 0:
pr = self.tie_probs
if self.sc == -1:
pr = self.lose_probs
for hand in rps:
for s1 in self.opp_strats:
for s2 in strats_for_hand[hand]:
probs[hand] *= pr[chr(s1)+chr(s2)]
else:
self.csc = []
if self.singleopp_my and self.singleopp_opp:
for hand in rps:
probs[hand] *= self.singleopp_opp_probs[self.singleopp_opp][hand] * \
self.singleopp_my_probs[self.singleopp_my][hand] * \
self.singleopp_both_probs[self.singleopp_my+self.singleopp_opp][hand]
probs[hand] *= self.singleopp_opp_answers[self.cscore[hand+self.singleopp_opp]] * self.singleopp_my_answers[self.cscore[hand+self.singleopp_my]]
if self.singlemy_my and self.singlemy_opp:
for hand in rps:
probs[hand] *= self.singlemy_opp_probs[self.singlemy_opp][hand] * \
self.singlemy_my_probs[self.singlemy_my][hand] * \
self.singlemy_both_probs[self.singlemy_my+self.singlemy_opp][hand]
probs[hand] *= self.singlemy_opp_answers[self.cscore[hand+self.singlemy_opp]] * self.singlemy_my_answers[self.cscore[hand+self.singlemy_my]]
if self.my2 and self.opp2:
for hand in rps:
probs[hand] *= self.opp2_probs[self.opp2][hand] * self.my2_probs[self.my2][hand] * self.both2_probs[self.my2+self.opp2][hand]
probs[hand] *= self.opp2_answers[self.cscore[hand+self.opp2]] * self.my2_answers[self.cscore[hand+self.my2]]
if self.singleopp_my2 and self.singleopp_opp2:
for hand in rps:
probs[hand] *= self.singleopp_opp2_probs[self.singleopp_opp2][hand] *\
self.singleopp_my2_probs[self.singleopp_my2][hand] *\
self.singleopp_both2_probs[self.singleopp_my2+self.singleopp_opp2][hand]
probs[hand] *= self.singleopp_opp2_answers[self.cscore[hand+self.singleopp_opp2]] * \
self.singleopp_my2_answers[self.cscore[hand+self.singleopp_my2]]
if self.singlemy_my2 and self.singlemy_opp2:
for hand in rps:
probs[hand] *= self.singlemy_opp2_probs[self.singlemy_opp2][hand] *\
self.singlemy_my2_probs[self.singlemy_my2][hand] *\
self.singlemy_both2_probs[self.singlemy_my2+self.singlemy_opp2][hand]
probs[hand] *= self.singlemy_opp2_answers[self.cscore[hand+self.singlemy_opp2]] * \
self.singlemy_my2_answers[self.cscore[hand+self.singlemy_my2]]
output = self.counter_prob(probs)
return output
class MarkovStrategySet:
def __init__(self, history_start, history_length, step, min_markov_order, max_markov_order, markov_step):
self.debug = 0
self.USE_RANDOM = 1
self.USE_MARKOW = 1
self.LAST_ROUND = 999
self.ROUND = 1
self.history = []
self.strategies = []
for hl in range(history_start, history_length, step):
basic_strat = Bayes(hl)
self.strategies.append(basic_strat)
for hl in range(min_markov_order, max_markov_order + 1, markov_step):
basic_strat = MarkovPredictor(hl)
self.strategies.append(basic_strat)
self.last_plays = [-1] * len(self.strategies)
self.wins = [0] * len(self.strategies)
self.losses = [0] * len(self.strategies)
self.ties = [0] * len(self.strategies)
self.indices = []
for i in range(0, len(self.strategies)):
self.indices.append(i)
self.beatedBy = {"R":"P", "P":"S", "S":"R"}
self.result = {"R":{"R":0, "P":-1, "S":1}, "P":{"R":1, "P":0, "S":-1}, "S":{"R":-1, "P":1, "S":0}}
self.alpha = 0.01
self.M = 0
self.M_tuple_size = 1
if self.USE_RANDOM == 1:
self.M += 1
if self.USE_MARKOW == 1:
self.markov_orders = []
for m in range(0, 4):
self.markov_orders.append(m)
self.historyCount = {}
self.M += len(self.strategies) * 2 * len(self.markov_orders)
self.weight = [1] * self.M
self.decay = [0.85] * self.M
self.score = [0] * self.M
self.selected = [0] * self.M
# self.strategy = [[random.choice(self.indices) for k in range(self.M_tuple_size)] for i in range(self.M)]
self.strategy = [[random.choice(self.indices)] for i in range(self.M)]
self.last = random.choice(self.indices)
self.last_chosen_strat = self.last
def selectBest(self, s):
return random.choice([i for i in range(len(s)) if max(s) == s[i]])
def selectBestDict(self, h, v):
# ew = {i:h[i][v][0][self.beatedBy[self.beatedBy[self.last_plays[random.choice(self.strategy[i])]]]] - h[i][v][0][self.beatedBy[self.last_plays[random.choice(self.strategy[i])]]] for i in h.keys()};
ew = {}
for i in h.keys():
rotated = self.rotate_strat(self.strategy[i])
rotated_2 = self.rotate_strat(rotated)
ew.update({i:h[i][v][0][random.choice(rotated_2)] - h[i][v][0][random.choice(rotated)]});
return self.strategy[random.choice([i for i in ew.keys() if max(ew.values()) == ew[i]])]
def normalize(self, counts):
c = counts[:]
total = sum(counts)
if total == 0:
c = [1] * len(c)
total = sum(c)
c = list(map(truediv, c, [total * 1.0] * len(c)))
return c
def max_index(self, arr):
max_idx = 0
for i in range(len(arr)):
if (arr[i] > arr[max_idx]):
max_idx = i
return max_idx
def learn(self, me, opp):
if self.last_plays[0] < 0:
return
self.ROUND += 1
self.history.append(self.last)
# print "result = ", [self.result[self.last_plays[self.strategy[i]]][input] for i in range(self.M)]
# decay all alternative strategies
for i in range(self.M):
for s in range(len(self.strategy[i])):
self.score[i] = self.decay[i] * self.score[i] + self.weight[i] * self.result[self.last_plays[self.strategy[i][s]]][input]
self.weight[i] = self.weight[i] + self.alpha * self.result[self.last_plays[self.strategy[i][s]]][input]
index = 0
# random optimal
if self.USE_RANDOM == 1:
# self.strategy[index] = [random.choice(self.indices) for k in range(self.M_tuple_size)]
self.strategy[index] = [random.choice(self.indices)]
# adjust random optimal score to zero
self.score[index] = 0
index += 1
first_meta_index = index
if self.USE_MARKOW == 1:
# markow with meta strategies
for m in self.markov_orders:
if len(self.history) > m:
key = self.history[-m]
if not (key in self.historyCount):
my_counts = [{}, {}, {}]
opp_counts = [{}, {}, {}]
for i in self.indices:
my_counts[0].update({i: 0})
my_counts[1].update({i: 0})
my_counts[2].update({i: 0})
opp_counts[0].update({i: 0})
opp_counts[1].update({i: 0})
opp_counts[2].update({i: 0})
self.historyCount[key] = [my_counts, opp_counts]
# update my counts with reality
for i in range(len(self.strategies)):
a = self.last_plays[self.last_chosen_strat]
if self.last_plays[i] == a:
self.historyCount[key][0][0][i] += 1
elif self.beatedBy[a] == self.last_plays[i]:
self.historyCount[key][0][1][i] += 1
else:
self.historyCount[key][0][2][i] += 1
# update opp counts with reality
for i in range(len(self.strategies)):
a = opp
if self.last_plays[i] == a:
self.historyCount[key][1][0][i] += 1
elif self.beatedBy[a] == self.last_plays[i]:
self.historyCount[key][1][1][i] += 1
else:
self.historyCount[key][1][2][i] += 1
for m in self.markov_orders:
if len(self.history) >= m:
key = self.history[-m]
if key in self.historyCount:
# self.strategy[index] = self.selectBestDict(self.historyCount[key][0][0])
# self.strategy[index + len(self.strategies)] = self.selectBestDict(self.historyCount[key][1][0])
self.strategy[index] = self.selectBestDict(self.historyCount, 0)
self.strategy[index + len(self.strategies)] = self.selectBestDict(self.historyCount, 1)
else:
# self.strategy[index] = [random.choice(self.indices) for k in range(self.M_tuple_size)]
# self.strategy[index + len(self.strategies)] = [random.choice(self.indices) for k in range(self.M_tuple_size)]
self.strategy[index] = [random.choice(self.indices)]
self.strategy[index + len(self.strategies)] = [random.choice(self.indices)]
else:
# self.strategy[index] = [random.choice(self.indices) for k in range(self.M_tuple_size)]
# self.strategy[index + len(self.strategies)] = [random.choice(self.indices) for k in range(self.M_tuple_size)]
self.strategy[index] = [random.choice(self.indices)]
self.strategy[index + len(self.strategies)] = [random.choice(self.indices)]
index += len(self.strategies)
# set other meta strategies
for i in range(first_meta_index, self.M, len(self.strategies)):
# initial value
rotation_strats = self.strategy[i]
# compute the next sequence of rotations
for offset in range(1, len(self.strategies)):
rotation_strats = self.rotate_strat(rotation_strats)
self.strategy[i+offset] = rotation_strats
for si in range(0, len(self.strategies)):
# lp = self.last_plays[si]
self.strategies[si].learn(me, opp)
def rotate_strat(self, rotation_strats):
# get the strategy to pivot on
strat = random.choice(rotation_strats)
# print strat, len(self.last_plays)
# ask the pivot strategy what it did last round
rotation = self.beatedBy[self.last_plays[strat]]
# build a set of strategies that match the pivot's response
rotation_strats = []
for si in range(len(self.strategies)):
if self.last_plays[si] == rotation:
rotation_strats.append(si)
# randomize to fill the rotation_strats
while len(rotation_strats) < self.M_tuple_size:
rotation_strats.append(random.choice(self.indices))
while len(rotation_strats) > self.M_tuple_size:
del rotation_strats[random.choice(range(len(rotation_strats)))]
return rotation_strats
def choice(self, arr, count, p):
result = [0] * count
accum = arr[:]
for i in range(1, len(accum)):
accum[i] += accum[i - 1]
for c in range(count):
r = random.uniform(0, 1)
for i in range(0, len(accum)):
if i == len(accum) - 1:
result[c] = len(accum) - 1
break;
elif r >= accum[i] and r < accum[i + 1]:
result[c] = i
break;
return result
def evaluate(self):
for si in range(0, len(self.strategies)):
self.last_plays[si] = self.strategies[si].evaluate()
best = self.selectBest(self.score)
self.selected[best] += 1
strat = None
# test random strat choice
strat = random.choice(self.strategy[best])
# elif sel == 1:
# # todo: test pd strat choice
# possible_strats = self.strategy[best]
# pd = [0] * len(possible_strats)
# for ps in range(len(possible_strats)):
# key = possible_strats[ps]
# if key in self.historyCount:
# strat_tuple = self.strategy[key]
# for i in strat_tuple:
# pd[ps] += self.historyCount[key][0][0][i] \
# - self.historyCount[key][0][1][i] \
# - self.historyCount[key][0][2][i] / 2.0
# # print "=======", pd
# pd = [x if x > 0 else 0 for x in pd]
# pd = self.normalize(pd)
# strat = self.choice(possible_strats, 1, pd)[0]
# # print "AAAAAAA", possible_strats, pd, strat
# # if strat in self.historyCount:
# # print self.historyCount[strat]
# elif sel == 2:
# # todo: test max strat choice
# possible_strats = self.strategy[best]
# pd = [0] * len(possible_strats)
# for ps in range(len(possible_strats)):
# key = possible_strats[ps]
# if key in self.historyCount:
# strat_tuple = self.strategy[key]
# for i in strat_tuple:
# pd[ps] += self.historyCount[key][0][0][i] \
# - self.historyCount[key][0][1][i] \
# - self.historyCount[key][0][2][i] / 2.0
# strat = possible_strats[self.max_index(pd)];
output = self.last_plays[strat]
self.last_chosen_strat = strat
self.last = best
if self.debug == 1:
# print self.ROUND, self.LAST_ROUND
if self.ROUND == self.LAST_ROUND:
print "strats =", self.strategy
print "score =", self.score
print "selected =", self.selected
return output
strategy_set = MarkovStrategySet(16, 17, 5, 6, 6, 2)
else:
strategy_set.learn(output, input)
output = strategy_set.evaluate()