zai_dfa_v3

Author	zdg
Submission date	2012-02-18 23:15:22.780586
Rating	7173
Matches played	772
Win rate	74.35
Use rpsrunner.py to play unranked matches on your computer.
Source code:

# uses a higher order markov chain to transition between the 6 ways of
# using the beat last strategy

# --------------------- initialization -----------------------------
if not input:
    import random, math
    import itertools, operator, collections
    
    R, P, S = 0, 1, 2
    RPS = [R, P, S]
    T, W, L = R, P, S
    PAYOFFS = [T, W, L]
    scorep = [0, 1, -1]
    to_win = [W, T, L]
    convert = {'R':R, 'P':P, 'S':S, R:'R', P:'P', S:'S'}
    subh = [[T, L, W], [W, T, L], [L, W, T]]
    addh = [[R, P, S], [P, S, R], [S, R, P]]
    ties, beats, loses = addh[T], addh[W], addh[L]
    
    def pick_max(vec):
        maxix = 0
        accmax = vec[0]
        for i in xrange(1, len(vec)):
            if vec[i] >= accmax:
                accmax = vec[i]
                maxix = i
        return maxix
    
    def normalize(u):
        factor = 1.0 / sum(u)
        for i in xrange(len(u)):
            u[i] *= factor

    def pick_weighted(v):
        u = random.random()
        acc = 0.0
        for i, p in enumerate(v):
            acc += p
            if u < acc:
                return (i, p)
    
    class MarkovN(object):
        def __init__(self, order, op_moves, state_history, bot_list):
            self.order = order
            self.state_history = state_history
            self.op_moves = op_moves
            self.bot_list = bot_list
            self.dict = collections.defaultdict(lambda:[0,0,0,0,0,0])
        
        def update(self):
            # last turn's state
            last_state = self.state_history[-1]
            
            # op's last move
            last_op_move = self.op_moves[-1]
            
            # the highest order that can be updated
            highest_prev_order = min(len(self.state_history) - 1, self.order)
            
            # update the transitions for the bots that won last turn
            for bot_index, bot in enumerate(self.bot_list):
                if bot.next_hand == beats[last_op_move]:
                    # up to and including highest_prev_order
                    for order in xrange(highest_prev_order + 1):
                        # up to second last one
                        upper_bound = len(self.state_history) - 1
                        lower_bound = upper_bound - order
                        index = tuple(self.state_history[lower_bound:upper_bound]);
                        
                        # update last time's info
                        self.dict[index][bot_index] += 1
            
            # now find highest order that's not all 0
            highest_curr_order = min(len(self.state_history), self.order)
            for order in xrange(highest_curr_order, -1, -1):
                lower_bound = len(self.state_history) - order
                index = tuple(self.state_history[lower_bound:])
                counts = self.dict[index]
                if sum(counts) > 0:
                    self.counts = counts
                    break
                
    
    class LastBot(object):
        def __init__(self, op_moves, offset):
            self.op_moves = op_moves
            self.offset = offset
        
        def update(self):
            self.next_hand = addh[self.offset][self.op_moves[-1]]
    
    op_hands = []
    my_hands = []
    payoffs = []
    op_payoffs = []
    
    output = convert[random.choice(RPS)]
    
    
    bots = [
        LastBot(op_hands, W),
        LastBot(my_hands, L),
        LastBot(op_hands, T),
        LastBot(my_hands, W),
        LastBot(op_hands, L),
        LastBot(my_hands, T)]
    mode = 0
    
    states = []
    
    markov = MarkovN(6, op_hands, states, bots)
# --------------------- turn -----------------------------
else:
    last_input, last_output = convert[input], convert[output]
    my_hands.append(last_output)
    op_hands.append(last_input)
    payoffs.append(subh[last_output][last_input])
    op_payoffs.append(subh[last_input][last_output])
    hands_played = len(op_hands)
    
    states.append(mode)
    
    if hands_played > 1:
        markov.update()
        
        for b in bots:
            b.update()
        
        nextstatep = markov.counts[:]
        normalize(nextstatep)
        mode = pick_weighted(nextstatep)[0]
        next_hand = bots[mode].next_hand
    else:
        for b in bots:
            b.update()
        next_hand = bots[mode].next_hand
    
    output = convert[next_hand]
    
    # output = convert[random.choice(RPS)]
    
    # if hands_played == 999:
    # if hands_played < 5:
        # print markov.dict
        # print '\n'.join(str(lst) for lst in transitions)