from utils import * import network class Learner(): def __init__(self, critter): self.critter = critter self.network = self.critter.network self.memory = self.critter.memory self.n_acts = len(self.critter.acts) self.lr = self.critter.world.features.get('LR', .1) # def initialize(self): class QLearner(Learner): """A Critter's QLearner is responsible for making changes to the long-term memory.""" def __init__(self, critter): Learner.__init__(self, critter) self.last_action = -1 self.last_reinf = 0 self.last_state = [0.0 for x in range(self.network.layers[0].size)] self.discount = self.critter.world.features.get('DISCOUNT', .7) # def initalize(self): # Learner.initialize(self) # REINFORCEMENT LEARNING def get_best_q(self, state, outputs=[]): '''Run the network with state as input and read off the highest output.''' if not outputs: self.critter.run_network(state, show=False) outputs = self.network.layers[-1].activations return max(outputs) def q_new(self, next_state, outputs=[]): '''Target for network; estimate of Q given next information.''' return self.last_reinf + self.discount * self.get_best_q(next_state, outputs) def learn(self, next_state, outputs=[]): '''Generate a target from the Q rule, and run the network with it, adjusting the weights.''' target = self.make_target(next_state, outputs) # print 'State', self.last_state # Run the network and adjust the weights self.critter.run_network(self.last_state, target, train=True, lr=self.lr, show=True) def make_target(self, next_state, outputs=[]): '''Create a target from the Q-learning rule, using saved last action. Only the unit corresponding to the action gets an actual target. ''' targ = [network.NO_TARGET for x in range(self.critter.n_acts)] targ[self.last_action] = self.q_new(next_state, outputs) # print 'Target', targ return targ