qa

QX-CHEN · Jun 11, 2020 · 91b9fb3 · 91b9fb3
1 parent 0128050
commit 91b9fb3
Showing 1 changed file with 254 additions and 0 deletions.
diff --git a/qlearningAgents.py b/qlearningAgents.py
@@ -0,0 +1,254 @@
+# qlearningAgents.py
+# ------------------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+#
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# ([email protected]) and Dan Klein ([email protected]).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel ([email protected]).
+
+
+from game import *
+from learningAgents import ReinforcementAgent
+from featureExtractors import *
+
+import random,util,math
+
+class QLearningAgent(ReinforcementAgent):
+    """
+      Q-Learning Agent
+
+      Functions you should fill in:
+        - computeValueFromQValues
+        - computeActionFromQValues
+        - getQValue
+        - getAction
+        - update
+
+      Instance variables you have access to
+        - self.epsilon (exploration prob)
+        - self.alpha (learning rate)
+        - self.discount (discount rate)
+
+      Functions you should use
+        - self.getLegalActions(state)
+          which returns legal actions for a state
+    """
+    def __init__(self, **args):
+        "You can initialize Q-values here..."
+        ReinforcementAgent.__init__(self, **args)
+
+        "*** YOUR CODE HERE ***"
+        self.qTable = {}
+        self.numTable = {}
+
+    def getQValue(self, state, action):
+        """
+          Returns Q(state,action)
+          Should return 0.0 if we have never seen a state
+          or the Q node value otherwise
+        """
+        "*** YOUR CODE HERE ***"
+        if state in self.qTable:
+            if action in self.qTable[state]:
+                return self.qTable[state][action]
+            else:
+                self.qTable[state][action] = 0.0
+                self.numTable[state][action] = 0
+                return 0.0
+        else:
+            self.qTable[state] = {}
+            self.numTable[state] = {}
+        return 0.0
+
+        util.raiseNotDefined()
+
+
+    def computeValueFromQValues(self, state):
+        """
+          Returns maxVal_action Q(state,action)
+          where the maxVal is over legal actions.  Note that if
+          there are no legal actions, which is the case at the
+          terminal state, you should return a value of 0.0.
+        """
+        "*** YOUR CODE HERE ***"
+        if len(self.getLegalActions(state)):
+            maxVal = self.getQValue(state, self.getLegalActions(state)[0])
+            for i in self.getLegalActions(state):
+                if self.getQValue(state, i) > maxVal:
+                    maxVal = self.getQValue(state, i)
+            return maxVal
+        else:
+            return 0.0
+
+        util.raiseNotDefined()
+
+    def computeActionFromQValues(self, state):
+        """
+          Compute the best action to take in a state.  Note that if there
+          are no legal actions, which is the case at the terminal state,
+          you should return None.
+        """
+        "*** YOUR CODE HERE ***"
+        legalActions = self.getLegalActions(state)
+
+        random.shuffle(legalActions)
+
+        if len(self.getLegalActions(state)):
+            maxVal = -400
+            maxValAction = -1
+        else:
+            return None
+        for i in legalActions:
+            if self.getQValue(state, i) > maxVal:
+                maxVal = self.getQValue(state, i)
+                maxValAction = i
+        if i != -1:
+            return maxValAction
+        else:
+            return None
+
+        util.raiseNotDefined()
+
+    def getAction(self, state):
+        """
+          Compute the action to take in the current state.  With
+          probability self.epsilon, we should take a random action and
+          take the best policy action otherwise.  Note that if there are
+          no legal actions, which is the case at the terminal state, you
+          should choose None as the action.
+
+          HINT: You might want to use util.flipCoin(prob)
+          HINT: To pick randomly from a list, use random.choice(list)
+        """
+        # Pick Action
+        legalActions = self.getLegalActions(state)
+        action = None
+        "*** YOUR CODE HERE ***"
+        bestAction = self.computeActionFromQValues(state)
+        if legalActions:
+            if util.flipCoin(self.epsilon):
+                action = random.choice(legalActions)
+            else:
+                action = self.computeActionFromQValues(state)
+
+        return action
+
+        util.raiseNotDefined()
+
+    def update(self, state, action, nextState, reward):
+        """
+          The parent class calls this to observe a
+          state = action => nextState and reward transition.
+          You should do your Q-Value update here
+
+          NOTE: You should never call this function,
+          it will be called on your behalf
+        """
+        "*** YOUR CODE HERE ***"
+        if state not in self.qTable:
+            self.qTable[state] = {}
+
+        currentQ = self.getQValue(state, action)
+        if (reward == 9):
+            reward = 18
+        if (reward == -501):
+            reward = -5000
+        if (reward < 1) and (reward > -5):
+            reward = 5
+        if reward == 509:
+            reward = 400
+        if (reward):
+            potentialQ = reward + self.discount * self.computeValueFromQValues(nextState)
+
+        if currentQ < potentialQ:
+            self.qTable[state][action] = potentialQ
+            self.numTable[state][action] += 1
+        return
+
+        util.raiseNotDefined()
+
+    def getPolicy(self, state):
+        return self.computeActionFromQValues(state)
+
+    def getValue(self, state):
+        return self.computeValueFromQValues(state)
+
+
+class PacmanQAgent(QLearningAgent):
+    "Exactly the same as QLearningAgent, but with different default parameters"
+
+    def __init__(self, epsilon=0.05,gamma=0.8,alpha=0.2, numTraining=0, **args):
+        """
+        These default parameters can be changed from the pacman.py command line.
+        For example, to change the exploration rate, try:
+            python pacman.py -p PacmanQLearningAgent -a epsilon=0.1
+
+        alpha    - learning rate
+        epsilon  - exploration rate
+        gamma    - discount factor
+        numTraining - number of training episodes, i.e. no learning after these many episodes
+        """
+        args['epsilon'] = epsilon
+        args['gamma'] = gamma
+        args['alpha'] = alpha
+        args['numTraining'] = numTraining
+        self.index = 0  # This is always Pacman
+        QLearningAgent.__init__(self, **args)
+
+    def getAction(self, state):
+        """
+        Simply calls the getAction method of QLearningAgent and then
+        informs parent of action for Pacman.  Do not change or remove this
+        method.
+        """
+        action = QLearningAgent.getAction(self,state)
+        self.doAction(state,action)
+        return action
+
+
+class ApproximateQAgent(PacmanQAgent):
+    """
+       ApproximateQLearningAgent
+
+       You should only have to overwrite getQValue
+       and update.  All other QLearningAgent functions
+       should work as is.
+    """
+    def __init__(self, extractor='IdentityExtractor', **args):
+        self.featExtractor = util.lookup(extractor, globals())()
+        PacmanQAgent.__init__(self, **args)
+        self.weights = util.Counter()
+
+    def getWeights(self):
+        return self.weights
+
+    def getQValue(self, state, action):
+        """
+          Should return Q(state,action) = w * featureVector
+          where * is the dotProduct operator
+        """
+        "*** YOUR CODE HERE ***"
+        util.raiseNotDefined()
+
+    def update(self, state, action, nextState, reward):
+        """
+           Should update your weights based on transition
+        """
+        "*** YOUR CODE HERE ***"
+        util.raiseNotDefined()
+
+    def final(self, state):
+        "Called at the end of each game."
+        # call the super-class final method
+        PacmanQAgent.final(self, state)
+
+        # did we finish training?
+        if self.episodesSoFar == self.numTraining:
+            # you might want to print your weights here for debugging
+            "*** YOUR CODE HERE ***"
+            pass