
=iQc           @   s   d  d l  Td  d l m Z d  d l Td  d l Z d  d l Z d  d l Z d e f d     YZ d e f d     YZ d e f d	     YZ	 d S(
   i(   t   *(   t   ReinforcementAgentNt   QLearningAgentc           B   sV   e  Z d  Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z	 d   Z
 RS(	   s  
      Q-Learning Agent

      Functions you should fill in:
        - computeValueFromQValues
        - computeActionFromQValues
        - getQValue
        - getAction
        - update

      Instance variables you have access to
        - self.epsilon (exploration prob)
        - self.alpha (learning rate)
        - self.discount (discount rate)

      Functions you should use
        - self.getLegalActions(state)
          which returns legal actions for a state
    c         K   s#   t  j |  |  t j   |  _ d S(   s#   You can initialize Q-values here...N(   R   t   __init__t   utilt   Countert   QState(   t   selft   args(    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyR   %   s    c         C   s   |  j  | | f S(   s   
          Returns Q(state,action)
          Should return 0.0 if we have never seen a state
          or the Q node value otherwise
        (   R   (   R   t   statet   action(    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyt	   getQValue-   s    c         C   se   d } xD |  j  |  D]3 } |  j | |  | k r |  j | |  } q q W| d k r] d S| Sd S(   s   
          Returns max_action Q(state,action)
          where the max is over legal actions.  Note that if
          there are no legal actions, which is the case at the
          terminal state, you should return a value of 0.0.
        ig        N(   t   getLegalActionsR   (   R   R	   t   optimalQValueR
   (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyt   computeValueFromQValues7   s    c         C   s   g  } d } xu |  j  |  D]d } |  j | |  | k rX |  j | |  } | g } n  |  j | |  | k r | j |  q q Wt |  d k r t j |  St |  d k r | d St |  d k r d Sd S(   s   
          Compute the best action to take in a state.  Note that if there
          are no legal actions, which is the case at the terminal state,
          you should return None.
        ii   i    N(   R   R   t   appendt   lent   randomt   choicet   None(   R   R	   t   optimalActionR   R
   (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyt   computeActionFromQValuesP   s    	c         C   sE   |  j  |  } d } t j |  j  r4 t j |  S|  j |  S| S(   s  
          Compute the action to take in the current state.  With
          probability self.epsilon, we should take a random action and
          take the best policy action otherwise.  Note that if there are
          no legal actions, which is the case at the terminal state, you
          should choose None as the action.

          HINT: You might want to use util.flipCoin(prob)
          HINT: To pick randomly from a list, use random.choice(list)
        N(   R   R   R   t   flipCoint   epsilonR   R   R   (   R   R	   t   legalActionsR
   (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyt	   getActionz   s    c         C   sS   d } d |  j  |  j | |  |  j  | |  j |  j |  |  j | | f <d S(   s  
          The parent class calls this to observe a
          state = action => nextState and reward transition.
          You should do your Q-Value update here

          NOTE: You should never call this function,
          it will be called on your behalf
        ii   N(   t   alphaR   t   discountR   R   (   R   R	   R
   t	   nextStatet   rewardR   (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyt   update   s    
c         C   s   |  j  |  S(   N(   R   (   R   R	   (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyt	   getPolicy   s    c         C   s   |  j  |  S(   N(   R   (   R   R	   (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyt   getValue   s    (   t   __name__t
   __module__t   __doc__R   R   R   R   R   R   R   R    (    (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyR      s   		
		*			t   PacmanQAgentc           B   s,   e  Z d  Z d d d d d  Z d   Z RS(   sI   Exactly the same as QLearningAgent, but with different default parametersg?g?g?i    c         K   sE   | | d <| | d <| | d <| | d <d |  _  t j |  |  d S(   s  
        These default parameters can be changed from the pacman.py command line.
        For example, to change the exploration rate, try:
            python pacman.py -p PacmanQLearningAgent -a epsilon=0.1

        alpha    - learning rate
        epsilon  - exploration rate
        gamma    - discount factor
        numTraining - number of training episodes, i.e. no learning after these many episodes
        R   t   gammaR   t   numTrainingi    N(   t   indexR   R   (   R   R   R%   R   R&   R   (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyR      s    



	c         C   s&   t  j |  |  } |  j | |  | S(   s   
        Simply calls the getAction method of QLearningAgent and then
        informs parent of action for Pacman.  Do not change or remove this
        method.
        (   R   R   t   doAction(   R   R	   R
   (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyR      s    (   R!   R"   R#   R   R   (    (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyR$      s   t   ApproximateQAgentc           B   s>   e  Z d  Z d d  Z d   Z d   Z d   Z d   Z RS(   s   
       ApproximateQLearningAgent

       You should only have to overwrite getQValue
       and update.  All other QLearningAgent functions
       should work as is.
    t   IdentityExtractorc         K   s>   t  j | t      |  _ t j |  |  t  j   |  _ d  S(   N(   R   t   lookupt   globalst   featExtractorR$   R   R   t   weights(   R   t	   extractorR   (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyR      s    c         C   s   |  j  S(   N(   R.   (   R   (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyt
   getWeights   s    c         C   s,   |  j  j | |  } |  j } | | } | S(   sr   
          Should return Q(state,action) = w * featureVector
          where * is the dotProduct operator
        (   R-   t   getFeaturesR.   (   R   R	   R
   t   featurest   wR   (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyR      s    	
c         C   s   | |  j  |  j |  |  j | |  } |  j j | |  } x> | j   D]0 } |  j   | |  j | | | |  j | <qL Wd S(   sC   
           Should update your weights based on transition
        N(	   R   R   R   R-   R1   t   keysR0   R   R.   (   R   R	   R
   R   R   t
   differenceR2   t   i(    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyR      s    *c         C   s)   t  j |  |  |  j |  j k r% n  d S(   s   Called at the end of each game.N(   R$   t   finalt   episodesSoFarR&   (   R   R	   (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyR7      s    (   R!   R"   R#   R   R0   R   R   R7   (    (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyR)      s   			(
   t   gamet   learningAgentsR   t   featureExtractorsR   R   t   mathR   R$   R)   (    (    (    sV   C:\MyDocuments\Studies\Berkeley CS188X\05 Projects\03 Reinforcement\qlearningAgents.pyt   <module>   s   

$ 