
iQc           @   s  d  d l  Z  d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d e j f d     YZ d e j f d     YZ	 d d6 d     YZ
 d   Z d	   Z d
   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z e d k re   Z d  d l Z e e d e j  Z e   Z e j e j  e j e j  e j	 e  Z  d  d l! Z! e! j" e  Z# e j$ sd  d l% Z% e% j& e e j' e j(  Z# n  y e# j)   Wn e* k
 re j+ d  n Xd  d l, Z, d  d l- Z- d Z/ e j0 d k r'e, j1 e e j2 e j3  Z/ n e j0 d k re	 e  Z4 d   Z5 i e j2 d 6e j6 d 6e j7 d 6e5 d 6Z8 e- j9 e8   Z/ nb e j0 d k re j: d k rd e _: n  d d7 d      YZ; e;   Z/ n e j< sd! e j0  n  y e j< re j0 d k re j= rox] e> e j3  D]I Z? e, j1 e e j2 e?  Z@ e# jA e@ d" d# eB e?  d$ e# jC   qWn  e# jA e/ d" d# eB e j3  d$ e# jC   e# jD e/ d" d% eB e j3  d$ e# jC   n  Wn e* k
 re j+ d  n Xd&   ZE e jF sze j< r&e j0 d k r&d'   ZE qze j0 d k rAd(   ZE n  e j0 d k r\d)   ZE n  e j0 d k rzd*   ZE qzn  d+   ZG e jF rd,   ZG n  d-   ZH e jC rd.   ZH n  e j< rd/   ZI n	 e/ jJ ZI e j: d k rHd0 Ge j: Gd1 GHHn  d ZK xF e> d2 e j: d2  D]. ZL eK e e/ e  e j2 eI eE eG eH eL  7ZK qWe j: d k rvHd3 eB eK d4 e j:  GHHHn  e j0 d k re j< ry` e# jD e/ d" d% eB e j:  d5 e# jC   e# jA e/ d" d# eB e j:  d5 e# jC   Wqe* k
 re j+ d  qXqn  d S(8   iNt	   Gridworldc           B   sq   e  Z d  Z d   Z d   Z d   Z d   Z d   Z d   Z d   Z	 d   Z
 d	   Z d
   Z d   Z RS(   s   
      Gridworld
    c         C   sF   t  |  t  g   k r' t |  } n  | |  _ d |  _ d |  _ d  S(   Ng        g?(   t   typet   makeGridt   gridt   livingRewardt   noise(   t   selfR   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   __init__   s
     		c         C   s   | |  _  d S(   s   
        The (negative) reward for exiting "normal" states.

        Note that in the R+N text, this reward is on entering
        a state and therefore is not clearly part of the state's
        future rewards.
        N(   R   (   R   t   reward(    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   setLivingReward   s    c         C   s   | |  _  d S(   sG   
        The probability of moving in an unintended direction.
        N(   R   (   R   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   setNoise)   s    c         C   sG   | |  j  j k r d S| \ } } t |  j  | |  t k rC d Sd S(	   s   
        Returns list of valid actions for 'state'.

        Note that you can request moves into walls and
        that "exit" states transition to the terminal
        state under the special action "done".
        t   exitt   northt   westt   southt   east(    (   s   exit(   s   norths   wests   souths   east(   R   t   terminalStateR   t   int(   R   t   statet   xt   y(    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   getPossibleActions0   s    c         C   s   |  j  j g } xj t |  j  j  D]V } xM t |  j  j  D]9 } |  j  | | d k r; | | f } | j |  q; q; Wq" W| S(   s,   
        Return list of all states.
        t   #(   R   R   t   ranget   widtht   heightt   append(   R   t   statesR   R   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt	   getStates?   s    c         C   sb   | |  j  j k r d S| \ } } |  j  | | } t |  t k sW t |  t k r[ | S|  j S(   s   
        Get reward for state, action, nextState transition.

        Note that the reward depends only on the state being
        departed (as in the R+N book examples, which more or
        less use this convention).
        g        (   R   R   R   R   t   floatR   (   R   R   t   actiont	   nextStateR   R   t   cell(    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt	   getRewardL   s    $c         C   se   xX t  |  j j  D]D } x; t  |  j j  D]' } |  j | | d k r, | | f Sq, Wq Wd  d  S(   Nt   Ss   Grid has no start state(   R   R   R   R   (   R   R   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   getStartState\   s
    c         C   s   | |  j  j k S(   s=  
        Only the TERMINAL_STATE state is *actually* a terminal state.
        The other "exit" states are technically non-terminals with
        a single action "exit" which leads to the true terminal state.
        This convention is to make the grids line up with the examples
        in the R+N textbook.
        (   R   R   (   R   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt
   isTerminalc   s    c         C   s  | |  j  |  k r d  n  |  j |  r1 g  S| \ } } t |  j | |  t k sw t |  j | |  t k r |  j j } | d f g Sg  } |  j | d |  r | | d f p | } |  j | | d  r | d | f p | } |  j | d |  r| | d f p| }	 |  j | | d  r@| d | f pC| }
 | d k s^| d k r| d k r| j | d |  j	 f  n | j |	 d |  j	 f  |  j	 } | j | | d f  | j |
 | d f  n  | d k s| d k rp| d k r| j | d |  j	 f  n | j |
 d |  j	 f  |  j	 } | j | | d f  | j |	 | d f  n  |  j
 |  } | S(	   s   
        Returns list of (nextState, prob) pairs
        representing the states reachable
        from 'state' by taking 'action' along
        with their transition probabilities.
        s   Illegal action!g      ?i   R   R   g       @R   R   (   R   R$   R   R   R   R   R   t   _Gridworld__isAllowedR   R   t   _Gridworld__aggregate(   R   R   R   R   R   t	   termStatet
   successorst
   northStatet	   westStatet
   southStatet	   eastStatet   massLeft(    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   getTransitionStatesAndProbsn   s:    	:,,,,		c         C   sm   t  j   } x$ | D] \ } } | | c | 7<q Wg  } x- | j   D] \ } } | j | | f  qF W| S(   N(   t   utilt   Countert   itemsR   (   R   t   statesAndProbst   counterR   t   probt   newStatesAndProbs(    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   __aggregate   s    c         C   sY   | d k  s | |  j  j k r" t S| d k  s@ | |  j  j k rD t S|  j  | | d k S(   Ni    R   (   R   R   t   FalseR   (   R   R   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   __isAllowed   s
      (   t   __name__t
   __module__t   __doc__R   R	   R
   R   R   R!   R#   R$   R.   R&   R%   (    (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR       s   			
							3		t   GridworldEnvironmentc           B   sA   e  Z d    Z d   Z d   Z d   Z d d  Z d   Z RS(   c         C   s   | |  _  |  j   d  S(   N(   t	   gridWorldt   reset(   R   R=   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR      s    	c         C   s   |  j  S(   N(   R   (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   getCurrentState   s    c         C   s   |  j  j |  S(   N(   R=   R   (   R   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR      s    c         C   s7   |  j    } |  j | |  \ } } | |  _ | | f S(   N(   R?   t   getRandomNextStateR   (   R   R   R   R   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   doAction   s    	c   
      C   s   d } | d  k r! t j   } n | j   } d } |  j j | |  } xa | D]Y \ } } | | 7} | d k rz d  n  | | k  rO |  j j | | |  }	 | |	 f SqO Wd  d  S(   Ng      g        g      ?s;   Total transition probability more than one; sample failure.s;   Total transition probability less than one; sample failure.(   t   Nonet   randomR=   R.   R!   (
   R   R   R   t   randObjt   randt   sumR(   R   R4   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR@      s    
	c         C   s   |  j  j   |  _ d  S(   N(   R=   R#   R   (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR>      s    N(	   R9   R:   R   R?   R   RA   RB   R@   R>   (    (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR<      s   				t   Gridc           B   sk   e  Z d  Z d d  Z d   Z d   Z d   Z d   Z d   Z d   Z	 d	   Z
 d
   Z d   Z RS(   s2  
    A 2-dimensional array of immutables backed by a list of lists.  Data is accessed
    via grid[x][y] where (x,y) are cartesian coordinates with x horizontal,
    y vertical and the origin (0,0) in the bottom left corner.

    The __str__ method constructs an output that is oriented appropriately.
    t    c         C   sZ   | |  _  | |  _ g  t |  D]% } g  t |  D] } | ^ q2 ^ q |  _ d |  _ d  S(   Nt   TERMINAL_STATE(   R   R   R   t   dataR   (   R   R   R   t   initialValueR   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR      s    		;c         C   s   |  j  | S(   N(   RJ   (   R   t   i(    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   __getitem__   s    c         C   s   | |  j  | <d  S(   N(   RJ   (   R   t   keyt   item(    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   __setitem__   s    c         C   s    | d  k r t S|  j | j k S(   N(   RB   R7   RJ   (   R   t   other(    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   __eq__   s     c         C   s   t  |  j  S(   N(   t   hashRJ   (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   __hash__   s    c         C   s9   t  |  j |  j  } g  |  j D] } | ^ q | _ | S(   N(   RG   R   R   RJ   (   R   t   gR   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   copy   s     c         C   s
   |  j    S(   N(   RV   (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   deepCopy   s    c         C   s%   t  |  j |  j  } |  j | _ | S(   N(   RG   R   R   RJ   (   R   RU   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   shallowCopy   s    c         C   sW   g  t  |  j  D]3 } g  t  |  j  D] } |  j | | ^ q& ^ q } | j   | S(   N(   R   R   R   RJ   t   reverse(   R   R   R   t   t(    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   _getLegacyText   s    I
c         C   s   t  |  j    S(   N(   t   strR[   (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   __str__  s    (   R9   R:   R;   R   RM   RP   RR   RT   RV   RW   RX   R[   R]   (    (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyRG      s   								c   	      C   s   t  |  d  t  |   } } t | |  } xS t |   D]E \ } } | | d } x( t |  D] \ } } | | | | <q` Wq9 W| S(   Ni    i   (   t   lenRG   t	   enumerate(	   t
   gridStringR   R   R   t   ybart   lineR   R   t   el(    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR     s    c          C   sL   d d d d d g d d d d d g d d d d d g g }  t  t |    S(   NRH   R"   i
   i(   R    R   (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   getCliffGrid  s    c          C   sF   d d d d d g d d d d d g d d d d d g g }  t  |   S(   NRH   i   R"   i
   i(   R    (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   getCliffGrid2  s    c       	   C   sj   d d d d d g d d d d d g d d d d d g d d d d d g d d d d d g g }  t  |   S(   NRH   R   i   i
   R"   i(   R    (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   getDiscountGrid  s    c       	   C   sX   d d d d d d d g d d d d d d d g d d d d d d d g g }  t  |   S(   NR   ii   R"   RH   i
   (   R    (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   getBridgeGrid"  s    c          C   s>   d d d d 
g d d d d g d d d d g g }  t  |   S(   NRH   i   R   iR"   (   R    (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   getBookGrid(  s    c          C   s\   d d d d 
g d d d d g d d d d g d d d d g d d d d g g }  t  |   S(   NRH   i   R   R"   (   R    (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   getMazeGrid.  s    c         C   s   d d l  } d } x t r | j   } d | k r< d } n  d | k rQ d } n  d | k rf d } n  d	 | k r{ d
 } n  d | k r t j d  n  | d k r q n  Pq W| |   } | | k r | d } n  | S(   si   
    Get an action from the user (rather than the agent).

    Used for debugging and lecture demos.
    iNt   UpR   t   DownR   t   LeftR   t   RightR   t   qi    (   t   graphicsUtilsRB   t   Truet   wait_for_keyst   sysR   (   R   t   actionFunctionRo   R   t   keyst   actions(    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   getUserAction8  s*    	 	 	 	 	  c         C   s	   |  GHd  S(   N(    (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   printStringN  s    c         C   s  d } d }	 | j    d t |   k r5 |  j   n  | d t |  d  x-t r|| j   }
 | |
  |   | j |
  } t |  d k r | d t |  d t |  d  | S| |
  } | d  k r d  n  | j	 |  \ } } | d	 t |
  d
 t |  d t |  d t |  d  d t |   k ra|  j
 |
 | | |  n  | | |	 7} |	 | 9}	 qP Wd t |   k r|  j   n  d  S(   Ni    g      ?t   startEpisodes   BEGINNING EPISODE: s   
s   EPISODE s    COMPLETE: RETURN WAS s!   Error: Agent returned None actions   Started in state: s   
Took action: s   
Ended in state: s   
Got reward: t   observeTransitiont   stopEpisode(   R>   t   dirRx   R\   Rp   R?   R   R^   RB   RA   Ry   Rz   (   t   agentt   environmentt   discountt   decisiont   displayt   messaget   pauset   episodet   returnst   totalDiscountR   Ru   R   R   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt
   runEpisodeP  s4    
 	
&	?c          C   st  t  j   }  |  j d d d d d d d d d	 d
 d d |  j d d d d d d d d d	 d d d d d |  j d d d d d d d d d	 d d d d d d |  j d d d d d d d d d	 d d d d d  |  j d! d" d d d d d d# d	 d$ d d d d% |  j d& d' d d d d( d d) d	 d* d d+ d d, |  j d- d. d d d d( d d/ d	 d0 d d+ d d1 |  j d2 d3 d d d d4 d d5 d d6 d	 d7 d d8 |  j d9 d: d d; d d( d d< d	 d= d d> |  j d? d@ d d d dA d d5 d dB d	 dC d dD |  j dE dF d dG d dH d	 t d dI |  j dJ dK d dG d dL d	 t d dM |  j dN dO d dG d dP d	 t d dQ |  j dR dS d d d dT d t d dU d	 dV d dW |  j dX dY d dG d dZ d	 t d d[ |  j d\ d] d dG d	 t d d^ |  j   \ } } | j r=| j d_ k r=d` GHd  | _ n  | j	 sO| j
 r[t | _ n  | j rpt | _ n  | S(a   Ns   -ds
   --discountR   t   storeR   R   t   destR~   t   defaultg?t   helps%   Discount on future (default %default)s   -rs   --livingRewardR   g        t   metavart   Rs4   Reward for living for a time step (default %default)s   -ns   --noiseR   g?t   Ps   How often action results in s'   unintended direction (default %default)s   -es	   --epsilont   epsilong333333?t   EsA   Chance of taking a random action in q-learning (default %default)s   -ls   --learningRatet   learningRateg      ?s#   TD learning rate (default %default)s   -is   --iterationsR   t   itersi
   t   Ks6   Number of rounds of value iteration (default %default)s   -ks
   --episodest   episodesi   s7   Number of epsiodes of the MDP to run (default %default)s   -gs   --gridt   Gt   stringR   t   BookGridse   Grid to use (case sensitive; options are BookGrid, BridgeGrid, CliffGrid, MazeGrid, default %default)s   -ws   --windowSizet   Xt   gridSizei   sE   Request a window width of X pixels *per grid cell* (default %default)s   -as   --agentt   AR|   RC   sD   Agent type (options are 'random', 'value' and 'q', default %default)s   -ts   --textt
   store_truet   textDisplays   Use text-only ASCII displays   -ps   --pauseR   s3   Pause GUI after each time step when running the MDPs   -qs   --quiett   quiets%   Skip display of any learning episodess   -ss   --speedR"   t   speedg      ?sQ   Speed of animation, S > 1.0 is faster, 0.0 < S < 1.0 is slower (default %default)s   -ms   --manualt   manuals   Manually control agents   -vs   --valueStepss$   Display each step of value iterationRn   s*   ## Disabling Agents in Manual Mode (-m) ##(   t   optparset   OptionParsert
   add_optionR7   R   t
   parse_argsR   R|   RB   R   R   R   Rp   (   t	   optParsert   optst   args(    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   parseOptionsx  sr    	'!	t   __main__t   geti    t   valueRn   c         C   s   t  j |   S(   N(   t   mdpR   (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   <lambda>  s    t   gammat   alphaR   t   actionFnRC   i
   t   RandomAgentc           B   s5   e  Z d    Z d   Z d   Z d   Z d   Z RS(   c         C   s   t  j t j |   S(   N(   RC   t   choiceR   R   (   R   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt	   getAction  s    c         C   s   d S(   Ng        (    (   R   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   getValue  s    c         C   s   d S(   Ng        (    (   R   R   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt	   getQValue  s    c         C   s   d S(   sD   NOTE: 'random' is a special policy value; don't use it in your code.RC   (    (   R   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt	   getPolicy  s    c         C   s   d  S(   N(    (   R   R   R   R   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   update  s    (   R9   R:   R   R   R   R   R   (    (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR     s
   				s   Unknown agent type: R   s   VALUES AFTER s    ITERATIONSs   Q-VALUES AFTER c         C   s   d  S(   N(   RB   (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR     s    c         C   s   t  j |   S(   N(   R   t   displayNullValues(   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR     s    c         C   s   t  j t |  d  S(   Ns   CURRENT VALUES(   R   t   displayValuest   a(   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR     s    c         C   s   t  j t |  d  S(   Ns   CURRENT VALUES(   R   R   R   (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR     s    c         C   s   t  j t |  d  S(   Ns   CURRENT Q-VALUES(   R   t   displayQValuesR   (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR     s    c         C   s
   t  |   S(   N(   Rw   (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR      s    c         C   s   d  S(   N(   RB   (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR   "  s    c           C   s   d  S(   N(   RB   (    (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR   %  s    c           C   s
   t  j   S(   N(   R   R   (    (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR   '  s    c         C   s   t  |  t j  S(   N(   Rv   R   R   (   R   (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyR   +  s    t   RUNNINGt   EPISODESi   s"   AVERAGE RETURNS FROM START STATE: g        s	    EPISODES(    (    (M   RC   Rr   R   R}   R/   R   t   MarkovDecisionProcessR    t   EnvironmentR<   RG   R   Rd   Re   Rf   Rg   Rh   Ri   Rv   Rw   R   R   R9   R   t	   gridworldt   getattrR   t   mdpFunctionR	   R   R
   R   t   envt   textGridworldDisplayt   TextGridworldDisplayR   R   t   graphicsGridworldDisplayt   GraphicsGridworldDisplayR   R   t   startt   KeyboardInterruptR   t   valueIterationAgentst   qlearningAgentsRB   R   R|   t   ValueIterationAgentR~   R   t   gridWorldEnvR   R   R   t
   qLearnOptst   QLearningAgentR   R   R   t
   valueStepsR   RL   t	   tempAgentR   R\   R   R   t   displayCallbackR   t   messageCallbackt   pauseCallbackt   decisionCallbackR   R   R   (    (    (    sS   C:\Users\Nicholas\Desktop\CS188X Berkeley\05 Projects\03 Reinforcement\gridworld.pyt   <module>   s   &0								
			(	D				


	 	!$
$		   						,$
$