ó
ÍÄiQc           @   sÈ  d  d l  Z  d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l Z d  d l	 Z	 d  d l
 Z
 d  d l m Z m Z m Z d  d l m Z d  d l m Z d  d l m Z e ƒ  Z e Z d  d l	 Z	 d Z d Z d e  j f d	 „  ƒ  YZ d
 e  j f d „  ƒ  YZ d e  j f d „  ƒ  YZ d e  j f d „  ƒ  YZ d e  j f d „  ƒ  YZ d e  j f d „  ƒ  YZ  d d „ Z! d „  Z" d „  Z# d e  j f d „  ƒ  YZ$ d S(   iÿÿÿÿN(   t   Countert   TimeoutFunctiont   FixedRandom(   t   defaultdict(   t   PrettyPrinter(   t   sha1gš™™™™™¹¿gš™™™™™É?t   ValueIterationTestc           B   s€   e  Z d  „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 „  Z d
 „  Z d d „ Z d „  Z RS(   c         C   sø   t  t |  ƒ j | | ƒ t | d ƒ |  _ t j t | d ƒ ƒ |  _ t	 | d ƒ } d | k r |  j j
 t | d ƒ ƒ n  d | k rª |  j j t | d ƒ ƒ n  d } t t | | ƒ ƒ |  _ | d |  _ | | k  rô |  j j | ƒ n  d  S(   Nt   discountt   gridt   valueIterationst   noiset   livingRewardi
   t   test_out_file(   t   superR   t   __init__t   floatR   t	   gridworldt	   Gridworldt	   parseGridR   t   intt   setNoiset   setLivingRewardt   ranget   mint   numsIterationsForDisplayt   testOutFilet   append(   t   selft   questiont   testDictt
   iterationst   maxPreIterations(    (    s   reinforcementTestClasses.pyR      s      c         C   s,   t  |  j d ƒ  } | j | ƒ Wd  QXd  S(   Nt   w(   t   openR   t   write(   R   t   stringt   handle(    (    s   reinforcementTestClasses.pyt   writeFailureFile*   s    c         C   s,   t  j j |  j ƒ r( t  j |  j ƒ n  d  S(   N(   t   ost   patht   existsR   t   remove(   R   (    (    s   reinforcementTestClasses.pyt   removeFailureFileIfExists.   s    c         C   sÃ   d } d } x |  j  D]’ } | |  j  d k } |  j | | | | | ƒ \ } }	 }
 | |	 7} | |
 7} | s |  j | ƒ |  j d |  j ƒ |  j | ƒ |  j | ƒ Sq W|  j ƒ  |  j | ƒ S(   Nt    iÿÿÿÿs=   For more details to help you debug, see test output file %s

(   R   t   executeNIterationst
   addMessageR   R%   t   testFailR*   t   testPass(   R   t   gradest
   moduleDictt   solutionDictt   failureOutputFileStringt   failureOutputStdStringt   nt   checkPolicyR/   t   stdOutStringt   fileOutString(    (    s   reinforcementTestClasses.pyt   execute2   s    $


c         C   s2  t  } |  j | | ƒ \ } } }	 }
 d } d } d | } |  j | | | ƒ r{ | d | 7} | d |  j | | ƒ 7} n\ t } d | } | d |  j | | ƒ 7} | d |  j | | | ƒ 7} | | 7} | | 7} xÑ |	 D]É } d | | f } | | } |  j | | | ƒ rE| d	 | | f 7} | d |  j | | ƒ 7} qÞ t } d
 | | f } | d |  j | | ƒ 7} | d |  j | | | ƒ 7} | | 7} | | 7} qÞ W| r%|  j |
 | d ƒ s%t } d } | d |  j d |
 ƒ 7} | d |  j d | d ƒ 7} | | 7} | | 7} q%n  | | | f S(   NR+   s   values_k_%ds$   Values at iteration %d are correct.
s!      Student/correct solution:
 %s
s(   Values at iteration %d are NOT correct.
s      Student solution:
 %s
s      Correct solution:
 %s
s   q_values_k_%d_action_%ss4   Q-Values at iteration %d for action %s are correct.
s8   Q-Values at iteration %d for action %s are NOT correct.
t   policys   Policy is NOT correct.
(   t   Truet   runAgentt   comparePrettyValuest   prettyValueSolutionStringt   False(   R   R0   R1   R2   R5   R6   R/   t   valuesPrettyt   qValuesPrettyt   actionst   policyPrettyR7   R8   t	   valuesKeyt	   outStringt   actiont
   qValuesKeyt   qValues(    (    s   reinforcementTestClasses.pyR,   B   sF    






c   
      C   sî   t  | d ƒ Ù } d } g  } x„ |  j D]y } |  j | | ƒ \ } } } } | j |  j d | | ƒ ƒ x5 | D]- }	 | j |  j d | |	 f | |	 ƒ ƒ qp Wq( W| j |  j d | ƒ ƒ | j |  j d d j | ƒ d ƒ ƒ Wd  QXt S(   NR    R+   s   values_k_%ds   q_values_k_%d_action_%sR:   RB   s   
(   R!   R   R<   R"   R>   t   joinR;   (
   R   R1   t   filePathR$   RC   RB   R5   R@   RA   RF   (    (    s   reinforcementTestClasses.pyt   writeSolutioni   s    /,c         C   s†  | d j  |  j d |  j d | ƒ} |  j j ƒ  } t t d „  g  | D] } |  j j | ƒ ^ qG ƒ ƒ } i  } i  } i  }	 xª | D]¢ } | j | ƒ | | <| j | ƒ |	 | <|  j j | ƒ }
 xa | D]Y } | j	 | ƒ së i  | | <n  | |
 k r| j
 | | ƒ | | | <qÉ d  | | | <qÉ Wq„ W|  j | ƒ } |  j |	 ƒ } i  } x% | D] } |  j | | ƒ | | <qUW| | | | f S(   Nt   valueIterationAgentsR   R   c         S   s   t  |  ƒ j | ƒ S(   N(   t   sett   union(   t   at   b(    (    s   reinforcementTestClasses.pyt   <lambda>y   s    (   t   ValueIterationAgentR   R   t	   getStatest   listt   reducet   getPossibleActionst   getValuet   computeActionFromValuest   has_keyt   computeQValueFromValuest   Nonet   prettyValuest   prettyPolicy(   R   R1   t   numIterationst   agentt   statest   stateRB   t   valuesRH   R:   t   possibleActionsRF   R@   RC   RA   (    (    s   reinforcementTestClasses.pyR<   v   s,    %7c   
      C   s  d } |  j  j ƒ  } xâ t |  j  j  j ƒ D]Ë } |  j  j  j d | } g  } x‹ t |  j  j  j ƒ D]t } | | f | k rË | | | f }	 |	 d  k r¨ | j d ƒ qØ | j | j | | | f ƒ ƒ qd | j d	 ƒ qd W| d d j | ƒ f 7} q+ W| d 7} | S(
   NR+   i   s
      illegalt   _i
   s           %s
s      s   
t
   __________(	   R   RS   R   t   heightt   widthR[   R   t   formatRI   (
   R   t   elementst   formatStringt   prettyR`   t   ybart   yt   rowt   xt   value(    (    s   reinforcementTestClasses.pyt   prettyPrint   s    #
c         C   s   |  j  | d ƒ S(   Ns	   {0:10.4f}(   Rq   (   R   Rb   (    (    s   reinforcementTestClasses.pyR\   ¢   s    c         C   s   |  j  | d ƒ S(   Ns   {0:10s}(   Rq   (   R   R:   (    (    s   reinforcementTestClasses.pyR]   ¥   s    c         C   s   d | | j  ƒ  f S(   Ns   %s: """
%s
"""

(   t   rstrip(   R   t   nameRk   (    (    s   reinforcementTestClasses.pyR>   ¨   s    g{®Gáz„?c         C   sÊ   |  j  | ƒ } |  j  | ƒ } t | ƒ t | ƒ k r: t Sx‰ t | | ƒ D]x \ } } y< t | ƒ } t | ƒ }	 t | |	 ƒ }
 |
 | k r‘ t SWqJ t k
 rÁ | j ƒ  | j ƒ  k rÂ t SqJ XqJ Wt S(   N(	   t   parsePrettyValuest   lenR?   t   zipR   t   abst
   ValueErrort   stripR;   (   R   t   aPrettyt   bPrettyt	   tolerancet   aListt   bListRO   RP   t   aNumt   bNumt   error(    (    s   reinforcementTestClasses.pyR=   «   s    c         C   s   | j  ƒ  } | S(   N(   t   split(   R   Rk   Rb   (    (    s   reinforcementTestClasses.pyRt   ½   s    (   t   __name__t
   __module__R   R%   R*   R9   R,   RK   R<   Rq   R\   R]   R>   R=   Rt   (    (    (    s   reinforcementTestClasses.pyR      s   					'						t   ApproximateQLearningTestc           B   s€   e  Z d  „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 „  Z d
 „  Z d d „ Z d „  Z RS(   c         C   sƒ  t  t |  ƒ j | | ƒ t | d ƒ |  _ t j t | d ƒ ƒ |  _ d | k rq |  j j	 t | d ƒ ƒ n  d | k rš |  j j
 t | d ƒ ƒ n  t j t | d ƒ ƒ |  _ t j |  j ƒ |  _ t | d ƒ |  _ t | d ƒ |  _ i |  j j d 6|  j d 6|  j d 6|  j d	 6|  _ t | d
 ƒ } d } t t | | ƒ ƒ |  _ | d |  _ | | k  r|  j j | ƒ n  d  S(   NR   R   R
   R   t   epsilont   learningRatet   actionFnt   gammat   alphat   numExperiencesi
   R   (   R   R…   R   R   R   R   R   R   R   R   R   t   GridworldEnvironmentt   envR†   R‡   RV   t   optsR   R   R   t   numsExperiencesForDisplayR   R   (   R   R   R   R‹   t   maxPreExperiences(    (    s   reinforcementTestClasses.pyR   Ä   s$      4c         C   s,   t  |  j d ƒ  } | j | ƒ Wd  QXd  S(   NR    (   R!   R   R"   (   R   R#   R$   (    (    s   reinforcementTestClasses.pyR%   Ö   s    c         C   s,   t  j j |  j ƒ r( t  j |  j ƒ n  d  S(   N(   R&   R'   R(   R   R)   (   R   (    (    s   reinforcementTestClasses.pyR*   Ú   s    c   
      C   s­   d } d } x‡ |  j  D]| } |  j | | | | ƒ \ } } }	 | | 7} | |	 7} | s |  j | ƒ |  j d |  j ƒ |  j | ƒ |  j | ƒ Sq W|  j ƒ  |  j | ƒ S(   NR+   s=   For more details to help you debug, see test output file %s

(   R   t   executeNExperiencesR-   R   R%   R.   R*   R/   (
   R   R0   R1   R2   R3   R4   R5   R/   R7   R8   (    (    s   reinforcementTestClasses.pyR9   Þ   s    !


c         C   sz  t  } |  j | | ƒ \ } } } }	 d }
 d | } |	 d  k	 rQ | d |	 7} n  d | } | t | | ƒ k r™ | d | 7} | d t j | ƒ 7} n  xÑ | D]É } d | | f } | | } |  j | | | ƒ r| d | | f 7} | d	 |  j | | ƒ 7} q  t } d
 | | f } | d |  j | | ƒ 7} | d |  j | | | ƒ 7} |
 | 7}
 | | 7} q  W| |
 | f S(   NR+   s7   ==================== Iteration %d ====================
sZ   Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)

s   weights_k_%ds$   Weights at iteration %d are correct.s"      Student/correct solution:

%s

s   q_values_k_%d_action_%ss5   Q-Values at iteration %d for action '%s' are correct.s       Student/correct solution:
	%ss9   Q-Values at iteration %d for action '%s' are NOT correct.s      Student solution:
	%ss      Correct solution:
	%s(	   R;   R<   R[   t   evalt   ppt   pformatR=   R>   R?   (   R   R0   R1   R2   R5   R/   RA   t   weightsRB   t   lastExperienceR7   R8   t
   weightsKeyRF   RG   RH   RE   (    (    s   reinforcementTestClasses.pyR‘   í   s.    



c   
      C   s¬   t  | d ƒ — } x |  j D]‚ } |  j | | ƒ \ } } } } | j |  j d | t j | ƒ ƒ ƒ x5 | D]- }	 | j |  j d | |	 f | |	 ƒ ƒ qm Wq WWd  QXt S(   NR    s   weights_k_%ds   q_values_k_%d_action_%s(   R!   R   R<   R"   R>   R“   R”   R;   (
   R   R1   RJ   R$   R5   RA   R•   RB   Rd   RF   (    (    s   reinforcementTestClasses.pyRK     s    &5c            sâ  | d j  ˆ  j   } t ‡  f d †  ˆ  j j ƒ  ƒ } | j ƒ  t ƒ  j } d  } x~ t	 | ƒ D]p } | j
 | ƒ } | j
 ˆ  j j | ƒ ƒ }	 ˆ  j j | |	 d | ƒ\ }
 } | |	 |
 | f } | j | Œ  q` Wt t d „  g  | D] } ˆ  j j | ƒ ^ qç ƒ ƒ } i  } | j ƒ  } x„ | D]| } ˆ  j j | ƒ } xa | D]Y }	 | j |	 ƒ sei  | |	 <n  |	 | k rŽ| j | |	 ƒ | |	 | <qCd  | |	 | <qCWq$Wi  } x% | D] }	 ˆ  j | |	 ƒ | |	 <q±W| | | | f S(   Nt   qlearningAgentsc            s   t  ˆ  j j |  ƒ ƒ d k S(   Ni    (   Ru   R   RV   (   Ra   (   R   (    s   reinforcementTestClasses.pyRQ     s    t   randObjc         S   s   t  |  ƒ j | ƒ S(   N(   RM   RN   (   RO   RP   (    (    s   reinforcementTestClasses.pyRQ     s    (   t   ApproximateQAgentRŽ   t   filterR   RS   t   sortR   t   randomR[   R   t   choiceRV   R   t   getRandomNextStatet   updateRT   RU   t
   getWeightsRY   t	   getQValueR\   (   R   R1   R‹   R_   R`   R™   R–   t   it
   startStateRF   t   endStatet   rewardRa   RB   RH   R•   Rc   RA   (    (   R   s   reinforcementTestClasses.pyR<     s4    !
!7c   
      C   s  d } |  j  j ƒ  } xâ t |  j  j  j ƒ D]Ë } |  j  j  j d | } g  } x‹ t |  j  j  j ƒ D]t } | | f | k rË | | | f }	 |	 d  k r¨ | j d ƒ qØ | j | j | | | f ƒ ƒ qd | j d	 ƒ qd W| d d j | ƒ f 7} q+ W| d 7} | S(
   NR+   i   s
      illegalRd   i
   s           %s
s      s   
Re   (	   R   RS   R   Rf   Rg   R[   R   Rh   RI   (
   R   Ri   Rj   Rk   R`   Rl   Rm   Rn   Ro   Rp   (    (    s   reinforcementTestClasses.pyRq   /  s    #
c         C   s   |  j  | d ƒ S(   Ns	   {0:10.4f}(   Rq   (   R   Rb   (    (    s   reinforcementTestClasses.pyR\   B  s    c         C   s   |  j  | d ƒ S(   Ns   {0:10s}(   Rq   (   R   R:   (    (    s   reinforcementTestClasses.pyR]   E  s    c         C   s   d | | j  ƒ  f S(   Ns   %s: """
%s
"""

(   Rr   (   R   Rs   Rk   (    (    s   reinforcementTestClasses.pyR>   H  s    g{®Gáz„?c         C   sÊ   |  j  | ƒ } |  j  | ƒ } t | ƒ t | ƒ k r: t Sx‰ t | | ƒ D]x \ } } y< t | ƒ } t | ƒ }	 t | |	 ƒ }
 |
 | k r‘ t SWqJ t k
 rÁ | j ƒ  | j ƒ  k rÂ t SqJ XqJ Wt S(   N(	   Rt   Ru   R?   Rv   R   Rw   Rx   Ry   R;   (   R   Rz   R{   R|   R}   R~   RO   RP   R   R€   R   (    (    s   reinforcementTestClasses.pyR=   K  s    c         C   s   | j  ƒ  } | S(   N(   R‚   (   R   Rk   Rb   (    (    s   reinforcementTestClasses.pyRt   ]  s    (   Rƒ   R„   R   R%   R*   R9   R‘   RK   R<   Rq   R\   R]   R>   R=   Rt   (    (    (    s   reinforcementTestClasses.pyR…   Â   s   												t   QLearningTestc           B   s€   e  Z d  „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 „  Z d
 „  Z d d „ Z d „  Z RS(   c         C   sƒ  t  t |  ƒ j | | ƒ t | d ƒ |  _ t j t | d ƒ ƒ |  _ d | k rq |  j j	 t | d ƒ ƒ n  d | k rš |  j j
 t | d ƒ ƒ n  t j t | d ƒ ƒ |  _ t j |  j ƒ |  _ t | d ƒ |  _ t | d ƒ |  _ i |  j j d 6|  j d 6|  j d 6|  j d	 6|  _ t | d
 ƒ } d } t t | | ƒ ƒ |  _ | d |  _ | | k  r|  j j | ƒ n  d  S(   NR   R   R
   R   R†   R‡   Rˆ   R‰   RŠ   R‹   i
   R   (   R   R§   R   R   R   R   R   R   R   R   R   RŒ   R   R†   R‡   RV   RŽ   R   R   R   R   R   R   (   R   R   R   R‹   R   (    (    s   reinforcementTestClasses.pyR   d  s$      4c         C   s,   t  |  j d ƒ  } | j | ƒ Wd  QXd  S(   NR    (   R!   R   R"   (   R   R#   R$   (    (    s   reinforcementTestClasses.pyR%   v  s    c         C   s,   t  j j |  j ƒ r( t  j |  j ƒ n  d  S(   N(   R&   R'   R(   R   R)   (   R   (    (    s   reinforcementTestClasses.pyR*   z  s    c         C   sÃ   d } d } x |  j  D]’ } | |  j  d k } |  j | | | | | ƒ \ } }	 }
 | |	 7} | |
 7} | s |  j | ƒ |  j d |  j ƒ |  j | ƒ |  j | ƒ Sq W|  j ƒ  |  j | ƒ S(   NR+   iÿÿÿÿs=   For more details to help you debug, see test output file %s

(   R   R‘   R-   R   R%   R.   R*   R/   (   R   R0   R1   R2   R3   R4   R5   t   checkValuesAndPolicyR/   R7   R8   (    (    s   reinforcementTestClasses.pyR9   ~  s    $


c         C   s   t  } |  j | | ƒ \ } } }	 }
 } d } d | } | d  k	 rT | d | 7} n  xÑ |	 D]É } d | | f } | | } |  j | | | ƒ rÂ | d | | f 7} | d |  j | | ƒ 7} q[ t } d | | f } | d |  j | | ƒ 7} | d	 |  j | | | ƒ 7} | | 7} | | 7} q[ W| r|  j | | d
 ƒ sŸt } d } | d |  j d
 | ƒ 7} | d	 |  j d
 | d
 ƒ 7} | | 7} | | 7} n  |  j |
 | d ƒ st } d } | d |  j d |
 ƒ 7} | d	 |  j d | d ƒ 7} | | 7} | | 7} qn  | | | f S(   NR+   s7   ==================== Iteration %d ====================
s[   Agent observed the transition (startState = %s, action = %s, endState = %s, reward = %f)


s   q_values_k_%d_action_%ss5   Q-Values at iteration %d for action '%s' are correct.s       Student/correct solution:
	%ss9   Q-Values at iteration %d for action '%s' are NOT correct.s      Student solution:
	%ss      Correct solution:
	%sRb   s   Values are NOT correct.R:   s   Policy is NOT correct.(   R;   R<   R[   R=   R>   R?   (   R   R0   R1   R2   R5   R¨   R/   R@   RA   RB   RC   R–   R7   R8   RF   RG   RH   RE   (    (    s   reinforcementTestClasses.pyR‘   Ž  sD    !




c         C   sÇ   t  | d ƒ ² } d } d } xj |  j D]_ } |  j | | ƒ \ } } } } }	 x5 | D]- }
 | j |  j d | |
 f | |
 ƒ ƒ qV Wq( W| j |  j d | ƒ ƒ | j |  j d | ƒ ƒ Wd  QXt S(   NR    R+   s   q_values_k_%d_action_%sRb   R:   (   R!   R   R<   R"   R>   R;   (   R   R1   RJ   R$   R@   RC   R5   RA   RB   Rd   RF   (    (    s   reinforcementTestClasses.pyRK   ³  s    !/c            s)  | d j  ˆ  j   } t ‡  f d †  ˆ  j j ƒ  ƒ } | j ƒ  t ƒ  j } d  } x~ t	 | ƒ D]p } | j
 | ƒ } | j
 ˆ  j j | ƒ ƒ }	 ˆ  j j | |	 d | ƒ\ }
 } | |	 |
 | f } | j | Œ  q` Wt t d „  g  | D] } ˆ  j j | ƒ ^ qç ƒ ƒ } i  } i  } i  } xª | D]¢ } | j | ƒ | | <| j | ƒ | | <ˆ  j j | ƒ } xa | D]Y }	 | j |	 ƒ s‹i  | |	 <n  |	 | k r´| j | |	 ƒ | |	 | <qid  | |	 | <qiWq$Wˆ  j | ƒ } ˆ  j | ƒ } i  } x% | D] }	 ˆ  j | |	 ƒ | |	 <qõW| | | | | f S(   NR˜   c            s   t  ˆ  j j |  ƒ ƒ d k S(   Ni    (   Ru   R   RV   (   Ra   (   R   (    s   reinforcementTestClasses.pyRQ   Á  s    R™   c         S   s   t  |  ƒ j | ƒ S(   N(   RM   RN   (   RO   RP   (    (    s   reinforcementTestClasses.pyRQ   Í  s    (   t   QLearningAgentRŽ   R›   R   RS   Rœ   R   R   R[   R   Rž   RV   R   RŸ   R    RT   RU   t   computeValueFromQValuest   computeActionFromQValuesRY   R¢   R\   R]   (   R   R1   R‹   R_   R`   R™   R–   R£   R¤   RF   R¥   R¦   Ra   RB   Rb   RH   R:   Rc   R@   RC   RA   (    (   R   s   reinforcementTestClasses.pyR<   ¿  s>    !
!7c   
      C   s  d } |  j  j ƒ  } xâ t |  j  j  j ƒ D]Ë } |  j  j  j d | } g  } x‹ t |  j  j  j ƒ D]t } | | f | k rË | | | f }	 |	 d  k r¨ | j d ƒ qØ | j | j | | | f ƒ ƒ qd | j d	 ƒ qd W| d d j | ƒ f 7} q+ W| d 7} | S(
   NR+   i   s
      illegalRd   i
   s           %s
s      s   
Re   (	   R   RS   R   Rf   Rg   R[   R   Rh   RI   (
   R   Ri   Rj   Rk   R`   Rl   Rm   Rn   Ro   Rp   (    (    s   reinforcementTestClasses.pyRq   ã  s    #
c         C   s   |  j  | d ƒ S(   Ns	   {0:10.4f}(   Rq   (   R   Rb   (    (    s   reinforcementTestClasses.pyR\   ö  s    c         C   s   |  j  | d ƒ S(   Ns   {0:10s}(   Rq   (   R   R:   (    (    s   reinforcementTestClasses.pyR]   ù  s    c         C   s   d | | j  ƒ  f S(   Ns   %s: """
%s
"""

(   Rr   (   R   Rs   Rk   (    (    s   reinforcementTestClasses.pyR>   ü  s    g{®Gáz„?c         C   sÊ   |  j  | ƒ } |  j  | ƒ } t | ƒ t | ƒ k r: t Sx‰ t | | ƒ D]x \ } } y< t | ƒ } t | ƒ }	 t | |	 ƒ }
 |
 | k r‘ t SWqJ t k
 rÁ | j ƒ  | j ƒ  k rÂ t SqJ XqJ Wt S(   N(	   Rt   Ru   R?   Rv   R   Rw   Rx   Ry   R;   (   R   Rz   R{   R|   R}   R~   RO   RP   R   R€   R   (    (    s   reinforcementTestClasses.pyR=   ÿ  s    c         C   s   | j  ƒ  } | S(   N(   R‚   (   R   Rk   Rb   (    (    s   reinforcementTestClasses.pyRt     s    (   Rƒ   R„   R   R%   R*   R9   R‘   RK   R<   Rq   R\   R]   R>   R=   Rt   (    (    (    s   reinforcementTestClasses.pyR§   b  s   					%		$				t   EpsilonGreedyTestc           B   s8   e  Z d  „  Z d „  Z d „  Z d „  Z d d „ Z RS(   c         C   sO  t  t |  ƒ j | | ƒ t | d ƒ |  _ t j t | d ƒ ƒ |  _ d | k rq |  j j	 t | d ƒ ƒ n  d | k rš |  j j
 t | d ƒ ƒ n  t j t | d ƒ ƒ |  _ t j |  j ƒ |  _ t | d ƒ |  _ t | d ƒ |  _ t | d ƒ |  _ t | d ƒ |  _ i |  j j d	 6|  j d 6|  j d
 6|  j d 6|  _ d  S(   NR   R   R
   R   R†   R‡   R‹   R   Rˆ   R‰   RŠ   (   R   R¬   R   R   R   R   R   R   R   R   R   RŒ   R   R†   R‡   R   R‹   R^   RV   RŽ   (   R   R   R   (    (    s   reinforcementTestClasses.pyR     s      c         C   s-   |  j  | ƒ r |  j | ƒ S|  j | ƒ Sd  S(   N(   t   testEpsilonGreedyR/   R.   (   R   R0   R1   R2   (    (    s   reinforcementTestClasses.pyR9   '  s    c         C   s=   t  | d ƒ ( } | j d |  j ƒ | j d ƒ Wd  QXt S(   NR    s$   # This is the solution file for %s.
s   # File intentionally blank.
(   R!   R"   R'   R;   (   R   R1   RJ   R$   (    (    s   reinforcementTestClasses.pyRK   -  s    c   
         sÌ   | d j  ˆ  j   } t ‡  f d †  ˆ  j j ƒ  ƒ } | j ƒ  t ƒ  j } xx t ˆ  j	 ƒ D]g } | j
 | ƒ } | j
 ˆ  j j | ƒ ƒ } ˆ  j j | | d | ƒ\ } }	 | j | | | |	 ƒ q] W| S(   NR˜   c            s   t  ˆ  j j |  ƒ ƒ d k S(   Ni    (   Ru   R   RV   (   Ra   (   R   (    s   reinforcementTestClasses.pyRQ   5  s    R™   (   R©   RŽ   R›   R   RS   Rœ   R   R   R   R‹   Rž   RV   R   RŸ   R    (
   R   R1   R_   R`   R™   R£   R¤   RF   R¥   R¦   (    (   R   s   reinforcementTestClasses.pyR<   3  s    !
!gš™™™™™™?c         C   s&  |  j  | ƒ } x|  j j ƒ  D]ÿ } t | j | ƒ ƒ } | d k rL q n  d } | j | ƒ } x9 t |  j ƒ D]( } | j | ƒ | k rq | d 7} qq qq W|  j | }	 |  j |  j t	 | ƒ }
 |	 |
 } t
 | |  j ƒ } | | k r |  j d ƒ |  j d |  j | | | f ƒ t Sq Wt S(   Ni   i    s/   Epsilon-greedy action selection is not correct.sP   Actual epsilon = %f; student empirical epsilon = %f; error = %f > tolerance = %f(   R<   R   RS   Ru   t   getLegalActionsR«   R   R^   t	   getActionR   Rw   R†   R-   R?   R;   (   R   R1   R|   R_   Ra   t   numLegalActionst   numGreedyChoicest   optimalActiont	   iterationt   empiricalEpsilonNumeratort   empiricalEpsilonDenominatort   empiricalEpsilonR   (    (    s   reinforcementTestClasses.pyR­   A  s&    
 (   Rƒ   R„   R   R9   RK   R<   R­   (    (    (    s   reinforcementTestClasses.pyR¬     s
   				t   Question6Testc           B   s#   e  Z d  „  Z d „  Z d „  Z RS(   c         C   s   t  t |  ƒ j | | ƒ d  S(   N(   R   R·   R   (   R   R   R   (    (    s   reinforcementTestClasses.pyR   ]  s    c         C   s…   | d j  ƒ  } t | ƒ j ƒ  j ƒ  } t | ƒ j ƒ  } | d k rS |  j | ƒ S|  j d ƒ |  j d | f ƒ |  j | ƒ Sd  S(   Nt   analysist(   46729c96bb1e4081fdc81a8ff74b3e5db8fba415s   Solution is not correct.s      Student solution: %s(	   t	   question6t   strRy   t   lowerR   t	   hexdigestR/   R-   R.   (   R   R0   R1   R2   t   studentSolutiont   hashedSolution(    (    s   reinforcementTestClasses.pyR9   `  s    c         C   s>   t  | d ƒ } | j d |  j ƒ | j d ƒ | j ƒ  t S(   NR    s$   # This is the solution file for %s.
s   # File intentionally blank.
(   R!   R"   R'   t   closeR;   (   R   R1   RJ   R$   (    (    s   reinforcementTestClasses.pyRK   k  s
    
(   Rƒ   R„   R   R9   RK   (    (    (    s   reinforcementTestClasses.pyR·   [  s   		t   EvalAgentTestc           B   s#   e  Z d  „  Z d „  Z d „  Z RS(   c         C   so  t  t |  ƒ j | | ƒ | d |  _ d | k rB t | d ƒ n d  |  _ d | k rg t | d ƒ n d  |  _ d | k rŒ t | d ƒ n d  |  _ g  | j	 d d ƒ j
 ƒ  D] } t | ƒ ^ q® |  _ g  | j	 d d ƒ j
 ƒ  D] } t | ƒ ^ qâ |  _ g  | j	 d d ƒ j
 ƒ  D] } t | ƒ ^ q|  _ t g  |  j |  j |  j g D] } t | ƒ ^ qMƒ |  _ d  S(	   Nt   pacmanParamst   scoreMinimumt   nonTimeoutMinimumt   winsMinimumt   scoreThresholdsR+   t   nonTimeoutThresholdst   winsThresholds(   R   RÁ   R   RÂ   R   R[   RÃ   RÄ   RÅ   t   getR‚   RÆ   RÇ   RÈ   t   sumRu   t	   maxPoints(   R   R   R   t   st   t(    (    s   reinforcementTestClasses.pyR   y  s    %%%444c      	   C   s¸  |  j  d |  j f ƒ t j ƒ  } t j t j |  j j d ƒ ƒ   } t j ƒ  | } t | ƒ } i | d 6g  | D] } | j j	 ƒ  ^ qt j
 t ƒ d 6| d 6g  | D] } | j j ƒ  ^ q§ d 6g  | D] } | j ^ qÊ j
 t ƒ d 6g  | D] } | j ^ qð j
 t ƒ d 6}	 t |	 d ƒ t t |	 d ƒ ƒ }
 | |	 d } |	 d } d	 „  } | |
 |  j |  j d
 ƒ | | |  j |  j d ƒ | | |  j |  j d ƒ g } d } x¹| D]±\ } } } } } } | d  k rît | ƒ d k rîq²n  | | 7} | s-| d k st ‚ |  j  d | | | f ƒ n# |  j  d | | | t | ƒ f ƒ | d  k rñ|  j  d ƒ |  j  d | f ƒ t | ƒ d k sŸ| | d k r¶|  j  d | f ƒ n  xª t | ƒ D]' \ } } |  j  d | | d f ƒ qÃWq²t | ƒ d k r²|  j  d ƒ |  j  d | d f ƒ x8 t | ƒ D]' \ } } |  j  d | | d f ƒ q5Wq²q²Wt g  | D] \ } } } } } } | ^ qqƒ r¢d } n  |  j | | |  j ƒ S(   Ns1   Grading agent using command:  python pacman.py %st    t   timet   winst   gamest   scorest   timeoutst   crashesc         S   sg   d } | d  k p |  | k } | rQ x* | D] } |  | k r+ | d 7} q+ q+ Wn  | | |  | | | f S(   Ni    i   (   R[   (   Rp   t   minimumt
   thresholdsRs   t   pointst   passedRÍ   (    (    s   reinforcementTestClasses.pyt   gradeThreshold˜  s    s   average scores   games not timed outi    s$   %s %s (fail: below minimum value %s)s   %s %s (%s of %s points)s       Grading scheme:s        < %s:  fails       >= %s:  0 pointss       >= %s:  %s pointsi   s        < %s:  0 points(   R-   RÂ   RÏ   t   pacmant   runGamest   readCommandR‚   Ru   Ra   t   isWint   countR;   t   getScoret   agentTimeoutt   agentCrashedRÊ   R   RÃ   RÆ   RÄ   RÇ   RÅ   RÈ   R[   t   AssertionErrort	   enumeratet   anyt   testPartialRË   (   R   R0   R1   R2   t	   startTimeRÑ   t	   totalTimet   numGamest   gt   statst   averageScoret   nonTimeoutsRÐ   RÙ   t   resultst   totalPointsRØ   R×   Rp   RÕ   RÖ   Rs   t   idxt	   thresholdRd   (    (    s   reinforcementTestClasses.pyR9   ˆ  sN    $6*O$
		
#""&2	c         C   s=   t  | d ƒ ( } | j d |  j ƒ | j d ƒ Wd  QXt S(   NR    s$   # This is the solution file for %s.
s   # File intentionally blank.
(   R!   R"   R'   R;   (   R   R1   RJ   R$   (    (    s   reinforcementTestClasses.pyRK   Ä  s    (   Rƒ   R„   R   R9   RK   (    (    (    s   reinforcementTestClasses.pyRÁ   w  s   		<id   c         C   s  | } g  } x
t  | ƒ D]ü } | |  k r/ Pn  |  | } | j d | ƒ | d k rq | d | d d f } n  | d k r˜ | d | d d f } n  | d k r¿ | d d | d f } n  | d k ræ | d d | d f } n  | d k sþ | d  k r| j d	 ƒ Pn  | } q W| S(
   Ns   (%s,%s)t   northi    i   t   southt   eastt   westt   exitt   TERMINAL_STATE(   R   R   R[   (   R:   t   startt   numStepsRa   R'   R£   RF   t	   nextState(    (    s   reinforcementTestClasses.pyt
   followPathÑ  s(    
    
c         C   sµ   g  |  j  d ƒ D]+ } g  | j  ƒ  D] } | j ƒ  ^ q# ^ q } xd | D]\ } xS t | ƒ D]E \ } } y t | ƒ } Wn n X| d k r– d } n  | | | <q[ WqH Wt j | ƒ S(   Ns   
Rd   RÎ   (   R‚   Ry   Rã   R   R   t   makeGrid(   R#   t   linet   entryR   Rn   Ro   t   col(    (    s   reinforcementTestClasses.pyR   ä  s    A	c         C   sM   |  d j  | d | ƒ} i  } x' | j ƒ  D] } | j | ƒ | | <q, W| S(   NRL   R   (   RR   RS   RX   (   R1   R   R   t   valueIteratorR:   Ra   (    (    s   reinforcementTestClasses.pyt   computePolicyò  s
    t   GridPolicyTestc           B   s5   e  Z d  „  Z d „  Z d „  Z d „  Z d „  Z RS(   c         C   s¾   t  t |  ƒ j | | ƒ | d |  _ | j d d ƒ j ƒ  d k |  _ | d |  _ t j	 t
 | d ƒ ƒ |  _ | d |  _ t
 | d ƒ |  _ | j d d  ƒ |  _ | j d	 d  ƒ |  _ d  S(
   Nt   parameterFnt	   question2t   falset   trueR   t   gridNameR:   t
   pathVisitst   pathNotVisits(   R   R  R   R  RÉ   R¼   R  t   gridTextR   R   R   R   R  R:   R[   R  R  (   R   R   R   (    (    s   reinforcementTestClasses.pyR   ý  s    !c      	   C   sï  t  | d |  j ƒ s: |  j d |  j f ƒ |  j | ƒ St | d |  j ƒ ƒ  } t | ƒ t k r˜ | j ƒ  d d !d k r˜ |  j d ƒ |  j | ƒ S|  j r9d  } y( | \ } } t
 | ƒ } t
 | ƒ } Wn+ |  j d |  j | f ƒ |  j | ƒ SX| d k rž| d	 k rž|  j d
 | f ƒ |  j | ƒ Sne y7 | \ } } } t
 | ƒ } t
 | ƒ } t
 | ƒ } Wn+ |  j d |  j | f ƒ |  j | ƒ SX|  j j | ƒ | d  k rÍ|  j j | ƒ n  |  j j ƒ  } t | |  j | ƒ }	 i d d 6d d 6d d 6d d 6d d 6}
 |  j j |  j j } } t } x| t | ƒ D]n } xe t | ƒ D]W } |  j | | |
 k rY|	 | | f |
 |  j | | k rY| | f } t } qYqYWqFW| sœ|  j d ƒ |  j d | |	 | f ƒ |  j d | |
 |  j | d | d f ƒ |  j d ƒ |  j |	 t ƒ |  j d ƒ |  j d ƒ |  j d ƒ |  j |  j t ƒ |  j d ƒ |  j d ƒ |  j ƒ  |  j | ƒ St |	 |  j j ƒ  ƒ } |  j d  k rK|  j | k rK|  j d  |  j f ƒ |  j d! | f ƒ |  j d ƒ |  j |	 t ƒ |  j d ƒ |  j d" ƒ |  j ƒ  |  j | ƒ S|  j d  k râ|  j | k râ|  j d# |  j f ƒ |  j d! | f ƒ |  j d ƒ |  j |	 t ƒ |  j d ƒ |  j d" ƒ |  j ƒ  |  j | ƒ S|  j | ƒ S($   NR¸   s#   Method not implemented: analysis.%si    i   t   nots   Actually, it is possible!sI   Did not return a (discount, noise) pair; instead analysis.%s returned: %sgÍÌÌÌÌÌì?gš™™™™™É?sW   Must change either the discount or the noise, not both. Returned (discount, noise) = %ssZ   Did not return a (discount, noise, living reward) triple; instead analysis.%s returned: %sRñ   t   NRó   t   ERò   t   SRô   t   WRõ   t   Xs   Policy not correct.s       Student policy at %s: %ss       Correct policy at %s: %si   s       Student policy:sP           Legend:  N,S,E,W at states which move north etc, X at states which exit,sI                    . at states where the policy is not defined (e.g. walls)s!       Correct policy specification:sT           Legend:  N,S,E,W for states in which the student policy must move north etc,sS                    _ for states where it doesn't matter what the student policy does.s9   Policy does not visit state %s when moving without noise.s       States visited: %ss5                    . at states where policy not defineds1   Policy visits state %s when moving without noise.(   t   hasattrR  R-   R.   t   getattrt   typeR»   R¼   R  R[   R   R   R   R   t   getStartStateR   R:   Rg   Rf   R;   R   R?   t   printPolicyt   printGridworldRú   R  R  R/   (   R   R0   R1   R2   t   resultR   R   R
   R÷   R:   t	   actionMapRg   Rf   t   policyPassedRo   Rm   t   differPointR'   (    (    s   reinforcementTestClasses.pyR9     s”    +	)<.


c         C   sL   |  j  d ƒ x+ |  j j d ƒ D] } |  j  d | ƒ q  W|  j  d ƒ d  S(   Ns       Gridworld:s   
s        sS           Legend: # wall, _ empty, S start, numbers terminal states with that reward.(   R-   R	  R‚   (   R   Rü   (    (    s   reinforcementTestClasses.pyR  y  s    c      
   C   sM  | r2 i d d 6d d 6d d 6d d 6d d 6} n7 i d d 6d d 6d d	 6d d
 6d d 6d d 6d d 6} xÝ t  |  j j j ƒ D]Æ } |  j j j d | } | rð |  j d d j g  t  |  j j j ƒ D] } | | | | ^ qÇ ƒ f ƒ q |  j d d j g  t  |  j j j ƒ D]" } | | j | | f d ƒ ^ qƒ f ƒ q Wd  S(   NR  R  R  R  Rd   RÎ   Rñ   Ró   Rò   Rô   R  Rõ   t   .i   s
           %ss       (   R   R   Rf   R-   RI   Rg   RÉ   (   R   R:   t   policyTypeIsGridt   legendRl   Rm   Ro   (    (    s   reinforcementTestClasses.pyR    s    ,7Nc         C   s=   t  | d ƒ ( } | j d |  j ƒ | j d ƒ Wd  QXt S(   NR    s$   # This is the solution file for %s.
s   # File intentionally blank.
(   R!   R"   R'   R;   (   R   R1   RJ   R$   (    (    s   reinforcementTestClasses.pyRK     s    (   Rƒ   R„   R   R9   R  R  RK   (    (    (    s   reinforcementTestClasses.pyR  û  s
   	"	Z		(%   t   testClassesR   t   matht	   tracebackt   sysR&   t   layoutt   textDisplayRÚ   R   RÏ   t   utilR    R   R   t   collectionsR   t   pprintR   t   hashlibR   R“   R?   t   VERBOSEt   LIVINGREWARDt   NOISEt   TestCaseR   R…   R§   R¬   R·   RÁ   Rú   R   R   R  (    (    (    s   reinforcementTestClasses.pyt   <module>   s,   <0	§ ´EZ			