Skip to content

Commit b269d4d

Browse files
committed
deploy: 8a142a9
1 parent eccc135 commit b269d4d

File tree

3 files changed

+8
-10
lines changed

3 files changed

+8
-10
lines changed

_modules/lzero/mcts/buffer/game_buffer.html

+6-7
Original file line numberDiff line numberDiff line change
@@ -655,14 +655,13 @@ <h1>Source code for lzero.mcts.buffer.game_buffer</h1><div class="highlight"><pr
655655
<span class="c1"># print(f&#39;valid_len is {valid_len}&#39;)</span>
656656

657657
<span class="k">if</span> <span class="n">meta</span><span class="p">[</span><span class="s1">&#39;priorities&#39;</span><span class="p">]</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
658-
<span class="n">max_prio</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">game_pos_priorities</span><span class="o">.</span><span class="n">max</span><span class="p">()</span> <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">game_segment_buffer</span> <span class="k">else</span> <span class="mi">1</span>
658+
<span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">game_segment_buffer</span><span class="p">:</span>
659+
<span class="n">max_prio</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">game_pos_priorities</span><span class="o">.</span><span class="n">max</span><span class="p">()</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">game_pos_priorities</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span> <span class="k">else</span> <span class="mi">1</span>
660+
<span class="k">else</span><span class="p">:</span>
661+
<span class="n">max_prio</span> <span class="o">=</span> <span class="mi">1</span>
662+
659663
<span class="c1"># if no &#39;priorities&#39; provided, set the valid part of the new-added game history the max_prio</span>
660-
<span class="bp">self</span><span class="o">.</span><span class="n">game_pos_priorities</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">(</span>
661-
<span class="p">(</span>
662-
<span class="bp">self</span><span class="o">.</span><span class="n">game_pos_priorities</span><span class="p">,</span> <span class="p">[</span><span class="n">max_prio</span>
663-
<span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">valid_len</span><span class="p">)]</span> <span class="o">+</span> <span class="p">[</span><span class="mf">0.</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">valid_len</span><span class="p">,</span> <span class="n">data_length</span><span class="p">)]</span>
664-
<span class="p">)</span>
665-
<span class="p">)</span>
664+
<span class="bp">self</span><span class="o">.</span><span class="n">game_pos_priorities</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">concatenate</span><span class="p">((</span><span class="bp">self</span><span class="o">.</span><span class="n">game_pos_priorities</span><span class="p">,</span> <span class="p">[</span><span class="n">max_prio</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">valid_len</span><span class="p">)]</span> <span class="o">+</span> <span class="p">[</span><span class="mf">0.</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">valid_len</span><span class="p">,</span> <span class="n">data_length</span><span class="p">)]))</span>
666665
<span class="k">else</span><span class="p">:</span>
667666
<span class="k">assert</span> <span class="n">data_length</span> <span class="o">==</span> <span class="nb">len</span><span class="p">(</span><span class="n">meta</span><span class="p">[</span><span class="s1">&#39;priorities&#39;</span><span class="p">]),</span> <span class="s2">&quot; priorities should be of same length as the game steps&quot;</span>
668667
<span class="n">priorities</span> <span class="o">=</span> <span class="n">meta</span><span class="p">[</span><span class="s1">&#39;priorities&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">copy</span><span class="p">()</span><span class="o">.</span><span class="n">reshape</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>

_modules/lzero/model/sampled_efficientzero_model.html

+1-2
Original file line numberDiff line numberDiff line change
@@ -315,8 +315,7 @@ <h1>Source code for lzero.model.sampled_efficientzero_model</h1><div class="high
315315
<span class="c1"># (3,96,96), and frame_stack_num is 4. Due to downsample, the encoding of observation (latent_state) is</span>
316316
<span class="c1"># (64, 96/16, 96/16), where 64 is the number of channels, 96/16 is the size of the latent state. Thus,</span>
317317
<span class="c1"># self.projection_input_dim = 64 * 96/16 * 96/16 = 64*6*6 = 2304</span>
318-
<span class="bp">self</span><span class="o">.</span><span class="n">projection_input_dim</span> <span class="o">=</span> <span class="n">num_channels</span> <span class="o">*</span> <span class="n">math</span><span class="o">.</span><span class="n">ceil</span><span class="p">(</span><span class="n">observation_shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">/</span> <span class="mi">16</span>
319-
<span class="p">)</span> <span class="o">*</span> <span class="n">math</span><span class="o">.</span><span class="n">ceil</span><span class="p">(</span><span class="n">observation_shape</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="o">/</span> <span class="mi">16</span><span class="p">)</span>
318+
<span class="bp">self</span><span class="o">.</span><span class="n">projection_input_dim</span> <span class="o">=</span> <span class="n">num_channels</span> <span class="o">*</span> <span class="n">latent_size</span>
320319
<span class="k">else</span><span class="p">:</span>
321320
<span class="bp">self</span><span class="o">.</span><span class="n">projection_input_dim</span> <span class="o">=</span> <span class="n">num_channels</span> <span class="o">*</span> <span class="n">observation_shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">*</span> <span class="n">observation_shape</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span>
322321

_modules/lzero/policy/sampled_muzero.html

+1-1
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,7 @@ <h1>Source code for lzero.policy.sampled_muzero</h1><div class="highlight"><pre>
619619
<span class="s1">&#39;total_loss&#39;</span><span class="p">:</span> <span class="n">loss</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">(),</span>
620620
<span class="s1">&#39;policy_loss&#39;</span><span class="p">:</span> <span class="n">policy_loss</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">(),</span>
621621
<span class="s1">&#39;policy_entropy&#39;</span><span class="p">:</span> <span class="n">policy_entropy</span><span class="o">.</span><span class="n">item</span><span class="p">()</span> <span class="o">/</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_cfg</span><span class="o">.</span><span class="n">num_unroll_steps</span> <span class="o">+</span> <span class="mi">1</span><span class="p">),</span>
622-
<span class="s1">&#39;target_policy_entropy&#39;</span><span class="p">:</span> <span class="n">target_policy_entropy</span><span class="o">.</span><span class="n">item</span><span class="p">()</span> <span class="o">/</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_cfg</span><span class="o">.</span><span class="n">num_unroll_steps</span> <span class="o">+</span> <span class="mi">1</span><span class="p">),</span>
622+
<span class="s1">&#39;target_policy_entropy&#39;</span><span class="p">:</span> <span class="n">target_policy_entropy</span> <span class="o">/</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_cfg</span><span class="o">.</span><span class="n">num_unroll_steps</span> <span class="o">+</span> <span class="mi">1</span><span class="p">),</span>
623623
<span class="s1">&#39;reward_loss&#39;</span><span class="p">:</span> <span class="n">reward_loss</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">(),</span>
624624
<span class="s1">&#39;value_loss&#39;</span><span class="p">:</span> <span class="n">value_loss</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">(),</span>
625625
<span class="s1">&#39;consistency_loss&#39;</span><span class="p">:</span> <span class="n">consistency_loss</span><span class="o">.</span><span class="n">mean</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">_cfg</span><span class="o">.</span><span class="n">num_unroll_steps</span><span class="p">,</span>

0 commit comments

Comments
 (0)