nice comments

microsoft · Jan 17, 2025 · 30bdcda · 30bdcda
1 parent 6efed8e
commit 30bdcda
Showing 1 changed file with 32 additions and 29 deletions.
diff --git a/test/sampling_tests.cpp b/test/sampling_tests.cpp
@@ -176,7 +176,6 @@ TEST(SamplingTests, RandomizedSamplingTopPCpu) {
   }
 }
 
-// TODO(aciddelgado): this is copy-pasted from softmax.h but I think that might be fine... not sure how we feel about that
 void SoftMax(std::span<float> scores, float temperature) {
   float const max_score = *std::max_element(scores.begin(), scores.end());
 
@@ -192,11 +191,11 @@ void SoftMax(std::span<float> scores, float temperature) {
 
 TEST(SamplingTests, RandomizedSamplingTopKCpu) {
   auto model = Generators::CreateModel(Generators::GetOrtEnv(), MODEL_PATH "hf-internal-testing/tiny-random-gpt2-fp32");
-  int batch_size = 5;
-  int k = 5;
+  const int batch_size = 5;
+  const int k = 5;
 
   Generators::Config config;
-  int vocab_size = 32000;  // vocab size of llama
+  const int vocab_size = 13;  // vocab size of llama
   config.model.vocab_size = vocab_size;  // vocab size of llama
 
   // Create a generator
@@ -213,17 +212,21 @@ TEST(SamplingTests, RandomizedSamplingTopKCpu) {
   std::mt19937 engine(rd());
   std::vector<int> indices(vocab_size);
   std::vector<float> logits_cpu(vocab_size * batch_size);
-  int num_iter = 100;
+  const int num_iter = 100;
   std::map<float, int> logit_to_count;
+
+  // Run test
   for (int i = 0; i < num_iter; i++) {
     auto generator = Generators::CreateGenerator(*model, *params);
     logits_cpu = std::vector<float>(vocab_size * batch_size, 0.0f);
+    // Shuffle integers 1 to k randomly into logits_cpu
     for (int i = 0; i < batch_size; i++) {
       std::iota(indices.begin(), indices.end(), 0);
       std::shuffle(indices.begin(), indices.end(), engine);
       for (int j = 0; j < k; j++)
         logits_cpu[indices[j] + vocab_size * i] = float(k - j);
     }
+    // Set logits and get generated token
     auto logits_copy = logits_cpu;
     auto logits = params->p_device->WrapMemory<float>(logits_copy);
     generator->SetLogits(logits);
@@ -233,20 +236,19 @@ TEST(SamplingTests, RandomizedSamplingTopKCpu) {
     for (int b = 0; b < batch_size; b++) {
       auto next_token = next_tokens[b];
       auto next_token_score = logits_cpu[next_token + vocab_size * b];
-      if (logit_to_count.find(next_token_score) == logit_to_count.end())
-        logit_to_count[next_token_score] = 1;
-      else
-        logit_to_count[next_token_score]++;
+      logit_to_count[next_token_score]++;
       EXPECT_GT(next_token_score, 0.0f);
     }
   }
-  std::vector<float> expected_distributions(5);
+  // Calculate expected distribution of tokens by softmaxing given logits (integers 1 through k)
+  std::vector<float> expected_distributions(k);
   for (int i = 0; i < k; i++)
     expected_distributions[i] = float(i + 1);
   SoftMax(expected_distributions, 1.0f);
-  int total_count = batch_size * num_iter;
+  // Check that the distribution of tokens generated by the model is close to the expected distribution
+  const int total_count = batch_size * num_iter;
   for (auto& [logit, count] : logit_to_count) {
-    float expected_distribution = expected_distributions[int(logit) - 1];
+    const float expected_distribution = expected_distributions[int(logit) - 1];
     EXPECT_NEAR(count / float(total_count), expected_distribution, 0.1);
   }
 }
@@ -432,11 +434,11 @@ TEST(SamplingTests, RandomizedSamplingTopPCuda) {
 
 TEST(SamplingTests, RandomizedSamplingTopKCuda) {
   auto model = Generators::CreateModel(Generators::GetOrtEnv(), MODEL_PATH "hf-internal-testing/tiny-random-gpt2-fp32");
-  int batch_size = 5;
-  int k = 5;
+  const int batch_size = 5;
+  const int k = 5;
 
   Generators::Config config;
-  int vocab_size = 32000;  // vocab size of llama
+  const int vocab_size = 13;  // vocab size of llama
   config.model.vocab_size = vocab_size;  // vocab size of llama
 
   // Create a generator
@@ -453,42 +455,43 @@ TEST(SamplingTests, RandomizedSamplingTopKCuda) {
   std::mt19937 engine(rd());
   std::vector<int> indices(vocab_size);
   std::vector<float> logits_cpu(vocab_size * batch_size);
-  int num_iter = 100;
+  const int num_iter = 100;
   std::map<float, int> logit_to_count;
+
+  // Run test
   for (int i = 0; i < num_iter; i++) {
     auto generator = Generators::CreateGenerator(*model, *params);
-    logits_cpu = std::vector<float>(vocab_size * batch_size, 0.0f);
+    Generators::DeviceSpan<float> logits_gpu = params->p_device->Allocate<float>(config.model.vocab_size * batch_size);
+    auto cpu_span = logits_gpu.CpuSpan();
+    // Shuffle integers 1 to k randomly into cpu_span
     for (int i = 0; i < batch_size; i++) {
       std::iota(indices.begin(), indices.end(), 0);
       std::shuffle(indices.begin(), indices.end(), engine);
       for (int j = 0; j < k; j++)
-        logits_cpu[indices[j] + vocab_size * i] = float(k - j);
+        cpu_span[indices[j] + vocab_size * i] = float(k - j);
     }
-    Generators::DeviceSpan<float> logits_gpu = params->p_device->Allocate<float>(config.model.vocab_size * batch_size);
-    auto cpu_span = logits_gpu.CpuSpan();
-    std::copy(logits_cpu.begin(), logits_cpu.end(), cpu_span.begin());
+    // Copy logits onto device, set logits, and get generated token
     logits_gpu.CopyCpuToDevice();
     generator->SetLogits(logits_gpu);
     generator->GenerateNextToken();
     auto next_tokens = generator->search_->GetNextTokens().CopyDeviceToCpu();
     // Verify outputs match expected outputs
     for (int b = 0; b < batch_size; b++) {
       auto next_token = next_tokens[b];
-      auto next_token_score = logits_cpu[next_token + vocab_size * b];
-      if (logit_to_count.find(next_token_score) == logit_to_count.end())
-        logit_to_count[next_token_score] = 1;
-      else
-        logit_to_count[next_token_score]++;
+      auto next_token_score = cpu_span[next_token + vocab_size * b];
+      logit_to_count[next_token_score]++;
       EXPECT_GT(next_token_score, 0.0f);
     }
   }
-  std::vector<float> expected_distributions(5);
+  // Calculate expected distribution of tokens by softmaxing given logits (integers 1 through k)
+  std::vector<float> expected_distributions(k);
   for (int i = 0; i < k; i++)
     expected_distributions[i] = float(i + 1);
   SoftMax(expected_distributions, 1.0f);
-  int total_count = batch_size * num_iter;
+  const int total_count = batch_size * num_iter;
+  // Check that the distribution of tokens generated by the model is close to the expected distribution
   for (auto& [logit, count] : logit_to_count) {
-    float expected_distribution = expected_distributions[int(logit) - 1];
+    const float expected_distribution = expected_distributions[int(logit) - 1];
     EXPECT_NEAR(count / float(total_count), expected_distribution, 0.1);
   }
 }