Skip to content

Commit

Permalink
nice comments
Browse files Browse the repository at this point in the history
  • Loading branch information
aciddelgado committed Jan 17, 2025
1 parent 6efed8e commit 30bdcda
Showing 1 changed file with 32 additions and 29 deletions.
61 changes: 32 additions & 29 deletions test/sampling_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,6 @@ TEST(SamplingTests, RandomizedSamplingTopPCpu) {
}
}

// TODO(aciddelgado): this is copy-pasted from softmax.h but I think that might be fine... not sure how we feel about that
void SoftMax(std::span<float> scores, float temperature) {
float const max_score = *std::max_element(scores.begin(), scores.end());

Expand All @@ -192,11 +191,11 @@ void SoftMax(std::span<float> scores, float temperature) {

TEST(SamplingTests, RandomizedSamplingTopKCpu) {
auto model = Generators::CreateModel(Generators::GetOrtEnv(), MODEL_PATH "hf-internal-testing/tiny-random-gpt2-fp32");
int batch_size = 5;
int k = 5;
const int batch_size = 5;
const int k = 5;

Generators::Config config;
int vocab_size = 32000; // vocab size of llama
const int vocab_size = 13; // vocab size of llama
config.model.vocab_size = vocab_size; // vocab size of llama

// Create a generator
Expand All @@ -213,17 +212,21 @@ TEST(SamplingTests, RandomizedSamplingTopKCpu) {
std::mt19937 engine(rd());
std::vector<int> indices(vocab_size);
std::vector<float> logits_cpu(vocab_size * batch_size);
int num_iter = 100;
const int num_iter = 100;
std::map<float, int> logit_to_count;

// Run test
for (int i = 0; i < num_iter; i++) {
auto generator = Generators::CreateGenerator(*model, *params);
logits_cpu = std::vector<float>(vocab_size * batch_size, 0.0f);
// Shuffle integers 1 to k randomly into logits_cpu
for (int i = 0; i < batch_size; i++) {
std::iota(indices.begin(), indices.end(), 0);
std::shuffle(indices.begin(), indices.end(), engine);
for (int j = 0; j < k; j++)
logits_cpu[indices[j] + vocab_size * i] = float(k - j);
}
// Set logits and get generated token
auto logits_copy = logits_cpu;
auto logits = params->p_device->WrapMemory<float>(logits_copy);
generator->SetLogits(logits);
Expand All @@ -233,20 +236,19 @@ TEST(SamplingTests, RandomizedSamplingTopKCpu) {
for (int b = 0; b < batch_size; b++) {
auto next_token = next_tokens[b];
auto next_token_score = logits_cpu[next_token + vocab_size * b];
if (logit_to_count.find(next_token_score) == logit_to_count.end())
logit_to_count[next_token_score] = 1;
else
logit_to_count[next_token_score]++;
logit_to_count[next_token_score]++;
EXPECT_GT(next_token_score, 0.0f);
}
}
std::vector<float> expected_distributions(5);
// Calculate expected distribution of tokens by softmaxing given logits (integers 1 through k)
std::vector<float> expected_distributions(k);
for (int i = 0; i < k; i++)
expected_distributions[i] = float(i + 1);
SoftMax(expected_distributions, 1.0f);
int total_count = batch_size * num_iter;
// Check that the distribution of tokens generated by the model is close to the expected distribution
const int total_count = batch_size * num_iter;
for (auto& [logit, count] : logit_to_count) {
float expected_distribution = expected_distributions[int(logit) - 1];
const float expected_distribution = expected_distributions[int(logit) - 1];
EXPECT_NEAR(count / float(total_count), expected_distribution, 0.1);
}
}
Expand Down Expand Up @@ -432,11 +434,11 @@ TEST(SamplingTests, RandomizedSamplingTopPCuda) {

TEST(SamplingTests, RandomizedSamplingTopKCuda) {
auto model = Generators::CreateModel(Generators::GetOrtEnv(), MODEL_PATH "hf-internal-testing/tiny-random-gpt2-fp32");
int batch_size = 5;
int k = 5;
const int batch_size = 5;
const int k = 5;

Generators::Config config;
int vocab_size = 32000; // vocab size of llama
const int vocab_size = 13; // vocab size of llama
config.model.vocab_size = vocab_size; // vocab size of llama

// Create a generator
Expand All @@ -453,42 +455,43 @@ TEST(SamplingTests, RandomizedSamplingTopKCuda) {
std::mt19937 engine(rd());
std::vector<int> indices(vocab_size);
std::vector<float> logits_cpu(vocab_size * batch_size);
int num_iter = 100;
const int num_iter = 100;
std::map<float, int> logit_to_count;

// Run test
for (int i = 0; i < num_iter; i++) {
auto generator = Generators::CreateGenerator(*model, *params);
logits_cpu = std::vector<float>(vocab_size * batch_size, 0.0f);
Generators::DeviceSpan<float> logits_gpu = params->p_device->Allocate<float>(config.model.vocab_size * batch_size);
auto cpu_span = logits_gpu.CpuSpan();
// Shuffle integers 1 to k randomly into cpu_span
for (int i = 0; i < batch_size; i++) {
std::iota(indices.begin(), indices.end(), 0);
std::shuffle(indices.begin(), indices.end(), engine);
for (int j = 0; j < k; j++)
logits_cpu[indices[j] + vocab_size * i] = float(k - j);
cpu_span[indices[j] + vocab_size * i] = float(k - j);
}
Generators::DeviceSpan<float> logits_gpu = params->p_device->Allocate<float>(config.model.vocab_size * batch_size);
auto cpu_span = logits_gpu.CpuSpan();
std::copy(logits_cpu.begin(), logits_cpu.end(), cpu_span.begin());
// Copy logits onto device, set logits, and get generated token
logits_gpu.CopyCpuToDevice();
generator->SetLogits(logits_gpu);
generator->GenerateNextToken();
auto next_tokens = generator->search_->GetNextTokens().CopyDeviceToCpu();
// Verify outputs match expected outputs
for (int b = 0; b < batch_size; b++) {
auto next_token = next_tokens[b];
auto next_token_score = logits_cpu[next_token + vocab_size * b];
if (logit_to_count.find(next_token_score) == logit_to_count.end())
logit_to_count[next_token_score] = 1;
else
logit_to_count[next_token_score]++;
auto next_token_score = cpu_span[next_token + vocab_size * b];
logit_to_count[next_token_score]++;
EXPECT_GT(next_token_score, 0.0f);
}
}
std::vector<float> expected_distributions(5);
// Calculate expected distribution of tokens by softmaxing given logits (integers 1 through k)
std::vector<float> expected_distributions(k);
for (int i = 0; i < k; i++)
expected_distributions[i] = float(i + 1);
SoftMax(expected_distributions, 1.0f);
int total_count = batch_size * num_iter;
const int total_count = batch_size * num_iter;
// Check that the distribution of tokens generated by the model is close to the expected distribution
for (auto& [logit, count] : logit_to_count) {
float expected_distribution = expected_distributions[int(logit) - 1];
const float expected_distribution = expected_distributions[int(logit) - 1];
EXPECT_NEAR(count / float(total_count), expected_distribution, 0.1);
}
}
Expand Down

0 comments on commit 30bdcda

Please sign in to comment.