File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -2470,17 +2470,21 @@ struct GGMLRunner {
24702470 *effective_budget_out = effective_budget;
24712471 }
24722472
2473- // When the model dwarfs the budget, cap the planner at a quarter so
2474- // it builds smaller merged segments and chunk-K can fit alongside.
2475- // Otherwise leave the planner free to merge into one large segment.
2476- size_t total_params_bytes = 0 ;
2477- for (const ggml_tensor* t : params_tensor_set_) {
2478- if (t != nullptr ) {
2479- total_params_bytes += ggml_nbytes (t);
2473+ // When streaming and the model dwarfs the budget, cap the planner at
2474+ // a quarter so it builds smaller merged segments and chunk-K can fit
2475+ // alongside. Without streaming the cap only adds dispatch overhead.
2476+ size_t planner_budget = effective_budget;
2477+ if (stream_layers_enabled) {
2478+ size_t total_params_bytes = 0 ;
2479+ for (const ggml_tensor* t : params_tensor_set_) {
2480+ if (t != nullptr ) {
2481+ total_params_bytes += ggml_nbytes (t);
2482+ }
2483+ }
2484+ if (total_params_bytes * 4 > effective_budget * 3 ) {
2485+ planner_budget = effective_budget / 4 ;
24802486 }
24812487 }
2482- const size_t planner_budget =
2483- (total_params_bytes * 4 > effective_budget * 3 ) ? effective_budget / 4 : effective_budget;
24842488
24852489 *plan_out = sd::ggml_graph_cut::resolve_plan (runtime_backend,
24862490 gf,
You can’t perform that action at this time.
0 commit comments