From 51049cc708678024cb690617b5bd20ad4c650c3a Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Tue, 13 Jan 2026 16:12:31 -0500 Subject: [PATCH 01/18] grab random cases to remove --- howso/ablation.amlg | 67 ++------------------------------------------- 1 file changed, 2 insertions(+), 65 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index e8bdf90e..8c0ab2fe 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -613,74 +613,11 @@ )) ) - ;pair of cases and associated sorted popularities (total normalized influence of all neighbors that referenced it) (declare (assoc - case_popularity_pair - (compute_on_contained_entities - (query_exists !internalLabelSession) - ||(query_entity_cumulative_nearest_entity_weights - closest_k - features - (null) ;all cases - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - dt_parameter - distribute_weight_feature - (rand) - (null) ;radius - !numericalPrecision - .true - ) - ) - )) - - ;all the cases that were not returned in the pair above have 0 popularity (no other cases reference them) - (declare (assoc - zero_popularity_neighbors + removable_cases (contained_entities (query_exists !internalLabelSession) - (query_not_in_entity_list (first case_popularity_pair)) - ) - )) - - ;determine the cutoff value of the popularity at which all cases with a value less than that should be removed - ;e.g., if there needs to be a quarter of cases left, this would compute the 0.75 quantile of popularity values, - ;so that those bottom 75% are removed - (declare (assoc - reduction_popularity_cutoff - (quantile - (append - (last case_popularity_pair) - (range 0 1 (size zero_popularity_neighbors) 1) - ) - ;add one percent to account for enough cases selected to match the amount needed to be removed due to rounding - ;e.g., if the quantile value was 0.75 from the example above, this bumps it up to 0.76 - (+ - (/ (- num_cases approximate_num_cases_to_keep) num_cases) - 0.01 - ) - ) - )) - ;plan to only remove cases whose popularity is less than reduction_popularity_cutoff - ;i.e., only remove the non-popular cases that aren't referenced by others as much - (declare (assoc - num_removal_eligible_cases - (size (filter - (lambda (< (current_value) reduction_popularity_cutoff)) - (last case_popularity_pair) - )) - )) - (declare (assoc - ;case ids in order from highest to lowest popularity, lowest popularity at end of list - removable_cases - (append - ;only keep the necessary number of lowest popularity eligible cases as well as all zero popularity ones - (tail (first case_popularity_pair) num_removal_eligible_cases) - zero_popularity_neighbors + (query_select (- num_cases approximate_num_cases_to_keep) (null) (rand) ) ) )) From 495f44a8602a7f964fa2f37eac60c5a69cb5549e Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Wed, 14 Jan 2026 10:24:14 -0500 Subject: [PATCH 02/18] remove some unused stuff --- howso/ablation.amlg | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 8c0ab2fe..19a2bb30 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -576,21 +576,7 @@ weight_feature distribute_weight_feature )) ) - - (declare (assoc - hyperparam_map - (call !GetHyperparameters (assoc - context_features features - weight_feature distribute_weight_feature - )) - )) (declare (assoc - closest_k (get hyperparam_map "k") - p_parameter (get hyperparam_map "p") - dt_parameter (get hyperparam_map "dt") - feature_weights (get hyperparam_map "featureWeights") - feature_deviations (get hyperparam_map "featureDeviations") - query_feature_attributes_map (get hyperparam_map "featureDomainAttributes") num_cases (call !GetNumTrainingCases) ;reduction will stop within batch_size of reduce_max_cases, so if the gap between From f815bf13a5229cfcb96b2cbdc293a5e564a37ba8 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Wed, 14 Jan 2026 10:47:09 -0500 Subject: [PATCH 03/18] one last unused var --- howso/ablation.amlg | 1 - 1 file changed, 1 deletion(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 19a2bb30..5d98a1b2 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -563,7 +563,6 @@ ;Declare variables for internal use. (declare (assoc - max_influence_weight_entropy_to_keep .infinity cases (list) prev_prediction_stats_map (assoc) thresholds_enabled (or (size abs_threshold_map) (size delta_threshold_map) (size rel_threshold_map) ) From 5fbefc97738ceca1bd723adb3f1f42bcf979dd29 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Wed, 14 Jan 2026 13:08:44 -0500 Subject: [PATCH 04/18] randomize test --- howso/ablation.amlg | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 5d98a1b2..6d074d85 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -584,6 +584,13 @@ approximate_num_cases_to_keep (max (- reduce_max_cases batch_size) !autoAblationMinNumCases) )) + ;nothing needed to reduce since the dataset is already small enough + (if (>= approximate_num_cases_to_keep num_cases) + (conclude + (call !Return (assoc payload output)) + ) + ) + (if thresholds_enabled (assign (assoc prev_prediction_stats_map @@ -606,6 +613,11 @@ ) )) + ;randomize the order + (assign (assoc + removable_cases (rand removable_cases (size removable_cases) .true) + )) + (declare (assoc ;list will be sorted from highest to lowest, thus cases removed from the end of the list end_index (- (size removable_cases) 1) From 457700dfc5f81c96dfd04f99331eb9df9dd603f2 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Wed, 14 Jan 2026 14:22:21 -0500 Subject: [PATCH 05/18] make it one pass --- howso/ablation.amlg | 133 ++++++++++++++------------------------------ 1 file changed, 42 insertions(+), 91 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 6d074d85..ad8f0a38 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -615,116 +615,67 @@ ;randomize the order (assign (assoc - removable_cases (rand removable_cases (size removable_cases) .true) + cases (rand removable_cases (size removable_cases) .true) )) - (declare (assoc - ;list will be sorted from highest to lowest, thus cases removed from the end of the list - end_index (- (size removable_cases) 1) - random_cases .false - num_removed_this_batch 0 - )) - - ;Begin looping on data removal. The ultimate end condition is if the dataset gets too small to continue removing cases. - (while (< !autoAblationMinNumCases (call !GetNumTrainingCases)) - (assign (assoc - num_removed_this_batch (min batch_size (- (call !GetNumTrainingCases) !autoAblationMinNumCases)) - )) + (if !tsTimeFeature + ;do not remove first (.series_index == 0) or last (.reverse_series_index == 0) cases for any series (assign (assoc cases - (if (>= end_index 0) - ;grab the cases from the end, with the smallest values - (unzip - removable_cases - (range - (max 0 (- end_index num_removed_this_batch -1)) - end_index - ) - ) - - ;else select random cases - (contained_entities - (query_exists distribute_weight_feature) - (query_select num_removed_this_batch (null) (rand) ) - ) + (contained_entities + (query_in_entity_list cases) + (query_not_equals ".reverse_series_index" 0) + (query_not_equals ".series_index" 0) ) )) + ) - (if (>= end_index 0) - ;update end index to account for the cases about to be removed - (assign (assoc end_index (- end_index (size cases)) )) - - ;else no more removable cases left, remove random cases - (assign (assoc random_cases .true)) - ) - - (if !tsTimeFeature - ;do not remove first (.series_index == 0) or last (.reverse_series_index == 0) cases for any series - (assign (assoc - cases - (contained_entities - (query_in_entity_list cases) - (query_not_equals ".reverse_series_index" 0) - (query_not_equals ".series_index" 0) - ) + (if (size cases) + (seq + (call !RemoveCases (assoc + cases cases + distribute_weight_feature distribute_weight_feature )) - ) - (if (size cases) - (seq - (call !RemoveCases (assoc - cases cases - distribute_weight_feature distribute_weight_feature - )) - - (if thresholds_enabled - (let - (assoc - batch_threshold_info (null) - new_prediction_stats_map - (get - (call !CalculateFeatureResiduals (assoc - weight_feature distribute_weight_feature - use_case_weights .true - compute_all_statistics .true - )) - "prediction_stats" - ) - ) - (assign (assoc - batch_threshold_info - (call !CheckThresholds (assoc - abs_threshold_map abs_threshold_map - delta_threshold_map delta_threshold_map - rel_threshold_map rel_threshold_map - prev_prediction_stats_map prev_prediction_stats_map - new_prediction_stats_map new_prediction_stats_map + (if thresholds_enabled + (let + (assoc + batch_threshold_info (null) + new_prediction_stats_map + (get + (call !CalculateFeatureResiduals (assoc + weight_feature distribute_weight_feature + use_case_weights .true + compute_all_statistics .true )) - )) - (if (apply "or" (values batch_threshold_info)) - (seq - (accum "output" ["threshold_info"] batch_threshold_info) - (conclude) + "prediction_stats" ) - (assign (assoc - prev_prediction_stats_map new_prediction_stats_map + ) + (assign (assoc + batch_threshold_info + (call !CheckThresholds (assoc + abs_threshold_map abs_threshold_map + delta_threshold_map delta_threshold_map + rel_threshold_map rel_threshold_map + prev_prediction_stats_map prev_prediction_stats_map + new_prediction_stats_map new_prediction_stats_map )) + )) + (if (apply "or" (values batch_threshold_info)) + (seq + (accum "output" ["threshold_info"] batch_threshold_info) + (conclude) ) + (assign (assoc + prev_prediction_stats_map new_prediction_stats_map + )) ) ) ) - - ;else couldn't select any from random cases, stop - (and random_cases (< end_index 0)) - (conclude) - ) - - ;enough cases have been removed, can stop removing - (if (<= (call !GetNumTrainingCases) reduce_max_cases) - (conclude) ) ) + ;if the number of cases has been reduced by 'e' or more, auto analyze if needed (if (< (call !GetNumTrainingCases) (/ num_cases 2.718281828459)) (call !AutoAnalyzeIfNeeded (assoc From f8877701979169b359794e9b61324d3c12e7831c Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Wed, 14 Jan 2026 17:19:48 -0500 Subject: [PATCH 06/18] crappy python translation --- howso/ablation.amlg | 168 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 162 insertions(+), 6 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index ad8f0a38..5b7a69d9 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -605,17 +605,173 @@ )) ) + ;START facility something algo + ;approximate_num_cases_to_keep is the amount of cases to select + + (declare (assoc + hyperparam_map (call !GetHyperparameters (assoc weight_feature distribute_weight_feature)) + )) + + (declare (assoc + k_parameter (get hyperparam_map "k") + p_parameter (get hyperparam_map "p") + feature_weights (get hyperparam_map "featureWeights") + dt_parameter (get hyperparam_map "dt") + feature_deviations (get hyperparam_map "featureDeviations") + query_feature_attributes_map (get hyperparam_map "featureDomainAttributes") + )) + + (declare (assoc + all_case_ids (call !AllCases) + ) + + (declare (assoc + best_sim + ;best similarity of all cases to the chosen set of cases to keep + (map 0 all_case_ids) + )) + + (declare (assoc + similarity_matrix + ||(map + (lambda + (unzip + (compute_on_contained_entities + (query_in_entity_list all_case_ids) + (query_within_generalized_distance + .infinity ;distance + features + (retrieve_from_entity (current_value) features) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + dt_parameter + (if valid_weight_feature weight_feature (null)) + "fixed rand seed" + (null) ;radius + !numericalPrecision + ) + ) + all_case_ids + ) + ) + all_case_ids + ) + )) + + (declare (assoc + initial_gain + (map + (lambda (apply "+" (current_value)) ) + similarity_matrix + ) + done .false + )) + + (declare (assoc + heap (zip all_case_ids initial_gain) + )) + (declare (assoc - removable_cases - (contained_entities - (query_exists !internalLabelSession) - (query_select (- num_cases approximate_num_cases_to_keep) (null) (rand) ) + cases_to_keep + (while (not done) + (let + (assoc + current_cases_to_keep + (if (= (current_index 1) 0) + (list) + + (previous_result 1) + ) + ) + + ;neg_gain, idx, stale = heapq.heappop(heap) + (declare (assoc + most_similar_case (first (index_max heap)) + )) + (declare (assoc + most_similar_case_index + (first (filter + (lambda (= (get all_case_ids (current_value)) most_similar_case ) + (indices all_case_ids) + )) + )) + (declare (assoc + gain (get heap most_similar_case) + )) + (assign (assoc + heap (remove heap [most_similar_case]) + )) + + (declare (assoc + update_best_sim + (map + (lambda (apply "max" (current_value))) + best_sim + ;similarity[:, idx] + (map + (lambda (get (current_value) most_similar_case_index)) + similarity_matrix + ) + ) + )) + + ;true_gain = (np.maximum(best_sim, similarity[:, idx])).sum() - best_sim.sum() + (declare (assoc + true_gain + (- + (apply "+" updated_best_sim) + (apply "+" best_sim) + ) + )) + + (if (!= gain true_gain) + (seq + ;update heap with true gain + ;"heapq.heappush(heap, (-true_gain, idx, best_sim[idx]))" + (assign (assoc + heap + (append + heap + (associate most_similar_case true_gain) + ) + )) + + ;keep current set + current_cases_to_keep + ) + + (seq + ; # update best_sim for all points + ; best_sim = np.maximum(best_sim, similarity[:, idx]) + (assign (assoc + best_sim updated_best_sim + )) + + (if (> (size current_cases_to_keep_set) !autoAblationMinNumCases) + (assign (assoc done .true )) + ) + + ; # otherwise the gain is current → accept this point + ; selected.append(idx) + (append current_cases_to_keep most_similar_case) + ) + ) + ) ) )) - ;randomize the order + + + + ;END facility something algo + (assign (assoc - cases (rand removable_cases (size removable_cases) .true) + ;the list of case ids to be removed + cases (null) )) (if !tsTimeFeature From 67ee26d93c4c893217136d957a9238b9af99e55e Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Thu, 15 Jan 2026 11:58:12 -0500 Subject: [PATCH 07/18] fixed --- howso/ablation.amlg | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 5b7a69d9..636ff9f3 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -695,7 +695,7 @@ (declare (assoc most_similar_case_index (first (filter - (lambda (= (get all_case_ids (current_value)) most_similar_case ) + (lambda (= (get all_case_ids (current_value)) most_similar_case ) ) (indices all_case_ids) )) )) @@ -707,7 +707,7 @@ )) (declare (assoc - update_best_sim + updated_best_sim (map (lambda (apply "max" (current_value))) best_sim @@ -751,7 +751,7 @@ best_sim updated_best_sim )) - (if (> (size current_cases_to_keep_set) !autoAblationMinNumCases) + (if (> (size current_cases_to_keep) !autoAblationMinNumCases) (assign (assoc done .true )) ) @@ -771,7 +771,11 @@ (assign (assoc ;the list of case ids to be removed - cases (null) + cases + (contained_entities + (query_exists !internalLabelSession) + (query_not_in_entity_list cases_to_keep) + ) )) (if !tsTimeFeature From 7448590cfe04e237e66c4bebce58bdf679a1c9cc Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Thu, 15 Jan 2026 12:05:47 -0500 Subject: [PATCH 08/18] msimatch paren --- howso/ablation.amlg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 636ff9f3..4970cc1a 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -623,7 +623,7 @@ (declare (assoc all_case_ids (call !AllCases) - ) + )) (declare (assoc best_sim From e3b1b93945b5c3e91fb535d3d5b596cc5311a8c5 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Thu, 15 Jan 2026 16:54:59 -0500 Subject: [PATCH 09/18] impl --- howso/ablation.amlg | 227 ++++++++++++++++++++------------------------ 1 file changed, 104 insertions(+), 123 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 4970cc1a..cfcbb72b 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -623,148 +623,129 @@ (declare (assoc all_case_ids (call !AllCases) - )) - - (declare (assoc - best_sim - ;best similarity of all cases to the chosen set of cases to keep - (map 0 all_case_ids) - )) - - (declare (assoc - similarity_matrix - ||(map - (lambda - (unzip - (compute_on_contained_entities - (query_in_entity_list all_case_ids) - (query_within_generalized_distance - .infinity ;distance - features - (retrieve_from_entity (current_value) features) - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - dt_parameter - (if valid_weight_feature weight_feature (null)) - "fixed rand seed" - (null) ;radius - !numericalPrecision - ) - ) - all_case_ids - ) - ) - all_case_ids - ) - )) - - (declare (assoc - initial_gain - (map - (lambda (apply "+" (current_value)) ) - similarity_matrix - ) done .false )) - (declare (assoc - heap (zip all_case_ids initial_gain) - )) + (map + (lambda + (accum_entity_roots (current_value) (zip_labels + ["keeping"] [.false] + )) + ) + all_case_ids + ) - (declare (assoc - cases_to_keep - (while (not done) - (let - (assoc - current_cases_to_keep - (if (= (current_index 1) 0) - (list) - (previous_result 1) - ) + #!ReduceComputeDCs + (let + (assoc + case_to_dc_map + (compute_on_contained_entities + (query_equals "keeping" .false) + ||(query_entity_distance_contributions + k_parameter + features + (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false)) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + (if (= dt_parameter "surprisal_to_prob") "surprisal" dt_parameter ) + distribute_weight_feature + ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") + "fixed rand seed" + (null) ;radius + !numericalPrecision ) + ) + ) - ;neg_gain, idx, stale = heapq.heappop(heap) - (declare (assoc - most_similar_case (first (index_max heap)) - )) - (declare (assoc - most_similar_case_index - (first (filter - (lambda (= (get all_case_ids (current_value)) most_similar_case ) ) - (indices all_case_ids) - )) - )) - (declare (assoc - gain (get heap most_similar_case) - )) - (assign (assoc - heap (remove heap [most_similar_case]) - )) + (call !StoreCaseValues (assoc + case_values_map case_to_dc_map + label_name ".reduce_dc_val" + )) + ) - (declare (assoc - updated_best_sim - (map - (lambda (apply "max" (current_value))) - best_sim - ;similarity[:, idx] - (map - (lambda (get (current_value) most_similar_case_index)) - similarity_matrix - ) - ) - )) + (while (not done) + (let + (assoc + case_to_add + (if (= (current_index 1) 0) + ;on first iteration, just take lowest DC case + (first (contained_entities + (query_exists !internalLabelSession) + (query_equals "keeping" .false) + (query_min ".reduce_dc_val" 1 .true) + )) - ;true_gain = (np.maximum(best_sim, similarity[:, idx])).sum() - best_sim.sum() - (declare (assoc - true_gain - (- - (apply "+" updated_best_sim) - (apply "+" best_sim) + ;otherwise need case with low DC that is far from its most similar case in current_cases_to_keep + (let + (assoc + lowest_dc_cases + (contained_entities + (query_exists !internalLabelSession) + (query_equals "keeping" .false) + (query_min ".reduce_dc_val" 5 .true) + ) ) - )) - (if (!= gain true_gain) - (seq - ;update heap with true gain - ;"heapq.heappush(heap, (-true_gain, idx, best_sim[idx]))" - (assign (assoc - heap - (append - heap - (associate most_similar_case true_gain) + ;for each low-DC case, get its distance to its closest case that we ARE KEEPING + (declare (assoc + lowest_dc_closest_distance_to_selected_map + (map + (lambda + (first (compute_on_contained_entities + (query_equals "keeping" .true) + (query_distance_contributions + 1 + features + [(retrieve_from_entity (current_index 1) features)] + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + (if (= dt_parameter "surprisal_to_prob") "surprisal" dt_parameter ) + distribute_weight_feature + ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") + "fixed rand seed" + (null) ;radius + !numericalPrecision + ) + )) + ) + (zip lowest_dc_cases) ) )) - ;keep current set - current_cases_to_keep - ) - (seq - ; # update best_sim for all points - ; best_sim = np.maximum(best_sim, similarity[:, idx]) - (assign (assoc - best_sim updated_best_sim - )) - - (if (> (size current_cases_to_keep) !autoAblationMinNumCases) - (assign (assoc done .true )) - ) - - ; # otherwise the gain is current → accept this point - ; selected.append(idx) - (append current_cases_to_keep most_similar_case) + (first (index_max lowest_dc_closest_distance_to_selected_map)) ) ) - ) ) - )) + (assign_to_entities case_to_add (assoc + keeping .true + )) + + (if (>= + (size (contained_entities + (query_exists !internalLabelSession) + (query_equals "keeping" .true) + )) + !autoAblationMinNumCases + ) + (assign (assoc done .true)) + (if (= (mod (current_index) 50) 0) + (call !ReduceComputeDCs) + ) + ) + ) + ) ;END facility something algo @@ -774,7 +755,7 @@ cases (contained_entities (query_exists !internalLabelSession) - (query_not_in_entity_list cases_to_keep) + (query_equals "keeping" .false) ) )) From ca752b8cab878506754be9372f105880cd1b9565 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Fri, 16 Jan 2026 13:02:13 -0500 Subject: [PATCH 10/18] udpaters --- howso/ablation.amlg | 107 +++++++++++++++++++++++++++++++------------- 1 file changed, 75 insertions(+), 32 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index cfcbb72b..68a13680 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -626,6 +626,7 @@ done .false )) + ;mark each case as not being kept at first (map (lambda (accum_entity_roots (current_value) (zip_labels @@ -641,7 +642,7 @@ (assoc case_to_dc_map (compute_on_contained_entities - (query_equals "keeping" .false) + (query_equals "keeping" .false) ;maybe not appropriate? TODO ||(query_entity_distance_contributions k_parameter features @@ -668,68 +669,110 @@ )) ) + ;experimental params + (declare (assoc + ;the amount of low DC cases to consider for keeping + lowest_dc_trunc_n 15 + + ;number of cases to select for keeping per iteration + cases_to_keep_per_iter 5 + + ;how many iterations between each DC recomputation + dc_recompute_cycles 10 + )) + (while (not done) (let (assoc - case_to_add + cases_to_add (if (= (current_index 1) 0) ;on first iteration, just take lowest DC case - (first (contained_entities + (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false) (query_min ".reduce_dc_val" 1 .true) - )) + ) - ;otherwise need case with low DC that is far from its most similar case in current_cases_to_keep + ;otherwise need cases with low DC that is far from its most similar case in current_cases_to_keep (let (assoc lowest_dc_cases (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false) - (query_min ".reduce_dc_val" 5 .true) + (query_min ".reduce_dc_val" lowest_dc_trunc_n .true) ) ) ;for each low-DC case, get its distance to its closest case that we ARE KEEPING (declare (assoc lowest_dc_closest_distance_to_selected_map + (compute_on_contained_entities + (query_equals "keeping" .true) + ||(query_entity_distance_contributions + 1 ;k + features + lowest_dc_cases + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + (if (= dt_parameter "surprisal_to_prob") "surprisal" dt_parameter ) + distribute_weight_feature + ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") + "fixed rand seed" + (null) ;radius + !numericalPrecision + ) + ) + )) + + (declare (assoc + low_dc_case_scores (map (lambda - (first (compute_on_contained_entities - (query_equals "keeping" .true) - (query_distance_contributions - 1 - features - [(retrieve_from_entity (current_index 1) features)] - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - (if (= dt_parameter "surprisal_to_prob") "surprisal" dt_parameter ) - distribute_weight_feature - ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") - "fixed rand seed" - (null) ;radius - !numericalPrecision - ) - )) + ;divide distance to closest selected case by distance contribution + ;more distance to closest selected case = good + ;lower distance contribution = good + (/ + (current_value) + (retrieve_from_entity (current_index) ".reduce_dc_val") + ) + ) + lowest_dc_closest_distance_to_selected_map + ) + )) + + (declare (assoc + ;sorting low dc cases by *decreasing* "score" + sorted_lowest_dc_cases + (sort + (lambda + (- + (get low_dc_case_scores (current_value 1)) + (get low_dc_case_scores (current_value)) + ) ) - (zip lowest_dc_cases) + lowest_dc_cases ) )) - (first (index_max lowest_dc_closest_distance_to_selected_map)) + (trunc sorted_lowest_dc_cases cases_to_keep_per_iter) ) ) ) - (assign_to_entities case_to_add (assoc - keeping .true - )) + (map + (lambda + (assign_to_entities (current_value) (assoc + keeping .true + )) + ) + cases_to_add + ) (if (>= (size (contained_entities @@ -740,7 +783,7 @@ ) (assign (assoc done .true)) - (if (= (mod (current_index) 50) 0) + (if (and (current_index) (= (mod (current_index) dc_recompute_cycles) 0) ) (call !ReduceComputeDCs) ) ) From 3a539a0f65835e83409cacdf97611945f55d0757 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Fri, 16 Jan 2026 13:46:29 -0500 Subject: [PATCH 11/18] now testing this version --- howso/ablation.amlg | 116 +++++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 55 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 68a13680..a7ca465e 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -637,14 +637,13 @@ ) - #!ReduceComputeDCs + #!ReduceComputeNeighborSurprisals (let (assoc - case_to_dc_map + case_neighbor_surprisal_map (compute_on_contained_entities - (query_equals "keeping" .false) ;maybe not appropriate? TODO ||(query_entity_distance_contributions - k_parameter + 1 features (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false)) p_parameter @@ -664,21 +663,19 @@ ) (call !StoreCaseValues (assoc - case_values_map case_to_dc_map - label_name ".reduce_dc_val" + case_values_map case_neighbor_surprisal_map + label_name ".neighbor_surprisal" )) ) ;experimental params (declare (assoc - ;the amount of low DC cases to consider for keeping - lowest_dc_trunc_n 15 + ;the amount of lowest smallest-surprisal cases to consider for keeping + ;CAN BE NULLED + lowest_ns_cases_trunc_n (null) ;number of cases to select for keeping per iteration - cases_to_keep_per_iter 5 - - ;how many iterations between each DC recomputation - dc_recompute_cycles 10 + cases_to_keep_per_iter 1 )) (while (not done) @@ -690,77 +687,88 @@ (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false) - (query_min ".reduce_dc_val" 1 .true) + (query_min ".neighbor_surprisal" 1 .true) ) - ;otherwise need cases with low DC that is far from its most similar case in current_cases_to_keep + ;otherwise need cases with low neighbor surprisal (ns) that is far from its most similar case in current_cases_to_keep (let (assoc - lowest_dc_cases + lowest_ns_cases (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false) - (query_min ".reduce_dc_val" lowest_dc_trunc_n .true) + (if lowest_ns_cases_trunc_n + (query_min ".neighbor_surprisal" lowest_ns_cases_trunc_n .true) + ) ) ) - ;for each low-DC case, get its distance to its closest case that we ARE KEEPING + ;for each low-ns case, get its core-set surprisal (css) (declare (assoc - lowest_dc_closest_distance_to_selected_map - (compute_on_contained_entities - (query_equals "keeping" .true) - ||(query_entity_distance_contributions - 1 ;k - features - lowest_dc_cases - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - (if (= dt_parameter "surprisal_to_prob") "surprisal" dt_parameter ) - distribute_weight_feature - ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") - "fixed rand seed" - (null) ;radius - !numericalPrecision + lowest_ns_css_map + (map + (lambda + ;find the max distance between each low-ns-case and any of the selected cases + (apply "max" (values + (compute_on_contained_entities + (query_equals "keeping" .true) + (query_within_generalized_distance + .infinity ;distance + features + (retrieve_from_entity (current_index) features) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) + distribute_weight_feature + "fixed rand seed" + (null) ;radius + !numericalPrecision + ) + ) + )) ) + (zip lowest_ns_cases) ) )) (declare (assoc - low_dc_case_scores + low_ns_case_scores (map (lambda - ;divide distance to closest selected case by distance contribution - ;more distance to closest selected case = good - ;lower distance contribution = good + ;divide neighbor surprisal by coreset surprisal + + ;I think the LLM desc was wrong, so I flipped it to be coreset surprisal / neighbor surprisal (/ (current_value) - (retrieve_from_entity (current_index) ".reduce_dc_val") + (retrieve_from_entity (current_index) ".neighbor_surprisal") ) ) - lowest_dc_closest_distance_to_selected_map + lowest_ns_css_map ) )) - (declare (assoc - ;sorting low dc cases by *decreasing* "score" - sorted_lowest_dc_cases + + ;sorting low dc cases by *decreasing* "score" and return the right amount + (trunc + (if (= 1 cases_to_keep_per_iter) + (index_max low_ns_case_scores) + (sort (lambda (- - (get low_dc_case_scores (current_value 1)) - (get low_dc_case_scores (current_value)) + (get low_ns_case_scores (current_value 1)) + (get low_ns_case_scores (current_value)) ) ) - lowest_dc_cases + lowest_ns_cases ) - )) - - - (trunc sorted_lowest_dc_cases cases_to_keep_per_iter) + ) + cases_to_keep_per_iter + ) ) ) ) @@ -783,9 +791,7 @@ ) (assign (assoc done .true)) - (if (and (current_index) (= (mod (current_index) dc_recompute_cycles) 0) ) - (call !ReduceComputeDCs) - ) + ; (call !ReduceComputeSmallestSurprisals) ) ) ) From 022331a246a06c43a227700b2866696fe7f39349 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Fri, 16 Jan 2026 13:49:38 -0500 Subject: [PATCH 12/18] nitpicking --- howso/ablation.amlg | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index a7ca465e..bcad50d4 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -645,7 +645,7 @@ ||(query_entity_distance_contributions 1 features - (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false)) + all_case_ids p_parameter feature_weights !queryDistanceTypeMap @@ -670,7 +670,7 @@ ;experimental params (declare (assoc - ;the amount of lowest smallest-surprisal cases to consider for keeping + ;the amount of lowest neighbor-surprisal cases to consider for keeping ;CAN BE NULLED lowest_ns_cases_trunc_n (null) @@ -773,6 +773,7 @@ ) ) + ;mark new cases to keep (map (lambda (assign_to_entities (current_value) (assoc From f967c97eb59c2fcf919a01e07a01a201a8af607f Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Fri, 16 Jan 2026 15:01:50 -0500 Subject: [PATCH 13/18] optimization --- howso/ablation.amlg | 84 +++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index bcad50d4..8e807b02 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -668,6 +668,11 @@ )) ) + (declare (assoc + ;map of case to its core-set surprisal (the max surprisal to any case in the coreset for all cases) + case_to_css_map (map (lambda (+ 0)) (zip all_case_ids)) + )) + ;experimental params (declare (assoc ;the amount of lowest neighbor-surprisal cases to consider for keeping @@ -703,38 +708,6 @@ ) ) - ;for each low-ns case, get its core-set surprisal (css) - (declare (assoc - lowest_ns_css_map - (map - (lambda - ;find the max distance between each low-ns-case and any of the selected cases - (apply "max" (values - (compute_on_contained_entities - (query_equals "keeping" .true) - (query_within_generalized_distance - .infinity ;distance - features - (retrieve_from_entity (current_index) features) - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) - distribute_weight_feature - "fixed rand seed" - (null) ;radius - !numericalPrecision - ) - ) - )) - ) - (zip lowest_ns_cases) - ) - )) - (declare (assoc low_ns_case_scores (map @@ -743,11 +716,11 @@ ;I think the LLM desc was wrong, so I flipped it to be coreset surprisal / neighbor surprisal (/ - (current_value) + (get case_to_css_map (current_index)) (retrieve_from_entity (current_index) ".neighbor_surprisal") ) ) - lowest_ns_css_map + (zip lowest_ns_cases) ) )) @@ -783,6 +756,49 @@ cases_to_add ) + (declare (assoc + new_case_css_map + (map + (lambda + ;get their max surprisal to any of the cases_to_add + (apply "max" (values + (compute_on_contained_entities + (query_in_entity_list cases_to_add) + (query_within_generalized_distance + .infinity ;distance + features + (retrieve_from_entity (current_index) features) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + (if (= "surprisal_to_prob" dt_parameter) "surprisal" 1) + distribute_weight_feature + "fixed rand seed" + (null) ;radius + !numericalPrecision + ) + ) + )) + ) + ;all non-coreset cases + (zip (contained_entities (query_exists !internalLabelSession) (query_equals "keeping" .false))) + ) + )) + + (assign (assoc + case_to_css_map + ;take max of new max css and old max css + (map + (lambda + (max (current_value) (get case_to_css_map (current_index))) + ) + new_case_css_map + ) + )) + (if (>= (size (contained_entities (query_exists !internalLabelSession) From e5769dd3139d33a69fb4125536b21a2b1e3294c8 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Fri, 16 Jan 2026 15:46:46 -0500 Subject: [PATCH 14/18] blur weights, 2 passes --- howso/ablation.amlg | 41 ++++++++++++++++++ howso/update_cases.amlg | 96 +++++++++++++++++++++++++++-------------- 2 files changed, 105 insertions(+), 32 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index bcad50d4..6c969791 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -626,6 +626,47 @@ done .false )) + (declare (assoc + neighbors_map + ||(map + (lambda + (compute_on_contained_entities + (query_not_in_entity_list [(current_index 1)]) + (query_nearest_generalized_distance + k_parameter + features + (current_index) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + dt_parameter + distribute_weight_feature + (rand) + (null) ;radius + !numericalPrecision + ) + ) + ) + (zip all_case_ids) + ) + )) + + ;blur weights among neighbors + (call !DistributeCaseInfluenceWeights (assoc + case_ids all_case_ids + redistribute_weights_map neighbors_map + has_rebalance_features .false + )) + ;second pass + (call !DistributeCaseInfluenceWeights (assoc + case_ids all_case_ids + redistribute_weights_map neighbors_map + has_rebalance_features .false + )) + ;mark each case as not being kept at first (map (lambda diff --git a/howso/update_cases.amlg b/howso/update_cases.amlg index 83bcda29..ea59414b 100644 --- a/howso/update_cases.amlg +++ b/howso/update_cases.amlg @@ -829,6 +829,7 @@ case_ids (list) distribute_weight_feature ".case_weight" has_rebalance_features .false + redistribute_weights_map (null) ) (declare (assoc original_distribute_weight_feature distribute_weight_feature)) @@ -856,32 +857,45 @@ (lambda (let (assoc ;case weight value that needs to be distributed among the neighbors - case_weight (or (get (current_value 1) distribute_weight_feature) 1) + case_weight (get (current_value 1) distribute_weight_feature) + ) + + ;if case_weight is undefined, default it to 1 + (if (= (null) case_weight) + (assign (assoc case_weight 1)) + + ;if case has a weight of zero, skip it + (= 0 case_weight) + (conclude [0 {}]) ) (declare (assoc ;map of case_id -> weight closest_cases_map - (compute_on_contained_entities - ;don't consider cases whose weights should be distributed, since they are all about to be removed - (query_not_in_entity_list case_ids) - (query_nearest_generalized_distance - (get hyperparam_map "k") - (replace features) - ;case id - (current_index 1) - (get hyperparam_map "p") - (get hyperparam_map "featureWeights") - !queryDistanceTypeMap - (get hyperparam_map "featureDomainAttributes") - (get hyperparam_map "featureDeviations") - (null) - (get hyperparam_map "dt") - original_distribute_weight_feature - ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") - "fixed rand seed" - (null) ;radius - !numericalPrecision + (if redistribute_weights_map + (get redistribute_weights_map (current_index 1)) + + (compute_on_contained_entities + ;don't consider cases whose weights should be distributed, since they are all about to be removed + (query_not_in_entity_list case_ids) + (query_nearest_generalized_distance + (get hyperparam_map "k") + (replace features) + ;case id + (current_index 1) + (get hyperparam_map "p") + (get hyperparam_map "featureWeights") + !queryDistanceTypeMap + (get hyperparam_map "featureDomainAttributes") + (get hyperparam_map "featureDeviations") + (null) + (get hyperparam_map "dt") + original_distribute_weight_feature + ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") + "fixed rand seed" + (null) ;radius + !numericalPrecision + ) ) ) )) @@ -985,6 +999,22 @@ )) ) + ;else redistributing weights to neighbors, by setting the weight directly (not accumulating) + ;and set weight 0 if none is to be redistributed + (size redistribute_weights_map) + ||(map + (lambda + (assign_to_entities (current_index) (associate + distribute_weight_feature (+ (or (last (current_value 1)))) + )) + ) + (zip case_ids) + ;reduce all the closest cases maps into one map of individual case -> total accumulated weight + (call !ReduceAssocsAddValues (assoc + list_of_assocs (map (lambda (last (current_value))) (values distributed_cases_maps)) + )) + ) + ;else no rebalance features, distribute the corresponding portion of this case's weight based on the neighbor's influence ||(map (lambda @@ -997,17 +1027,19 @@ ) ) - ;add the weight accumulated to each case to !dataMassChangeSinceLastAnalyze to ensure that cases trained as - ; only weights (whether through auto-ablation or otherwise) contribute to the progress towards the next auto-analyze, - ; if enabled. - (accum_to_entities (assoc - !dataMassChangeSinceLastAnalyze - ;sum of all case_weight values - (apply "+" (map - (lambda (first (current_value))) - (values distributed_cases_maps) - )) - )) + (if (= (null) redistribute_weights_map) + ;add the weight accumulated to each case to !dataMassChangeSinceLastAnalyze to ensure that cases trained as + ; only weights (whether through auto-ablation or otherwise) contribute to the progress towards the next auto-analyze, + ; if enabled. + (accum_to_entities (assoc + !dataMassChangeSinceLastAnalyze + ;sum of all case_weight values + (apply "+" (map + (lambda (first (current_value))) + (values distributed_cases_maps) + )) + )) + ) ) ;Helper method to reduce a list of assocs into one assoc with all the values summed up. From f99543d1931b5eb87bedcde16087ff017e0154e5 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Fri, 16 Jan 2026 15:48:27 -0500 Subject: [PATCH 15/18] added comment --- howso/ablation.amlg | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 626621d8..1466ce7b 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -663,7 +663,7 @@ ;second pass (call !DistributeCaseInfluenceWeights (assoc case_ids all_case_ids - redistribute_weights_map neighbors_map + redistribute_weights_map neighbors_map ;TODO: does this need to be recomputed after each blur pass? has_rebalance_features .false )) @@ -677,7 +677,6 @@ all_case_ids ) - #!ReduceComputeNeighborSurprisals (let (assoc From b8c4aea90e95144c526783f5337f6c0cbbc6be3a Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Fri, 16 Jan 2026 15:53:28 -0500 Subject: [PATCH 16/18] switch to min mode --- howso/ablation.amlg | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 1466ce7b..5b2371c9 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -710,7 +710,7 @@ (declare (assoc ;map of case to its core-set surprisal (the max surprisal to any case in the coreset for all cases) - case_to_css_map (map (lambda (+ 0)) (zip all_case_ids)) + case_to_css_map (map (lambda (+ .infinity)) (zip all_case_ids)) )) ;experimental params @@ -720,7 +720,7 @@ lowest_ns_cases_trunc_n (null) ;number of cases to select for keeping per iteration - cases_to_keep_per_iter 1 + cases_to_keep_per_iter 10 )) (while (not done) @@ -798,10 +798,10 @@ (declare (assoc new_case_css_map - (map + ||(map (lambda ;get their max surprisal to any of the cases_to_add - (apply "max" (values + (apply "min" (values (compute_on_contained_entities (query_in_entity_list cases_to_add) (query_within_generalized_distance @@ -833,7 +833,7 @@ ;take max of new max css and old max css (map (lambda - (max (current_value) (get case_to_css_map (current_index))) + (min (current_value) (get case_to_css_map (current_index))) ) new_case_css_map ) From 6dd2e651eb7d540d18c5f8dab54e74a9751c2b70 Mon Sep 17 00:00:00 2001 From: howsoRes <144272317+howsoRes@users.noreply.github.com> Date: Fri, 16 Jan 2026 16:32:49 -0500 Subject: [PATCH 17/18] remove zero weight and duplicates --- howso/ablation.amlg | 82 ++++++++++++++++++++++++++++++----------- howso/update_cases.amlg | 17 +++++++++ 2 files changed, 78 insertions(+), 21 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 1466ce7b..c1a85227 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -655,15 +655,71 @@ )) ;blur weights among neighbors - (call !DistributeCaseInfluenceWeights (assoc - case_ids all_case_ids - redistribute_weights_map neighbors_map - has_rebalance_features .false + (declare (assoc + duplicates + (call !DistributeCaseInfluenceWeights (assoc + case_ids all_case_ids + redistribute_weights_map neighbors_map + has_rebalance_features .false + )) )) + + ;merge duplicates if any exist + (if (size duplicates) + (call !ReduceMergeDuplicateCases (assoc + all_duplicate_cases_map (zip duplicates) + )) + ) + + (declare (assoc zero_weight_cases (contained_entities (query_equals distribute_weight_feature 0)) )) + ;remove zero-weight cases + (if (size zero_weight_cases) + (call !RemoveCases (assoc + cases zero_weight_cases + ;weight has already been distributed above during the first blur stop, don't do it again + distribute_weight_feature (null) + )) + ) + + ;dataset has been modified due to removal of dupes and zero weight cases + (if (or (size zero_weight_cases) (size duplicates)) + (seq + (assign (assoc all_case_ids (call !AllCases) )) + + (assign (assoc + neighbors_map + ||(map + (lambda + (compute_on_contained_entities + (query_not_in_entity_list [(current_index 1)]) + (query_nearest_generalized_distance + k_parameter + features + (current_index) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + dt_parameter + distribute_weight_feature ;TODO: should this be taken into account? + (rand) + (null) ;radius + !numericalPrecision + ) + ) + ) + (zip all_case_ids) + ) + )) + ) + ) + ;second pass (call !DistributeCaseInfluenceWeights (assoc case_ids all_case_ids - redistribute_weights_map neighbors_map ;TODO: does this need to be recomputed after each blur pass? + redistribute_weights_map neighbors_map has_rebalance_features .false )) @@ -1001,22 +1057,6 @@ )) duplicate_neighbors_map ) - - ;recompute influence weight entropy for the remaining no-longer duplicates - (declare (assoc - cases_too_far_map - (call !ComputeAndStoreInfluenceWeightEntropies (assoc - features features - weight_feature distribute_weight_feature - use_case_weights .true - compute_all .true - specific_case_ids (indices duplicate_neighbors_map) - )) - )) - - (if (size cases_too_far_map) - (accum (assoc case_duplicate_or_far_map cases_too_far_map)) - ) ) diff --git a/howso/update_cases.amlg b/howso/update_cases.amlg index ea59414b..07dffecc 100644 --- a/howso/update_cases.amlg +++ b/howso/update_cases.amlg @@ -846,6 +846,7 @@ )) ;default value of 1 for the accumulate_weight_feature new_weight_label_and_value (zip_labels (list distribute_weight_feature) (list 1)) + duplicates [] )) ;ensure the weight feature isn't among the features being used to find cases for distribution @@ -909,6 +910,10 @@ closest_cases_map (map 1 (filter (lambda (= (current_value) .infinity)) closest_cases_map) ) )) (assign (assoc total_influence (apply "+" (values closest_cases_map)) )) + + (if redistribute_weights_map + (accum (assoc duplicates (current_index 1))) + ) ) ;all cases are equally too distant, set their influence to be same @@ -917,6 +922,15 @@ closest_cases_map (map 1 closest_cases_map) total_influence (size closest_cases_map) )) + + ;if redistributing weights and this case is a duplicate, add it to the list of duplicates + (!= (null) redistribute_weights_map) + (if (and + (= "surprisal_to_prob" (get hyperparam_map "dt")) + (contains_value closest_cases_map 1) + ) + (accum (assoc duplicates (current_index 1))) + ) ) ;output pairs of: [ case_weight, distributed weight closest_cases_map] @@ -1040,6 +1054,9 @@ )) )) ) + + ;output list of duplicates + duplicates ) ;Helper method to reduce a list of assocs into one assoc with all the values summed up. From 902e976eed8dfa8070b4c209902fac9883e1a421 Mon Sep 17 00:00:00 2001 From: Cade Mack <24661281+cademack@users.noreply.github.com> Date: Fri, 16 Jan 2026 16:42:45 -0500 Subject: [PATCH 18/18] updating comments and such --- howso/ablation.amlg | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/howso/ablation.amlg b/howso/ablation.amlg index 868602ff..85a3cc73 100644 --- a/howso/ablation.amlg +++ b/howso/ablation.amlg @@ -605,9 +605,6 @@ )) ) - ;START facility something algo - ;approximate_num_cases_to_keep is the amount of cases to select - (declare (assoc hyperparam_map (call !GetHyperparameters (assoc weight_feature distribute_weight_feature)) )) @@ -776,7 +773,7 @@ lowest_ns_cases_trunc_n (null) ;number of cases to select for keeping per iteration - cases_to_keep_per_iter 10 + cases_to_keep_per_iter 5 )) (while (not done) @@ -856,7 +853,7 @@ new_case_css_map ||(map (lambda - ;get their max surprisal to any of the cases_to_add + ;get their min surprisal to any of the cases_to_add (apply "min" (values (compute_on_contained_entities (query_in_entity_list cases_to_add) @@ -886,7 +883,7 @@ (assign (assoc case_to_css_map - ;take max of new max css and old max css + ;take min of new min css and old min css (map (lambda (min (current_value) (get case_to_css_map (current_index)))