Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions howso.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
#!cyclicFeaturesMap (null)
#!numericNominalFeaturesMap (null)
#!editDistanceFeatureTypesMap (null)
#!codeFeatureRecursiveMatchingMap (null)
#!codeFeatureDomainAttributesMap (null)
#!stringNominalFeaturesSet (null)
#!userSpecifiedFeatureErrorsMap (null)
#!averageCaseEntropyAddition (null)
Expand Down Expand Up @@ -467,8 +467,10 @@
;assoc of all string continuous or any json or amalgam features for fast lookup, feature -> data_type
!editDistanceFeatureTypesMap (assoc)

;assoc of code (json/yaml/amalgam) feature -> boolean (whether they are recursive_matching or not)
!codeFeatureRecursiveMatchingMap (assoc)
;assoc of code (json/yaml/amalgam) feature -> assoc of feature domain properties with the following keys:
; "types_must_match", "nominal_numbers", "nominal_strings", "recursive_matching";
;string/string_mixable features will have only one key of "recursive_matching" defined
!codeFeatureDomainAttributesMap (assoc)

;assoc of nominal features names whose values are all uniques
!uniqueNominalsSet (assoc)
Expand Down
60 changes: 45 additions & 15 deletions howso/attribute_maps.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -571,20 +571,45 @@
)
)

#!ComposeCodeFeatureRecursiveMatchingMap
;helper method executed on any non number continuous feature, ie code features, string and string_mixable
#!ComposeCodeFeatureDomainAttributesMap
(map
(lambda
;code features have an explicit feature domain assoc specified, for use in queries and opcodes
(if (contains_value ["json" "yaml" "amalgam"] (current_value))
;parameter wasn't specified, default to .false for json/yaml and .true for amalgam
(if (= (null) (get feature_attributes [(current_index 1) "recursive_matching"]))
(= "amalgam" (current_value))
{
"types_must_match"
;default to true
(if (= (null) (get feature_attributes [(current_index 1) "types_must_match"]))
.true
(get feature_attributes [(current_index 1) "types_must_match"])
)
"nominal_numbers"
;default to false
(if (= (null) (get feature_attributes [(current_index 1) "nominal_numbers"]))
.false
(get feature_attributes [(current_index 1) "nominal_numbers"])
)
"nominal_strings"
;default to true
(if (= (null) (get feature_attributes [(current_index 1) "nominal_strings"]))
.true
(get feature_attributes [(current_index 1) "nominal_strings"])
)
"recursive_matching"
;parameter wasn't specified, default to .false for json/yaml and .true for amalgam
(if (= (null) (get feature_attributes [(current_index 1) "recursive_matching"]))
(= "amalgam" (current_value))

;else use the explicitly specified value
(get feature_attributes [(current_index 1) "recursive_matching"])
)
;else use the explicitly specified value
(get feature_attributes [(current_index 1) "recursive_matching"])
)
}

;false for any non-code features (string or string_mixable)
.false
;else any non-code features (string or string_mixable) set recursive_matching to false
{
"recursive_matching" .false
}
)
)
code_features_map
Expand Down Expand Up @@ -792,7 +817,7 @@
)

;Helper method to creata map of feature -> limits, based on the type of feature it is and limits specified in boundaries map
#!ComposeFeatureLimitsMap
#!ComposeFeatureDomainAttributesMap
(map
(lambda (let
(assoc feature (current_index 1))
Expand All @@ -808,15 +833,20 @@
;max string length
(and
(= "continuous" (get (current_value) "type"))
(= "string" (get (current_value) "data_type"))
(or
(= "string" (get (current_value) "data_type"))
(= "string_mixable" (get (current_value) "data_type"))
)
)
(replace (get !featureBoundsMap (list feature "max")))

;max code size
(= "code" (get (current_value) "type"))
(replace (get !featureBoundsMap (list feature "max")))
;else "continuous":

;code features use an assoc of properties as their feature domain attributes/limits
(contains_value ["yaml" "json" "amalgam"] (get (current_value) "data_type"))
(get !codeFeatureDomainAttributesMap feature)

;else "continuous", and min or max are specified, provide as a delta of max - min
;if min or max are specified, provide as a delta of max - min
(if
(and
(!= (null) (get !featureBoundsMap (list feature "min")) )
Expand Down
20 changes: 15 additions & 5 deletions howso/attributes.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@
; matches without considering recursion, which will yield better and faster results if the schema of the
; semistructured data is not recursive.
;
; 'types_must_match': boolean, defaults to true, applicable to code features (when 'data_type' is one of json/yaml/amalgam).
; If true, only considers nodes common if their types match.
;
; 'nominal_numbers': boolean, defaults to false, applicable to code features (when 'data_type' is one of json/yaml/amalgam).
; If true, will assume that all numbers will match only if identical; if false, it will compare similarity of values.
;
; 'nominal_strings': boolean, defaults to true, applicable to code features (when 'data_type' is one of json/yaml/amalgam).
; If true, will assume that all strings will match only if identical;
; if false uses string edit distance to compare similarity.
;
; 'id_feature': boolean, Set to true only for nominal features containing nominal IDs to specify that this
; feature should be used to compute case weights for id based privacy. For time series,
; this feature will be used as the id for each time series generation. Default is false
Expand Down Expand Up @@ -217,7 +227,7 @@
ordinal_string_to_ordinal_map (assoc)
ordinal_ordinal_to_string_map (assoc)
non_number_continuous_features_map (assoc)
code_feature_recursive_matching_map (assoc)
code_feature_domain_attributes_map (assoc)
numeric_nominal_features_map (assoc)
string_nominal_features_set (assoc)
feature_rounding_map (assoc)
Expand Down Expand Up @@ -311,8 +321,8 @@

(if (size non_number_continuous_features_map)
(assign (assoc
code_feature_recursive_matching_map
(call !ComposeCodeFeatureRecursiveMatchingMap (assoc code_features_map non_number_continuous_features_map))
code_feature_domain_attributes_map
(call !ComposeCodeFeatureDomainAttributesMap (assoc code_features_map non_number_continuous_features_map))
))
)

Expand Down Expand Up @@ -600,7 +610,7 @@
!ordinalStringToOrdinalMap ordinal_string_to_ordinal_map
!ordinalOrdinalToStringMap ordinal_ordinal_to_string_map
!editDistanceFeatureTypesMap non_number_continuous_features_map
!codeFeatureRecursiveMatchingMap code_feature_recursive_matching_map
!codeFeatureDomainAttributesMap code_feature_domain_attributes_map
!numericNominalFeaturesMap numeric_nominal_features_map
!stringNominalFeaturesSet string_nominal_features_set
!novelSubstitionFeatureSet novel_substition_feature_set
Expand All @@ -625,7 +635,7 @@
(call !SetNominalFeatures (assoc nominal_features nominals))
(call !SetCyclicFeatures (assoc feature_attributes cyclics_map))

(declare (assoc feature_limits_map (call !ComposeFeatureLimitsMap) ))
(declare (assoc feature_limits_map (call !ComposeFeatureDomainAttributesMap) ))
(declare (assoc
updated_hp_map (call !UpdateHyperparametersWithFeatureDomainAttributes (assoc hp_map !hyperparameterMetadataMap))
updated_default_hp_map (call !UpdateHyperparametersWithFeatureDomainAttributes (assoc hp_map !defaultHyperparameters))
Expand Down
17 changes: 10 additions & 7 deletions howso/contributions.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,12 @@
feature_contributions_map (assoc)
num_training_cases (call !GetNumTrainingCases)
edit_distance_action_feature (contains_index !editDistanceFeatureTypesMap action_feature)
is_string_mixable (= "string_mixable" (get !editDistanceFeatureTypesMap action_feature))
is_recursive_matching (get !codeFeatureRecursiveMatchingMap action_feature)
code_feature_attributes_map (get !codeFeatureDomainAttributesMap action_feature)
edit_distance_feature_attributes_map
(append
{ "use_string_edit_distance" (= "string_mixable" (get !editDistanceFeatureTypesMap action_feature)) }
(get !codeFeatureDomainAttributesMap action_feature)
)
;store an assoc of lag/rate/delta feature -> lag/order amount for time series flows
ts_feature_lag_amount_map (if !tsTimeFeature (call !BuildTSFeatureLagAmountMap))
max_lag_index_value (null)
Expand Down Expand Up @@ -455,17 +459,17 @@
(call !CombineCode (assoc
sources feature_reactions_with
weights weights_with
is_recursive_matching is_recursive_matching
code_feature_attributes_map code_feature_attributes_map
))
mixed_without
(call !CombineCode (assoc
sources feature_reactions_without
weights weights_without
is_recursive_matching is_recursive_matching
code_feature_attributes_map code_feature_attributes_map
))
)
(list
(edit_distance mixed_with mixed_without is_string_mixable is_recursive_matching)
(edit_distance mixed_with mixed_without edit_distance_feature_attributes_map)
(difference mixed_with mixed_without)
)
))
Expand Down Expand Up @@ -639,8 +643,7 @@
(edit_distance
(get reaction_with (list "action_values" 0))
(get reaction_without (list "action_values" 0))
is_string_mixable
is_recursive_matching
edit_distance_feature_attributes_map
)
;TODO: 17356, deal with averaging out differences
(difference
Expand Down
2 changes: 1 addition & 1 deletion howso/details.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@
(call !CombineCode (assoc
sources neighbor_feature_values
weights weights
is_recursive_matching (get !codeFeatureRecursiveMatchingMap (current_value 1))
code_feature_attributes_map (get !codeFeatureDomainAttributesMap (current_value 1))
))

))
Expand Down
13 changes: 9 additions & 4 deletions howso/influences.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -624,11 +624,16 @@
(edit_distance
(get expected_values index)
(get reaction_values ["action_values" index])
(or
(= "string" (get !editDistanceFeatureTypesMap action_feature))
(= "string_mixable" (get !editDistanceFeatureTypesMap action_feature))
(append
{
"use_string_edit_distance"
(or
(= "string" (get !editDistanceFeatureTypesMap action_feature))
(= "string_mixable" (get !editDistanceFeatureTypesMap action_feature))
)
}
(get !codeFeatureDomainAttributesMap action_feature)
)
(get !codeFeatureRecursiveMatchingMap action_feature)
)

(abs (-
Expand Down
11 changes: 8 additions & 3 deletions howso/react_discriminative.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@
(call !CombineCode (assoc
sources candidate_case_values
weights candidate_case_weights
is_recursive_matching (get !codeFeatureRecursiveMatchingMap action_feature)
code_feature_attributes_map (get !codeFeatureDomainAttributesMap action_feature)
))

;divide the dot product by the total weight
Expand Down Expand Up @@ -768,7 +768,7 @@
sources (list)
weights (list)
similar_mix_chance 0
is_recursive_matching .false
code_feature_attributes_map {}
)

;compute accumed_weights by adding up the total probability mass seen so far
Expand All @@ -791,6 +791,11 @@
)
weights
)
domain_attributes_map
(append
{"similar_mix_chance" similar_mix_chance}
code_feature_attributes_map
)
))

(reduce
Expand All @@ -809,7 +814,7 @@
frac_b (/ (get weights (current_index 1)) prob_mass)
))

(mix (previous_result) (current_value) frac_a frac_b similar_mix_chance is_recursive_matching)
(mix (previous_result) (current_value) frac_a frac_b domain_attributes_map)
))
sources
)
Expand Down
13 changes: 9 additions & 4 deletions howso/residuals.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -1338,11 +1338,16 @@
(edit_distance
case_feature_value
interpolated_value
(or
(= "string" (get !editDistanceFeatureTypesMap feature))
(= "string_mixable" (get !editDistanceFeatureTypesMap feature))
(append
{
"use_string_edit_distance"
(or
(= "string" (get !editDistanceFeatureTypesMap feature))
(= "string_mixable" (get !editDistanceFeatureTypesMap feature))
)
}
(get !codeFeatureDomainAttributesMap feature)
)
(get !codeFeatureRecursiveMatchingMap feature)
)

(abs (- case_feature_value interpolated_value))
Expand Down
17 changes: 14 additions & 3 deletions howso/synthesis_utilities.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -905,7 +905,6 @@
case_index 1
original_regional_feature_values_map (assoc)
is_string_mixable .false
is_recursive_matching (get !codeFeatureRecursiveMatchingMap feature)
)

;explode all the strings to treat them as lists
Expand Down Expand Up @@ -975,11 +974,21 @@
)
)

(declare (assoc
string_feature_domain_attributes_map
(append
{"use_string_edit_distance" is_string_mixable}
(get !codeFeatureDomainAttributesMap feature)
)
))

;create assoc of case id -> edit distance
(declare (assoc
regional_edit_distances_map
(map
(lambda (edit_distance (current_value) intersected_regional_value is_string_mixable is_recursive_matching))
(lambda
(edit_distance (current_value) intersected_regional_value string_feature_domain_attributes_map)
)
regional_feature_values_map
)
))
Expand All @@ -997,7 +1006,9 @@
(assoc
local_edit_distances
(map
(lambda (edit_distance (current_value) intersected_local_value is_string_mixable is_recursive_matching))
(lambda
(edit_distance (current_value) intersected_local_value string_feature_domain_attributes_map)
)
(unzip regional_feature_values_map local_case_ids)
)
)
Expand Down
1 change: 1 addition & 0 deletions unit_tests/ut_h_clustering.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@
(commonality
cluster_1_expected_indices_map
cluster_1_clustered_indices_map
{ "recursive_matching" .false }
)
(+ 1 (max (size cluster_1_clustered_indices_map) (size cluster_1_expected_indices_map)) )
)
Expand Down
2 changes: 1 addition & 1 deletion unit_tests/ut_h_edit_dist_features.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

(print "Set 'recursive_matching' flag correctly for each feature: ")
(call assert_same (assoc
obs (call_entity "howso" "debug_label" (assoc label "!codeFeatureRecursiveMatchingMap"))
obs (call_entity "howso" "debug_label" (assoc label "!codeFeatureDomainAttributesMap"))
exp
{
amalgam .true
Expand Down
2 changes: 1 addition & 1 deletion version.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"version": "0.0.0",
"dependencies": {
"amalgam": "71.1.0"
"amalgam": "72.0.0"
}
}
Loading