Skip to content

Commit b56158b

Browse files
author
ahmedgc
committed
docs
Updating inline docs.
1 parent d4888fa commit b56158b

File tree

1 file changed

+69
-103
lines changed

1 file changed

+69
-103
lines changed

pysf/generalisation.py

Lines changed: 69 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -84,20 +84,9 @@ def combine_evaluators(list_of_input_tupes):
8484

8585

8686
class Target(LoggingHandler):
87-
"""The summary line for a class docstring should fit on one line.
88-
89-
If the class has public attributes, they may be documented here
90-
in an ``Attributes`` section and follow the same formatting as a
91-
function's ``Args`` section. Alternatively, attributes may be documented
92-
inline with the attribute's declaration (see __init__ method below).
93-
94-
Properties created with the ``@property`` decorator should be documented
95-
in the property's getter method.
96-
97-
Attributes:
98-
attr1 (str): Description of `attr1`.
99-
attr2 (:obj:`int`, optional): Description of `attr2`.
100-
87+
"""Mapping between the features that are to be input into a particular prediction strategy, and the features that should be output (predicted) by that strategy.
88+
89+
Taken together, the input features, output features, and predictor template constitute a prediction strategy.
10190
"""
10291
def __init__(self, data, predictor_template, input_time_column, input_value_colnames, output_value_colnames, description = None):
10392
super(Target, self).__init__()
@@ -167,19 +156,9 @@ def __setstate__(self, state):
167156

168157

169158
class GeneralisationPerformance(LoggingHandler):
170-
"""The summary line for a class docstring should fit on one line.
171-
172-
If the class has public attributes, they may be documented here
173-
in an ``Attributes`` section and follow the same formatting as a
174-
function's ``Args`` section. Alternatively, attributes may be documented
175-
inline with the attribute's declaration (see __init__ method below).
159+
"""Holds the results of estimating a predictor's overall generalisation error across multiple folds.
176160
177-
Properties created with the ``@property`` decorator should be documented
178-
in the property's getter method.
179-
180-
Attributes:
181-
attr1 (str): Description of `attr1`.
182-
attr2 (:obj:`int`, optional): Description of `attr2`.
161+
Provides methods that expose the overall and per-timestamp generalisation error estimates.
183162
184163
"""
185164
def __init__(self, target, feature_name, error_curve):
@@ -198,14 +177,10 @@ def __init__(self, target, feature_name, error_curve):
198177

199178

200179
def get_overall_metrics_df(self):
201-
"""Class methods are similar to regular functions.
202-
203-
Args:
204-
param1: The first parameter.
205-
param2: The second parameter.
180+
"""Exposes the overall (i.e. aggregated across all timestamps) estimates of the generalisation error.
206181
207182
Returns:
208-
True if successful, False otherwise.
183+
A copy of the results as a pandas dataframe.
209184
210185
"""
211186
df = self.error_curve.get_overall_metrics_as_dataframe()
@@ -219,14 +194,14 @@ def get_overall_metrics_df(self):
219194

220195

221196
def get_per_timestamp_metrics_df(self):
222-
"""Class methods are similar to regular functions.
197+
"""Exposes the per-timestamp estimates of the generalisation error.
223198
224199
Args:
225200
param1: The first parameter.
226201
param2: The second parameter.
227202
228203
Returns:
229-
True if successful, False otherwise.
204+
A copy of the results as a pandas dataframe.
230205
231206
"""
232207
df = self.error_curve.get_per_timestamp_metrics_as_dataframe()
@@ -261,23 +236,19 @@ def __setstate__(self, state):
261236

262237

263238
class GeneralisationPerformanceEvaluator(LoggingHandler):
264-
"""The summary line for a class docstring should fit on one line.
265-
266-
If the class has public attributes, they may be documented here
267-
in an ``Attributes`` section and follow the same formatting as a
268-
function's ``Args`` section. Alternatively, attributes may be documented
269-
inline with the attribute's declaration (see __init__ method below).
270-
271-
Properties created with the ``@property`` decorator should be documented
272-
in the property's getter method.
273-
274-
Attributes:
275-
attr1 (str): Description of `attr1`.
276-
attr2 (:obj:`int`, optional): Description of `attr2`.
239+
"""Estimates the generalisation errors of multiple configured prediction strategies on the same dataset, for the same prediction timestamps, and on the exact same cross-validated splits.
277240
241+
The steps to take are: initialise this object with the common dataset and prediction times, then call `add_to_targets` to configure the prediction strategies to be used. Finally, call `evaluate`, which will conduct the (long-running) evaluation procedure, which will return some results upon completion. Results are also cached internally, and can be retrieved or charted using the helper functions.
278242
"""
279243
# The constructor takes in fields that are common to all targets: the data container + prediction times
280244
def __init__(self, data, prediction_times):
245+
"""Constructor.
246+
247+
Args:
248+
data: The common `MultiSeries` data container.
249+
prediction_times: The common collection of prediction times.
250+
251+
"""
281252
super(GeneralisationPerformanceEvaluator, self).__init__()
282253
self.data = data
283254
self.prediction_times = prediction_times
@@ -286,14 +257,17 @@ def __init__(self, data, prediction_times):
286257

287258
# You can call this method multiple times to build up a collection of targets, before evaluating them
288259
def add_to_targets(self, predictor_templates, combos_of_input_time_column, combos_of_input_value_colnames, combos_of_output_value_colnames):
289-
"""Class methods are similar to regular functions.
260+
"""Expand the arguments (using a Cartesian join) into a collection of `Target` objects, and cache those `Target` objects, ready to be used for the `evaluate` function.
261+
262+
The method can be called multiple times: when that happens, it will append to the collection of targets, overwrite it.
290263
264+
This is really just a shorthand way of initialising multiple `Target` objects and adding them to the `targets` attribute of this object.
265+
291266
Args:
292-
param1: The first parameter.
293-
param2: The second parameter.
294-
295-
Returns:
296-
True if successful, False otherwise.
267+
predictor_templates: A list of predictors that should be Cartesian-joined with the other lists of parameters to define multiple targets for evaluation.
268+
combos_of_input_time_column: A list of boolean values that should be Cartesian-joined with the other lists of parameters to define multiple targets for evaluation.
269+
combos_of_input_value_colnames: A list of string field names that should be Cartesian-joined with the other lists of parameters to define multiple targets for evaluation.
270+
combos_of_output_value_colnames: A list of string field names that should be Cartesian-joined with the other lists of parameters to define multiple targets for evaluation.
297271
298272
"""
299273
# Validation
@@ -314,14 +288,14 @@ def add_to_targets(self, predictor_templates, combos_of_input_time_column, combo
314288

315289

316290
def evaluate(self, series_splitter=None, chart_intermediate_results=False):
317-
"""Class methods are similar to regular functions.
291+
"""Run the evaluation. Unless a specific `series_splitter` override is provided, this will be a 5-fold cross validation. Depending on the size of the dataset, time to train predictors, number of targets and number of folds, this may take a long time!
318292
319293
Args:
320-
param1: The first parameter.
321-
param2: The second parameter.
294+
series_splitter: If supplied by the user, this object be used instead of `sklearn.model_selection.KFold(n_splits=5)` to produce the folds used for the generalisation error estimation procedure.
295+
chart_intermediate_results: If set to True by the user, this method will produce charts after each fold.
322296
323297
Returns:
324-
True if successful, False otherwise.
298+
A pandas dataframe containing overall (i.e. aggregated over timestamps) estimated generalisation errors.
325299
326300
"""
327301
try:
@@ -396,15 +370,7 @@ def evaluate(self, series_splitter=None, chart_intermediate_results=False):
396370
raise ex1 # propagate
397371

398372
def calculate_second_pass(self):
399-
"""Class methods are similar to regular functions.
400-
401-
Args:
402-
param1: The first parameter.
403-
param2: The second parameter.
404-
405-
Returns:
406-
True if successful, False otherwise.
407-
373+
"""This is an internal method used by the `evaluate` function.
408374
"""
409375
generalisation_performances = []
410376
for key in self.dict_target_and_feature_name_to_list_of_intermediate_scoring_results:
@@ -425,14 +391,14 @@ def calculate_second_pass(self):
425391

426392

427393
def get_sorted_overall_results(self, feature_name, metric = 'rmse'):
428-
"""Class methods are similar to regular functions.
394+
"""TODO
429395
430396
Args:
431-
param1: The first parameter.
432-
param2: The second parameter.
433-
397+
feature_name: Name of the output/prediction feature that we would like to see the performance on.
398+
metric: Type of error metric that we would like to chart; 'rmse' by default.
399+
434400
Returns:
435-
True if successful, False otherwise.
401+
A pandas dataframe containing a copy of the results.
436402
437403
"""
438404
df = self.generalisation_metrics_overall_df.copy()
@@ -444,14 +410,15 @@ def get_sorted_overall_results(self, feature_name, metric = 'rmse'):
444410

445411

446412
def get_best_n_overall_results(self, feature_name, best_n_results, metric = 'rmse'):
447-
"""Class methods are similar to regular functions.
413+
"""TODO
448414
449415
Args:
450-
param1: The first parameter.
451-
param2: The second parameter.
452-
416+
feature_name: Name of the output/prediction feature that we would like to see the performance on.
417+
metric: Type of error metric that we would like to chart; 'rmse' by default.
418+
best_n_results: If an integer is supplied, only the results of the `best_n_results` top-performing prediction strategies will be charted; otherwise all will be charted (by default).
419+
453420
Returns:
454-
True if successful, False otherwise.
421+
A pandas dataframe containing a copy of the results.
455422
456423
"""
457424
df = self.get_sorted_overall_results(feature_name=feature_name, metric=metric)
@@ -461,15 +428,15 @@ def get_best_n_overall_results(self, feature_name, best_n_results, metric = 'rms
461428

462429

463430
def chart_overall_performance(self, feature_name, metric = 'rmse', best_n_results = None, stderr_bar_multiple = 1, figsize=None, func_update_description_strings=None, color_non_baseline='C0', color_baseline='C3', baseline_regular_expression='Baseline'):
464-
"""Class methods are similar to regular functions.
431+
"""TODO
465432
466433
Args:
467-
param1: The first parameter.
468-
param2: The second parameter.
469-
470-
Returns:
471-
True if successful, False otherwise.
472-
434+
feature_name: Name of the output/prediction feature that we would like to see the performance on.
435+
metric: Type of error metric that we would like to chart; 'rmse' by default.
436+
best_n_results: If an integer is supplied, only the results of the `best_n_results` top-performing prediction strategies will be charted; otherwise all will be charted (by default).
437+
stderr_bar_multiple: Error bars will be drawn for the given multiple of standard error (1 by default).
438+
errorevery: If a value is supplied, deterministically sample the error bars, displaying each `errorevery` error bar. This does not affect the number of points displayed, just the number of error bars.
439+
func_update_description_strings: If a function name is specified, this function will be called on each feature name and the results used in the display. By default, none is supplied.
473440
"""
474441
if best_n_results is None:
475442
title='Showing all results'
@@ -509,15 +476,15 @@ def chart_overall_performance(self, feature_name, metric = 'rmse', best_n_result
509476

510477

511478
def chart_per_timestamp_performance(self, feature_name, metric = 'rmse', best_n_overall_results = None, stderr_bar_multiple = 1, errorevery=1, func_update_description_strings=None):
512-
"""Class methods are similar to regular functions.
479+
"""TODO
513480
514481
Args:
515-
param1: The first parameter.
516-
param2: The second parameter.
517-
518-
Returns:
519-
True if successful, False otherwise.
520-
482+
feature_name: Name of the output/prediction feature that we would like to see the performance on.
483+
metric: Type of error metric that we would like to chart; 'rmse' by default.
484+
best_n_overall_results: If an integer is supplied, only the results of the `best_n_overall_results` top-performing prediction strategies will be charted; otherwise all will be charted (by default).
485+
stderr_bar_multiple: Error bars will be drawn for the given multiple of standard error (1 by default).
486+
errorevery: If a value is supplied, deterministically sample the error bars, displaying each `errorevery` error bar. This does not affect the number of points displayed, just the number of error bars.
487+
func_update_description_strings: If a function name is specified, this function will be called on each feature name and the results used in the display. By default, none is supplied.
521488
"""
522489
title='Showing all results'
523490
if best_n_overall_results is None:
@@ -558,15 +525,16 @@ def chart_per_timestamp_performance(self, feature_name, metric = 'rmse', best_n_
558525
ax.legend(bbox_to_anchor=(0, 0, 1.7, 1), loc='right', prop={'size':8})
559526
ax.set_ylabel(metric + ' +/- ' + str(stderr_bar_multiple) + ' S.E')
560527

528+
561529
def get_intermediate_scoring_results(self, tgt_friendly_key, scoring_feature_name):
562-
"""Class methods are similar to regular functions.
530+
"""TODO
563531
564532
Args:
565533
param1: The first parameter.
566534
param2: The second parameter.
567535
568536
Returns:
569-
True if successful, False otherwise.
537+
A list (with the same length as the number of folds) containing fold-specific results.
570538
571539
"""
572540
scoring_tgt_features = list(self.dict_target_and_feature_name_to_list_of_intermediate_scoring_results.keys())
@@ -582,15 +550,16 @@ def get_intermediate_scoring_results(self, tgt_friendly_key, scoring_feature_nam
582550
self.warning('No matching set of intermediate scoring results found')
583551
return res
584552

553+
585554
def get_intermediate_tuning_metrics(self, tgt_friendly_key):
586-
"""Class methods are similar to regular functions.
555+
"""TODO
587556
588557
Args:
589558
param1: The first parameter.
590559
param2: The second parameter.
591560
592561
Returns:
593-
True if successful, False otherwise.
562+
A list (with the same length as the number of folds) containing fold-specific results.
594563
595564
"""
596565
tuning_tgts = list(self.dict_target_to_list_of_tuning_metrics.keys())
@@ -605,15 +574,16 @@ def get_intermediate_tuning_metrics(self, tgt_friendly_key):
605574
self.warning('No matching set of intermediate tuning metrics found')
606575
return res
607576

577+
608578
def get_friendly_tgts_keys(self):
609-
"""Class methods are similar to regular functions.
579+
"""TODO
610580
611581
Args:
612582
param1: The first parameter.
613583
param2: The second parameter.
614584
615585
Returns:
616-
True if successful, False otherwise.
586+
A tuple of lists of available keys for intermediate scoring results and intermediate tuning metrics.
617587
618588
"""
619589
set_for_scoring_results = set([ tgt.get_friendly_key() for (tgt, scoring_field) in self.dict_target_and_feature_name_to_list_of_intermediate_scoring_results ])
@@ -622,16 +592,12 @@ def get_friendly_tgts_keys(self):
622592
sorted_list_for_tuning_metrics = sorted(set_for_tuning_metrics)
623593
return (sorted_list_for_scoring_results, sorted_list_for_tuning_metrics)
624594

595+
625596
def to_csv(self, parent_dirpath):
626-
"""Class methods are similar to regular functions.
597+
"""Write out top-level and intermediate results as CSV files, under a fixed directory structure.
627598
628599
Args:
629-
param1: The first parameter.
630-
param2: The second parameter.
631-
632-
Returns:
633-
True if successful, False otherwise.
634-
600+
parent_dirpath: Path to the directory in which multiple subdirectories and CSV files will be created. If this parent directory does not exist, it will be created.
635601
"""
636602
evaluator=self
637603

0 commit comments

Comments
 (0)