diff --git a/IRIS_metadata.txt b/EarthScope_metadata.txt similarity index 100% rename from IRIS_metadata.txt rename to EarthScope_metadata.txt diff --git a/QuARG.py b/QuARG.py index 2d60b1d..632fa81 100755 --- a/QuARG.py +++ b/QuARG.py @@ -21,7 +21,7 @@ """ -version = "1.1.1" +version = "1.2.0" print("QuARG version %s" % version) # TODO: Need to include MS Gothic.ttf when packaging the scripts @@ -68,7 +68,6 @@ import os import datetime -import time import shutil # used to remove directories import webbrowser import pandas as pd @@ -82,11 +81,15 @@ import urllib.request import urllib.error import requests # used for getting empty transfer_function returns - import reportUtils Config.set("input", "mouse", "mouse,disable_multitouch") +# Explicit adapters and converters for datetime +sqlite3.register_adapter(datetime.datetime, lambda dt: dt.isoformat(" ")) +sqlite3.register_converter( + "timestamp", lambda s: datetime.datetime.fromisoformat(s.decode()) +) # PREFERENCE FILE TODOS # @@ -258,7 +261,7 @@ def get_default_dates(self): if not MainScreen.start: self.start = str(lastMonthStart) - if not MainLScreen.end: + if not MainScreen.end: self.end = str(first) def set_default_start(self): @@ -441,8 +444,6 @@ def load_file(self, path, filename): self.find_file.text = os.path.basename(filename[0]) self.examine_file.text = os.path.basename(filename[0]) - # self.find_file.text = filename[0] - # self.examine_file.text = filename[0] ExamineIssuesScreen.issueFile = self.examine_file.text except Exception as e: self.warning_popup("WARNING: %s" % e) @@ -464,7 +465,6 @@ def load_csv(self, path, filename): self.generate_directory.text = file_directory self.ids.csv_id.text = os.path.basename(filename[0]) - # self.ids.csv_id.text = filename[0] except Exception as e: self.warning_popup("WARNING: %s" % e) self.dismiss_popup() @@ -521,14 +521,14 @@ def do_find(self): if not os.path.isfile(masterDict["metrics_file"]): self.warning_popup( - "WARNING: Could not find file of IRIS metrics: %s\nIf connected to the internet, this file can be generated by entering the Thresholds Editor" + "WARNING: Could not find file of EarthScope metrics: %s\nIf connected to the internet, this file can be generated by entering the Thresholds Editor" % masterDict["metrics_file"] ) return if not os.path.isfile(masterDict["metadata_file"]): self.warning_popup( - "WARNING: Could not find file of IRIS metadata fields: %s\nIf connected to the internet, this file can be generated by entering the Thresholds Editor" + "WARNING: Could not find file of EarthScope metadata fields: %s\nIf connected to the internet, this file can be generated by entering the Thresholds Editor" % masterDict["metadata_file"] ) return @@ -607,14 +607,6 @@ def remove_dir(self): print("Previous copy removed, generating new Report") self.do_generate() - #### REMOVE IF NO ISSUES ARISE OUT OF ITS ABSENCE ### - # def date_checked(self, option, value): - # if value is True: - # self.query_options.append(option) - # else: - # self.query_options = [v for v in self.query_options if v != option] - ##################################################### - def get_ticket_inputs(self, *kwargs): main_screen = screen_manager.get_screen("mainScreen") @@ -738,7 +730,6 @@ def grab_tickets(self, *kwargs): try: # convert any cases of BH[EHZ] (for example) to lists for ind, row in allTickets.iterrows(): - # network(s) networks = reportUtils.expandCodes(row["network"]) allTickets.at[ind, "networks"] = networks @@ -758,80 +749,90 @@ def grab_tickets(self, *kwargs): # Now start subsetting subsettedTickets = pd.DataFrame(columns=allTickets.columns) - tmpTickets = pd.DataFrame() + # Subset for networks + frames_to_concat = [] # list to hold all DataFrames to concatenate + for net in masterDict["query_nets"].split(","): - if net == "" or net == "*" or net == "%" or net == "???": - tmpTickets = tmpTickets.append(allTickets) + if net in ["", "*", "%", "???"]: + frames_to_concat.append(allTickets) else: - tmpTickets = tmpTickets.append( - allTickets[ - allTickets["networks"].str.contains( - ",%s," % net.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[subsettedTickets["networks"].str.match(",\*,")] - ) - subsettedTickets = tmpTickets.copy() + filtered_all = allTickets[ + allTickets["networks"].str.contains( + ",%s," % net.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_all) + + filtered_subset = subsettedTickets[ + subsettedTickets["networks"].str.match(r",\*,") + ] + frames_to_concat.append(filtered_subset) + + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) + + # Subset for stations + frames_to_concat = [] - tmpTickets = pd.DataFrame() for sta in masterDict["query_stas"].split(","): - if sta == "" or sta == "*" or sta == "%" or sta == "???": - tmpTickets = tmpTickets.append(subsettedTickets) + if sta in ["", "*", "%", "???"]: + frames_to_concat.append(subsettedTickets) else: - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["stations"].str.contains( - ",%s," % sta.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[subsettedTickets["stations"].str.match(",\*,")] - ) - subsettedTickets = tmpTickets.copy() + filtered_stas = subsettedTickets[ + subsettedTickets["stations"].str.contains( + ",%s," % sta.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_stas) + + star_stas = subsettedTickets[ + subsettedTickets["stations"].str.match(r",\*,") + ] + frames_to_concat.append(star_stas) + + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) + + # Subset for locations + frames_to_concat = [] - tmpTickets = pd.DataFrame() for loc in masterDict["query_locs"].split(","): - if loc == "" or loc == "*" or loc == "%" or loc == "???": - tmpTickets = tmpTickets.append(subsettedTickets) + if loc in ["", "*", "%", "???"]: + frames_to_concat.append(subsettedTickets) else: - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["locations"].str.contains( - ",%s," % loc.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["locations"].str.match(",\*,") - ] - ) - subsettedTickets = tmpTickets.copy() + filtered_locs = subsettedTickets[ + subsettedTickets["locations"].str.contains( + ",%s," % loc.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_locs) + + star_locs = subsettedTickets[ + subsettedTickets["locations"].str.match(r",\*,") + ] + frames_to_concat.append(star_locs) + + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) + + # Subset for channels + frames_to_concat = [] - tmpTickets = pd.DataFrame() for chan in masterDict["query_chans"].split(","): - if chan == "" or chan == "*" or chan == "%" or chan == "???": - tmpTickets = tmpTickets.append(subsettedTickets) + if chan in ["", "*", "%", "???"]: + frames_to_concat.append(subsettedTickets) else: - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["channels"].str.contains( - ",%s," % chan.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[subsettedTickets["channels"].str.match(",\*,")] - ) + filtered_chans = subsettedTickets[ + subsettedTickets["channels"].str.contains( + ",%s," % chan.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_chans) + + star_chans = subsettedTickets[ + subsettedTickets["channels"].str.match(r",\*,") + ] + frames_to_concat.append(star_chans) + + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) - subsettedTickets = tmpTickets.copy() subsettedTickets.drop_duplicates(inplace=True) try: @@ -846,7 +847,7 @@ def grab_tickets(self, *kwargs): except: masterDict["tickets"] = "" - except: + except Exception as e: masterDict["tickets"] = "" def go_To_NewTickets(self, *kwargs): @@ -866,7 +867,6 @@ def generate_csv(self): with open(self.preference) as f: local_dict = locals() exec(compile(f.read(), self.preference, "exec"), globals(), local_dict) - try: if not self.generate_start == "": datetime.datetime.strptime(self.generate_start, "%Y-%m-%d") @@ -980,80 +980,81 @@ def generate_csv(self): # Now start subsetting subsettedTickets = pd.DataFrame(columns=allTickets.columns) - tmpTickets = pd.DataFrame() + frames_to_concat = [] for net in self.generate_network.split(","): - if net == "" or net == "*" or net == "%" or net == "???": - tmpTickets = tmpTickets.append(allTickets) + if net in ["", "*", "%", "???"]: + frames_to_concat.append(allTickets) else: - tmpTickets = tmpTickets.append( - allTickets[ - allTickets["networks"].str.contains( - ",%s," % net.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[subsettedTickets["networks"].str.match(",\*,")] - ) - subsettedTickets = tmpTickets.copy() + filtered_all = allTickets[ + allTickets["networks"].str.contains( + ",%s," % net.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_all) + + filtered_subset = subsettedTickets[ + subsettedTickets["networks"].str.match(r",\*,") + ] + frames_to_concat.append(filtered_subset) - tmpTickets = pd.DataFrame() + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) + + frames_to_concat = [] for sta in self.generate_station.split(","): - if sta == "" or sta == "*" or sta == "%" or sta == "???": - tmpTickets = tmpTickets.append(subsettedTickets) + if sta in ["", "*", "%", "???"]: + frames_to_concat.append(subsettedTickets) else: - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["stations"].str.contains( - ",%s," % sta.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[subsettedTickets["stations"].str.match(",\*,")] - ) - subsettedTickets = tmpTickets.copy() + filtered_stas = subsettedTickets[ + subsettedTickets["stations"].str.contains( + ",%s," % sta.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_stas) - tmpTickets = pd.DataFrame() + star_stas = subsettedTickets[ + subsettedTickets["stations"].str.match(r",\*,") + ] + frames_to_concat.append(star_stas) + + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) + + frames_to_concat = [] for loc in self.generate_location.split(","): - if loc == "" or loc == "*" or loc == "%" or loc == "???": - tmpTickets = tmpTickets.append(subsettedTickets) + if loc in ["", "*", "%", "???"]: + frames_to_concat.append(subsettedTickets) else: - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["locations"].str.contains( - ",%s," % loc.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["locations"].str.match(",\*,") - ] - ) - subsettedTickets = tmpTickets.copy() + filtered_locs = subsettedTickets[ + subsettedTickets["locations"].str.contains( + ",%s," % loc.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_locs) - tmpTickets = pd.DataFrame() + star_locs = subsettedTickets[ + subsettedTickets["locations"].str.match(r",\*,") + ] + frames_to_concat.append(star_locs) + + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) + + frames_to_concat = [] for chan in self.generate_channel.split(","): - if chan == "" or chan == "*" or chan == "%" or chan == "???": - tmpTickets = tmpTickets.append(subsettedTickets) + if chan in ["", "*", "%", "???"]: + frames_to_concat.append(subsettedTickets) else: - tmpTickets = tmpTickets.append( - subsettedTickets[ - subsettedTickets["channels"].str.contains( - ",%s," % chan.replace("?", ".?").replace("*", ".*") - ) - == True - ] - ) - tmpTickets = tmpTickets.append( - subsettedTickets[subsettedTickets["channels"].str.match(",\*,")] - ) + filtered_chans = subsettedTickets[ + subsettedTickets["channels"].str.contains( + ",%s," % chan.replace("?", ".?").replace("*", ".*") + ) + ] + frames_to_concat.append(filtered_chans) + + star_chans = subsettedTickets[ + subsettedTickets["channels"].str.match(r",\*,") + ] + frames_to_concat.append(star_chans) - subsettedTickets = tmpTickets.copy() + subsettedTickets = pd.concat(frames_to_concat, ignore_index=True) subsettedTickets.drop_duplicates(inplace=True) try: @@ -1197,24 +1198,21 @@ def generate_report(self): local_dict, ) YYYYmmdd = "".join(local_dict["startday"].split("-")) - # self.startDate.text = local_dict["startday"] except: self.warning_popup( "WARNING: Tried to get Start Date from Preference file(since it was left empty),\nbut failed to read Preference File" ) return + if not self.generate_network == "": network = self.generate_network else: network = local_dict["network"] # The network report should be put into the same directory as the csv file even if that differs from the preference)files - # dirToUse = os.path.dirname(self.csv) dirToUse = self.directory print(dirToUse) - # self.report_filename = dirToUse + '/' + local_dict['network'] +'_Netops_Report_' + month self.report_filename = network + "_Netops_Report_" + YYYYmmdd - # self.zipDir = local_dict["directory"] + self.report_filename self.zipDir = dirToUse + "/" + self.report_filename self.report_fullPath = self.zipDir + "/" + self.report_filename + ".html" @@ -1254,11 +1252,9 @@ def generate_report(self): return # The network report should be put into the same directory as the csv file even if that differs from the preference)files - # dirToUse = os.path.dirname(self.csv) dirToUse = self.directory self.report_filename = network + "_Netops_Report_" + YYYYmmdd - # self.zipDir = local_dict["directory"] + self.report_filename self.zipDir = dirToUse + "/" + self.report_filename self.report_fullPath = self.zipDir + "/" + self.report_filename + ".html" @@ -1608,11 +1604,6 @@ def help_text(self, whichOne): fields. [See detailed documentation for the format.] """ - # if whichOne == 12: - # helpText = ''' - # - # ''' - return helpText def open_detailed_documentation(self): @@ -1744,11 +1735,11 @@ def load_preference_file(self, preferenceFile): masterDict["preference_chanTypes"]["V"] ) - if masterDict["preference_metricSource"] == "IRIS": + if masterDict["preference_metricSource"] == "EarthScope": preferences_screen.metric_source_text.text = "" preferences_screen.metric_source_text.disabled = True preferences_screen.metric_browse_btn.disabled = True - preferences_screen.metric_source_btn.text = "IRIS" + preferences_screen.metric_source_btn.text = "EarthScope" else: preferences_screen.metric_source_text.text = masterDict[ "preference_metricSource" @@ -1759,11 +1750,11 @@ def load_preference_file(self, preferenceFile): preferences_screen.metric_source_text.disabled = False preferences_screen.metric_browse_btn.disabled = False - if masterDict["preference_metadataSource"] == "IRIS": + if masterDict["preference_metadataSource"] == "EarthScope": preferences_screen.metadata_source_text.text = "" preferences_screen.metadata_source_text.disabled = True preferences_screen.metadata_browse_btn.disabled = True - preferences_screen.metadata_source_btn.text = "IRIS" + preferences_screen.metadata_source_btn.text = "EarthScope" else: preferences_screen.metadata_source_text.text = masterDict[ "preference_metadataSource" @@ -1923,7 +1914,7 @@ def load_metric_file(self, path, filename): def deactivate_metric_source_text(self, *kwargs): preferences_screen = screen_manager.get_screen("preferencesScreen") - if preferences_screen.metric_source_btn.text == "IRIS": + if preferences_screen.metric_source_btn.text == "EarthScope": preferences_screen.metric_source_text.disabled = True preferences_screen.metric_browse_btn.disabled = True else: @@ -1932,7 +1923,7 @@ def deactivate_metric_source_text(self, *kwargs): def deactivate_metadata_source_text(self, *kwargs): preferences_screen = screen_manager.get_screen("preferencesScreen") - if preferences_screen.metadata_source_btn.text == "IRIS": + if preferences_screen.metadata_source_btn.text == "EarthScope": preferences_screen.metadata_source_text.disabled = True preferences_screen.metadata_browse_btn.disabled = True else: @@ -1940,12 +1931,6 @@ def deactivate_metadata_source_text(self, *kwargs): preferences_screen.metadata_browse_btn.disabled = False def go_to_thresholdGroups(self): - # if not masterDict['preference_file'] == "": - # try: - # masterDict['preference_groupsDict'] - # except: - # self.warning_popup("WARNING: Preference File has been selected but not loaded\n Either load the file") - ThresholdGroupsScreen.go_to_thresholdGroups(ThresholdGroupsScreen) def exit_confirmation(self, *kwargs): @@ -2049,7 +2034,6 @@ def save_preference_file(self): self.selected_instrumentGroups.append(masterDict["groupsDict"][x]) except: pass - # self.selected_instrumentGroups = list(set([masterDict['groupsDict'][x] for x in self.instrument_selectionIndices])) self.selected_thresholdGroups = list( set( @@ -2064,13 +2048,13 @@ def save_preference_file(self): "V": tuple(preferences_screen.pref_V.text.split(",")), } - if preferences_screen.metadata_source_btn.text == "IRIS": - self.metadataSource = "IRIS" + if preferences_screen.metadata_source_btn.text == "EarthScope": + self.metadataSource = "EarthScope" else: self.metadataSource = preferences_screen.metadata_source_text.text - if preferences_screen.metric_source_btn.text == "IRIS": - self.metricSource = "IRIS" + if preferences_screen.metric_source_btn.text == "EarthScope": + self.metricSource = "EarthScope" else: self.metricSource = preferences_screen.metric_source_text.text @@ -2238,7 +2222,7 @@ def do_writing(self, *kwargs): ) f.write( - "\n\n# Metric source: either 'IRIS' or the path to the local sqlite database file that ISPAQ generated\n" + "\n\n# Metric source: either 'EarthScope' or the path to the local sqlite database file that ISPAQ generated\n" ) f.write( "metricSource = '%s'\nmetadataSource = '%s'" @@ -2624,8 +2608,6 @@ def go_to_thresholdsLayout(self): my_thresholds = [{"text": x} for x in masterDict["threshold_names"]] thresholds_screen.threshold_list_rv.data = my_thresholds thresholds_screen.threshold_list_rv._layout_manager.select_node(0) - # selectable_nodes = thresholds_screen.threshold_list_rv.get_selectable_nodes() - # thresholds_screen.threshold_list_rv.select_node(selectable_nodes[0]) ## Threshold groups instrument_groups = list() @@ -2645,9 +2627,9 @@ def go_to_thresholdsLayout(self): thresholds_screen.threshold_group_rv._layout_manager.select_node(0) ## Metric names - # Try to get a list of metrics from service.iris.edu, but if fails + # Try to get a list of metrics from service.earthscope.org, but if fails # then just use the old list. - URL = "http://service.iris.edu/mustang/metrics/1/query?output=xml&nodata=404" + URL = "http://service.earthscope.org/mustang/metrics/1/query?output=xml&nodata=404" try: metrics = list() @@ -2662,7 +2644,7 @@ def go_to_thresholdsLayout(self): today = datetime.datetime.now() yesterday = today - datetime.timedelta(days=1) subURL = ( - "http://service.iris.edu/mustang/measurements/1/query?metric=transfer_function&format=text&timewindow=%s,%s&nodata=404" + "http://service.earthscope.org/mustang/measurements/1/query?metric=transfer_function&format=text&timewindow=%s,%s&nodata=404" % ( yesterday.strftime("%Y-%m-%d"), today.strftime("%Y-%m-%d"), @@ -2701,7 +2683,7 @@ def go_to_thresholdsLayout(self): print("ERROR: %s" % e) ## Do the same for the metadata fields - URL = "http://service.iris.edu/fdsnws/station/1/query?net=IU&sta=ANMO&loc=00&cha=BHZ&level=channel&format=text&includecomments=true&nodata=404" + URL = "http://service.earthscope.org/fdsnws/station/1/query?net=IU&sta=ANMO&loc=00&cha=BHZ&level=channel&format=text&includecomments=true&nodata=404" try: metadata = pd.read_csv(URL, nrows=1, sep="|").columns @@ -2798,7 +2780,6 @@ def new_threshold_popup(self): additionContent.bind(minimum_height=additionContent.setter("height")) nameLabel = Label(text="Threshold Name: ", size_hint_x=0.66) - # self.thresholdTextInput = TextInput(id='thresholdNameID') self.thresholdTextInput = TextInput() self.selectExistingThreshold = DropDown() @@ -2877,16 +2858,13 @@ def all_thresholds_popup(self, *kwargs): thresholdsDict = sorted(masterDict["thresholdsDict"].keys()) displayList = [] for thresholdName in thresholdsDict: - # print(thresholdName) displayList.append(thresholdName) - # f.write("%s \t" % thresholdName); for instrumentGroup in masterDict["thresholdsDict"][thresholdName].keys(): defStr = " && ".join( masterDict["thresholdsDict"][thresholdName][instrumentGroup] ) - # print(" %s - %s" % (instrumentGroup,defStr)); displayList.append(" %s - %s" % (instrumentGroup, defStr)) displayList.append("") @@ -2953,7 +2931,6 @@ def new_group_popup(self): col1.add_widget(Label(text="Channels: ")) col1.add_widget(Label()) - # self.groupTextInput = TextInput(id='groupNameID') self.groupTextInput = TextInput() self.netTextInput = TextInput(write_tab=False) self.staTextInput = TextInput(write_tab=False) @@ -3066,7 +3043,6 @@ def new_threshold_group_popup(self): additionContent.bind(minimum_height=additionContent.setter("height")) nameLabel = Label(text="Group Name: ", size_hint_x=0.66) - # self.thresholdGroupTextInput = TextInput(id='thresholdGroupID') self.thresholdGroupTextInput = TextInput() self.selectExistingThresholdGroup = DropDown() @@ -3399,7 +3375,9 @@ def what_type_of_field(field): field_passes = metric in masterDict["metrics"] if not field_passes: if is_metadata: - self.warning_popup("WARNING: Field must be an IRIS metadata field") + self.warning_popup( + "WARNING: Field must be an EarthScope metadata field" + ) else: self.warning_popup("WARNING: Field must be a MUSTANG metric") return @@ -3462,7 +3440,9 @@ def what_type_of_field(field): field_passes = metric in masterDict["metrics"] if not field_passes: if is_metadata: - self.warning_popup("WARNING: Field must be an IRIS metadata field") + self.warning_popup( + "WARNING: Field must be an EarthScope metadata field" + ) else: self.warning_popup("WARNING: Field must be a MUSTANG metric") print("WARNING: Field must be a MUSTANG metric") @@ -3530,7 +3510,7 @@ def what_type_of_field(field): if not metric == "": if is_metadata: self.warning_popup( - "WARNING: Field must be an IRIS metadata field" + "WARNING: Field must be an EarthScope metadata field" ) else: self.warning_popup("WARNING: Field must be a MUSTANG metric") @@ -3554,7 +3534,6 @@ def what_type_of_field(field): except Exception as e: pass - # print("WARNING: %s" % e) ensure_threshold() prevDef = get_existing_defintion() @@ -3582,7 +3561,7 @@ def what_type_of_field(field): met2_type = what_type_of_field(met2.split("[")[0]) if met1_type != met2_type: self.warning_popup( - "WARNING: Cannot compare MUSTANG metric with IRIS Metadata field" + "WARNING: Cannot compare MUSTANG metric with EarthScope Metadata field" ) return newPart = "%s / %s " % (met1, met2) @@ -3719,7 +3698,7 @@ def what_type_of_field(field): if not metric == "": if is_metadata: self.warning_popup( - "WARNING: Field must be an IRIS metadata field" + "WARNING: Field must be an EarthScope metadata field" ) else: self.warning_popup("WARNING: Field must be a MUSTANG metric") @@ -3743,7 +3722,6 @@ def what_type_of_field(field): except Exception as e: pass - # print("WARNING: %s" % e) ensure_threshold() prevDef = get_existing_defintion() @@ -3771,7 +3749,7 @@ def what_type_of_field(field): met2_type = what_type_of_field(met2.split("[")[0]) if met1_type != met2_type: self.warning_popup( - "WARNING: Cannot compare MUSTANG metric with IRIS Metadata field" + "WARNING: Cannot compare MUSTANG metric with EarthScope Metadata field" ) return # newPart = "%s / %s " %(met1, met2) @@ -3805,9 +3783,6 @@ def what_type_of_field(field): ) return - # if chanToDo != "": - # metric = "%s[%s]" %(metric, chanToDo) - if chanToDo != "": if len(indices) == 0: metric = "%s[%s]" % (metric, chanToDo) @@ -3902,7 +3877,6 @@ def what_type_of_field(field): newPart = "abs(" + metric + ") :: compare" else: newPart = metric + " :: compare" - # newPart = metric + ' :: compare' # Everything else (ie, 'normal') else: @@ -3916,7 +3890,9 @@ def what_type_of_field(field): if not field_passes: if is_metadata: - self.warning_popup("WARNING: Field must be an IRIS metadata field") + self.warning_popup( + "WARNING: Field must be an EarthScope metadata field" + ) else: self.warning_popup("WARNING: Field must be a MUSTANG metric") return @@ -3960,7 +3936,6 @@ def what_type_of_field(field): newPart = "abs(" + metric + ") " else: newPart = metric + " " - # newPart = metric + " " if neq == "down": newPart = ( newPart + "!" @@ -4087,7 +4062,7 @@ def write_definition_to_file(self): .strip() .split("[")[0] ) - if ~field3.isnumeric(): + if not field3.isnumeric(): if field3 not in metricsInThresh: metricsInThresh.append(field3) @@ -4098,8 +4073,8 @@ def write_definition_to_file(self): print(metricThreshDict, file=f) self.confirmation_popup() - except: - self.warning_popup("Error while saving Thresholds") + except Exception as e: + self.warning_popup("Error while saving Thresholds: {e}") def confirmation_popup(self): popupContent = BoxLayout(orientation="vertical", spacing=10) @@ -4268,9 +4243,7 @@ def update_data(self): examine_screen.end_day.text = main_screen.endDate.text def get_examine_inputs(self): - # if self.ids.examine_start_id.text: self.startday = self.ids.examine_start_id.text - # if self.ids.examine_end_id.text: self.endday = self.ids.examine_end_id.text self.metrics = self.ids.metrics_id.text self.threshold = self.ids.threshold_id.text @@ -4305,9 +4278,6 @@ def exit_confirmation(self): ) masterDict["_popup"].open() - # def create_ticket(self): - # pass - def see_databrowser(self): webbrowser.open("http://www.iris.edu/mustang/databrowser/", new=2) @@ -4335,7 +4305,7 @@ def see_waveforms(self): os.mkdir(image_dir) # Grab all of the pngs and save in the directory - imageURL = "http://service.iris.edu/irisws/timeseries/1/query?" + imageURL = "http://service.earthscope.org/irisws/timeseries/1/query?" if len(self.startday.split("T")) == 1: starttime = self.startday + "T00:00:00" @@ -4363,7 +4333,6 @@ def see_waveforms(self): cha = cha.strip() imageURL_cha = imageURL_loc + "&cha=" + cha - # imageURL_complete = imageURL_cha + "&starttime=" + self.startday + "&endtime=" + self.endday + "&helicordermode=false&format=png" imageURL_complete = ( imageURL_cha + "&starttime=" @@ -4420,7 +4389,7 @@ def see_metrics(self): return metricURL = ( - "http://service.iris.edu/mustang/measurements/1/query?metric=" + "http://service.earthscope.org/mustang/measurements/1/query?metric=" + self.metrics ) @@ -4488,7 +4457,7 @@ def see_metric_timeseries(self): + ".png" ) metricURL = ( - "http://service.iris.edu/mustang/measurements/1/query?metric=" + "http://service.earthscope.org/mustang/measurements/1/query?metric=" + metric ) @@ -4628,7 +4597,7 @@ def see_metric_timeseries(self): def see_pdfs(self): self.get_examine_inputs() - pdfURL = "http://service.iris.edu/mustang/noise-pdf-browser/1/gallery?" + pdfURL = "http://service.earthscope.org/mustang/noise-pdf-browser/1/gallery?" if self.network == "": self.warning_popup("WARNING: Network field required") @@ -4659,7 +4628,9 @@ def see_spectrograms(self): self.warning_popup("WARNING: Network field required") return - spectURL = "http://service.iris.edu/mustang/noise-pdf-browser/1/spectrogram?" + spectURL = ( + "http://service.earthscope.org/mustang/noise-pdf-browser/1/spectrogram?" + ) if self.network: spectURL = spectURL + "&net=" + self.network @@ -4705,7 +4676,7 @@ def see_nmt(self): return nmtURL = ( - "http://service.iris.edu/mustang/noise-mode-timeseries/1/query?net=" + "http://service.earthscope.org/mustang/noise-mode-timeseries/1/query?net=" + self.network + "&sta=" + self.station @@ -4744,7 +4715,6 @@ def see_goat(self): self.warning_popup( "WARNING: Channel code required for GOAT (can be wildcarded)" ) - # print("Channel code required for GOAT (can be wildcarded)") return if not self.startday or not self.endday: self.warning_popup("WARNING: Start and End times required") @@ -4818,7 +4788,7 @@ def see_stations(self): self.warning_popup("WARNING: Network field required") return - stationURL = "http://service.iris.edu/fdsnws/station/1/query?" + stationURL = "http://service.earthscope.org/fdsnws/station/1/query?" if self.network: stationURL = stationURL + "net=" + self.network @@ -5168,8 +5138,8 @@ def add_notes(self): print("No issues loaded yet") return - self.df["NOTES"].ix[indToChange] = self.notes - ExamineIssuesScreen.currentDF["NOTES"].ix[indToChange] = self.notes + self.df.loc[indToChange, "NOTES"] = self.notes + ExamineIssuesScreen.currentDF.loc[indToChange, "NOTES"] = self.notes self.update_data() def see_notes(self): @@ -5177,7 +5147,7 @@ def see_notes(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - currentNotes = self.currentDF.ix[indToChange] + currentNotes = self.currentDF.loc[indToChange] except: print("No issues loaded yet") return @@ -5262,16 +5232,13 @@ def thresholds_popup_orig(self, *kwargs): displayList = [] for thresholdName in thresholdsDict: - # print(thresholdName) displayList.append(thresholdName) - # f.write("%s \t" % thresholdName); for instrumentGroup in masterDict["thresholdsDict"][thresholdName].keys(): defStr = " && ".join( masterDict["thresholdsDict"][thresholdName][instrumentGroup] ) - # print(" %s - %s" % (instrumentGroup,defStr)); displayList.append(" %s - %s" % (instrumentGroup, defStr)) displayList.append("") @@ -5325,16 +5292,13 @@ def thresholds_popup(self, *kwargs): displayList = [] for thresholdName in thresholdsDict: - # print(thresholdName) displayList.append(thresholdName) - # f.write("%s \t" % thresholdName); for instrumentGroup in masterDict["thresholdsDict"][thresholdName].keys(): defStr = " && ".join( masterDict["thresholdsDict"][thresholdName][instrumentGroup] ) - # print(" %s - %s" % (instrumentGroup,defStr)); displayList.append(" %s - %s" % (instrumentGroup, defStr)) displayList.append("") @@ -5433,8 +5397,8 @@ def mark_as_todo(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - self.df["STATE"].ix[indToChange] = "TODO" - self.currentDF["STATE"].ix[indToChange] = "TODO" + self.df.loc[indToChange, "STATE"] = "TODO" + self.currentDF.loc[indToChange, "STATE"] = "TODO" self.update_data() except: print("No issues loaded yet") @@ -5445,8 +5409,8 @@ def mark_as_new(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - self.df["STATE"].ix[indToChange] = "New" - self.currentDF["STATE"].ix[indToChange] = "New" + self.df.loc[indToChange, "STATE"] = "New" + self.currentDF.loc[indToChange, "STATE"] = "New" self.update_data() except: print("No issues loaded yet") @@ -5457,8 +5421,8 @@ def mark_as_closed(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - self.df["STATE"].ix[indToChange] = "Closed" - self.currentDF["STATE"].ix[indToChange] = "Closed" + self.df.loc[indToChange, "STATE"] = "Closed" + self.currentDF.loc[indToChange, "STATE"] = "Closed" self.update_data() except: print("No issues loaded yet") @@ -5469,8 +5433,8 @@ def mark_as_existing(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - self.df["STATE"].ix[indToChange] = "Existing" - self.currentDF["STATE"].ix[indToChange] = "Existing" + self.df.loc[indToChange, "STATE"] = "Existing" + self.currentDF.loc[indToChange, "STATE"] = "Existing" self.update_data() except: print("No issues loaded yet") @@ -5481,8 +5445,8 @@ def mark_as_support(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - self.df["STATE"].ix[indToChange] = "Support" - self.currentDF["STATE"].ix[indToChange] = "Support" + self.df.loc[indToChange, "STATE"] = "Support" + self.currentDF.loc[indToChange, "STATE"] = "Support" self.update_data() except: print("No issues loaded yet") @@ -5493,8 +5457,8 @@ def mark_as_no_ticket(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - self.df["STATE"].ix[indToChange] = "No Ticket" - self.currentDF["STATE"].ix[indToChange] = "No Ticket" + self.df.loc[indToChange, "STATE"] = "No Ticket" + self.currentDF.loc[indToChange, "STATE"] = "No Ticket" self.update_data() except: print("No issues loaded yet") @@ -5505,8 +5469,8 @@ def mark_as_false_positive(self): indToChange = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - self.df["STATE"].ix[indToChange] = "False Pos" - self.currentDF["STATE"].ix[indToChange] = "False Pos" + self.df.loc[indToChange, "STATE"] = "False Pos" + self.currentDF.loc[indToChange, "STATE"] = "False Pos" self.update_data() except: print("No issues loaded yet") @@ -5520,10 +5484,10 @@ def get_selected_values(self): selectedInd = list( set(self.currentDF.iloc[self.selectionIndices].index.values.tolist()) ) - NewTicketScreen.targets = self.df["SNCL"].ix[selectedInd].values.tolist() - NewTicketScreen.descriptions = ( - self.df["NOTES"].ix[selectedInd].values.tolist() - ) + NewTicketScreen.targets = self.df.loc[selectedInd, "SNCL"].values.tolist() + NewTicketScreen.descriptions = self.df.loc[ + selectedInd, "NOTES" + ].values.tolist() except: print("No issues loaded yet") NewTicketScreen.targets = [] @@ -5748,7 +5712,6 @@ def check_image(self, image, state): if image.text not in self.selectedImages: self.selectedImages.append(image.text) self.captionLabel.text = masterDict["imageList"][self.selectedImages[0]] - # self.captionInput.text = masterDict['imageList'][self.selectedImages[0]] else: self.selectedImages = [v for v in self.selectedImages if v != image.text] @@ -5761,15 +5724,12 @@ def open_image(self, *kwargs): except Exception as e: self.warning_popup("WARNING: Unable to open %s: %s" % (file, e)) - # print("WARNING: Unable to open %s: %s" %(file, e)) - def remove_images(self, *kwargs): for file in self.selectedImages: try: del masterDict["imageList"][file] except KeyError as e: self.warning_popup("WARNING: File not found in list - %s" % e) - # print("WARNING: File not found in list - %s" % e) self.selectedImages = [v for v in self.selectedImages if v != file] @@ -5830,8 +5790,6 @@ def link_popup(self, *kwargs): if len(masterDict["linkList"]) > 0: link_id = 0 for row in masterDict["linkList"]: - # b = ToggleButton(text = row, size_hint_y = None, halign = 'left', id=str(link_id), - # background_color = (.5,.5,.5,1), group='imageButtons') b = ToggleButton( text=row, size_hint_y=None, @@ -5868,7 +5826,6 @@ def link_popup(self, *kwargs): upperLayout.add_widget(actionButtons) captionBox = BoxLayout(orientation="horizontal", size_hint_y=0.25) - # self.linkInput = TextInput(text="", id='linkID') self.linkInput = TextInput(text="") self.linkInput.bind() captionBox.add_widget(self.linkInput) @@ -5896,7 +5853,7 @@ def check_link(self, link, state): if link.text not in self.selectedLinks: self.selectedLinks.append(link.text) else: - self.selectedLinks = [v for v in self.selectedLinks if v != linkn.text] + self.selectedLinks = [v for v in self.selectedLinks if v != link.text] def remove_link(self, *kwargs): for file in self.selectedLinks: @@ -6015,7 +5972,9 @@ def create_connection(self, db_file): :return: Connection object or None """ try: - conn = sqlite3.connect(db_file) + conn = sqlite3.connect( + db_file, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES + ) return conn except Error as e: print("WARNING: %s" % e) @@ -6039,14 +5998,14 @@ def create_table(self, conn): location text NOT NULL, channel text NOT NULL, description text, - start_date datetime, - end_date datetime, + start_date TIMESTAMP, + end_date TIMESTAMP, status text NOT NULL, thresholds text NOT NULL, images text, caption text, links text, - updated datetime + updated TIMESTAMP ); """ try: @@ -6417,10 +6376,6 @@ def load_ticket_information(self): self.selectedThresholds.append(threshold) def return_to_ticketList(self): - - # IF you want to return to the popup, then uncomment these (right now the popup does not update properly, so have it disabled) - # masterDict["ticket_instance"].disabled = False # reenables the button that had been clicked and disabled - # masterDict["ticketList_popup"].open() self.clear_ticket_fields() def exit_confirmation(self): @@ -6650,8 +6605,6 @@ def link_popup(self, *kwargs): if len(masterDict["linkList"]) > 0: link_id = 0 for row in masterDict["linkList"]: - # b = ToggleButton(text = row, size_hint_y = None, halign = 'left', id=str(link_id), - # background_color = (.5,.5,.5,1), group='imageButtons') b = ToggleButton( text=row, size_hint_y=None, @@ -6663,7 +6616,7 @@ def link_popup(self, *kwargs): image_layout.add_widget(b) link_id += 1 - # + # The notes (in a box layout) go into a ScrollView scrl = ScrollView(size_hint_y=4) scrl.add_widget(image_layout) @@ -6688,7 +6641,6 @@ def link_popup(self, *kwargs): upperLayout.add_widget(actionButtons) captionBox = BoxLayout(orientation="horizontal", size_hint_y=0.25) - # self.linkInput = TextInput(text="", id='linkID') self.linkInput = TextInput(text="") self.linkInput.bind() captionBox.add_widget(self.linkInput) @@ -6716,7 +6668,7 @@ def check_link(self, link, state): if link.text not in self.selectedLinks: self.selectedLinks.append(link.text) else: - self.selectedLinks = [v for v in self.selectedLinks if v != linkn.text] + self.selectedLinks = [v for v in self.selectedLinks if v != link.text] def remove_link(self, *kwargs): for file in self.selectedLinks: @@ -6987,41 +6939,44 @@ def go_to_selectedTickets(self): ) self.theseTickets = masterDict["tickets"] - self.theseTickets["target"] = ( - self.theseTickets["network"] - + "." - + self.theseTickets["station"] - + "." - + self.theseTickets["location"] - + "." - + self.theseTickets["channel"] - ) - self.theseTickets = self.theseTickets.sort_values( - by=[masterDict["ticket_order"]] - ).reset_index(drop=True) - - ticketList = list() - - for id, row in self.theseTickets.iterrows(): - row_sub = [ - str(row["id"]), - row["target"], - row["start_date"], - row["end_date"], - row["subject"], - row["status"], - row["tracker"], - row["updated"], - ] - row_sub = [ - row_sub[y].ljust(spacing_dict[y])[0 : spacing_dict[y]] - for y in range(len(row_sub)) - ] - label = " ".join(row_sub) - ticketList.append({"text": label}) + if type(self.theseTickets) == str: + tickets_screen.ticket_list_rv.data = "" + else: + self.theseTickets["target"] = ( + self.theseTickets["network"] + + "." + + self.theseTickets["station"] + + "." + + self.theseTickets["location"] + + "." + + self.theseTickets["channel"] + ) + + self.theseTickets = self.theseTickets.sort_values( + by=[masterDict["ticket_order"]] + ).reset_index(drop=True) + + ticketList = list() + for id, row in self.theseTickets.iterrows(): + row_sub = [ + str(row["id"]), + row["target"], + row["start_date"], + row["end_date"], + row["subject"], + row["status"], + row["tracker"], + row["updated"], + ] + row_sub = [ + row_sub[y].ljust(spacing_dict[y])[0 : spacing_dict[y]] + for y in range(len(row_sub)) + ] + label = " ".join(row_sub) + ticketList.append({"text": label}) - tickets_screen.ticket_list_rv.data = ticketList + tickets_screen.ticket_list_rv.data = ticketList except Exception as e: print("Warning: could not retrieve tickets - %s" % e) tickets_screen.ticket_list_rv.data = "" @@ -7772,7 +7727,7 @@ def apply_selection(self, rv, index, is_selected): masterDict["linkList"] = list() masterDict["thresholds_file"] = "./thresholds.txt" masterDict["metrics_file"] = "./MUSTANG_metrics.txt" -masterDict["metadata_file"] = "./IRIS_metadata.txt" +masterDict["metadata_file"] = "./EarthScope_metadata.txt" databaseDir = "./db/" databaseName = "quargTickets.db" @@ -7819,7 +7774,7 @@ def build(self): Window.clearcolor = (1, 1, 1, 1) Window.size = (1377, 700) - self.title = "IRIS Quality Assurance Report Generator" + self.title = "EarthScope Quality Assurance Report Generator" screen_manager.add_widget(MainScreen(name="mainScreen")) screen_manager.add_widget(PreferencesScreen(name="preferencesScreen")) screen_manager.add_widget(ThresholdGroupsScreen(name="thresholdGroupsScreen")) diff --git a/README.md b/README.md index 33a9402..e472798 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ For detailed documentation, check out [EarthScope.github.io/quarg/](https://EarthScope.github.io/quarg/DOCUMENTATION.html)
-**QuARG**, the Quality Assurance Report Generator, is a Python client that allows network operators to generate quality assurance (QA) reports from start to finish. These reports utilize EarthScope’s database of [MUSTANG](http://service.iris.edu/mustang/) data quality metrics to find and highlight potential issues in the data, reducing the amount of time that analysts need to spend scanning the data for problems. +**QuARG**, the Quality Assurance Report Generator, is a Python client that allows network operators to generate quality assurance (QA) reports from start to finish. These reports utilize EarthScope’s database of [MUSTANG](http://service.earthscope.org/mustang/) data quality metrics to find and highlight potential issues in the data, reducing the amount of time that analysts need to spend scanning the data for problems. Over the years that IRIS produced Quality Assurance Reports, we refined the process of generating a report into four primary steps: @@ -97,16 +97,18 @@ Instructions for Linux or macOS (Intel chip) ``` cd quarg conda update conda -conda create --name quarg -c conda-forge --file quarg-conda-install.txt +conda create --name quarg -c conda-forge python=3.12 conda activate quarg +conda install -c conda-forge --file quarg-conda-install.txt ``` Instructions for macOS (Apple M1 or M2 chip): ``` cd quarg conda update conda -CONDA_SUBDIR=osx-64 conda create --name quarg -c conda-forge --file quarg-conda-install.txt +CONDA_SUBDIR=osx-64 conda create --name quarg -c conda-forge python=3.12 conda activate quarg +CONDA_SUBDIR=osx-64 conda install -c conda-forge --file quarg-conda-install.txt ``` See what is installed in our (quarg) environment with: diff --git a/docs/DOCUMENTATION.html b/docs/DOCUMENTATION.html index d164386..bf00b6c 100644 --- a/docs/DOCUMENTATION.html +++ b/docs/DOCUMENTATION.html @@ -363,20 +363,20 @@

QuARG - Quality Assurance Report Generator

-

Laura Keyson, IRIS DMC

+

Laura Keyson, EarthScope

-
-

Questions or comments can be directed to the IRIS DMC Quality Assurance Group at dmc_qa@iris.washington.edu.

+
+

Questions or comments can be directed to the EarthScope Quality Assurance Group at qa-qc@earthscope.org.


-

QuARG is a Python client that allows network operators to generate quality assurance (QA) reports from start to finish. These reports utilize IRIS’s database of MUSTANG data quality metrics to find and highlight potential issues in the data, reducing the amount of time that analysts need to spend scanning the data for problems.

+

QuARG is a Python client that allows network operators to generate quality assurance (QA) reports from start to finish. These reports utilize EarthScope's database of MUSTANG data quality metrics to find and highlight potential issues in the data, reducing the amount of time that analysts need to spend scanning the data for problems.

Users have the ability to customize QuARG to adapt to their particular network. Some features that can be personalized:

@@ -793,7 +793,7 @@

Directories and Filenames

Targets

-

This section defines which targets (network, station, channel, location) will be used when retrieving quality assurance (likely from MUSTANG, but could also be ISPAQ) metrics and metadata, and therefore which channels will be included in the issue list. It also defines the category of instrumentation used in the report.

+

This section defines which targets (network, station, channel, location) will be used when retrieving quality assurance (likely from MUSTANG, but could also be ISPAQ) metrics and metadata, and therefore which channels will be included in the issue list. It also defines the category of instrumentation used in the report.

@@ -849,7 +849,7 @@

Report Header Information

@@ -886,7 +886,7 @@

Report Frequency

  • Directory: The directory for Weekly reports is set up like YYYYMMDD. Again, if run on August 13, 2020, the new subdirecotry would be 20200803/.
  • -
  • Daily: Daily will run for just a single day. This option resolves to ‘Two days ago’ to give time for MUSTANG metrics to have calculated after IRIS recieves the data. +
  • Daily: Daily will run for just a single day. This option resolves to ‘Two days ago’ to give time for MUSTANG metrics to have calculated after EarthScope recieves the data.

    In More Detail:

    Metrics
    -At the top is a selectable list of all of the MUSTANG metrics. This list comes from the IRIS MUSTANG webservices and is refreshed whenever QuARG is connected to the internet so it should stay up to date as we add new metrics. When a metric is selected, it will fill in the text box labeled Field below. While you can simply type the metric you are interested in Field box directly, the list makes it easy to know what metrics are availble to use.

    +At the top is a selectable list of all of the MUSTANG metrics. This list comes from the EarthScope MUSTANG webservices and is refreshed whenever QuARG is connected to the internet so it should stay up to date as we add new metrics. When a metric is selected, it will fill in the text box labeled Field below. While you can simply type the metric you are interested in Field box directly, the list makes it easy to know what metrics are availble to use.

    Channel Options
    The channel options allow you to specify whether a threshold, or part of a threshold, should apply to only the horizontal or vertical channels. In most cases, these will not be used since you will want to find issues associated with any and all of the channels. But there are some cases where you would want to limit things. For example, when looking for issues in the metadata you may want to find all cases where the horizontal channels have a Dip != 0. If you applied this threshold to all channels, then every vertical channel should get triggered since they ought to have a non-0 Dip. Another example would be rmsRatio, which compares the sample_rms of the vertical channel to and average of the horizontals.

    There are 4 buttons for Channel Options:

    @@ -1041,7 +1041,7 @@

    Threshold Definitions Form

  • Save
  • In More Detail:

    -

    Metadata List This is a scrollable, selectable list of all metadata fields that can be used in QuARG. These are based on the IRIS station service headers at the channel level in the text format. When a field is selected, it will turn blue and will automatically fill in the Field in column 3. The metadata list is disabled by default, and only becomes available when the Metadata toggle button is selected (see below).

    +

    Metadata List This is a scrollable, selectable list of all metadata fields that can be used in QuARG. These are based on the EarthScope station service headers at the channel level in the text format. When a field is selected, it will turn blue and will automatically fill in the Field in column 3. The metadata list is disabled by default, and only becomes available when the Metadata toggle button is selected (see below).

    Threshold Options
    There are five options available:

    @@ -1404,11 +1404,11 @@

    Examine Issues Screen

    - - + + - + @@ -1416,15 +1416,15 @@

    Examine Issues Screen

    - + - + - + @@ -1436,8 +1436,8 @@

    Examine Issues Screen

    - - + +
    Databrowser is a tool that allows users to plot MUSTANG metrics. These include Metric Timeseries (plotting metric values over time), Gap Duration plots, Network and Station boxplots, as well as some other options. It can be useful in looking at a network’s overall health, or to quickly view patterns in metric values over long periods of time. The Databrowser button does not require any of the Input fields to be filled.
    WaveformsThis button will retreive and display waveform data from the IRIS timeseriesplot service. This requires all target fields to be specified, though it can accomodate a comma-separated list. Users must be careful with the requested Start and End times, as the service limits the length of time that can be plotted. Note: this returns a static image and is not recommended to be the primary way of viewing waveforms - we expect the analyst to use another more dynamic tool to view waveforms, this is simply for use as a quick view of the data.WaveformsThis button will retreive and display waveform data from the EarthScope timeseriesplot service. This requires all target fields to be specified, though it can accomodate a comma-separated list. Users must be careful with the requested Start and End times, as the service limits the length of time that can be plotted. Note: this returns a static image and is not recommended to be the primary way of viewing waveforms - we expect the analyst to use another more dynamic tool to view waveforms, this is simply for use as a quick view of the data.
    MetricsMetrics The Metrics button opens a web browser page that displays metric values from the MUSTANG Measurements web service. It uses input from all of the input fields except for Threshold. Start and End are used to limit the time range for the metrics retrieved; Metrics can be a comma-separated list of any desired metrics; Network, Station, Location, and Channel can all be wildcarded, lists, or left blank. Be careful of leaving fields blank, particularly Network, as that can create a very large query.
    The Metric Plot button uses the same inputs as the Metrics button, but rather than opening a web page with tabular data, it generates a simple timeseries plot of the requested values.
    PDFsPDFs Opens a webpage with monthly PDFs for the requested targets, beginning with the month of Start.
    SpectrogramsSpectrograms Opens a webpage with the spectorams for the requested targets, for the time span of Start to End. If no dates are provided, will do for the entire span of the targets (from the beginning of the earliest target until the end of the latest target).
    Noise ModesNoise Modes Opens a webpage to the Noise Mode Timeseries plot. All Network, Station, Location, and Channel fields must be filled, with only one target allowed (ie, no wildcarding or lists). Will use the Start and End dates.
    Opens a webpage of the USGS event service based on the Start and End dates specified. It will list all earthquakes M5.5 and larger, as MUSTANG event-based metrics do not calculate on smaller events.
    StationOpens a channel-level web page of the IRIS Station service, using provided target information. Any blank field will be wildcarded, and lists and wildcards are allowed; start and end times are ignored for this diagnosis tool.StationOpens a channel-level web page of the EarthScope Station service, using provided target information. Any blank field will be wildcarded, and lists and wildcards are allowed; start and end times are ignored for this diagnosis tool.
    @@ -1968,7 +1968,7 @@
    Ticket File
    id,tracker,target,start_date,category,subject,thresholds,images,caption,links,status,end_date,description

    The lines that come after that follow that pattern, with quotation marks (‘"’) around any fields that may have a comma in them. For example:

    id,tracker,target,start_date,category,subject,thresholds,images,caption,links,status,end_date,description
    -4,Support,UU BEI 01 EHZ,2019-12-01,Other,Example Ticket,"gapsRatioGt12, glitch",/Users/laura/QA_reports/testImage.jpg,"This is a figure caption, with a comma so it has quotation marks",http://service.iris.edu/mustang/measurements/1/query?metric=percent_availability&net=YO&cha=?XH&format=text&nodata=404&orderby=start_asc,In Progress,2019-12-03,"This one has a start and end date, and a link!"
    +4,Support,UU BEI 01 EHZ,2019-12-01,Other,Example Ticket,"gapsRatioGt12, glitch",/Users/laura/QA_reports/testImage.jpg,"This is a figure caption, with a comma so it has quotation marks",http://service.earthscope.org/mustang/measurements/1/query?metric=percent_availability&net=YO&cha=?XH&format=text&nodata=404&orderby=start_asc,In Progress,2019-12-03,"This one has a start and end date, and a link!"

    The most important thing is that the ticketing system used either has these fields, or has an equivalent, and that the tickets can be exported into a csv file of this format. Any missing fields can be left blank if necessary. For example, using a Redmine ticketing system, we are able to use the ‘Export to CSV’ function and choose what columns are exported. It may take an intermediate step to convert the CSV into the correct format, in which case it is probably worth setting up a workflow to do the conversion for you. Depending on the complexity, it might be worth delving into the code to change the required format - just be wary of doing that: it may create unintended consequences.

    @@ -2003,16 +2003,16 @@
    Final Report
  • Diagnostic Information: At the bottom of the report are a number of links that may be useful for QA or for better understanding the metrics. There are also definitions of the thresholds used so that there is a record
  • diff --git a/findIssues.py b/findIssues.py index a993666..eed3c66 100755 --- a/findIssues.py +++ b/findIssues.py @@ -27,15 +27,20 @@ import pandas as pd - # TODO: If ts_ metrics are used, must propagate through into the thresholds file # ============================# # LOAD INPUT ARGUMENTS -network = ''; station = ''; location = ''; channels = ''; start = '';end = ''; outfile = '' +network = "" +station = "" +location = "" +channels = "" +start = "" +end = "" +outfile = "" args = reportUtils.getArgs() -start= args.start +start = args.start end = args.end # month = args.month @@ -43,26 +48,57 @@ if not preferenceFile: # If no preference file included, run everything - thresholdGroups = ['Completeness','Amplitudes','Timing','State of Health','Metadata'] - groupsDict = {'Completeness':['avgGaps','gapsRatioGt12','noData'], - 'Amplitudes' : ['flat','lowRms','hiAmp','lowAmp','badResp', - 'avgSpikes','pegged','dead','noise1','noise2', - 'medianUnique','rmsRatio','xTalk', - 'gainRatio','nonCoher','polarity', - 'dcOffsets','nSpikes','rmsRatio'], - 'Timing' : ['poorTQual','suspectTime','noTime'], - 'State of Health' : ['ampSat','filtChg','clip', - 'spikes','glitch','padding','tSync'], - 'Metadata' : ['zDip','horDip','zeroZ','lowScale','nonMSUnits']} - + thresholdGroups = [ + "Completeness", + "Amplitudes", + "Timing", + "State of Health", + "Metadata", + ] + groupsDict = { + "Completeness": ["avgGaps", "gapsRatioGt12", "noData"], + "Amplitudes": [ + "flat", + "lowRms", + "hiAmp", + "lowAmp", + "badResp", + "avgSpikes", + "pegged", + "dead", + "noise1", + "noise2", + "medianUnique", + "rmsRatio", + "xTalk", + "gainRatio", + "nonCoher", + "polarity", + "dcOffsets", + "nSpikes", + "rmsRatio", + ], + "Timing": ["poorTQual", "suspectTime", "noTime"], + "State of Health": [ + "ampSat", + "filtChg", + "clip", + "spikes", + "glitch", + "padding", + "tSync", + ], + "Metadata": ["zDip", "horDip", "zeroZ", "lowScale", "nonMSUnits"], + } + else: try: with open(preferenceFile) as f: exec(compile(f.read(), preferenceFile, "exec")) except OSError: - print('Cannot open', preferenceFile) + print("Cannot open", preferenceFile) quit() - + # Commandline arguments override preference file values, if provided if args.network: network = args.network @@ -71,7 +107,7 @@ if args.locations: location = args.locations if args.channels: - channels= args.channels + channels = args.channels if args.outfile: outfile = args.outfile if args.metricsource: @@ -102,53 +138,64 @@ if os.path.isfile(outfile): resp1 = input("This file already exists - overwrite?[y/n]: ") - if (resp1.upper() == 'Y') or (resp1.upper() == 'YES'): - print('Removing existing file') + if (resp1.upper() == "Y") or (resp1.upper() == "YES"): + print("Removing existing file") os.remove(outfile) - - elif (resp1.upper() == 'N') or (resp1.upper()== 'NO'): - resp2= input('Should I append to the existing file?[y/n]: ') - if (not resp2.upper() == 'Y') and (not resp2.upper() == 'YES'): + + elif (resp1.upper() == "N") or (resp1.upper() == "NO"): + resp2 = input("Should I append to the existing file?[y/n]: ") + if (not resp2.upper() == "Y") and (not resp2.upper() == "YES"): quit("Exiting") else: - print('Input not recognized, cancelling') + print("Input not recognized, cancelling") quit() - + # Load up list of metrics and metadata, for reference later on if os.path.isfile(metrics_file): - with open(metrics_file,'r') as f: + with open(metrics_file, "r") as f: metricsList = f.read().splitlines() else: # This should not happen unless running outside of QuARG since QuARG.py has a check before running findIssues.py - print("WARNING: Could not find list of MUSTANG metrics in file %s - does it exist?" % metrics_file) - print(" You can create this list by entering the Thresholds Editor - it will automatically generate there") + print( + "WARNING: Could not find list of MUSTANG metrics in file %s - does it exist?" + % metrics_file + ) + print( + " You can create this list by entering the Thresholds Editor - it will automatically generate there" + ) quit() - + if os.path.isfile(metadata_file): - with open(metadata_file,'r') as f: + with open(metadata_file, "r") as f: metadataList = f.read().splitlines() else: # This should not happen unless running outside of QuARG since QuARG.py has a check before running findIssues.py - print("WARNING: Could not find list of IRIS metadata fields in file %s - does it exist?" % metadata_file) - print(" You can create this list by entering the Thresholds Editor - it will automatically generate there") + print( + "WARNING: Could not find list of EarthScope metadata fields in file %s - does it exist?" + % metadata_file + ) + print( + " You can create this list by entering the Thresholds Editor - it will automatically generate there" + ) quit() - + # ============================# # GO THROUGH THRESHOLDS # Add the header to the file -with open(outfile, 'w') as f: +with open(outfile, "w") as f: f.write("# Threshold|Target|Start|End|Ndays|Status|Value|Notes\n") -f.close() +f.close() # Get metadata dataframe at the beginning to use wherever necessary, since it is always the same -metadataDF = reportUtils.getMetadata(network, station, location, channels, start, end, metadataSource) +metadataDF = reportUtils.getMetadata( + network, station, location, channels, start, end, metadataSource +) failedMetricsAll = list() failedThresholdsAll = list() -# thresholdFile = './groupsTEST.txt' for thresholdGroup in thresholdGroups: print() print("Running %s Thresholds" % thresholdGroup) @@ -157,46 +204,65 @@ except: print(" Could not find any thresholds for %s" % thresholdGroup) continue - + thresholdsList.sort() - - allMetrics, failedThresholds = thresholds.get_threshold_metrics(thresholdsList, thresholdFile) + + allMetrics, failedThresholds = thresholds.get_threshold_metrics( + thresholdsList, thresholdFile + ) metadatas = [e for e in metadataList if e in allMetrics] metrics = [e for e in metricsList if e in allMetrics] -# hasMetadata = False; hasMetrics = False -# if len(metadatas) > 0: -# print("This thresholds Group contains some metadata fields") -# hasMetadata = True if len(metrics) > 0: hasMetrics = True - if hasMetrics: - metricDF, failedMetrics = reportUtils.mergeMetricDF(network, station, location, channels, start, end, metrics, metricSource) + metricDF, failedMetrics = reportUtils.mergeMetricDF( + network, station, location, channels, start, end, metrics, metricSource + ) else: - metricDF = pd.DataFrame(columns=['value','target','start','end','network','station','location','channel']) + metricDF = pd.DataFrame( + columns=[ + "value", + "target", + "start", + "end", + "network", + "station", + "location", + "channel", + ] + ) failedMetrics = list() - + for failedThreshold in failedThresholds: if not failedThreshold in failedThresholdsAll: failedThresholdsAll.append(failedThreshold) - + for failedMetric in failedMetrics: if not failedMetric in failedMetricsAll: failedMetricsAll.append(failedMetric) - -# if hasMetrics == True and not metricDF.empty: + for threshold in thresholdsList: if not threshold in failedThresholds: - thresholds.do_threshold(threshold, thresholdFile, metricDF, metadataDF, outfile, instruments, start, end, hasMetrics, chanTypes) + thresholds.do_threshold( + threshold, + thresholdFile, + metricDF, + metadataDF, + outfile, + instruments, + start, + end, + hasMetrics, + chanTypes, + ) -with open('failedMetrics.txt','w') as f: +with open("failedMetrics.txt", "w") as f: for failedThreshold in failedThresholdsAll: - f.write('threshold: %s\n' % failedThreshold) + f.write("threshold: %s\n" % failedThreshold) for failedMetric in failedMetricsAll: - f.write('metric: %s\n' % failedMetric) + f.write("metric: %s\n" % failedMetric) print("INFO: Completed generating issue file") - diff --git a/generateHTML.py b/generateHTML.py index 22ed6fa..d067668 100644 --- a/generateHTML.py +++ b/generateHTML.py @@ -31,26 +31,23 @@ import reportUtils args = reportUtils.getArgs() -start= args.start +start = args.start end = args.end -# month = args.month zipDir = args.htmldir report_fullPath = args.html_file_path iShort = 0 iBroad = 0 iStrong = 0 -# global iFlag -# iFlag = 0 metricsFile = args.metrics_file thresholdFile = args.thresholds_file preferenceFile = args.preference_file -if not preferenceFile: +if not preferenceFile: quit("WARNING: Preference File required") - + else: try: with open(preferenceFile) as f: @@ -58,36 +55,42 @@ except: print("Cannot open ", preferenceFile) quit() - if ("short period" in map(str.lower, instruments)) or ("shortperiod" in map(str.lower, instruments)): + if ("short period" in map(str.lower, instruments)) or ( + "shortperiod" in map(str.lower, instruments) + ): iShort = 1 - if ("broad band" in map(str.lower, instruments)) or ("broadband" in map(str.lower, instruments)): + if ("broad band" in map(str.lower, instruments)) or ( + "broadband" in map(str.lower, instruments) + ): iBroad = 1 - if ("strong motion" in map(str.lower, instruments)) or ("strongmotion" in map(str.lower, instruments)): + if ("strong motion" in map(str.lower, instruments)) or ( + "strongmotion" in map(str.lower, instruments) + ): iStrong = 1 - -if start == '' or end == '': + +if start == "" or end == "": pref_start, pref_end, subdir = reportUtils.calculate_dates(reportFrequency) - if start == '': + if start == "": start = pref_start - if end == '': + if end == "": end = pref_end try: - startDate = datetime.datetime.strptime(start, '%Y-%m-%d').strftime('%B %d, %Y') - endDate = datetime.datetime.strptime(end, '%Y-%m-%d').strftime('%B %d, %Y') + startDate = datetime.datetime.strptime(start, "%Y-%m-%d").strftime("%B %d, %Y") + endDate = datetime.datetime.strptime(end, "%Y-%m-%d").strftime("%B %d, %Y") dates = [startDate, endDate] except: print("ERROR: Are the dates properly formatted? YYYY-mm-dd") - + quit("") - + try: with open(thresholdFile) as f: exec(compile(f.read(), thresholdFile, "exec")) - + except Exception as e: print("WARNING: Cannot open thresholds File - %s" % e) - + if args.network: network = args.network if args.ticketsfile: @@ -98,263 +101,338 @@ print("Creating new directory: %s" % zipDir) os.mkdir(zipDir) - infile = csvfile print(infile) -#infile = directory + 'issues.csv' if not os.path.isfile(infile): quit("Input csv file does not exist") -summaryFile = report_fullPath + '.summary' -detailFile = report_fullPath + '.detail' - - - - -# date = datetime.datetime.strptime(month, '%Y%m').strftime('%B %Y') -#author = "Laura Keyson" - - -#os.chdir(directory) +summaryFile = report_fullPath + ".summary" +detailFile = report_fullPath + ".detail" ######################### # Define useful utilities ######################### -def printPreamble(net,dates,authors,email,outfile): +def printPreamble(net, dates, authors, email, outfile): # This prints the header of the html - with open(outfile,'a+') as f: - #print("Writing Header") + with open(outfile, "a+") as f: + # print("Writing Header") f.write("\n\n") f.write(" \n") - f.write("\t\n"); - f.write("\tData Quality Report for Network " + str(net) + " " + str(' - '. join(dates)) + "\n"); - f.write(" \n\n"); - - f.write(" \n\n"); - f.write("\t

    Data Quality Report for " + str(', '.join(net.split(','))) + "

    "); - f.write("\t

    " + str(' - '. join(dates)) + "

    \n\n"); + f.write( + '\t\n' + ) + f.write( + "\tData Quality Report for Network " + + str(net) + + " " + + str(" - ".join(dates)) + + "\n" + ) + f.write(" \n\n") + + f.write(" \n\n") + f.write( + "\t

    Data Quality Report for " + str(", ".join(net.split(","))) + "

    " + ) + f.write("\t

    " + str(" - ".join(dates)) + "

    \n\n") + + f.write("\t " + str(authors) + "
    \n") + f.write("\t " + str(email) + "
    \n") + + today = datetime.datetime.today().strftime("%B %d, %Y") + f.write("\t Issued " + str(today) + "\n\n") - f.write("\t " + str(authors) + "
    \n"); - f.write("\t " + str(email) + "
    \n"); - - today = datetime.datetime.today().strftime('%B %d, %Y'); - f.write("\t Issued " + str(today) + "\n\n"); + f.close() - f.close(); def printFirstProject(project, summaryFile, detailFile): # Start the summary and detail files, which will be combined into one later - with open(summaryFile,'a+') as f: - f.write("\t

    Summary

    \n\n"); - - f.write("\t

    Clicking on each issue Summary link takes you to a more detailed description of \n"); - f.write("\t that issue, including the metrics used to identify the problem.\n"); - f.write("\t Sorted by category, then station.\n"); - f.write("\t

    \n"); - f.write("\t

    \n"); - f.write("\t "+ str(project) +"\n\n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.close(); - - with open(detailFile,'a+') as f: - f.write("\t

    Details

    \n\n"); - f.write("\t

    Detailed description of the issues. Sorted by station, with resolved issues at bottom

    \n"); + with open(summaryFile, "a+") as f: + f.write("\t

    Summary

    \n\n") + + f.write( + "\t

    Clicking on each issue Summary link takes you to a more detailed description of \n" + ) + f.write( + "\t that issue, including the metrics used to identify the problem.\n" + ) + f.write("\t Sorted by category, then station.\n") + f.write("\t

    \n") + f.write("\t

    \n") + f.write("\t " + str(project) + "\n\n") + f.write("\t

    CategoryChannel(s)StatusStart DateSummary
    \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.close() + + with open(detailFile, "a+") as f: + f.write("\t

    Details

    \n\n") + f.write( + "\t

    Detailed description of the issues. Sorted by station, with resolved issues at bottom

    \n" + ) f.close() + def PrintNextProject(): # Necessary only if there is more than one network in the report - with open(summaryFile,'a+') as f: - f.write("\t
    CategoryChannel(s)StatusStart DateSummary
    \n"); - f.write("\t

    \n\n"); - f.write("\t

    \n"); - f.write("\t "+ str(project) +"\n\n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - - f.close(); - - with open(detailFile, 'a+'): - f.write("\t "+ str(project) +"\n\n"); - - f.close(); - - -def printTicketSummary(inum,category,sncl,status,start,summary,summaryFile): + with open(summaryFile, "a+") as f: + f.write("\t
    CategoryChannel(s)StatusStart DateSummary
    \n") + f.write("\t

    \n\n") + f.write("\t

    \n") + f.write("\t " + str(project) + "\n\n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + f.write("\t \n") + + f.close() + + with open(detailFile, "a+"): + f.write("\t " + str(project) + "\n\n") + + f.close() + + +def printTicketSummary(inum, category, sncl, status, start, summary, summaryFile): # Create a summary for the top of the final report, initially created separately - if status == 'New': - status='Open' - with open(summaryFile, 'a+') as f: - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); + if status == "New": + status = "Open" + with open(summaryFile, "a+") as f: + f.write("\t \n") + f.write("\t \n") + f.write( + "\t \n" + ) + f.write("\t \n") + f.write("\t \n") + f.write( + '\t \n" + ) + f.write("\t \n") f.close() - + + def closeSummary(): # Wrap up the summary file - with open(summaryFile,'a+') as f: - f.write("\t
    CategoryChannel(s)StatusStart DateSummary
    " + str(category) + "" + str(sncl).replace(" ",".").replace('--',"") + "" + str(status) + "" + str(start) + "" + str(summary) + "
    " + str(category) + "" + + str(sncl).replace(" ", ".").replace("--", "") + + "" + str(status) + "" + str(start) + "' + + str(summary) + + "
    \n\n"); + with open(summaryFile, "a+") as f: + f.write("\t \n\n") f.close() - -def printTicketDetails(inum, snclq, start, subject, thresholds, description, imageurl, imagecaption, status, end, link, detailFile): + + +def printTicketDetails( + inum, + snclq, + start, + subject, + thresholds, + description, + imageurl, + imagecaption, + status, + end, + link, + detailFile, +): # Create the detailed report, the meat of the final report. Initially created separately -# global iFlag - with open(detailFile,'a+') as f: + # global iFlag + with open(detailFile, "a+") as f: if start == "": - start="(Start not identified)" - if status == 'New': - status='Open' - f.write("\t

    "+ str(snclq).replace(" ",".").replace('--',"") +" "+ str(subject) + " -- " + str(start) +"
    \n"); + start = "(Start not identified)" + if status == "New": + status = "Open" + f.write( + '\t

    ' + + str(snclq).replace(" ", ".").replace("--", "") + + " " + + str(subject) + + " -- " + + str(start) + + "
    \n" + ) else: - f.write("\t

    "+ str(snclq) +" "+ str(subject) + " -- " +str(start) +" to " + str(end) +"
    \n"); - f.write("\t STATUS: "+ str(status) +"
    \n"); - #f.write("\t Diagnostics: \n"); - #f.write("\t "+ str(diagnostics) +"\n"); - #f.write("\t (what is this?)
    \n"); - f.write("\t Thresholds: \n"); - f.write("\t "+ str(thresholds) +"\n"); - f.write("\t (what is this?)
    \n"); - f.write("\t "+ str(str(description).replace('\n','
    ')) +"\n"); - f.write("\t

    \n"); - - links = link.split(';;;;') - if not links == ['']: + f.write( + '\t

    ' + + str(snclq) + + " " + + str(subject) + + " -- " + + str(start) + + " to " + + str(end) + + "
    \n" + ) + f.write('\t STATUS: ' + str(status) + "
    \n") + f.write('\t Thresholds: \n') + f.write('\t ' + str(thresholds) + "\n") + f.write('\t (what is this?)
    \n') + f.write("\t " + str(str(description).replace("\n", "
    ")) + "\n") + f.write("\t

    \n") + + links = link.split(";;;;") + if not links == [""]: f.write("\t Links:
    ") for thisLink in links: - f.write("\t " + thisLink +"" ) - f.write("
    "); - + f.write( + '\t ' + + thisLink + + "" + ) + f.write("
    ") + if not imageurl == "": - images = imageurl.split(';;;;') - captions = imagecaption.split(';;;;') + images = imageurl.split(";;;;") + captions = imagecaption.split(";;;;") nImages = len(images) - + for image_number in range(nImages): thisImage = images[image_number] thisCaption = captions[image_number] - printTicketDetails.iFlag = 1; -# imgfile = str(inum) + ".png"; - imgfile = "%s_%s.png" % (inum,image_number) - + printTicketDetails.iFlag = 1 + imgfile = "%s_%s.png" % (inum, image_number) + try: - + try: - shutil.copyfile(thisImage, zipDir + '/' + imgfile) + shutil.copyfile(thisImage, zipDir + "/" + imgfile) except: - urllib.request.urlretrieve(thisImage, zipDir + '/' + imgfile) - - gotPhoto=1 + urllib.request.urlretrieve(thisImage, zipDir + "/" + imgfile) + + gotPhoto = 1 except: print("WARNING: Unable to retrieve image: %s" % thisImage) - gotPhoto=0 - - f.write("\t

    \n"); - - + gotPhoto = 0 + + f.write("\t

    \n") + if gotPhoto == 1: if not thisCaption == "": - f.write("\t "+ str(thisCaption) +":
    \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t \n"); - f.write("\t

    \n"); - - f.write("\t (Top)\n"); - f.write("\t

    \n\n"); - + f.write("\t " + str(thisCaption) + ":
    \n") + f.write('\t \n') + f.write( + '\t \n' + ) + f.write("\t \n") + f.write("\t

    \n") + + f.write('\t (Top)\n') + f.write("\t

    \n\n") + f.close() - - - + def closeHTML(): - with open(metricsFile,'r') as f: + with open(metricsFile, "r") as f: metricsList = f.read().splitlines() - -# nMetrics = len(metricsList) + nCol = 4 -# metsPerCol = int(nMetrics / nCol) -# print("Metrics: %s, Columns: %s, Metrics Per Column: %s" % (nMetrics, nCol, metsPerCol)) - - #Wrap up the final report - with open(report_fullPath,'a+') as f: - f.write("\t

    Diagnostics

    \n"); - f.write("\t

    The links below take you to the metrics and other data quality tools used to identify the data issues in this report.\n"); - f.write("\t

    \n\n"); - - f.write("\t

    MUSTANG measurement service metrics:\n"); - f.write("\t \n"); - f.write("\t \n"); - - # Use the metrics file (which is updated when connected to the internet) to write out most current list of metrics + + # Wrap up the final report + with open(report_fullPath, "a+") as f: + f.write('\t

    Diagnostics

    \n') + f.write( + "\t

    The links below take you to the metrics and other data quality tools used to identify the data issues in this report.\n" + ) + f.write("\t

    \n\n") + + f.write( + '\t

    MUSTANG measurement service metrics:\n' + ) + f.write("\t

    \n") + f.write("\t \n") + + # Use the metrics file (which is updated when connected to the internet) to write out most current list of metrics ii = 0 for metric in metricsList: - f.write("\t \n" % metric); - if (ii % nCol == 0): - f.write("\t \n"); - f.write("\t \n"); - ii+=1 - f.write("\t \n"); - f.write("\t
    %s
    \n"); - f.write("\t

    \n"); - - f.write("\t

    MUSTANG noise-psd service

    \n"); - f.write("\t

    MUSTANG noise-pdf service

    \n"); - f.write("\t

    MUSTANG noise-mode-timeseries service

    \n"); - f.write("\t

    GOAT/data_available

    \n"); - for net in network.split(','): + f.write("\t %s\n" % metric) + if ii % nCol == 0: + f.write("\t \n") + f.write("\t \n") + ii += 1 + f.write("\t \n") + f.write("\t \n") + f.write("\t

    \n") + + f.write( + '\t

    MUSTANG noise-psd service

    \n' + ) + f.write( + '\t

    MUSTANG noise-pdf service

    \n' + ) + f.write( + '\t

    MUSTANG noise-mode-timeseries service

    \n' + ) + f.write( + '\t

    GOAT/data_available

    \n' + ) + for net in network.split(","): net = net.strip() - f.write("\t

    Metadata Aggregator for %s

    \n" % (net, net)); - f.write("\t

    BUD stats

    \n"); - f.write("\t

    SeismiQuery

    \n"); - + f.write( + '\t

    Metadata Aggregator for %s

    \n' + % (net, net) + ) + f.write( + '\t

    BUD stats

    \n' + ) + f.write( + '\t

    SeismiQuery

    \n' + ) # Loop over the thresholds dictionary to print the definitions for instrument groups that are being used. - f.write("\t

    Thresholds

    \n"); - f.write("\t

    Thresholds used to identify potential data issues for this report were:\n"); - f.write("\t

    \n\n"); - -# f.write("\t \n\n") + + f.write("\t\n\n") + f.write("\n") + f.close() @@ -362,59 +440,72 @@ def closeHTML(): # Create the Report - try: - printPreamble(network,dates,author,email,report_fullPath) - - + printPreamble(network, dates, author, email, report_fullPath) + # lastProject is used in case more than one network is included in the same report - iFirst = 1; lastProject = "" - + iFirst = 1 + lastProject = "" + # Create an empty dataframe to be filled by the csv file - not loading directly # because of the complcated description section - issueDF = pd.read_csv(infile).fillna('') - + issueDF = pd.read_csv(infile).fillna("") + # The summary should be sorted by category - summaryDF = issueDF.copy().sort_values(by=['category','target']) - - printFirstProject(project, summaryFile, detailFile); + summaryDF = issueDF.copy().sort_values(by=["category", "target"]) + + printFirstProject(project, summaryFile, detailFile) for index, row in summaryDF.iterrows(): - - - printTicketSummary(row['id'], row['category'], row['target'], \ - row['status'], row['start_date'], \ - row['subject'], summaryFile) - + + printTicketSummary( + row["id"], + row["category"], + row["target"], + row["status"], + row["start_date"], + row["subject"], + summaryFile, + ) + # The detailed portion should be sorted by sncl detailDF = issueDF.copy() - detailDF['Status'] = pd.Categorical(detailDF['status'], ["New", "In Progress", "Closed", "Resolved","Rejected"]) - detailDF = detailDF.sort_values(by=["Status","target"]) - + detailDF["Status"] = pd.Categorical( + detailDF["status"], ["New", "In Progress", "Closed", "Resolved", "Rejected"] + ) + detailDF = detailDF.sort_values(by=["Status", "target"]) + for index, row in detailDF.iterrows(): - #print(row['thresholds']) - - printTicketDetails(row['id'], row['target'], row['start_date'], \ - row['subject'], row['thresholds'], \ - row['description'], row['images'], row['caption'], \ - row['Status'], row['end_date'], row['links'],detailFile) - + + printTicketDetails( + row["id"], + row["target"], + row["start_date"], + row["subject"], + row["thresholds"], + row["description"], + row["images"], + row["caption"], + row["Status"], + row["end_date"], + row["links"], + detailFile, + ) + closeSummary() - - + # Combine the summary and detail files into one filenames = [summaryFile, detailFile] - with open(report_fullPath, 'a+') as ofile: + with open(report_fullPath, "a+") as ofile: for fname in filenames: with open(fname) as infile: ofile.write(infile.read()) - + closeHTML() - + # Remove the temporary summary and detail files os.remove(summaryFile) - os.remove(detailFile) - - + os.remove(detailFile) + # If we have images, make a new directory with all images and files, and zip # print(printTicketDetails.iFlag) try: @@ -423,18 +514,15 @@ def closeHTML(): pass else: files = os.listdir(directory) - - - shutil.make_archive(zipDir, 'zip', zipDir) - - with open('generateHTML_status.txt','w') as f: - f.write('') - print("Completed HTML report") - -except Exception as e: - with open('generateHTML_status.txt','w') as f: - f.write('%s' % e) + shutil.make_archive(zipDir, "zip", zipDir) + + with open("generateHTML_status.txt", "w") as f: + f.write("") + print("Completed HTML report") + +except Exception as e: + with open("generateHTML_status.txt", "w") as f: + f.write("%s" % e) print("Error while generating HTML report") - diff --git a/quarg-conda-install.txt b/quarg-conda-install.txt index 76a3ef9..a26f17c 100644 --- a/quarg-conda-install.txt +++ b/quarg-conda-install.txt @@ -1,6 +1,5 @@ -pandas=0.23.4 -matplotlib=3.0.2 -kivy -Cython -python=3.6.15 -requests=2.21.0 +pandas=2.3.3 +matplotlib=3.10.7 +kivy=2.3.1 +Cython=3.1.6 +requests=2.32.5 \ No newline at end of file diff --git a/quarg.kv b/quarg.kv index 8f44393..366d143 100644 --- a/quarg.kv +++ b/quarg.kv @@ -55,8 +55,7 @@ : size: self.texture_size - allow_stretch: True - keep_ratio: True + fit_mode: 'contain' size_hint_y: None size_hint_x: None width: self.parent.width @@ -2064,13 +2063,13 @@ on_select: metric_source_btn.text = args[1] Button: - text: 'IRIS' + text: 'EarthScope' size_hint_y: None height: '35dp' #background_color: .5,.5,.5,1 #background_normal: '' on_release: - metric_source_dropdown.select('IRIS') + metric_source_dropdown.select('EarthScope') root.deactivate_metric_source_text() Button: @@ -2125,13 +2124,13 @@ on_select: metadata_source_btn.text = args[1] Button: - text: 'IRIS' + text: 'EarthScope' size_hint_y: None height: '35dp' #background_color: .5,.5,.5,1 #background_normal: '' on_release: - metadata_source_dropdown.select('IRIS') + metadata_source_dropdown.select('EarthScope') root.deactivate_metadata_source_text() Button: diff --git a/reportUtils.py b/reportUtils.py index 81733ae..3012e2b 100644 --- a/reportUtils.py +++ b/reportUtils.py @@ -26,188 +26,287 @@ import datetime import argparse import sys -import os import xml.etree.ElementTree as et + # from urllib.request import Request, urlopen import requests from io import StringIO -import re + # ============================# # PREFERENCE FILE - DETERMINE START AND END DATES def calculate_dates(reportFrequency): today = datetime.date.today() - if reportFrequency.lower() == 'daily': + if reportFrequency.lower() == "daily": # For daily reports, we do two days ago to allow for metrics to have calculated - endday = datetime.date.today() - datetime.timedelta(days=1) + endday = datetime.date.today() - datetime.timedelta(days=1) startday = endday - datetime.timedelta(days=1) - subdir = '%s' % startday.strftime('%Y%m%d') - - - elif reportFrequency.lower() == 'weekly': + subdir = "%s" % startday.strftime("%Y%m%d") + + elif reportFrequency.lower() == "weekly": weekday = today.weekday() start_delta = datetime.timedelta(days=weekday, weeks=1) startday = today - start_delta endday = startday + datetime.timedelta(days=7) - subdir = '%s' % startday.strftime('%Y%m%d') - - elif reportFrequency.lower() == 'monthly': - endday= today.replace(day=1) + subdir = "%s" % startday.strftime("%Y%m%d") + + elif reportFrequency.lower() == "monthly": + endday = today.replace(day=1) endLastMonth = endday - datetime.timedelta(days=1) startday = endLastMonth.replace(day=1) - subdir = '%s' % startday.strftime('%Y%m') - - elif reportFrequency.lower() == 'quarterly': + subdir = "%s" % startday.strftime("%Y%m") + + elif reportFrequency.lower() == "quarterly": thisMonth = today.month year = datetime.date.today().year - month_delta = (thisMonth-1) % 3 + month_delta = (thisMonth - 1) % 3 endMonth = thisMonth - month_delta startMonth = endMonth - 3 if endMonth < 1: endMonth += 12 year = year - 1 endday = datetime.date(year=year, month=endMonth, day=1) - + if startMonth < 1: startMonth += 12 year = year - 1 startday = datetime.date(year=year, month=startMonth, day=1) - subdir = '%s' % startday.strftime('%Y%m') + subdir = "%s" % startday.strftime("%Y%m") else: -# print('Report frequency not recognized') - return '', '', '' - - #month = '%s' % startday.strftime('%Y%m') + return "", "", "" + startday = startday.strftime("%Y-%m-%d") endday = endday.strftime("%Y-%m-%d") return startday, endday, subdir + # ============================# # UTILITY FOR PARSING COMMAND LINE ARGUMENTS + def getArgs(): - parser = argparse.ArgumentParser(description="Parse inputs to Find QA issues", formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog,max_help_position=35)) + parser = argparse.ArgumentParser( + description="Parse inputs to Find QA issues", + formatter_class=lambda prog: argparse.RawTextHelpFormatter( + prog, max_help_position=35 + ), + ) parser._optionals.title = "single arguments" - inputs = parser.add_argument_group('arguments for running metrics') - inputs.add_argument('-P', '--preference_file', required=False, help='path to preference file, default=./preference_files/default.txt') - inputs.add_argument('-T', '--thresholds', required=False, - help='thresholds to be run, as defined in preference file or a list of threshold names, defaults to all') - inputs.add_argument('-N', '--network', required=False, - help='Required unless specified in preference file') - inputs.add_argument('-S', '--stations', required=False, - help='Required unless specified in preference file') - inputs.add_argument('-C', '--channels', required=False, - help='Required unless specified in preference file') - inputs.add_argument('-L', '--locations', required=False, - help='Required unless specified in preference file') - inputs.add_argument('--start', required=True, - help='start date in YYYY-MM-DD format, time defaults to 00:00:00, required') - inputs.add_argument('--end', required=True, - help='end date in YYYY-MM-DD format, timedefaults to 00:00:00, required') - inputs.add_argument('--outfile', required=False, - help='Location file will be written, directory included. Required if preference file not included') - inputs.add_argument('--ticketsfile', required=False, - help='File that contains ticketing information, directory included. Required if preference file not included') - inputs.add_argument('--htmldir', required=False, - help='Location to write the final HTML QA report to.') - inputs.add_argument('--html_file_path', required=False, - help='Full path and filename of final HTML QA report.') - inputs.add_argument('--metricsource', required=False, - help='Where metrics should be found - "IRIS" or the path the to ISPAQ-generated sqlite database file.') - inputs.add_argument('--metadatasource', required=False, - help='Location to find metadata - "IRIS" or the path to the XML file') - inputs.add_argument('--metrics_file', required=False, - help='Full path to file containing list of MUSTANG metrics') - inputs.add_argument('--metadata_file', required=False, - help='Full path to file containing list of IRIS station service metadata fields') - inputs.add_argument('--thresholds_file', required=False, - help='Full path to the file containing threshold definitions and groupings') + inputs = parser.add_argument_group("arguments for running metrics") + inputs.add_argument( + "-P", + "--preference_file", + required=False, + help="path to preference file, default=./preference_files/default.txt", + ) + inputs.add_argument( + "-T", + "--thresholds", + required=False, + help="thresholds to be run, as defined in preference file or a list of threshold names, defaults to all", + ) + inputs.add_argument( + "-N", + "--network", + required=False, + help="Required unless specified in preference file", + ) + inputs.add_argument( + "-S", + "--stations", + required=False, + help="Required unless specified in preference file", + ) + inputs.add_argument( + "-C", + "--channels", + required=False, + help="Required unless specified in preference file", + ) + inputs.add_argument( + "-L", + "--locations", + required=False, + help="Required unless specified in preference file", + ) + inputs.add_argument( + "--start", + required=True, + help="start date in YYYY-MM-DD format, time defaults to 00:00:00, required", + ) + inputs.add_argument( + "--end", + required=True, + help="end date in YYYY-MM-DD format, timedefaults to 00:00:00, required", + ) + inputs.add_argument( + "--outfile", + required=False, + help="Location file will be written, directory included. Required if preference file not included", + ) + inputs.add_argument( + "--ticketsfile", + required=False, + help="File that contains ticketing information, directory included. Required if preference file not included", + ) + inputs.add_argument( + "--htmldir", + required=False, + help="Location to write the final HTML QA report to.", + ) + inputs.add_argument( + "--html_file_path", + required=False, + help="Full path and filename of final HTML QA report.", + ) + inputs.add_argument( + "--metricsource", + required=False, + help='Where metrics should be found - "EarthScope" or the path the to ISPAQ-generated sqlite database file.', + ) + inputs.add_argument( + "--metadatasource", + required=False, + help='Location to find metadata - "EarthScope" or the path to the XML file', + ) + inputs.add_argument( + "--metrics_file", + required=False, + help="Full path to file containing list of MUSTANG metrics", + ) + inputs.add_argument( + "--metadata_file", + required=False, + help="Full path to file containing list of EarthScope station service metadata fields", + ) + inputs.add_argument( + "--thresholds_file", + required=False, + help="Full path to the file containing threshold definitions and groupings", + ) args = parser.parse_args(sys.argv[1:]) -# try: -# args.month = args.start.split('-')[0] + args.start.split('-')[1] -# except: -# args.month = '' - return args + return args + # ============================# # UTILITIES FOR GENERATING DATAFRAMES -def getMetrics(nets, stas, locs, chans, start, end, metric, metricSource, failedMetrics): + +def return_no_metrics(response_code: int, metric: str, failedMetrics: list): + print("Unable to get metrics for %s - %s" % (metric, response_code)) + if not metric in failedMetrics: + failedMetrics.append(metric) + DF = pd.DataFrame() + return DF, failedMetrics + + +def getMetrics( + nets, stas, locs, chans, start, end, metric, metricSource, failedMetrics +): # This will create a temporary dataframe with the columns: # $metric target start end # Where $metric is the current metric, and within it are the # values for that metric - - - if metricSource.upper() == 'IRIS': - - URL = "http://service.iris.edu/mustang/measurements/1/query?metric=" + metric + \ - "&net=" + nets +"&sta=" + stas + "&loc=" + locs + "&cha=" + chans + \ - "&format=text&timewindow="+start +"," + end +"&nodata=404" + + if metricSource.upper() == "EARTHSCOPE": + + URL = ( + "http://service.earthscope.org/mustang/measurements/1/query?metric=" + + metric + + "&net=" + + nets + + "&sta=" + + stas + + "&loc=" + + locs + + "&cha=" + + chans + + "&format=text&timewindow=" + + start + + "," + + end + + "&nodata=404" + ) print(URL) - + try: - response = requests.get(URL) - DF = pd.read_csv(StringIO(response.text), header=1) + response = requests.get(URL) + if response.status_code != 200: + DF, failedMetrics = return_no_metrics( + response_code=response.status_code, + metric=metric, + failedMetrics=failedMetrics, + ) + else: + DF = pd.read_csv(StringIO(response.text), header=1) - if not 'transfer_function' in metric: - DF.rename(columns = {'value': metric}, inplace=True) - DF[metric] = DF[metric].map(float) + if not "transfer_function" in metric: + DF.rename(columns={"value": metric}, inplace=True) + DF[metric] = DF[metric].map(float) - DF.drop('lddate', axis=1, inplace=True) + DF.drop("lddate", axis=1, inplace=True) except Exception as e: print("Unable to get metrics for %s - %s" % (metric, e)) if not metric in failedMetrics: failedMetrics.append(metric) DF = pd.DataFrame() - - + else: # then it must be a local database import sqlite3 - # ISPAQ does not calculate dc_offset: + + # ISPAQ does not calculate dc_offset: if metric == "dc_offset": print("ISPAQ does not run dc_offset, skipping.") DF = pd.DataFrame() return DF, failedMetrics - + # ISPAQ is based on targets, not individual net/sta/loc/chan so need to put them all together targetList = [] - for network in nets.split(','): - network = network.replace("?", "_").replace("*","%") - for station in stas.split(','): - station = station.replace("?", "_").replace("*","%") - for location in locs.split(','): - location = location.replace("?", "_").replace("*","%") - for channel in chans.split(','): - channel = channel.replace("?", "_").replace("*","%") - - # Include a wildcard for the quality code at this point -# thisTarget = "%s\.%s\..*%s.*\..*%s.*\..*" % (net2, sta2, loc2, cha2) - targetList.append(network + '.' + station + '.%' + location + '%.%' + channel + '%.%') -# targetList.append("%s.%s.%s.%s._" % (network, station, location, channel)) - + for network in nets.split(","): + network = network.replace("?", "_").replace("*", "%") + for station in stas.split(","): + station = station.replace("?", "_").replace("*", "%") + for location in locs.split(","): + location = location.replace("?", "_").replace("*", "%") + for channel in chans.split(","): + channel = channel.replace("?", "_").replace("*", "%") + + # Include a wildcard for the quality code at this point + targetList.append( + network + + "." + + station + + ".%" + + location + + "%.%" + + channel + + "%.%" + ) + targets = "' or target like '".join(targetList) - - SQLcommand = "SELECT * FROM " + metric + \ - " WHERE start >= '" + start + "' " \ - "and start < '" + end + "' " \ - "and (target like '" + targets + "');" - + + SQLcommand = ( + "SELECT * FROM " + metric + " WHERE start >= '" + start + "' " + "and start < '" + end + "' " + "and (target like '" + targets + "');" + ) try: conn = sqlite3.connect(metricSource) - + DF = pd.read_sql_query(SQLcommand, conn) - if (not metric == 'transfer_function') and (not metric == 'orientation_check'): - DF.rename(columns = {'value': metric}, inplace=True) + if (not metric == "transfer_function") and ( + not metric == "orientation_check" + ): + DF.rename(columns={"value": metric}, inplace=True) DF[metric] = DF[metric].map(float) - DF.drop('lddate', axis=1, inplace=True) + DF.drop("lddate", axis=1, inplace=True) except: print("Error connecting to %s %s" % (metricSource, metric)) @@ -218,431 +317,569 @@ def getMetrics(nets, stas, locs, chans, start, end, metric, metricSource, failed finally: if conn: conn.close() - return DF, failedMetrics - - - + + def mergeMetricDF(nets, stas, locs, chans, start, end, metrics, metricSource): # This will create a dataframe that joins on any matching - # Target, Start, End pairs. If one or the other dataframes has a + # Target, Start, End pairs. If one or the other dataframes has a # target, start, end that isn't in the other, then tack it on # and fill the missing slots with NaN - + DF = pd.DataFrame() emptyMets = [] failedMetrics = list() - skipTransferFunction = False; skipOrientationCheck = False # since multiple 'metrics' can have these metrics, only do it once + skipTransferFunction = False + skipOrientationCheck = ( + False # since multiple 'metrics' can have these metrics, only do it once + ) for metric in metrics: metric_part = metric.split("::")[0] - - if metric_part == 'transfer_function': + + if metric_part == "transfer_function": if skipTransferFunction: continue else: skipTransferFunction = True - - if metric_part == 'orientation_check': + + if metric_part == "orientation_check": if skipOrientationCheck: continue else: skipOrientationCheck = True - - tempDF, failedMetrics = getMetrics(nets, stas, locs, chans, start, end, metric_part, metricSource, failedMetrics) + tempDF, failedMetrics = getMetrics( + nets, + stas, + locs, + chans, + start, + end, + metric_part, + metricSource, + failedMetrics, + ) if tempDF.empty: # add it to a list for later emptyMets.append(metric_part) if len(tempDF.columns) == 0: ## This is TRULY empty - there wasn't a table for the metric in the database continue - - + if DF.empty: DF = tempDF.copy() else: try: - DF = pd.merge(DF, tempDF, how='outer', left_on=['target', 'start', 'end'], right_on=['target', 'start', 'end']) + DF = pd.merge( + DF, + tempDF, + how="outer", + left_on=["target", "start", "end"], + right_on=["target", "start", "end"], + ) except: - print("ERROR: Something went wrong with the metric. You should try again.") + print( + "ERROR: Something went wrong with the metric. You should try again." + ) quit() - -# # If any metrics didn't return any results, add them to the DF as NaNs -# for metric_part in emptyMets: -# if not DF.empty: -# DF[metric_part] = np.nan - - + # Add a channel column so that it's easier to divide the thresholds if DF.empty: return DF, failedMetrics else: - DF['network'] = pd.DataFrame([ x.split('.')[0] for x in DF['target'].tolist() ]) - DF['station'] = pd.DataFrame([ x.split('.')[1] for x in DF['target'].tolist() ]) - DF['location'] = pd.DataFrame([ x.split('.')[2] for x in DF['target'].tolist() ]) - DF['channel'] = pd.DataFrame([ x.split('.')[3] for x in DF['target'].tolist() ]) - -# print(DF) + DF["network"] = pd.DataFrame([x.split(".")[0] for x in DF["target"].tolist()]) + DF["station"] = pd.DataFrame([x.split(".")[1] for x in DF["target"].tolist()]) + DF["location"] = pd.DataFrame([x.split(".")[2] for x in DF["target"].tolist()]) + DF["channel"] = pd.DataFrame([x.split(".")[3] for x in DF["target"].tolist()]) + return DF, failedMetrics - - - -def parse_XML(xml_file, df_cols): - """Parse the input XML file and store the result in a pandas - DataFrame with the given columns. - - The first element of df_cols is supposed to be the identifier - variable, which is an attribute of each node element in the - XML data; other features will be parsed from the text content - of each sub-element. + + +def parse_XML(xml_file, df_cols): + """Parse the input XML file and store the result in a pandas + DataFrame with the given columns. + + The first element of df_cols is supposed to be the identifier + variable, which is an attribute of each node element in the + XML data; other features will be parsed from the text content + of each sub-element. """ - + xtree = et.parse(xml_file) xroot = xtree.getroot() rows = [] - -# def get_namespace(element): -# m = re.match('\{.*\}', element.tag) -# return m.group(0) if m else '' -# -# namespace = get_namespace(xtree.getroot()) -# print(namespace) + # def get_namespace(element): + # m = re.match('\{.*\}', element.tag) + # return m.group(0) if m else '' + # + # namespace = get_namespace(xtree.getroot()) + # print(namespace) - for rootNode in xroot: + for rootNode in xroot: if "}" in rootNode.tag: - field = rootNode.tag.split('}')[1] + field = rootNode.tag.split("}")[1] else: field = rootNode.tag - - if field == 'Network': - thisNetwork = rootNode.attrib['code'] -# print(thisNetwork) - + + if field == "Network": + thisNetwork = rootNode.attrib["code"] + # print(thisNetwork) + for netNode in rootNode: if "}" in netNode.tag: - field = netNode.tag.split('}')[1] + field = netNode.tag.split("}")[1] else: field = netNode.tag - - if field == 'Station': - thisStation = netNode.attrib['code'] -# print(thisStation) - + + if field == "Station": + thisStation = netNode.attrib["code"] + # print(thisStation) + for staNode in netNode: if "}" in staNode.tag: - field = staNode.tag.split('}')[1] + field = staNode.tag.split("}")[1] else: field = staNode.tag - - if field == 'Channel': - thisChannel = staNode.attrib['code'] -# print(thisChannel) - thisLocation = staNode.attrib['locationCode'] -# print(thisLocation) - thisStart = staNode.attrib['startDate'] -# print(thisStart) + + if field == "Channel": + thisChannel = staNode.attrib["code"] + # print(thisChannel) + thisLocation = staNode.attrib["locationCode"] + # print(thisLocation) + thisStart = staNode.attrib["startDate"] + # print(thisStart) try: - thisEnd = staNode.attrib['endDate'] + thisEnd = staNode.attrib["endDate"] except: thisEnd = np.nan -# thisEnd = '' -# print(thisEnd) - - + # thisEnd = '' + # print(thisEnd) + for fieldNode in staNode: if "}" in fieldNode.tag: - field = fieldNode.tag.split('}')[1] + field = fieldNode.tag.split("}")[1] else: field = fieldNode.tag - + if field in df_cols: - if field == 'Latitude': + if field == "Latitude": thisLatitude = fieldNode.text -# print(thisLatitude) - if field == 'Longitude': + if field == "Longitude": thisLongitude = fieldNode.text -# print(thisLongitude) - if field == 'Elevation': + if field == "Elevation": thisElevation = fieldNode.text -# print(thisElevation) - if field == 'Depth': + if field == "Depth": thisDepth = fieldNode.text -# print(thisDepth) - if field == 'Azimuth': + if field == "Azimuth": thisAzimuth = fieldNode.text -# print(thisAzimuth) - if field == 'Dip': + if field == "Dip": thisDip = fieldNode.text -# print(thisDip) - if field == 'SampleRate': + if field == "SampleRate": thisSampleRate = fieldNode.text -# print(thisSampleRate) - + if field == "Response": for subFieldNode in fieldNode: if "}" in subFieldNode.tag: - field = subFieldNode.tag.split('}')[1] + field = subFieldNode.tag.split("}")[1] else: field = subFieldNode.tag - - if field == 'InstrumentSensitivity': - - + if field == "InstrumentSensitivity": + for subFieldNode2 in subFieldNode: if "}" in subFieldNode2.tag: - field = subFieldNode2.tag.split('}')[1] + field = subFieldNode2.tag.split( + "}" + )[1] else: field = subFieldNode2.tag - if field == 'Value': + if field == "Value": thisScale = subFieldNode2.text -# print(thisScale) - elif field == 'Frequency': + # print(thisScale) + elif field == "Frequency": thisScaleFreq = subFieldNode2.text -# print(thisScaleFreq) - elif field == 'InputUnits': + # print(thisScaleFreq) + elif field == "InputUnits": for unitNode in subFieldNode2: if "}" in unitNode.tag: - field = unitNode.tag.split('}')[1] + field = unitNode.tag.split( + "}" + )[1] else: field = unitNode.tag - - if field == 'Name': - thisScaleUnits = unitNode.text -# print(thisScaleUnits) - rows.append([thisNetwork, thisStation, thisLocation, thisChannel, thisLatitude, thisLongitude,thisElevation, thisDepth,thisAzimuth, thisDip, thisScale, thisScaleFreq, thisScaleUnits, thisSampleRate, thisStart, thisEnd]) + + if field == "Name": + thisScaleUnits = ( + unitNode.text + ) + rows.append( + [ + thisNetwork, + thisStation, + thisLocation, + thisChannel, + thisLatitude, + thisLongitude, + thisElevation, + thisDepth, + thisAzimuth, + thisDip, + thisScale, + thisScaleFreq, + thisScaleUnits, + thisSampleRate, + thisStart, + thisEnd, + ] + ) out_df = pd.DataFrame(rows, columns=df_cols) -# out_df['EndTime']= pd.to_datetime(out_df['EndTime']) -# out_df['StartTime']= pd.to_datetime(out_df['StartTime']) - for column in ['Latitude','Longitude','Elevation','Depth','Azimuth','Dip', 'Scale','ScaleFreq','SampleRate']: - out_df[column] = out_df[column].astype(float) + for column in [ + "Latitude", + "Longitude", + "Elevation", + "Depth", + "Azimuth", + "Dip", + "Scale", + "ScaleFreq", + "SampleRate", + ]: + out_df[column] = out_df[column].astype(float) return out_df - -def getMetadata(nets, stas, locs, chans, start, end, metadataSource): - # This goes to the IRIS station service and pulls back the metadata - # about all specified SNCLs - for all time. - - # TODO: change it so that it only looks for current metadata epochs? - if metadataSource.upper() == 'IRIS': +def getMetadata(nets, stas, locs, chans, start, end, metadataSource): + # This goes to the EarthScope station service and pulls back the metadata + # about all specified SNCLs - for all time. + df_cols = [ + "Network", + "Station", + "Location", + "Channel", + "Latitude", + "Longitude", + "Elevation", + "Depth", + "Azimuth", + "Dip", + "Scale", + "ScaleFreq", + "ScaleUnits", + "SampleRate", + "StartTime", + "EndTime", + ] - URL = 'http://service.iris.edu/fdsnws/station/1/query?net=' + nets + \ - '&sta=' + stas + '&loc=' + locs + '&cha=' + chans + '&starttime=' + start + \ - '&endtime=' + end + '&level=channel&format=text&includecomments=true&nodata=404' + # TODO: change it so that it only looks for current metadata epochs? + if metadataSource.upper() == "EARTHSCOPE": + URL = ( + "http://service.earthscope.org/fdsnws/station/1/query?net=" + + nets + + "&sta=" + + stas + + "&loc=" + + locs + + "&cha=" + + chans + + "&starttime=" + + start + + "&endtime=" + + end + + "&level=channel&format=text&includecomments=true&nodata=404" + ) print(URL) - + try: - DF = pd.read_csv(URL, header=0, delimiter='|', dtype={' Location ': str,' Station ': str}) - + DF = pd.read_csv( + URL, + header=0, + delimiter="|", + dtype={" Location ": str, " Station ": str}, + ) + # Since station service returns headers with whitespace around them DF.rename(columns=lambda x: x.strip(), inplace=True) # And with a '#' in front of Network - DF.rename(columns = {'#Network': 'Network'}, inplace=True) - DF['Location'] = DF.Location.replace(np.nan, '', regex=True) - DF['Target'] = DF[['Network', 'Station', 'Location','Channel']].apply(lambda x: '.'.join(x.map(str)), axis=1) + DF.rename(columns={"#Network": "Network"}, inplace=True) + DF["Location"] = DF.Location.replace(np.nan, "", regex=True) + DF["Target"] = DF[["Network", "Station", "Location", "Channel"]].apply( + lambda x: ".".join(x.map(str)), axis=1 + ) DF.columns = DF.columns.str.lower() - + except Exception as e: - print("Unable to retrieve metadata from IRIS Station Service - %s" % e) - DF = pd.DataFrame() + print( + "Unable to retrieve metadata from EarthScope Station Service - %s" % e + ) + DF = pd.DataFrame(columns=df_cols) + DF["Target"] = DF[["Network", "Station", "Location", "Channel"]].apply( + lambda x: ".".join(x.map(str)), axis=1 + ) + DF.columns = DF.columns.str.lower() else: # Then use local response-level XML files that were used in ISPAQ if metadataSource is None: print("No local metadata XML file provided. Skipping.") return None else: - if metadataSource.endswith('.txt'): + if metadataSource.endswith(".txt"): print("Will parse text file using %s" % metadataSource) - DF = pd.read_csv(metadataSource, header=0, delimiter='|', dtype={' Location ': str,' Station ': str}) - + DF = pd.read_csv( + metadataSource, + header=0, + delimiter="|", + dtype={" Location ": str, " Station ": str}, + ) + # Since station service returns headers with whitespace around them DF.rename(columns=lambda x: x.strip(), inplace=True) # And with a '#' in front of Network - DF.rename(columns = {'#Network': 'Network'}, inplace=True) - + DF.rename(columns={"#Network": "Network"}, inplace=True) + else: print("Will parse XML using %s" % metadataSource) - df_cols = ['Network','Station','Location','Channel','Latitude','Longitude','Elevation','Depth','Azimuth','Dip', 'Scale','ScaleFreq','ScaleUnits','SampleRate','StartTime','EndTime'] DF = parse_XML(metadataSource, df_cols) - - DF['Location'] = DF.Location.replace(np.nan, '', regex=True) - DF['Target'] = DF[['Network', 'Station', 'Location','Channel']].apply(lambda x: '.'.join(x.map(str)), axis=1) + + DF["Location"] = DF.Location.replace(np.nan, "", regex=True) + DF["Target"] = DF[["Network", "Station", "Location", "Channel"]].apply( + lambda x: ".".join(x.map(str)), axis=1 + ) DF.columns = DF.columns.str.lower() return DF + # ============================# # UTILITIES FOR WRITING ISSUE FILES + def sortIssueFile(issueDF, threshold, itype): # Here we take the list of issues and make it more compact # Combining sequential days into a single line - #print " -> Combining days to make more compact" - - - printDF = pd.DataFrame(columns=['#Threshold','Target','Start','End','Ndays','Value', 'Status','Notes']) - if itype == "average" or itype == 'median': + # print " -> Combining days to make more compact" + + printDF = pd.DataFrame( + columns=[ + "#Threshold", + "Target", + "Start", + "End", + "Ndays", + "Value", + "Status", + "Notes", + ] + ) + if itype == "average" or itype == "median": for ind, row in issueDF.iterrows(): - nday = (row['end'] - row['start']).days - printDF.loc[len(printDF)] = [threshold, row['target'], row['start'], row['end'], nday, row['value'], 'TODO', ''] + nday = (row["end"] - row["start"]).days + printDF.loc[len(printDF)] = [ + threshold, + row["target"], + row["start"], + row["end"], + nday, + row["value"], + "TODO", + "", + ] else: - -# printDF = pd.DataFrame(columns=['#Threshold','Target','Start','End','Ndays','Status','Notes']) - + for sncl in sorted(issueDF.target.unique()): - tmpDF = issueDF[issueDF['target']==sncl].sort_values(['start']) - start = '' - end = '' - nday=0 + tmpDF = issueDF[issueDF["target"] == sncl].sort_values(["start"]) + start = "" + end = "" + nday = 0 for ind in tmpDF.index: - tmpStart = tmpDF['start'].loc[ind] - tmpEnd = tmpDF['end'].loc[ind] + tmpStart = tmpDF["start"].loc[ind] + tmpEnd = tmpDF["end"].loc[ind] if tmpEnd.time() == datetime.time(0, 0): tmpEnd = tmpEnd - datetime.timedelta(seconds=1) - + if start == "": start = tmpStart - + if end == "": end = tmpEnd - - - + else: if end == tmpStart - datetime.timedelta(seconds=1): end = tmpEnd nday += 1 - + else: nday += 1 - printDF.loc[len(printDF)] = [threshold,sncl, start.date(), end.date(), nday,'', 'TODO', ''] + printDF.loc[len(printDF)] = [ + threshold, + sncl, + start.date(), + end.date(), + nday, + "", + "TODO", + "", + ] nday = 0 - + start = tmpStart end = tmpEnd # When done with that sncl, need to add to list nday += 1 - printDF.loc[len(printDF)] = [threshold,sncl, start.date(), end.date(), nday, '', 'TODO', ''] + printDF.loc[len(printDF)] = [ + threshold, + sncl, + start.date(), + end.date(), + nday, + "", + "TODO", + "", + ] return printDF - + def sortMetaFile(issueDF, threshold): # Here we take the list of issues and make it more compact # Combining sequential days into a single line - #print " -> Combining days to make more compact" - issueDF['target'] = issueDF['network'] +'.'+ issueDF['station'] +'.'+ issueDF['location'].map(str) +'.'+ issueDF['channel'] - printDF = pd.DataFrame(columns=['#Threshold','Target','Start','End','Ndays','Value', 'Status','Notes']) + # print " -> Combining days to make more compact" + issueDF["target"] = ( + issueDF["network"] + + "." + + issueDF["station"] + + "." + + issueDF["location"].map(str) + + "." + + issueDF["channel"] + ) + printDF = pd.DataFrame( + columns=[ + "#Threshold", + "Target", + "Start", + "End", + "Ndays", + "Value", + "Status", + "Notes", + ] + ) - if len(issueDF) > 0: for ind, row in issueDF.iterrows(): - start = datetime.datetime.strptime(row['starttime'], '%Y-%m-%dT%H:%M:%S.%f').date() - if pd.isnull(row['endtime']): + start = datetime.datetime.strptime( + row["starttime"], "%Y-%m-%dT%H:%M:%S.%f" + ).date() + if pd.isnull(row["endtime"]): end = datetime.datetime.now().date() else: - end = datetime.datetime.strptime(row['endtime'], '%Y-%m-%dT%H:%M:%S.%f').date() - - Ndays = len(pd.period_range(start, end, freq='D')) - target = row['target'].strip() - - - printDF.loc[len(printDF)] = [threshold,target, start, end, Ndays,'', 'TODO', ''] + end = datetime.datetime.strptime( + row["endtime"], "%Y-%m-%dT%H:%M:%S.%f" + ).date() + + Ndays = len(pd.period_range(start, end, freq="D")) + target = row["target"].strip() + + printDF.loc[len(printDF)] = [ + threshold, + target, + start, + end, + Ndays, + "", + "TODO", + "", + ] return printDF - def writeToOutfile(issueDF, filename): - - with open(filename, 'a') as f: - issueDF.to_csv(f, sep='|', index=False, header=False) + + with open(filename, "a") as f: + issueDF.to_csv(f, sep="|", index=False, header=False) f.close() - + def expandCodes(s): - + codes = list() - codeList = s.split(',') + codeList = s.split(",") for code in codeList: - codeSplit = code.split('[') + codeSplit = code.split("[") lcodeSplit = len(codeSplit) - + if lcodeSplit == 1: codes.append(codeSplit[0].strip()) - + if lcodeSplit == 2: first = codeSplit[0].strip() second = codeSplit[1].strip() - + if first == "": - first = '%s]' % second.split(']')[0] - second = second.split(']')[1] - - if first.endswith(']'): - for f in first.strip(']'): - if second.endswith(']'): - for s in second.strip(']'): - codes.append('%s%s' % (f,s)) + first = "%s]" % second.split("]")[0] + second = second.split("]")[1] + + if first.endswith("]"): + for f in first.strip("]"): + if second.endswith("]"): + for s in second.strip("]"): + codes.append("%s%s" % (f, s)) else: - codes.append('%s%s' % (f,second)) + codes.append("%s%s" % (f, second)) else: - if second.endswith(']'): - for s in second.strip(']'): - codes.append('%s%s' % (first,s)) + if second.endswith("]"): + for s in second.strip("]"): + codes.append("%s%s" % (first, s)) else: - codes.append('%s%s' % (first,second)) + codes.append("%s%s" % (first, second)) - - - if lcodeSplit == 3: first = codeSplit[0].strip() second = codeSplit[1].strip() third = codeSplit[2].strip() - + if first == "": - first = '%s]' % second.split(']')[0] - second = second.split(']')[1] - - if first.endswith(']'): - for f in first.strip(']'): - if second.endswith(']'): - for s in second.strip(']'): - if third.endswith(']'): + first = "%s]" % second.split("]")[0] + second = second.split("]")[1] + + if first.endswith("]"): + for f in first.strip("]"): + if second.endswith("]"): + for s in second.strip("]"): + if third.endswith("]"): for t in third: - codes.append('%s%s%s' % (f,s,t)) + codes.append("%s%s%s" % (f, s, t)) else: - codes.append('%s%s%s' % (f,s,third)) + codes.append("%s%s%s" % (f, s, third)) else: - if third.endswith(']'): - for t in third.strip(']'): - codes.append('%s%s%s' % (f,second,t)) + if third.endswith("]"): + for t in third.strip("]"): + codes.append("%s%s%s" % (f, second, t)) else: - codes.append('%s%s%s' % (f,second,third)) + codes.append("%s%s%s" % (f, second, third)) else: - if second.endswith(']'): - for s in second.strip(']'): - if third.endswith(']'): - for t in third.strip(']'): - codes.append('%s%s%s' % (first,s,t)) + if second.endswith("]"): + for s in second.strip("]"): + if third.endswith("]"): + for t in third.strip("]"): + codes.append("%s%s%s" % (first, s, t)) else: - codes.append('%s%s%s' % (first,s,third)) + codes.append("%s%s%s" % (first, s, third)) else: - if third.endswith(']'): - for t in third.strip(']'): - codes.append('%s%s%s' % (first,second,t)) + if third.endswith("]"): + for t in third.strip("]"): + codes.append("%s%s%s" % (first, second, t)) else: - codes.append('%s%s%s' % (first,second,third)) - - codes = ",%s," % (','.join(codes)) - return codes \ No newline at end of file + codes.append("%s%s%s" % (first, second, third)) + + codes = ",%s," % (",".join(codes)) + return codes diff --git a/thresholds.py b/thresholds.py index 31fff39..eb1a4e1 100644 --- a/thresholds.py +++ b/thresholds.py @@ -21,37 +21,41 @@ """ import pandas as pd + +pd.set_option("future.no_silent_downcasting", True) import reportUtils -import numpy as np import datetime import os -from matplotlib.dates import epoch2num def load_thresholdDicts(thresholdFile): - -# FIRST, Read in the file and genrate two Dictionaries -# One will be the thresholdDict, which is used when initially grabbing metrics from webservices -# The other will provide defitinions of the thresholds + + # FIRST, Read in the file and genrate two Dictionaries + # One will be the thresholdDict, which is used when initially grabbing metrics from webservices + # The other will provide defitinions of the thresholds thresholdDefDict = {} thresholdDict = {} - - + with open(thresholdFile) as f: - local_dict = locals() - exec(compile(f.read(), thresholdFile, "exec"),globals(), local_dict) - + local_dict = locals() + exec(compile(f.read(), thresholdFile, "exec"), globals(), local_dict) - return local_dict['thresholdsDict'], local_dict['thresholdsMetricsDict'], local_dict['instrumentGroupsDict'] + return ( + local_dict["thresholdsDict"], + local_dict["thresholdsMetricsDict"], + local_dict["instrumentGroupsDict"], + ) def get_threshold_metrics(thresholds, thresholdFile): metrics = list() failedThresholds = list() - - thresholdDefDict, thresholdMetDict, instrumentGroupsDict = load_thresholdDicts(thresholdFile) - + + thresholdDefDict, thresholdMetDict, instrumentGroupsDict = load_thresholdDicts( + thresholdFile + ) + for threshold in thresholds: try: for metric in thresholdMetDict[threshold]: @@ -59,188 +63,244 @@ def get_threshold_metrics(thresholds, thresholdFile): except: if threshold not in failedThresholds: failedThresholds.append(threshold) - print("WARNING: Unable to understand threshold %s: the threshold has likely been deleted from the Edit Thresholds form, but not removed from this Preference File" % threshold) - + print( + "WARNING: Unable to understand threshold %s: the threshold has likely been deleted from the Edit Thresholds form, but not removed from this Preference File" + % threshold + ) + metrics = list(set(metrics)) return metrics, failedThresholds def load_metric_and_metadata(): metrics_file = "./MUSTANG_metrics.txt" - metadata_file = "./IRIS_metadata.txt" - + metadata_file = "./EarthScope_metadata.txt" + try: - with open(metrics_file,'r') as f: + with open(metrics_file, "r") as f: metricList = f.read().splitlines() except Exception as e: print("Warning: %s" % e) metricList = list() - + try: - with open(metadata_file,'r') as f: + with open(metadata_file, "r") as f: metadataList = f.read().splitlines() except Exception as e: print("Warning: %s" % e) metadataList = list() - + return metricList, metadataList - -def do_threshold(threshold, thresholdFile, metricDF, metaDF, outfile, instruments, specified_start, specified_end, hasMetrics, chanTypes): + +def do_threshold( + threshold, + thresholdFile, + metricDF, + metaDF, + outfile, + instruments, + specified_start, + specified_end, + hasMetrics, + chanTypes, +): print("Running %s" % threshold) - thresholdDefDict, thresholdMetDict, instrumentGroupsDict = load_thresholdDicts(thresholdFile) + thresholdDefDict, thresholdMetDict, instrumentGroupsDict = load_thresholdDicts( + thresholdFile + ) metricList, metadataList = load_metric_and_metadata() -# doRatio = 0 -# doAverage = 0 - + pd.options.mode.chained_assignment = None def get_channel_lists(CH1, CH2): - ch1 = '' - ch2 = '' - if not CH1 == '': + ch1 = "" + ch2 = "" + if not CH1 == "": ch1 = chanTypes[CH1] - if not CH2 == '': + if not CH2 == "": ch2 = chanTypes[CH2] return ch1, ch2 - - def do_channel_figuring(dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, doAbs2): - columnsToNotChange = ['target', 'start', 'end', 'network', 'station', 'location', 'channel','snl', 'new_target'] - metricsInDF = [x for x in dfToUse.columns if x not in columnsToNotChange] - dfToUse['snl'] = dfToUse['target'].apply(lambda x: os.path.splitext(os.path.splitext(x)[0])[0]) # use snl instead of station to do merging, in case multiple location codes + + def do_channel_figuring( + dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, doAbs2 + ): + columnsToNotChange = [ + "target", + "start", + "end", + "network", + "station", + "location", + "channel", + "snl", + "new_target", + ] + metricsInDF = [x for x in dfToUse.columns if x not in columnsToNotChange] + dfToUse["snl"] = dfToUse["target"].apply( + lambda x: os.path.splitext(os.path.splitext(x)[0])[0] + ) # use snl instead of station to do merging, in case multiple location codes #### CASES WITH AVG ### - if chType1 == '' and chType2 == 'avg': + if chType1 == "" and chType2 == "avg": # CH2 must be H, CH1 can be V or H for col in dfToUse.columns: if col in columnsToNotChange: continue - dfToUse.rename(columns={col : col + '_' + chType1}, inplace = True) - - - tmpDF = dfToUse[dfToUse['channel'].str.endswith(ch2)] - -# horzAvg = tmpDF.groupby(['station','start']).mean() - horzAvg = tmpDF.groupby(['snl','start'],as_index=False).mean().reset_index() + dfToUse.rename(columns={col: col + "_" + chType1}, inplace=True) + + tmpDF = dfToUse[dfToUse["channel"].str.endswith(ch2)] + + numeric_cols = tmpDF.select_dtypes(include="number").columns + horzAvg = ( + tmpDF.groupby(["snl", "start"], as_index=False)[numeric_cols] + .mean() + .reset_index() + ) for col in horzAvg.columns: if col in columnsToNotChange: continue -# if doAbs2: -# horzAvg[col] = horzAvg[col].abs() - horzAvg.rename(columns={col : col + chType2}, inplace = True) + horzAvg.rename(columns={col: col + chType2}, inplace=True) -# dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['station','start']) - dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['snl','start']) - -# if doAbs1: -# for col in dfToUse.columns[dfToUse.columns.str.endswith("_%s" % chType1)]: -# dfToUse[col] = dfToUse[col].abs() + dfToUse = pd.merge(dfToUse, horzAvg, how="inner", on=["snl", "start"]) newTargets = list() for idx, row in dfToUse.iterrows(): - splitTarget = row['target'].split('.') - thisSNL = row['snl'] - ch2ThisSNL = ''.join([i for i in list(set(dfToUse[dfToUse['snl'] == thisSNL].channel.str.strip().str[-1])) if i in ch2]) -# ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) - newChannel = '%s/[%s]' % (splitTarget[3], ch2ThisSNL) + splitTarget = row["target"].split(".") + thisSNL = row["snl"] + ch2ThisSNL = "".join( + sorted( + [ + i + for i in list( + set( + dfToUse[dfToUse["snl"] == thisSNL] + .channel.str.strip() + .str[-1] + ) + ) + if i in ch2 + ] + ) + ) + newChannel = "%s/[%s]" % (splitTarget[3], ch2ThisSNL) splitTarget[3] = newChannel - - newTarget = '.'.join(splitTarget) - newTargets.append(newTarget) - dfToUse['new_target'] = newTargets + newTarget = ".".join(splitTarget) + newTargets.append(newTarget) + dfToUse["new_target"] = newTargets - if chType1 == 'avg' and chType2 == '': + if chType1 == "avg" and chType2 == "": # CH1 must be H, CH2 can be H or V for col in dfToUse.columns: if col in columnsToNotChange: continue - dfToUse.rename(columns={col : col + '_' + chType2}, inplace = True) - - tmpDF = dfToUse[dfToUse['channel'].str.endswith(ch1)] - - horzAvg = tmpDF.groupby(['snl','start']).mean().reset_index() -# horzAvg = tmpDF.groupby(['station','start']).mean().reset_index() + dfToUse.rename(columns={col: col + "_" + chType2}, inplace=True) + + tmpDF = dfToUse[dfToUse["channel"].str.endswith(ch1)] + + numeric_cols = tmpDF.select_dtypes(include="number").columns + horzAvg = tmpDF.groupby(["snl", "start"])[numeric_cols].reset_index() for col in horzAvg.columns: if col in columnsToNotChange: continue -# if doAbs1: -# horzAvg[col] = horzAvg[col].abs() - horzAvg.rename(columns={col : col + chType1}, inplace = True) - -# dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['station','start']) - dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['snl','start']) - + horzAvg.rename(columns={col: col + chType1}, inplace=True) + + dfToUse = pd.merge(dfToUse, horzAvg, how="inner", on=["snl", "start"]) + newTargets = list() for idx, row in dfToUse.iterrows(): - splitTarget = row['target'].split('.') - thisSNL = row['snl'] - ch1ThisSNL = ''.join([i for i in list(set(dfToUse[dfToUse['snl'] == thisSNL].channel.str.strip().str[-1])) if i in ch1]) -# ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) - newChannel = '%s[%s]/%s' % (splitTarget[3][0:2], ch1ThisSNL, splitTarget[3][-1]) + splitTarget = row["target"].split(".") + thisSNL = row["snl"] + ch1ThisSNL = "".join( + [ + i + for i in list( + set( + dfToUse[dfToUse["snl"] == thisSNL] + .channel.str.strip() + .str[-1] + ) + ) + if i in ch1 + ] + ) + newChannel = "%s[%s]/%s" % ( + splitTarget[3][0:2], + ch1ThisSNL, + splitTarget[3][-1], + ) splitTarget[3] = newChannel - - newTarget = '.'.join(splitTarget) - newTargets.append(newTarget) - dfToUse['new_target'] = newTargets + newTarget = ".".join(splitTarget) + newTargets.append(newTarget) + dfToUse["new_target"] = newTargets - if chType1 == 'avg' and chType2 == 'avg': + if chType1 == "avg" and chType2 == "avg": # This case can only happen if we are comparing two different metrics # Create dataframe average of horizontals for metric 1 - - tmpDF = dfToUse[dfToUse['channel'].str.endswith(ch1)] - -# horzAvg = tmpDF.groupby(['station','start']).mean().reset_index() - horzAvg = tmpDF.groupby(['snl','start']).mean().reset_index() + + tmpDF = dfToUse[dfToUse["channel"].str.endswith(ch1)] + + numeric_cols = tmpDF.select_dtypes(include="number").columns + horzAvg = tmpDF.groupby(["snl", "start"])[numeric_cols].mean().reset_index() for col in horzAvg.columns: if col in columnsToNotChange: continue -# if doAbs2: -# horzAvg[col] = horzAvg[col].abs() - horzAvg.rename(columns={col : col + '_' + chType2}, inplace = True) - - dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['snl','start']) -# dfToUse = pd.merge(dfToUse, horzAvg, how='inner', on=['station','start']) + horzAvg.rename(columns={col: col + "_" + chType2}, inplace=True) + + dfToUse = pd.merge(dfToUse, horzAvg, how="inner", on=["snl", "start"]) newTargets = list() for idx, row in dfToUse.iterrows(): - splitTarget = row['target'].split('.') - thisSNL = row['snl'] - ch1ThisSNL = ''.join([i for i in list(set(dfToUse[dfToUse['snl'] == thisSNL].channel.str.strip().str[-1])) if i in ch1]) -# ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) - newChannel = '%s[%s]' % (splitTarget[3][0:2], ch1ThisSNL) + splitTarget = row["target"].split(".") + thisSNL = row["snl"] + ch1ThisSNL = "".join( + [ + i + for i in list( + set( + dfToUse[dfToUse["snl"] == thisSNL] + .channel.str.strip() + .str[-1] + ) + ) + if i in ch1 + ] + ) + newChannel = "%s[%s]" % (splitTarget[3][0:2], ch1ThisSNL) splitTarget[3] = newChannel - - newTarget = '.'.join(splitTarget) + + newTarget = ".".join(splitTarget) newTargets.append(newTarget) - dfToUse['new_target'] = newTargets - - + dfToUse["new_target"] = newTargets - - #### CASES WITH VS #### - if (chType1 == '' and chType2 == 'vs') or (chType1 == 'vs' and chType2 == ''): + #### CASES WITH VS #### + if (chType1 == "" and chType2 == "vs") or (chType1 == "vs" and chType2 == ""): print("INFO: comparing 'all' with a 'vs' - this shouldn't happen") - - if (chType1 == 'avg' and chType2 == 'vs') or (chType1 == 'vs' and chType2 == 'avg'): + + if (chType1 == "avg" and chType2 == "vs") or ( + chType1 == "vs" and chType2 == "avg" + ): print("INFO: comparing 'avg' with 'vs' - this shouldn't happen") - - if chType1 == 'vs' and chType2 == 'vs': + + if chType1 == "vs" and chType2 == "vs": # CH1 and CH2 must be H - dfToUse = dfToUse[~dfToUse['channel'].str.endswith(chanTypes['V'])] - + dfToUse = dfToUse[~dfToUse["channel"].str.endswith(chanTypes["V"])] + for col in dfToUse.columns: if col in columnsToNotChange: continue - dfToUse.rename(columns={col : col + '_' + chType1}, inplace = True) + dfToUse.rename(columns={col: col + "_" + chType1}, inplace=True) # Horizontal vs horizontal: need to copy the value of both horizontals for each NSL, # such that both E/N and N/E can be computed # Since it is H-vs v H-vs, both ch1 and ch2 should be exaclty the same - + # create a column for snl, to use as a join later: - dfToUse['snl'] = dfToUse['target'].apply(lambda x: os.path.splitext(os.path.splitext(x)[0])[0]) + dfToUse["snl"] = dfToUse["target"].apply( + lambda x: os.path.splitext(os.path.splitext(x)[0])[0] + ) dtToStore = dfToUse.copy() colList = list() @@ -248,297 +308,321 @@ def do_channel_figuring(dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, d for tmpChan in ch1: # get all values for each channel, then create a new column with those values, associated with the snl tmpValues = dtToStore[dtToStore.channel.str.endswith(tmpChan)] - tmpValues.drop(['station', 'location','channel','end','target','network'], axis = 1, inplace = True) + tmpValues.drop( + ["station", "location", "channel", "end", "target", "network"], + axis=1, + inplace=True, + ) for col in tmpValues.columns: -# if col in columnsToNotChange or col == 'snl': + # if col in columnsToNotChange or col == 'snl': if col in columnsToNotChange: continue - newcol = col + '_' + tmpChan + newcol = col + "_" + tmpChan if newcol not in colList: colList.append(newcol) - tmpValues.rename(columns={col : newcol}, inplace = True) - for snl in set(tmpValues['snl']): + tmpValues.rename(columns={col: newcol}, inplace=True) + for snl in set(tmpValues["snl"]): try: chanDict[snl] = chanDict[snl] + tmpChan except: chanDict[snl] = tmpChan - dfToUse.dropna(subset = ["channel"], inplace=True) - mergedDF = pd.merge(dfToUse[~dfToUse['channel'].str.endswith(tmpChan)], tmpValues, how='outer', on=['snl','start']) - dfToUse = pd.merge(dfToUse, mergedDF, how='outer') - + dfToUse.dropna(subset=["channel"], inplace=True) + mergedDF = pd.merge( + dfToUse[~dfToUse["channel"].str.endswith(tmpChan)], + tmpValues, + how="outer", + on=["snl", "start"], + ) + dfToUse = pd.merge(dfToUse, mergedDF, how="outer") + for metric in metricsInDF: theseCols = [x for x in colList if x.startswith(metric)] - sncl2 = metric + '_sncl2' + sncl2 = metric + "_sncl2" dfToUse[sncl2] = dfToUse[theseCols[0]] - + for col in theseCols: - dfToUse[sncl2] = dfToUse[sncl2].fillna(dfToUse[col]) - dfToUse.drop([col], axis = 1, inplace = True) - - dfToUse.dropna(subset = ["target"], inplace=True) + dfToUse[sncl2] = ( + dfToUse[sncl2].fillna(dfToUse[col]).infer_objects(copy=False) + ) + dfToUse.drop([col], axis=1, inplace=True) + + dfToUse.dropna(subset=["target"], inplace=True) newTargets = list() for idx, row in dfToUse.iterrows(): try: - splitTarget = row['target'].split('.') + splitTarget = row["target"].split(".") except: - newTargets.append(row['target']) + newTargets.append(row["target"]) continue - thisSNL = row['snl'] + thisSNL = row["snl"] thisChan = splitTarget[3][-1] - + try: - ch1ThisSNL = chanDict[thisSNL].replace(thisChan,'') + ch1ThisSNL = chanDict[thisSNL].replace(thisChan, "") except: - print("INFO: unable to process %s - maybe it has H[orizontal] channels not included in the preference file?" % thisSNL) - newTargets.append('') + print( + "INFO: unable to process %s - maybe it has H[orizontal] channels not included in the preference file?" + % thisSNL + ) + newTargets.append("") continue -# ch2Channels = unique(tmpDF['channel'].str.strip().str[-1]) - newChannel = '%s/%s' % (splitTarget[3], ch1ThisSNL) + newChannel = "%s/%s" % (splitTarget[3], ch1ThisSNL) splitTarget[3] = newChannel - - newTarget = '.'.join(splitTarget) - newTargets.append(newTarget) - dfToUse['new_target'] = newTargets - -# mergedDF.update(mergedDF[colList].merge(df2, 'left')) + newTarget = ".".join(splitTarget) + newTargets.append(newTarget) + dfToUse["new_target"] = newTargets - #### CASES WITHOUT VS OR AVG #### - if chType1 == '' and chType2 == '': + #### CASES WITHOUT VS OR AVG #### + if chType1 == "" and chType2 == "": # Can be any combination of H and V (H-V, V-H, H-H, V-V) # CH1 == CH2 is handled directly in the dp_ method, since we already have a dataframe with the two metrics joined on target-day - + #### V vs H, or H vs V #### if CH1 != CH2: - # Can be same or different metrics, either way we need to get the different channels into a single row - + # Can be same or different metrics, either way we need to get the different channels into a single row + for col in dfToUse.columns: if col in columnsToNotChange: continue - dfToUse.rename(columns={col : col + '_'}, inplace = True) - - dfToUse['snl'] = dfToUse['target'].apply(lambda x: os.path.splitext(os.path.splitext(x)[0])[0]) - - dtToStore = dfToUse.copy() # copy all values before subsetting for only ch1, so that all are availble as sncl2 - dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] # now there will only be ch1 channels in the main slot - newChanDF = dfToUse[['channel','target','start']] + dfToUse.rename(columns={col: col + "_"}, inplace=True) + + dfToUse["snl"] = dfToUse["target"].apply( + lambda x: os.path.splitext(os.path.splitext(x)[0])[0] + ) + + dtToStore = ( + dfToUse.copy() + ) # copy all values before subsetting for only ch1, so that all are availble as sncl2 + dfToUse = dfToUse[ + dfToUse["channel"].str.endswith(ch1) + ] # now there will only be ch1 channels in the main slot + newChanDF = dfToUse[["channel", "target", "start"]] newChanList = list() - oldChanList = list() - - - - for tmpChanA in dfToUse['channel']: + oldChanList = list() + + for tmpChanA in dfToUse["channel"]: for tmpChanB in ch2: newChanList.append("%s%s" % (tmpChanA[0:2], tmpChanB)) oldChanList.append(tmpChanA) - ncDF = pd.DataFrame(newChanList, columns=['second_channel']) - ncDF['channel'] = oldChanList - -# newChanDF = pd.concat([ncDF,pd.concat([newChanDF]*len(ch2)).set_index(ncDF.index)]).sort_index().ffill() - - newChanDF = pd.merge(newChanDF, ncDF).drop_duplicates().reset_index(drop=True) + ncDF = pd.DataFrame(newChanList, columns=["second_channel"]) + ncDF["channel"] = oldChanList + + newChanDF = ( + pd.merge(newChanDF, ncDF).drop_duplicates().reset_index(drop=True) + ) dfToUse = pd.merge(dfToUse, newChanDF) colList = list() for tmpChan in ch2: # get all values for each channel, then create a new column with those values, associated with the snl tmpValues = dtToStore[dtToStore.channel.str.endswith(tmpChan)] - - tmpValues.drop(['station', 'location','end','network','target'], axis = 1, inplace = True) + + tmpValues.drop( + ["station", "location", "end", "network", "target"], + axis=1, + inplace=True, + ) for col in tmpValues.columns: - if col in columnsToNotChange or col == 'second_channel' or col == 'snl': + if ( + col in columnsToNotChange + or col == "second_channel" + or col == "snl" + ): continue - newcol = col + tmpChan + newcol = col + tmpChan if newcol not in colList: colList.append(newcol) - tmpValues.rename(columns={col : newcol}, inplace = True) - tmpValues.rename(columns={'channel' : 'second_channel'}, inplace = True) - mergedDF = pd.merge(dfToUse, tmpValues, on=['snl','start','second_channel']) + tmpValues.rename(columns={col: newcol}, inplace=True) + tmpValues.rename( + columns={"channel": "second_channel"}, inplace=True + ) + mergedDF = pd.merge( + dfToUse, tmpValues, on=["snl", "start", "second_channel"] + ) + + dfToUse = pd.merge(dfToUse, mergedDF, how="outer") - - dfToUse = pd.merge(dfToUse, mergedDF, how='outer') - newTargets = list() for idx, row in dfToUse.iterrows(): - splitTarget = row['target'].split('.') - newChannel = '%s/%s' % (splitTarget[3], row['second_channel'][-1]) + splitTarget = row["target"].split(".") + newChannel = "%s/%s" % (splitTarget[3], row["second_channel"][-1]) splitTarget[3] = newChannel - - newTarget = '.'.join(splitTarget) + + newTarget = ".".join(splitTarget) newTargets.append(newTarget) - dfToUse['new_target'] = newTargets - + dfToUse["new_target"] = newTargets + for metric in metricsInDF: theseCols = [x for x in colList if x.startswith(metric)] - sncl2 = metric + '_sncl2' + sncl2 = metric + "_sncl2" dfToUse[sncl2] = dfToUse[theseCols[0]] - + for col in theseCols: dfToUse[sncl2] = dfToUse[sncl2].fillna(dfToUse[col]) - dfToUse.drop([col], axis = 1, inplace = True) - - - if chType1 == '' and chType2 == 'V': + dfToUse.drop([col], axis=1, inplace=True) + + if chType1 == "" and chType2 == "V": pass - if chType1 == '' and chType2 == 'V': + if chType1 == "" and chType2 == "V": pass - if chType1 == '' and chType2 == 'V': + if chType1 == "" and chType2 == "V": pass - + return dfToUse - + def do_comparison(dfToUse, field1, operator, field2, doAbs1, doAbs2): - - if operator == '>=': + + if operator == ">=": if doAbs1 and doAbs2: dfToUse = dfToUse[field1.abs() >= field2.abs()] - elif doAbs1: + elif doAbs1: dfToUse = dfToUse[field1.abs() >= field2] elif doAbs2: dfToUse = dfToUse[field1 >= field2.abs()] else: dfToUse = dfToUse[field1 >= field2] - if operator == '!>=': + if operator == "!>=": if doAbs1 and doAbs2: - dfToUse = dfToUse[ field1.abs() < field2.abs()] - elif doAbs1: - dfToUse = dfToUse[ field1.abs() < field2] + dfToUse = dfToUse[field1.abs() < field2.abs()] + elif doAbs1: + dfToUse = dfToUse[field1.abs() < field2] elif doAbs2: - dfToUse = dfToUse[ field1 < field2.abs()] + dfToUse = dfToUse[field1 < field2.abs()] else: - dfToUse = dfToUse[ field1 < field2] - if operator == '>': + dfToUse = dfToUse[field1 < field2] + if operator == ">": if doAbs1 and doAbs2: dfToUse = dfToUse[field1.abs() > field2.abs()] - elif doAbs1: + elif doAbs1: dfToUse = dfToUse[field1.abs() > field2] elif doAbs2: dfToUse = dfToUse[field1 > field2.abs()] else: dfToUse = dfToUse[field1 > field2] - if operator == '=': + if operator == "=": if doAbs1 and doAbs2: dfToUse = dfToUse[field1.abs() == field2.abs()] - elif doAbs1: + elif doAbs1: dfToUse = dfToUse[field1.abs() == field2] elif doAbs2: dfToUse = dfToUse[field1 == field2.abs()] else: dfToUse = dfToUse[field1 == field2] - if operator == '!=': + if operator == "!=": if doAbs1 and doAbs2: dfToUse = dfToUse[field1.abs() != field2.abs()] - elif doAbs1: + elif doAbs1: dfToUse = dfToUse[field1.abs() != field2] elif doAbs2: dfToUse = dfToUse[field1 != field2.abs()] else: dfToUse = dfToUse[field1 != field2] - if operator == '<=': + if operator == "<=": if doAbs1 and doAbs2: dfToUse = dfToUse[field1.abs() <= field2.abs()] - elif doAbs1: + elif doAbs1: dfToUse = dfToUse[field1.abs() <= field2] elif doAbs2: dfToUse = dfToUse[field1 <= field2.abs()] else: dfToUse = dfToUse[field1 <= field2] - if operator == '!<=': + if operator == "!<=": if doAbs1 and doAbs2: - dfToUse = dfToUse[ field1.abs() > field2.abs()] - elif doAbs1: - dfToUse = dfToUse[ field1.abs() > field2] + dfToUse = dfToUse[field1.abs() > field2.abs()] + elif doAbs1: + dfToUse = dfToUse[field1.abs() > field2] elif doAbs2: - dfToUse = dfToUse[ field1 > field2.abs()] + dfToUse = dfToUse[field1 > field2.abs()] else: - dfToUse = dfToUse[ field1 > field2] - if operator == '<': + dfToUse = dfToUse[field1 > field2] + if operator == "<": if doAbs1 and doAbs2: dfToUse = dfToUse[field1.abs() < field2.abs()] - elif doAbs1: + elif doAbs1: dfToUse = dfToUse[field1.abs() < field2] elif doAbs2: dfToUse = dfToUse[field1 < field2.abs()] else: dfToUse = dfToUse[field1 < field2] - + return dfToUse - def simple_threshold(chanMetricDF, chanMetaDF, subDef): # Whether we use chanMetricDF or chanMetaDF depends on whether this definition has metrics or metadata... doAbs1 = 0 doAbs2 = 0 - CH1 = '' + CH1 = "" - - #Get the definition + # Get the definition threshDefs = thresholdDefDict[threshold] try: - - field = subDef.split()[0].split('[')[0] + + field = subDef.split()[0].split("[")[0] try: -# ch1 = subDef.split()[0].split('[')[1].replace(']','').split(':')[0] # Only Ratio and Comparison can have H: avg/vs - CH1 = subDef.split()[0].split('[')[1].replace(']','') - ch1, ch2 = get_channel_lists(CH1, '') + CH1 = subDef.split()[0].split("[")[1].replace("]", "") + ch1, ch2 = get_channel_lists(CH1, "") except: - ch1 = '' - - if 'abs' in field: + ch1 = "" + + if "abs" in field: doAbs1 = 1 - field = field.replace('abs(','').replace(')','') - + field = field.replace("abs(", "").replace(")", "") + if field in metricList: - fieldType = 'metric' + fieldType = "metric" dfToUse = chanMetricDF elif field in metadataList: - fieldType = 'metadata' + fieldType = "metadata" field = field.lower() dfToUse = chanMetaDF else: print("WARNING unknown field type") return chanMetricDF, chanMetaDF, "simple" - + try: field = field.split("::")[1] except: pass - - + operator = subDef.split()[1] - + try: # it's numeric value = float(subDef.split()[2]) except: # it's not numeric, so the fielf better be a metadata field - if fieldType != 'metadata': - print("Warning, only metadata fields can have non-numeric cutoff values") + if fieldType != "metadata": + print( + "Warning, only metadata fields can have non-numeric cutoff values" + ) return chanMetricDF, chanMetaDF, "simple" else: value = subDef.split()[2] # If the threshold is only for horixontal or verticals, then subset it now: - if ch1 != '': - dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] - + if ch1 != "": + dfToUse = dfToUse[dfToUse["channel"].str.endswith(ch1)] + except Exception as e: print("Warning: could not calculate threshold %s - %s" % (subDef, e)) return chanMetricDF, chanMetaDF, "simple" - - - dfToUse = do_comparison(dfToUse, dfToUse[field], operator, value, doAbs1, doAbs2) - - if fieldType == 'metric': + + dfToUse = do_comparison( + dfToUse, dfToUse[field], operator, value, doAbs1, doAbs2 + ) + + if fieldType == "metric": chanMetricDF = dfToUse - elif fieldType == 'metadata': + elif fieldType == "metadata": chanMetaDF = dfToUse - + return chanMetricDF, chanMetaDF, "simple" + # ============================# # COMPLETENESS THRESHOLDS @@ -547,33 +631,33 @@ def ratio_threshold(chanMetricDF, chanMetaDF, subDef): doAbs2 = 0 # second metric doAbs3 = 0 # "ratio" - unused currently, placeholder doAbs4 = 0 # cutoff value - unused currently, placeholder - chType1 = '' - chType2 = '' - + chType1 = "" + chType2 = "" + try: - met1 = subDef.split('/')[0].split()[-1].split('[')[0] - met2 = subDef.split('/')[1].split()[0].split('[')[0] + met1 = subDef.split("/")[0].split()[-1].split("[")[0] + met2 = subDef.split("/")[1].split()[0].split("[")[0] except Exception as e: print("Warning: Could not parse ratio threshold %s - %s" % (subDef, e)) return chanMetricDF, chanMetaDF, "ratio" - - if 'abs' in met1: + + if "abs" in met1: doAbs1 = 1 - met1 = met1.replace('abs(','').replace(')','') - if 'abs' in met2: + met1 = met1.replace("abs(", "").replace(")", "") + if "abs" in met2: doAbs2 = 1 - met2 = met2.replace('abs(','').replace(')','') - + met2 = met2.replace("abs(", "").replace(")", "") + if met1 in metricList: - fieldType = 'metric' + fieldType = "metric" dfToUse = chanMetricDF elif met1 in metadataList: - fieldType = 'metadata' + fieldType = "metadata" dfToUse = chanMetaDF else: print("WARNING: unknown field type") return chanMetricDF, chanMetaDF, "ratio" - + try: met1 = met1.split("::")[1] except: @@ -581,110 +665,136 @@ def ratio_threshold(chanMetricDF, chanMetaDF, subDef): try: met2 = met2.split("::")[1] except: - pass - + pass + # figure out what's going on with H/V, if anything try: - CH1 = subDef.split('/')[0].split()[-1].split('[')[1].replace(']','').replace(')','') + CH1 = ( + subDef.split("/")[0] + .split()[-1] + .split("[")[1] + .replace("]", "") + .replace(")", "") + ) try: - chType1 = CH1.split(':')[1] - CH1 = CH1.split(':')[0] + chType1 = CH1.split(":")[1] + CH1 = CH1.split(":")[0] except: pass - + except: - CH1 = '' - + CH1 = "" + try: - CH2 = subDef.split('/')[1].split()[0].split('[')[1].replace(']','').replace(')','') + CH2 = ( + subDef.split("/")[1] + .split()[0] + .split("[")[1] + .replace("]", "") + .replace(")", "") + ) try: - chType2 = CH2.split(':')[1] - CH2 = CH2.split(':')[0] + chType2 = CH2.split(":")[1] + CH2 = CH2.split(":")[0] except: pass except: - CH2 = '' - - - ## Only in the ratio threshold do we have to handle the absolute values outside of the do_comparison function + CH2 = "" + + ## Only in the ratio threshold do we have to handle the absolute values outside of the do_comparison function ch1, ch2 = get_channel_lists(CH1, CH2) - columnsToNotChange = ['target', 'start', 'end', 'network', 'station', 'location', 'channel','snl','ratio','new_target'] - - + columnsToNotChange = [ + "target", + "start", + "end", + "network", + "station", + "location", + "channel", + "snl", + "ratio", + "new_target", + ] + if CH1 == CH2 and chType1 == chType2 == "": if doAbs1: dfToUse[met1] = dfToUse[met1].abs() if doAbs2: dfToUse[met2] = dfToUse[met2].abs() - dfToUse['ratio'] = dfToUse[met1] / dfToUse[met2] # Later we will whittle down to just the V or just the H, if necessary - -# dfToUse['ratio'] = dfToUse['ratio'].apply(lambda x: x*100) # OLD + dfToUse["ratio"] = ( + dfToUse[met1] / dfToUse[met2] + ) # Later we will whittle down to just the V or just the H, if necessary else: # Do the figuring on what needs to happen to the dataframe based on chType1 and chyType2 - dfToUse = do_channel_figuring(dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, doAbs2) + dfToUse = do_channel_figuring( + dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, doAbs2 + ) - - # Subset based on the channel indicated by ch1: -# dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] - # create the ratio column: - if chType1 == 'vs' or chType2 == 'vs': + if chType1 == "vs" or chType2 == "vs": if doAbs1: - dfToUse[met1+ "_" + chType1] = dfToUse[met1+ "_" + chType1].abs() + dfToUse[met1 + "_" + chType1] = dfToUse[met1 + "_" + chType1].abs() if doAbs2: dfToUse[met2 + "_sncl2"] = dfToUse[met2 + "_sncl2"].abs() - dfToUse['ratio'] = dfToUse[met1+ "_" + chType1] / dfToUse[met2 + "_sncl2"] - + dfToUse["ratio"] = ( + dfToUse[met1 + "_" + chType1] / dfToUse[met2 + "_sncl2"] + ) + # delete extra columns, revert names of main metrics for col in dfToUse.columns: - if col.endswith('_sncl2'): - dfToUse.drop([col], axis = 1, inplace = True) + if col.endswith("_sncl2"): + dfToUse.drop([col], axis=1, inplace=True) elif col not in columnsToNotChange: - dfToUse.rename(columns={col : col.rsplit('_', 1)[0]}, inplace = True) - - + dfToUse.rename( + columns={col: col.rsplit("_", 1)[0]}, inplace=True + ) + else: - if chType1 == chType2 == '': + if chType1 == chType2 == "": if doAbs1: - dfToUse[met1+ "_"] = dfToUse[met1+ "_"].abs() + dfToUse[met1 + "_"] = dfToUse[met1 + "_"].abs() if doAbs2: dfToUse[met2 + "_sncl2"] = dfToUse[met2 + "_sncl2"].abs() - dfToUse['ratio'] = dfToUse[met1+ "_"] / dfToUse[met2 + "_sncl2"] - + dfToUse["ratio"] = dfToUse[met1 + "_"] / dfToUse[met2 + "_sncl2"] + # delete extra columns, revert names of main metrics for col in dfToUse.columns: - if col.endswith('_sncl2'): - dfToUse.drop([col], axis = 1, inplace = True) + if col.endswith("_sncl2"): + dfToUse.drop([col], axis=1, inplace=True) elif col not in columnsToNotChange: -# dfToUse.rename(columns={col : '_'.join(col.split("_")[:-1])}) - dfToUse.rename(columns={col : col.rsplit('_', 1)[0]}, inplace = True) - + dfToUse.rename( + columns={col: col.rsplit("_", 1)[0]}, inplace=True + ) + else: -# if chType1 == chType2 == 'avg': if doAbs1: - dfToUse[met1+ "_" + chType1] = dfToUse[met1+ "_" + chType1].abs() + dfToUse[met1 + "_" + chType1] = dfToUse[ + met1 + "_" + chType1 + ].abs() if doAbs2: - dfToUse[met2+ "_" + chType2] = dfToUse[met2+ "_" + chType2].abs() + dfToUse[met2 + "_" + chType2] = dfToUse[ + met2 + "_" + chType2 + ].abs() + + dfToUse["ratio"] = ( + dfToUse[met1 + "_" + chType1] / dfToUse[met2 + "_" + chType2] + ) - dfToUse['ratio'] = dfToUse[met1+ "_" + chType1] / dfToUse[met2 + "_" + chType2] - # delete extra columns, revert names of main metrics for col in dfToUse.columns: if col.endswith("_" + chType2): - dfToUse.drop([col], axis = 1, inplace = True) + dfToUse.drop([col], axis=1, inplace=True) elif col not in columnsToNotChange: -# dfToUse.rename(columns={col : '_'.join(col.split("_")[:-1])}) - dfToUse.rename(columns={col : col.rsplit('_', 1)[0]}, inplace = True) -# dfToUse['ratio'] = dfToUse['ratio'].apply(lambda x: x*100) # OLD + dfToUse.rename( + columns={col: col.rsplit("_", 1)[0]}, inplace=True + ) - if ch1 != '': - dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] - -# dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] + if ch1 != "": + dfToUse = dfToUse[dfToUse["channel"].str.endswith(ch1)] - ##### + ##### try: fields = subDef.split() @@ -693,187 +803,191 @@ def ratio_threshold(chanMetricDF, chanMetaDF, subDef): except Exception as e: print("Warning: could not calculate threshold %s - %s" % (subDef, e)) return - dfToUse = do_comparison(dfToUse, dfToUse['ratio'], operator, value, doAbs3, doAbs4) + dfToUse = do_comparison( + dfToUse, dfToUse["ratio"], operator, value, doAbs3, doAbs4 + ) - if fieldType == 'metric': + if fieldType == "metric": chanMetricDF = dfToUse - elif fieldType == 'metadata': + elif fieldType == "metadata": chanMetaDF = dfToUse - - + return chanMetricDF, chanMetaDF, "ratio" -# return dfToUse, fieldType, "ratio" - + def average_threshold(chanMetricDF, chanMetaDF, subDef): # Shouldn't have metadata in here, but keeping it open for future-proofing doAbs1 = 0 doAbs2 = 0 - CH1 = '' - CH2 = '' - + CH1 = "" + CH2 = "" + try: fields = subDef.split("::")[1].split() - - field = fields[0].split('[')[0] + + field = fields[0].split("[")[0] operator = fields[1] value = float(fields[2]) - + try: -# ch1 = fields[0].split('[')[1].replace(']','').split(':')[0] # only Ratio and Comparison can have H: avg/vs - CH1 = fields[0].split('[')[1].replace(']','') + CH1 = fields[0].split("[")[1].replace("]", "") ch1, ch2 = get_channel_lists(CH1, CH2) -# ch1 = chanTypes[CH1] -# if ch1 == 'V': -# ch1 = Vchans -# elif ch1 == 'H': -# ch1 = Hchans except: - ch1 = '' - - - if 'abs' in field: + ch1 = "" + + if "abs" in field: doAbs1 = 1 - field = field.replace('abs(','').replace(')','') - + field = field.replace("abs(", "").replace(")", "") + if field in metricList: - fieldType = 'metric' + fieldType = "metric" dfToUse = chanMetricDF elif field in metadataList: - fieldType = 'metadata' + fieldType = "metadata" dfToUse = chanMetaDF else: print("WARNING: unknown field type") - return - + return + try: field = field.split("::")[1] except: pass - - dfToUse = dfToUse.groupby('target', as_index=False)[field].mean().round(1) - dfToUse.rename(columns={field : 'value'}, inplace = True) - dfToUse['channel'] = [t.split('.')[3] for t in dfToUse['target']] - dfToUse['start'] = datetime.datetime.strptime(specified_start, '%Y-%m-%d') - dfToUse['end'] = datetime.datetime.strptime(specified_end, '%Y-%m-%d') - + + dfToUse = dfToUse.groupby("target", as_index=False)[field].mean().round(1) + dfToUse.rename(columns={field: "value"}, inplace=True) + dfToUse["channel"] = [t.split(".")[3] for t in dfToUse["target"]] + dfToUse["start"] = datetime.datetime.strptime(specified_start, "%Y-%m-%d") + dfToUse["end"] = datetime.datetime.strptime(specified_end, "%Y-%m-%d") + # If the threshold is only for horixontal or verticals, then subset it now: - if ch1 != '': - dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] - + if ch1 != "": + dfToUse = dfToUse[dfToUse["channel"].str.endswith(ch1)] + except Exception as e: print("WARNING: Unable to calculate %s - %s" % (subDef, e)) return dfToUse, fieldType, "average" - - dfToUse = do_comparison(dfToUse, dfToUse['value'], operator, value, doAbs1, doAbs2) - if fieldType == 'metric': + dfToUse = do_comparison( + dfToUse, dfToUse["value"], operator, value, doAbs1, doAbs2 + ) + + if fieldType == "metric": chanMetricDF = dfToUse - elif fieldType == 'metadata': + elif fieldType == "metadata": chanMetaDF = dfToUse - + return chanMetricDF, chanMetaDF, "average" -# return dfToUse, fieldType, "average" def median_threshold(chanMetricDF, chanMetaDF, subDef): # Shouldn't have metadata in here, but keeping it open for future-proofing doAbs1 = 0 doAbs2 = 0 - CH1 = '' - CH2 = '' - + CH1 = "" + CH2 = "" + try: fields = subDef.split("::")[1].split() - - field = fields[0].split('[')[0] + + field = fields[0].split("[")[0] operator = fields[1] value = float(fields[2]) - + try: -# ch1 = fields[0].split('[')[1].replace(']','').split(':')[0] # Only Ratio and Comparison can have H: avg/vs - CH1 = fields[0].split('[')[1].replace(']','') + CH1 = fields[0].split("[")[1].replace("]", "") ch1, ch2 = get_channel_lists(CH1, CH2) -# ch1 = chanTypes[CH1] except: - ch1 = '' - - if 'abs' in field: + ch1 = "" + + if "abs" in field: doAbs1 = 1 - field = field.replace('abs(','').replace(')','') - + field = field.replace("abs(", "").replace(")", "") + if field in metricList: - fieldType = 'metric' + fieldType = "metric" dfToUse = chanMetricDF elif field in metadataList: - fieldType = 'metadata' + fieldType = "metadata" dfToUse = chanMetaDF else: print("WARNING: unknown field type") - return chanMetricDF, chanMetaDF, "median" - - + return chanMetricDF, chanMetaDF, "median" + try: field = field.split("::")[1] except: pass - - dfToUse = dfToUse.groupby('target', as_index=False)[field].median().round(1) - dfToUse.rename(columns={field : 'value'}, inplace = True) - dfToUse['channel'] = [t.split('.')[3] for t in dfToUse['target']] - dfToUse['start'] = datetime.datetime.strptime(specified_start, '%Y-%m-%d') - dfToUse['end'] = datetime.datetime.strptime(specified_end, '%Y-%m-%d') - + + dfToUse = dfToUse.groupby("target", as_index=False)[field].median().round(1) + dfToUse.rename(columns={field: "value"}, inplace=True) + dfToUse["channel"] = [t.split(".")[3] for t in dfToUse["target"]] + dfToUse["start"] = datetime.datetime.strptime(specified_start, "%Y-%m-%d") + dfToUse["end"] = datetime.datetime.strptime(specified_end, "%Y-%m-%d") + # If the threshold is only for horixontal or verticals, then subset it now: - if ch1 != '': - dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] + if ch1 != "": + dfToUse = dfToUse[dfToUse["channel"].str.endswith(ch1)] except Exception as e: print("WARNING: Unable to calculate %s - %s" % (subDef, e)) return chanMetricDF, chanMetaDF, "median" - - dfToUse = do_comparison(dfToUse, dfToUse['value'], operator, value, doAbs1, doAbs2) - if fieldType == 'metric': + dfToUse = do_comparison( + dfToUse, dfToUse["value"], operator, value, doAbs1, doAbs2 + ) + + if fieldType == "metric": chanMetricDF = dfToUse - elif fieldType == 'metadata': + elif fieldType == "metadata": chanMetaDF = dfToUse - + return chanMetricDF, chanMetaDF, "median" - + def compare_threshold(chanMetricDF, chanMetaDF, subDF): doAbs1 = 0 doAbs2 = 0 - CH1 = '' - CH2 = '' - chType1 = '' - chType2 = '' - columnsToNotChange = ['target', 'start', 'end', 'network', 'station', 'location', 'channel','snl','ratio', 'new_target'] - + CH1 = "" + CH2 = "" + chType1 = "" + chType2 = "" + columnsToNotChange = [ + "target", + "start", + "end", + "network", + "station", + "location", + "channel", + "snl", + "ratio", + "new_target", + ] + try: fields = subDef.split() - met1 = fields[0].split('[')[0] + met1 = fields[0].split("[")[0] operator = fields[1] - met2 = fields[2].split('[')[0] - + met2 = fields[2].split("[")[0] + except Exception as e: print("WARNING: Unable to calculate %s - %s" % (subDef, e)) return chanMetricDF, chanMetaDF, "comparison" - - if 'abs' in met1: + + if "abs" in met1: doAbs1 = 1 - met1 = met1.replace('abs(','').replace(')','') - if 'abs' in met2: + met1 = met1.replace("abs(", "").replace(")", "") + if "abs" in met2: doAbs2 = 1 - met2 = met2.replace('abs(','').replace(')','') - + met2 = met2.replace("abs(", "").replace(")", "") + if met1 in metricList: - fieldType = 'metric' + fieldType = "metric" dfToUse = chanMetricDF elif met1 in metadataList: - fieldType = 'metadata' + fieldType = "metadata" dfToUse = chanMetaDF else: print("WARNING: unknown field type") return chanMetricDF, chanMetaDF, "comparison" - + try: met1 = met1.split("::")[1] except: @@ -882,284 +996,370 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): met2 = met2.split("::")[1] except: pass - + # figure out what's going on with H/V, if anything try: - CH1 = fields[0].split('[')[1].replace(']','').replace(')','') + CH1 = fields[0].split("[")[1].replace("]", "").replace(")", "") try: - chType1 = CH1.split(':')[1] - CH1 = CH1.split(':')[0] + chType1 = CH1.split(":")[1] + CH1 = CH1.split(":")[0] except: pass - + except: - CH1 = '' + CH1 = "" try: - CH2 = fields[2].split('[')[1].replace(']','').replace(')','') + CH2 = fields[2].split("[")[1].replace("]", "").replace(")", "") try: - chType2 = CH2.split(':')[1] - CH2 = CH2.split(':')[0] + chType2 = CH2.split(":")[1] + CH2 = CH2.split(":")[0] except: pass except: - CH2 = '' + CH2 = "" ch1, ch2 = get_channel_lists(CH1, CH2) # Simplest case: ch1 and ch2 are both empty, or we are doing V-V or H-H and we just run everything like normal if CH1 == CH2 and chType1 == chType2 == "": - dfToUse = do_comparison(dfToUse, dfToUse[met1], operator, dfToUse[met2], doAbs1, doAbs2) + dfToUse = do_comparison( + dfToUse, dfToUse[met1], operator, dfToUse[met2], doAbs1, doAbs2 + ) # No extra columns to figure out here, since this case doesn't need do_channel_figuring() - + else: # Do the figuring on what needs to happen to the dataframe based on chType1 and chyType2 - dfToUse = do_channel_figuring(dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, doAbs2) + dfToUse = do_channel_figuring( + dfToUse, CH1, CH2, ch1, ch2, chType1, chType2, doAbs1, doAbs2 + ) # Subset based on the channel indicated by ch1: - dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] + dfToUse = dfToUse[dfToUse["channel"].str.endswith(ch1)] - if chType1 == 'vs' or chType2 == 'vs': - df1 = dfToUse[met1+ "_" + chType1] + if chType1 == "vs" or chType2 == "vs": + df1 = dfToUse[met1 + "_" + chType1] df2 = dfToUse[met2 + "_sncl2"] - + # each one of these cases has it's own do_comparison so that it is easier to remove the extra columns afterward dfToUse = do_comparison(dfToUse, df1, operator, df2, doAbs1, doAbs2) - + # delete extra columns, revert names of main metrics for col in dfToUse.columns: - if col.endswith('_sncl2'): - dfToUse.drop([col], axis = 1, inplace = True) + if col.endswith("_sncl2"): + dfToUse.drop([col], axis=1, inplace=True) elif col not in columnsToNotChange: - dfToUse.rename(columns={col : col.rsplit('_', 1)[0]}, inplace = True) - - + dfToUse.rename( + columns={col: col.rsplit("_", 1)[0]}, inplace=True + ) + else: - if chType1 == chType2 == '': - df1 = dfToUse[met1+ "_"] + if chType1 == chType2 == "": + df1 = dfToUse[met1 + "_"] df2 = dfToUse[met2 + "_sncl2"] - + dfToUse = do_comparison(dfToUse, df1, operator, df2, doAbs1, doAbs2) - + # delete extra columns, revert names of main metrics for col in dfToUse.columns: - if col.endswith('_sncl2'): - dfToUse.drop([col], axis = 1, inplace = True) + if col.endswith("_sncl2"): + dfToUse.drop([col], axis=1, inplace=True) elif col not in columnsToNotChange: - dfToUse.rename(columns={col : col.rsplit('_', 1)[0]}, inplace = True) - + dfToUse.rename( + columns={col: col.rsplit("_", 1)[0]}, inplace=True + ) + else: -# if chType1 == chType2 == 'avg': -# if doAbs1: -# dfToUse[met1+ "_" + chType1] = dfToUse[met1+ "_" + chType1].abs() -# if doAbs2: -# dfToUse[met1+ "_" + chType2] = dfToUse[met1+ "_" + chType2].abs() - df1 = dfToUse[met1+ "_" + chType1] + df1 = dfToUse[met1 + "_" + chType1] df2 = dfToUse[met2 + "_" + chType2] - + dfToUse = do_comparison(dfToUse, df1, operator, df2, doAbs1, doAbs2) - + # delete extra columns, revert names of main metrics for col in dfToUse.columns: - if col.endswith('_' + chType2): - dfToUse.drop([col], axis = 1, inplace = True) + if col.endswith("_" + chType2): + dfToUse.drop([col], axis=1, inplace=True) if col not in columnsToNotChange: - dfToUse.rename(columns={col : col.rsplit('_', 1)[0]}, inplace = True) - - dfToUse = dfToUse[dfToUse['channel'].str.endswith(ch1)] - + dfToUse.rename( + columns={col: col.rsplit("_", 1)[0]}, inplace=True + ) + + dfToUse = dfToUse[dfToUse["channel"].str.endswith(ch1)] - if fieldType == 'metric': + if fieldType == "metric": chanMetricDF = dfToUse - elif fieldType == 'metadata': + elif fieldType == "metadata": chanMetaDF = dfToUse - + return chanMetricDF, chanMetaDF, "comparison" -# return dfToUse, fieldType, "comparison" - - - + # Within a single threshold, there can be multiple instrument groups, so need to loop over each of those # But before we do, we need to do some organization to figure out what stations are specifically spelled # out, so they that they can be withheld from any potential "*" so that it's not doubled up threshDefs = thresholdDefDict[threshold] - + if metricDF.empty: if hasMetrics: - return + return for group in threshDefs.keys(): - # loop over each group in the threshold, and run them if we have included them in the preference file if group in instruments: instDef = threshDefs[group] - + # For every group, regenerate specificSNCLs specificSNCLs = [] for instGroup in threshDefs.keys(): if instGroup in instruments: specificSNCLs.append(instrumentGroupsDict[instGroup]) - - # remove this group from specificSNCLS, so that it doesn't compare against itself thisIdx = specificSNCLs.index(instrumentGroupsDict[group]) del specificSNCLs[thisIdx] - + if (len(instDef) > 1) and any("average :: " in s for s in instDef): - print("WARNING: thresholds with 'ratio' cannot have multiple parts, skipping") + print( + "WARNING: thresholds with 'ratio' cannot have multiple parts, skipping" + ) continue - + thisMetricDF = metricDF.copy() thisMetaDF = metaDF.copy() - - for net in instrumentGroupsDict[group]['network']: + + for net in instrumentGroupsDict[group]["network"]: if net == "*" or net == "%" or net == "": netMetricDF = thisMetricDF netMetaDF = thisMetaDF # If it can be any net, look at all other groups and make sure to remove any that might be specified for idx, specificSNCL in enumerate(specificSNCLs): - if (not specificSNCL['network'] == ['*']): + if not specificSNCL["network"] == ["*"]: # Then a network has been specified - work down the NSLC chain to remove specific targets - for net2 in specificSNCL['network']: - for sta2 in specificSNCL['station']: - if sta2 == "*" or sta2=="%" or sta2=="": + for net2 in specificSNCL["network"]: + for sta2 in specificSNCL["station"]: + if sta2 == "*" or sta2 == "%" or sta2 == "": sta2 = ".*" - for loc2 in specificSNCL['location']: - if loc2 == "*" or loc2=="%" or loc2=="": + for loc2 in specificSNCL["location"]: + if loc2 == "*" or loc2 == "%" or loc2 == "": loc2 = ".*" - for cha2 in specificSNCL['channel']: - if cha2 == "*" or cha2=="%" or cha2=="": + for cha2 in specificSNCL["channel"]: + if cha2 == "*" or cha2 == "%" or cha2 == "": cha2 = ".*" - - thisTarget = "%s\.%s\..*%s.*\..*%s.*\..*" % (net2, sta2, loc2, cha2) - - netMetricDF = netMetricDF[~netMetricDF['target'].str.contains(thisTarget,regex= True)] - netMetaDF = netMetaDF[~netMetaDF['target'].str.contains(thisTarget,regex= True)] - del specificSNCLs[idx] + + thisTarget = ( + r"%s\.%s\..*%s.*\..*%s.*\..*" + % (net2, sta2, loc2, cha2) + ) + + netMetricDF = netMetricDF[ + ~netMetricDF["target"].str.contains( + thisTarget, regex=True + ) + ] + netMetaDF = netMetaDF[ + ~netMetaDF["target"].str.contains( + thisTarget, regex=True + ) + ] + del specificSNCLs[idx] else: - netMetricDF = thisMetricDF[thisMetricDF['network'] == net] - netMetaDF = thisMetaDF[thisMetaDF['network'] == net] - - for sta in instrumentGroupsDict[group]['station']: + netMetricDF = thisMetricDF[thisMetricDF["network"] == net] + netMetaDF = thisMetaDF[thisMetaDF["network"] == net] + + for sta in instrumentGroupsDict[group]["station"]: if sta == "*" or sta == "%" or sta == "": staMetricDF = netMetricDF staMetaDF = netMetaDF for idx, specificSNCL in enumerate(specificSNCLs): - if (not specificSNCL['station'] == ['*']): - for sta2 in specificSNCL['station']: - if sta2 == "*" or sta2=="%" or sta2=="": + if not specificSNCL["station"] == ["*"]: + for sta2 in specificSNCL["station"]: + if sta2 == "*" or sta2 == "%" or sta2 == "": sta2 = ".*" - for loc2 in specificSNCL['location']: - if loc2 == "*" or loc2=="%" or loc2=="": + for loc2 in specificSNCL["location"]: + if loc2 == "*" or loc2 == "%" or loc2 == "": loc2 = ".*" - for cha2 in specificSNCL['channel']: - if cha2 == "*" or cha2=="%" or cha2=="": + for cha2 in specificSNCL["channel"]: + if cha2 == "*" or cha2 == "%" or cha2 == "": cha2 = ".*" - - thisTarget = ".*\.%s\..*%s.*\..*%s.*\..*" % (sta2, loc2, cha2) - staMetricDF = staMetricDF[~staMetricDF['target'].str.contains(thisTarget,regex= True)] - staMetaDF = staMetaDF[~staMetaDF['target'].str.contains(thisTarget,regex= True)] - del specificSNCLs[idx] + + thisTarget = ( + r".*\.%s\..*%s.*\..*%s.*\..*" + % (sta2, loc2, cha2) + ) + staMetricDF = staMetricDF[ + ~staMetricDF["target"].str.contains( + thisTarget, regex=True + ) + ] + staMetaDF = staMetaDF[ + ~staMetaDF["target"].str.contains( + thisTarget, regex=True + ) + ] + del specificSNCLs[idx] else: - staMetricDF = netMetricDF[netMetricDF['station'] == sta] - staMetaDF = netMetaDF[netMetaDF['station'] == sta] - - for loc in instrumentGroupsDict[group]['location']: + staMetricDF = netMetricDF[netMetricDF["station"] == sta] + staMetaDF = netMetaDF[netMetaDF["station"] == sta] + + for loc in instrumentGroupsDict[group]["location"]: if loc == "*" or loc == "%" or loc == "": locMetricDF = staMetricDF locMetaDF = staMetaDF - + for idx, specificSNCL in enumerate(specificSNCLs): - if (not specificSNCL['location'] == ['*']): + if not specificSNCL["location"] == ["*"]: - for loc2 in specificSNCL['location']: - if loc2 == "*" or loc2=="%" or loc2=="": + for loc2 in specificSNCL["location"]: + if loc2 == "*" or loc2 == "%" or loc2 == "": loc2 = ".*" - for cha2 in specificSNCL['channel']: - if cha2 == "*" or cha2=="%" or cha2=="": + for cha2 in specificSNCL["channel"]: + if cha2 == "*" or cha2 == "%" or cha2 == "": cha2 = ".*" - - thisTarget = ".*\..*\..*%s.*\..*%s.*\..*" % (loc2, cha2)# - - locMetricDF = locMetricDF[~locMetricDF['target'].str.contains(thisTarget,regex= True)] - locMetaDF = locMetaDF[~locMetaDF['target'].str.contains(thisTarget,regex= True)] + + thisTarget = ( + r".*\..*\..*%s.*\..*%s.*\..*" + % (loc2, cha2) + ) # + + locMetricDF = locMetricDF[ + ~locMetricDF["target"].str.contains( + thisTarget, regex=True + ) + ] + locMetaDF = locMetaDF[ + ~locMetaDF["target"].str.contains( + thisTarget, regex=True + ) + ] del specificSNCLs[idx] - - - + else: # some metrics compare two loc - locMetricDF = staMetricDF[staMetricDF['location'].str.contains(loc)] - locMetaDF = staMetaDF[staMetaDF['location'].str.contains(loc)] - - - - for chan in instrumentGroupsDict[group]['channel']: - if chan == "*" or chan =="%" or chan == "": + locMetricDF = staMetricDF[ + staMetricDF["location"].str.contains(loc) + ] + locMetaDF = staMetaDF[ + staMetaDF["location"].str.contains(loc) + ] + + for chan in instrumentGroupsDict[group]["channel"]: + if chan == "*" or chan == "%" or chan == "": chanMetricDF = locMetricDF chanMetaDF = locMetaDF - + for idx, specificSNCL in enumerate(specificSNCLs): - if (not specificSNCL['channel'] == ['*']): + if not specificSNCL["channel"] == ["*"]: - for cha2 in specificSNCL['channel']: - if cha2 == "*" or cha2=="%" or cha2=="": + for cha2 in specificSNCL["channel"]: + if cha2 == "*" or cha2 == "%" or cha2 == "": cha2 = ".*" - thisTarget = ".*\..*\..*\..*%s.*\..*" % (cha2) + thisTarget = r".*\..*\..*\..*%s.*\..*" % ( + cha2 + ) if hasMetrics: - chanMetricDF = chanMetricDF[~chanMetricDF['target'].str.contains(thisTarget,regex=True)] - chanMetaDF = chanMetaDF[~chanMetaDF['target'].str.contains(thisTarget,regex=True)] - del specificSNCLs[idx] - + chanMetricDF = chanMetricDF[ + ~chanMetricDF[ + "target" + ].str.contains( + thisTarget, regex=True + ) + ] + chanMetaDF = chanMetaDF[ + ~chanMetaDF["target"].str.contains( + thisTarget, regex=True + ) + ] + del specificSNCLs[idx] + else: # Note the .startswith() rather than .contains() - this is because HN? brought up BHN channels - # Checks indicate that this change is ok, but be aware that this MAY have other effects - chanMetricDF = locMetricDF[locMetricDF['channel'].str.startswith(chan)] - chanMetaDF = locMetaDF[locMetaDF['channel'].str.startswith(chan)] - + # Checks indicate that this change is ok, but be aware that this MAY have other effects + chanMetricDF = locMetricDF[ + locMetricDF["channel"].str.startswith(chan) + ] + chanMetaDF = locMetaDF[ + locMetaDF["channel"].str.startswith(chan) + ] # each definition may have multiple entries that need to be met. For each, check on what kind - # of definition it is, send it to the right place, then get the return to pass on to the next - # part of the definition. This way we may have different 'types' of definitions within a + # of definition it is, send it to the right place, then get the return to pass on to the next + # part of the definition. This way we may have different 'types' of definitions within a # single definition. for subDef in instDef: - itype = "" # assign a dummy itype, mostly for the metadata-only thresholds + itype = "" # assign a dummy itype, mostly for the metadata-only thresholds if "average ::" in subDef: try: - chanMetricDF, chanMetaDF, itype = average_threshold(chanMetricDF, chanMetaDF, subDef) + chanMetricDF, chanMetaDF, itype = ( + average_threshold( + chanMetricDF, chanMetaDF, subDef + ) + ) except Exception as e: print("WARNING: Did not run because of %s" % e) elif "median ::" in subDef: try: - chanMetricDF, chanMetaDF, itype = median_threshold(chanMetricDF, chanMetaDF, subDef) + chanMetricDF, chanMetaDF, itype = ( + median_threshold( + chanMetricDF, chanMetaDF, subDef + ) + ) except Exception as e: print("WARNING: Did not run because of %s" % e) elif "/" in subDef.split(): try: - chanMetricDF, chanMetaDF, itype = ratio_threshold(chanMetricDF, chanMetaDF, subDef) + chanMetricDF, chanMetaDF, itype = ( + ratio_threshold( + chanMetricDF, chanMetaDF, subDef + ) + ) except Exception as e: print("WARNING: Did not run because of %s" % e) else: - # Could be 3 situations: + # Could be 3 situations: # metric operator value - simple # metadata operator string - simple # metric/metadata operator/metadata metric - comparison - # + # try: try: fields = subDef.split() - pos1 = fields[0].replace('abs(','').replace(')','').split('[')[0] - pos2 = fields[1].replace('abs(','').replace(')','').split('[')[0] - pos3 = fields[2].replace('abs(','').replace(')','').split('[')[0] + pos1 = ( + fields[0] + .replace("abs(", "") + .replace(")", "") + .split("[")[0] + ) + pos2 = ( + fields[1] + .replace("abs(", "") + .replace(")", "") + .split("[")[0] + ) + pos3 = ( + fields[2] + .replace("abs(", "") + .replace(")", "") + .split("[")[0] + ) except Exception as e: - print("WARNING: could not split definition - %s" % e) - - if (pos3 in metricList) or (pos3 in metadataList): - chanMetricDF, chanMetaDF, itype = compare_threshold(chanMetricDF, chanMetaDF, subDef) -# quit("Stopping here to make sure it's working") + print( + "WARNING: could not split definition - %s" + % e + ) + + if (pos3 in metricList) or ( + pos3 in metadataList + ): + chanMetricDF, chanMetaDF, itype = ( + compare_threshold( + chanMetricDF, chanMetaDF, subDef + ) + ) else: - chanMetricDF, chanMetaDF, itype = simple_threshold(chanMetricDF, chanMetaDF, subDef) + chanMetricDF, chanMetaDF, itype = ( + simple_threshold( + chanMetricDF, chanMetaDF, subDef + ) + ) except Exception as e: print("WARNING: Did not run because of %s" % e) @@ -1167,60 +1367,79 @@ def compare_threshold(chanMetricDF, chanMetaDF, subDF): # and move onto the next threshold return - # At this point, we have two different dataframes that have been subsetted (or not, depending on the specifics) - # If within a single threshold, there are both mustang metrics AND metadata, then we need to make sure that the + # If within a single threshold, there are both mustang metrics AND metadata, then we need to make sure that the # two are in alignment # Since any given threshold is an AND statement, any targets that have been eliminated from one must also be - # eliminated from the other. Metadata has no real day values, so it's just the targets that can be used. + # eliminated from the other. Metadata has no real day values, so it's just the targets that can be used. # If metrics are flagged for 3 days but the metadata for none, then none should be in the issue list # If metrics are flagged for 3 days and the metadata is also flagged, then all three should be in the list - - #starting in a probably inefficient way and then can make it more efficient later + + # starting in a probably inefficient way and then can make it more efficient later # First check to see if either are empty... if one is empty, then the end result should be empty! - + if hasMetrics == True: - if not itype == 'average': - chanMetricDF['start'] = pd.to_datetime(chanMetricDF['start']) - chanMetricDF['end'] = pd.to_datetime(chanMetricDF['end']) - + if not itype == "average": + chanMetricDF["start"] = pd.to_datetime( + chanMetricDF["start"] + ) + chanMetricDF["end"] = pd.to_datetime( + chanMetricDF["end"] + ) + cols = chanMetricDF.columns - finalDF = pd.DataFrame(columns=cols) - + frames = [] + if chanMetricDF.empty or chanMetaDF.empty: continue - else: # both of them have stuff in them + else: # both of them have stuff in them # the metadata dataframe is probably going to be shorter (of course, maybe not) for index, row in chanMetaDF.iterrows(): # The metadata dataframe will never have complex targets in it, so I need to allow for those - complexTarget = "%s\.%s\..*%s.*\..*%s.*\.." % (row['network'], row['station'], row['location'], row['channel']) - starttime = datetime.datetime.strptime(row['starttime'], '%Y-%m-%dT%H:%M:%S.%f') - if pd.isnull(row['endtime']): + complexTarget = r"%s\.%s\..*%s.*\..*%s.*\.." % ( + row["network"], + row["station"], + row["location"], + row["channel"], + ) + starttime = datetime.datetime.strptime( + row["starttime"], "%Y-%m-%dT%H:%M:%S.%f" + ) + if pd.isnull(row["endtime"]): endtime = datetime.datetime.now() else: - endtime = datetime.datetime.strptime(row['endtime'], '%Y-%m-%dT%H:%M:%S.%f') - thisSet = chanMetricDF[chanMetricDF['target'].str.contains(complexTarget,regex=True)] - - if 'new_target' in thisSet.columns: - thisSet['target'] = thisSet['new_target'] -# thisSet.drop('new_target', axis = 1, inplace = True) - - if not itype == 'average': - thisSet = thisSet[thisSet['start'] >= starttime] - thisSet = thisSet[thisSet['end'] <= endtime] + endtime = datetime.datetime.strptime( + row["endtime"], "%Y-%m-%dT%H:%M:%S.%f" + ) + thisSet = chanMetricDF[ + chanMetricDF["target"].str.contains( + complexTarget, regex=True + ) + ] + + if "new_target" in thisSet.columns: + thisSet["target"] = thisSet["new_target"] + + if not itype == "average": + thisSet = thisSet[ + thisSet["start"] >= starttime + ] + thisSet = thisSet[thisSet["end"] <= endtime] ## GET DATES FROM ROW AND SUBSET THISSET TO ONLY THOSE BETWEEN THOSE DATES! ## ALSO HANDLE THE CASE WHERE IT IS ONLY METADATA AND NO METRICS ARE EXPECTED... ADD IN AN IF CLAUSE? - finalDF = pd.concat([finalDF, thisSet]) - finalDF = finalDF.drop_duplicates(subset=['target','start','end']) - - issues = reportUtils.sortIssueFile(finalDF, threshold, itype) + frames.append(thisSet) + finalDF = pd.concat(frames, ignore_index=True) + finalDF = finalDF.drop_duplicates( + subset=["target", "start", "end"] + ) + + issues = reportUtils.sortIssueFile( + finalDF, threshold, itype + ) else: # If this threshold doesn't have any metrics anyway, then just convert the metadata dataframe into the finalDF format issues = reportUtils.sortMetaFile(chanMetaDF, threshold) - - + reportUtils.writeToOutfile(issues, outfile) - + return - - \ No newline at end of file