From 9aa49cf38f6b1a5d9622fdbe562e626d13e51b1f Mon Sep 17 00:00:00 2001
From: Mathew Biddle <8480023+MathewBiddle@users.noreply.github.com>
Date: Fri, 8 Jul 2022 16:34:29 -0400
Subject: [PATCH] automating btn generation cleaning up gts metrics

---
 .github/workflows/metrics.yml     |   7 +-
 btn_metrics.py                    | 217 ++++++++++++++++++++++++++++++
 ioos_metrics.py => gts_metrics.py |   0
 3 files changed, 221 insertions(+), 3 deletions(-)
 create mode 100644 btn_metrics.py
 rename ioos_metrics.py => gts_metrics.py (100%)

diff --git a/.github/workflows/metrics.yml b/.github/workflows/metrics.yml
index 79b816b..b10cc11 100644
--- a/.github/workflows/metrics.yml
+++ b/.github/workflows/metrics.yml
@@ -5,7 +5,7 @@ on:
      branches: 
        - main
      paths:
-       - 'ioos_metrics.py'
+       - 'gts_metrics.py'
   schedule:
     - cron: "0 12 5 1,4,7,10 *"
 
@@ -28,10 +28,11 @@ jobs:
         run: |
           conda env create -f environment.yml
 
-      - name: Collect quarterly metrics
+      - name: Collect quarterly GTS and BTN metrics
         run: |
           source activate ioos-btn
-          python ioos_metrics.py
+          python gts_metrics.py
+          python btn_metrics.py
 
       - name: Commit and push if it changed
         run: |
diff --git a/btn_metrics.py b/btn_metrics.py
new file mode 100644
index 0000000..15943e4
--- /dev/null
+++ b/btn_metrics.py
@@ -0,0 +1,217 @@
+import pandas as pd
+import requests
+import re
+from bs4 import BeautifulSoup
+import json
+
+
+ioos_btn_df = pd.read_csv('https://github.com/MathewBiddle/ioos_by_the_numbers/raw/main/ioos_btn_metrics.csv')#columns=['category','value','date'])
+
+today = pd.Timestamp.strftime(pd.Timestamp.today(tz='UTC'), '%Y-%m-%d')
+
+# only update numbers if it's a new day
+if today not in ioos_btn_df['date_UTC'].to_list():
+    ioos_btn_df = ioos_btn_df.append({'date_UTC': today}, ignore_index=True)
+
+fed_partners = 17
+
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['Federal Partners']] = [fed_partners]
+
+regional_associations = 11
+
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['Regional Associations']] = [regional_associations]
+
+comt = 5
+
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['COMT Projects']] = comt
+
+hfr_installations = 165
+
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['HF Radar Stations']] = hfr_installations
+
+df_glider = pd.read_csv('https://gliders.ioos.us/erddap/tabledap/allDatasets.csvp?minTime%2CmaxTime%2CdatasetID')
+df_glider.dropna(
+    axis=0,
+    inplace=True,
+    )
+
+# drop delayed datasets
+df_glider = df_glider[df_glider["datasetID"].str.contains("delayed")==False]
+
+df_glider[['minTime (UTC)','maxTime (UTC)']] = df_glider[
+                                                         ['minTime (UTC)','maxTime (UTC)']
+                                                         ].apply(pd.to_datetime)
+
+df_glider['glider_days'] = (df_glider['maxTime (UTC)'] - df_glider['minTime (UTC)']).dt.days
+
+glider_days = df_glider['glider_days'].sum()
+
+print('Cumulative glider days:', glider_days)
+
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['NGDAC Glider Days']] = glider_days
+
+# National Platforms
+
+## co-ops
+xml = requests.get('https://opendap.co-ops.nos.noaa.gov/stations/stationsXML.jsp').text
+COOPS = sum(1 for _ in re.finditer(r'\b%s\b' % re.escape("station name"), xml))
+
+## ports
+url = 'https://tidesandcurrents.noaa.gov/cdata/StationListFormat?type=Current+Data&filter=active&format=csv'
+df_coops = pd.read_csv(url)
+ports = df_coops[df_coops[' Project'].astype(str).str.contains('PORTS')].shape[0]
+
+## NDBC
+url = 'https://www.ndbc.noaa.gov/wstat.shtml'
+
+html = requests.get(url).text
+
+soup = BeautifulSoup(html, 'html.parser')
+
+string_to_find = ['Total Base Funded Buoys:','Total Other Buoys:',
+                  'Total Moored Buoys:','Total Base Funded Stations:',
+                  'Total Stations:']
+
+ndbc = dict()
+for string in string_to_find:
+    for tag in soup.find_all("td", string=string):
+        ndbc[string] = int(tag.next_sibling.string)
+
+NDBC = ndbc['Total Moored Buoys:'] + ndbc['Total Base Funded Stations:']
+
+## NERRS
+url = 'https://coast.noaa.gov/nerrs/about/'
+
+html = requests.get(url).text
+
+soup = BeautifulSoup(html, 'html.parser')
+
+string_to_find = ['The National Estuarine Research Reserve System is a network of ']
+
+nerrs = dict()
+for string in string_to_find:
+  for tag in soup.find_all("meta", attrs={'content': re.compile(string)}, limit=1):
+    res = [int(i) for i in tag['content'].split() if i.isdigit()] # extract number
+    #print(tag['content'])
+    NERRS = int(res[0])
+    #print('%s = %s' % (string, tag.next_sibling.string))
+
+NERRS = 140
+
+## CBIBS
+base_url = 'https://mw.buoybay.noaa.gov/api/v1'
+apikey = 'f159959c117f473477edbdf3245cc2a4831ac61f'
+start = '2021-12-08T01:00:00z'
+end = '2021-12-09T23:59:59z'
+var = 'Position'
+
+query_url = '{}/json/query?key={}&sd={}&ed={}&var={}'.format(base_url,apikey,start,end,var)
+
+json = json.loads(requests.get(query_url).text)
+
+CBIBS = len(json['stations'])
+
+## OAP
+url = 'https://oceanacidification.noaa.gov/WhatWeDo/Data.aspx'
+
+html = requests.get(url).text
+
+soup = BeautifulSoup(html, 'html.parser')
+
+text = soup.find_all(attrs={'id':"dnn_ctr14711_ContentPane"})[0].find_all(attrs={'class':'lead'})[0].text #id="mapDiv")
+
+res = [int(i) for i in text.split() if i.isdigit()] # extract number
+OAP = int(res[0])
+
+## CDIP
+url = 'https://cdip.ucsd.edu/themes/?d2=p1:m:mobile&regions=all&units=standard&zoom=auto&pub_set=public&tz=UTC&ll_fmt=dm&numcolorbands=10&palette=cdip_classic&high=6.096'
+table_list = pd.read_html(url, match='Stn')
+
+df = table_list[0]
+
+CDIP = df['Stn'].unique().size
+
+## Calculating National Platforms
+national_platforms = COOPS + NDBC + NERRS + CBIBS + OAP + CDIP
+print("National Platforms:",national_platforms)
+
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['National Platforms']] = national_platforms
+
+# Regional Platforms
+url = 'http://erddap.ioos.us/erddap/tabledap/processed_asset_inventory.csvp'
+
+df_regional_platforms = pd.read_csv(url)
+
+regional_platforms = df_regional_platforms['station_long_name'].unique().size
+
+print('Regional platforms:',regional_platforms)
+
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['Regional Platforms']] = regional_platforms
+
+# ATN Deployments
+atn_deployments = 4444
+
+print("ATN Deployments:",atn_deployments)
+
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['ATN Deployments']] = atn_deployments
+
+# MBON Projects
+mbon_projects = 6
+
+print("MBON Projects:",mbon_projects)
+
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['MBON Projects']] = mbon_projects
+
+# OTT Projects
+ott_projects = 8
+
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['OTT Projects']] = ott_projects
+
+# NHABON Pilot Projects
+nhabon_projects = 9
+
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['HAB Pilot Projects']] = nhabon_projects
+
+# QARTOD Manuals
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['QARTOD Manuals']] = 13
+
+# IOOS Core Variables
+headers = {'Accept-Encoding': 'identity'}
+
+url = 'https://www.iooc.us/task-teams/core-ioos-variables/'
+
+soup = BeautifulSoup(requests.get(url, headers=headers).text, 'html.parser')
+
+text = soup.find(style="color: #808080;").get_text() # grab the sentece w/ the number
+
+core_vars = [int(i) for i in text.split() if i.isdigit()] # extract number
+
+print('IOOS Core Variables:',core_vars)
+
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['IOOS Core Variables']] = core_vars
+
+# Metadata records
+import pandas as pd
+
+url = 'https://data.ioos.us/api/3/action/package_list'
+
+mdf = pd.read_json(url)
+
+metadata_records = len(mdf.result.unique())
+
+print("Found {} records from {}.".format(metadata_records,url))
+
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['Metadata Records']] = metadata_records
+
+# IOOS
+ioos = 1
+
+print("IOOS:",ioos)
+
+ioos_btn_df.loc[ioos_btn_df['date_UTC']==today, ['IOOS']] = ioos
+
+# Final table
+ioos_btn_df['date_UTC']=pd.to_datetime(ioos_btn_df['date_UTC'])
+ioos_btn_df.set_index('date_UTC')
+
+ioos_btn_df.to_csv('ioos_btn_metrics.csv', index=False)
\ No newline at end of file
diff --git a/ioos_metrics.py b/gts_metrics.py
similarity index 100%
rename from ioos_metrics.py
rename to gts_metrics.py