USEPA · bl-young · Feb 2, 2024 · Feb 2, 2024 · Feb 8, 2024 · Feb 8, 2024
diff --git a/examples/traci2_2.py b/examples/traci2_2.py
@@ -0,0 +1,39 @@
+import pandas as pd
+import lciafmt
+from lciafmt.util import store_method, save_json, log
+import esupy.location
+
+
+method = lciafmt.Method.TRACI2_2
+regions = ['states', 'countries']
+
+def main():
+
+    df = lciafmt.get_method(method)
+    mapping = method.get_metadata()['mapping']
+    mapped_df = lciafmt.map_flows(df, system=mapping)
+
+    # write the result to parquet, includes states and counties as FIPS,
+    # and all countries
+    store_method(mapped_df, method)
+
+    # Assigns codes to states e.g., "US-AL", leaves counties as FIPS
+    state_df = esupy.location.assign_state_abbrev(mapped_df)
+
+    # Convert country names to ISO Country codes, not all will map
+    country_codes = (esupy.location.read_iso_3166()
+                     .filter(['Name', 'ISO-2d'])
+                     .set_index('Name')['ISO-2d'].to_dict())
+    # prevents dropping of the factors without locations
+    country_codes[''] = ''
+    all_df = state_df.copy()
+    all_df['Location'] = (all_df['Location']
+                          .map(country_codes)
+                          .fillna(all_df['Location']))
+    all_df = all_df.query('Location.isin(@country_codes.values()) |'
+                          'Location.str.startswith("US")')
+
+    save_json(method, all_df, name='TRACI2.2', regions=regions)
+
+if __name__ == "__main__":
+    main()
diff --git a/lciafmt/__init__.py b/lciafmt/__init__.py
@@ -32,6 +32,7 @@ class Method(Enum):
     """LCIAFormatter Method object with available metadata."""
 
     TRACI = "TRACI 2.1"
+    TRACI2_2 = "TRACI 2.2"
     RECIPE_2016 = "ReCiPe 2016"
     FEDEFL_INV = "FEDEFL Inventory"
     ImpactWorld = "ImpactWorld"
@@ -110,8 +111,8 @@ def get_method(method_id, add_factors_for_missing_contexts=True,
         return custom.get_custom_method(file=file)
     else:
         method_id = util.check_as_class(method_id)
-    if method_id == Method.TRACI:
-        return traci.get(add_factors_for_missing_contexts, file=file, url=None)
+    if method_id == Method.TRACI or method_id == Method.TRACI2_2:
+        return traci.get(method_id, add_factors_for_missing_contexts, file=file, url=None)
     if method_id == Method.RECIPE_2016:
         return recipe.get(add_factors_for_missing_contexts, endpoint, summary,
                           file=file, url=url)
@@ -133,8 +134,9 @@ def to_jsonld(df: pd.DataFrame, zip_file: str, write_flows=False, **kwargs):
     """Generate a JSONLD file of the methods passed as DataFrame."""
     util.log.info(f"write JSON-LD package to {zip_file}")
     with jsonld.Writer(zip_file) as w:
-        w.write(df, write_flows,
+        w.write(df, write_flows=write_flows,
                 preferred_only=kwargs.get('preferred_only', False),
+                regions=kwargs.get('regions'),
                 )
 
 
@@ -153,7 +155,7 @@ def map_flows(df: pd.DataFrame, system=None, mapping=None,
                          preserve_unmapped=preserve_unmapped,
                          case_insensitive=case_insensitive)
     mapped = mapper.run()
-    x = mapped[mapped[['Method', 'Indicator', 'Flowable', 'Flow UUID']
+    x = mapped[mapped[['Method', 'Indicator', 'Flowable', 'Flow UUID', 'Location']
                       ].duplicated(keep=False)]
     duplicates = list(set(zip(x.Indicator, x.Flowable)))
     if len(duplicates) > 0:

diff --git a/lciafmt/data/methods.json b/lciafmt/data/methods.json
@@ -6,11 +6,26 @@
     "path": "traci",
     "mapping": "TRACI2.1",
     "case_insensitivity": "False",
+    "file": "traci_2.1.xlsx",
     "url": "https://www.epa.gov/sites/default/files/2015-12/traci_2_1_2014_dec_10_0.xlsx",
     "bib_id": "bare_traci_2011",
     "citation": "Bare 2012",
     "source_type": "Excel file"
   },
+  {
+    "id": "TRACI2_2",
+    "name": "TRACI 2.2",
+    "detail_note": "TRACI 2.2 replicates TRACI 2.1 with the exception of the Eutrophication indicators which are described in the attached reference. As in TRACI 2.1, additional flowable mappings were included for general metals (e.g. Copper, Zinc, and Selenium) in the FEDEFL to the most common ions where available (e.g. COPPER(II), ZINC(II), and SELENIUM(IV)). This ensures the application of TRACI 2.2 characterization factors when the general name of the metal is reported, as is the case in EPA datasets such as NEI and TRI. Duplicate names for flowables exist in TRACI 2.2 which have different characterization factors and different CAS. These occur due to duplicate entries in the source files for USEtox. In these cases, the entries with the currently recognized CAS are accepted and the other flowables are ignored. FEDEFL contexts are mapped to TRACI 2.2 to enable impact assessment across most of the possible contexts available within FEDEFL. Mappings that occur in the primary contexts (e.g. air) are assigned to all possible sub-contexts which apply. Where a context is non-specific in FEDEFL relative to the available TRACI 2.2 contexts (e.g. rural/urban), the LCIA Formatter applies the average of the relevant characterization factors from TRACI 2.2. Normalization and weighting factors are not provided in the TRACI 2.2 source file and are not added here.",
+    "path": "traci",
+    "mapping": "TRACI2.2",
+    "case_insensitivity": "False",
+    "file": "TRACI_2.2.xlsx",
+    "url": "https://github.com/USEPA/TRACI/raw/master/TraciTool/TRACI_2_2.xlsx",
+    "eutro_url": "https://pasteur.epa.gov/uploads/10.23719/1520443/TRACI%20Spatial%20Eutrophication%20Characterization%20Factors_2020-10.xlsx",
+    "eutro_file": "TRACI Spatial Eutrophication Characterization Factors_2020-10.xlsx",
+    "citation": "Henderson et al. 2021",
+    "source_type": "Excel file"
+  },
   {
     "id": "RECIPE_2016",
     "name": "ReCiPe 2016",

diff --git a/lciafmt/jsonld.py b/lciafmt/jsonld.py
@@ -17,6 +17,7 @@
 
 from esupy.util import make_uuid
 from esupy.bibtex import generate_sources
+from esupy.location import extract_coordinates, olca_location_meta
 import fedelemflowlist
 from .util import is_non_empty_str, generate_method_description,\
     log, pkg_version_number, datapath, check_as_class
@@ -30,6 +31,9 @@ def __init__(self, zip_file: str):
         self.__methods = {}
         self.__indicators = {}
         self.__flows = {}
+        self.__coordinates = {}
+        self.__locations = {}
+        self.__location_meta = olca_location_meta().fillna('')
         self.__sources = {}
         self.__sources_to_write = {}
         self.__bibids = {}
@@ -41,7 +45,14 @@ def __enter__(self):
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.__writer.close()
 
-    def write(self, df: pd.DataFrame, write_flows=False, preferred_only=False):
+    def write(self, df: pd.DataFrame,
+              write_flows=False,
+              preferred_only=False,
+              regions=None # list, options include: 'states', 'countries'
+              ):
+        if any(df['Location'] != '') and regions is not None:
+            coord = [extract_coordinates(group=r) for r in regions]
+            self.__coordinates = {k: v for d in coord for k, v in d.items()}
         if 'source_method' not in df:
             df['source_method'] = df['Method']
         if 'source_indicator' not in df:
@@ -78,12 +89,16 @@ def write(self, df: pd.DataFrame, write_flows=False, preferred_only=False):
             factor.flow_property = units.property_ref(unit)
             factor.unit = units.unit_ref(unit)
             factor.value = row['Characterization Factor']
+            if self.__coordinates != {}:
+                location = self.__location(row)
+                factor.location = location.to_ref() if location else None
             indicator.impact_factors.append(factor)
 
         log.debug("write entities")
         dicts = [
             self.__indicators,
             self.__methods,
+            self.__locations,
             self.__sources_to_write
         ]
         if write_flows:
@@ -191,6 +206,31 @@ def __flow(self, row):
         self.__flows[uid] = flow
         return flow
 
+    def __location(self, row):
+        if row['Location'] == '':
+            # no location specified
+            return None
+        meta = (self.__location_meta.loc[
+            self.__location_meta['Code'] == row['Location']].squeeze())
+        if len(meta) == 0:
+            # not an available location
+            return None
+        location = self.__locations.get(meta.ID)
+        if location is not None:
+            # location found, no need to regenerate
+            return location
+        location = o.Location(
+            id=meta.ID,
+            name=meta.Name,
+            description=meta.Description,
+            category=meta.Category,
+            code=meta.Code,
+            geometry=self.__coordinates.get(row['Location']),
+            latitude=meta.Latitude,
+            longitude=meta.Longitude)
+        self.__locations[meta.ID] = location
+        return location
+
     def _return_source(self, name):
         for uid, s in self.__sources.items():
             if s.name == name or name.startswith(s.name):

diff --git a/lciafmt/traci.py b/lciafmt/traci.py
@@ -15,15 +15,15 @@
 import lciafmt.df as dfutil
 import lciafmt.xls as xls
 
-from .util import log, aggregate_factors_for_primary_contexts, format_cas,\
+from lciafmt.util import log, aggregate_factors_for_primary_contexts, format_cas,\
     datapath
 
 
 flowables_replace = pd.read_csv(datapath / 'TRACI_2.1_replacement.csv')
 flowables_split = pd.read_csv(datapath / 'TRACI_2.1_split.csv')
 
 
-def get(add_factors_for_missing_contexts=True, file=None,
+def get(method, add_factors_for_missing_contexts=True, file=None,
         url=None) -> pd.DataFrame:
     """Generate a method for TRACI in standard format.
 
@@ -34,8 +34,8 @@ def get(add_factors_for_missing_contexts=True, file=None,
     :param url: str, alternate url for method, defaults to url in method config
     :return: DataFrame of method in standard format
     """
-    log.info("getting method Traci 2.1")
-    method_meta = lciafmt.Method.TRACI.get_metadata()
+    log.info("getting method TRACI")
+    method_meta = method.get_metadata()
     f = file
     if f is None:
         f = _get_file(method_meta, url)
@@ -66,6 +66,19 @@ def get(add_factors_for_missing_contexts=True, file=None,
     df.drop_duplicates(keep='first', inplace=True)
     length = length - len(df)
     log.info(f"{length} duplicate entries removed")
+
+    """add eutrophication updates 
+    the function _read_eutro is a function to read the raw data from the new
+    eutrophication updates
+    """
+    if 'eutro_url' in method_meta:
+        log.info("getting Eutrophication updates")
+        f = cache.get_or_download(file=method_meta['eutro_file'],
+                                  url=method_meta['eutro_url'])
+        df_eutro = _read_eutro(f)
+        frames = [df.query('Indicator != "Eutrophication"'), df_eutro]
+        df = pd.concat(frames)
+    df['Method'] = method_meta.get('name')
 
     return df
 
@@ -78,7 +91,7 @@ def _get_file(method_meta, url=None):
 
 def _read(xls_file: str) -> pd.DataFrame:
     """Read the data from Excel with given path into a DataFrame."""
-    log.info(f"read Traci 2.1 from file {xls_file}")
+    log.info(f"read TRACI from file {xls_file}")
     wb = openpyxl.load_workbook(xls_file, read_only=True, data_only=True)
     sheet = wb["Substances"]
     categories = {}
@@ -105,7 +118,6 @@ def _read(xls_file: str) -> pd.DataFrame:
             if factor == 0.0:
                 continue
             dfutil.record(records,
-                          method="TRACI 2.1",
                           indicator=cat_info[0],
                           indicator_unit=cat_info[1],
                           flow=flow,
@@ -127,6 +139,9 @@ def _category_info(c: str):
     if c == "Global Warming Air (kg CO2 eq / kg substance)":
         return "Global warming", "kg CO2 eq", "air", "kg"
 
+    if c == "Global Climate Air (kg CO2 eq / kg substance)":
+        return "Global warming", "kg CO2 eq", "air", "kg"
+
     if c == "Acidification Air (kg SO2 eq / kg substance)":
         return "Acidification", "kg SO2 eq", "air", "kg"
 
@@ -199,3 +214,132 @@ def _category_info(c: str):
 
     if c == "Human health CF  [CTUnoncancer/kg], Emission to cont. agric. Soil, non-canc.":
         return "Human health - non-cancer", "CTUnoncancer", "soil/agricultural", "kg"
+
+def _read_eutro(xls_file: str) -> pd.DataFrame:
+    """
+    Logic used for selecting US data (max 15 per region):
+    |               | Comp_Air | Comp_Fw | Comp_Soil | Comp_LME |
+    |---------------|----------|---------|-----------|----------|
+    | Flow_N        | n/a      | NonAg   | Agric     | NonAg    |
+    | Flow_NH3 as N | All      | n/a     | n/a       | n/a      |
+    | Flow_NOx as N | All      | n/a     | n/a       | n/a      |
+    | Flow_P        | n/a      | All     | All       | n/a      |
+    * skip Agric & NonAg
+
+    Logic used for selecting global data (max 15 per region):
+    |               | Comp_Air | Comp_Fw        | Comp_Soil | Comp_LME |
+    |---------------|----------|----------------|-----------|----------|
+    | Flow_N        | n/a      | Genrl == NonAg | Agric     | NonAg    |
+    | Flow_NH3 as N | All      | n/a            | n/a       | n/a      |
+    | Flow_NOx as N | All      | n/a            | n/a       | n/a      |
+    | Flow_P        | n/a      | All            | All       | n/a      |
+
+    """
+    context_dict = {'Comp_Fw': 'freshwater',
+                    'Comp_Air': 'air',
+                    'Comp_Soil': 'soil',
+                    'Comp_LME': 'marine'}
+    compartment_dict = {'Genrl': 'unspecified',
+                        'Agric': 'rural',
+                        'NonAg': 'urbran'}
+    log.info(f"read Eutrophication category from file {xls_file}")
+    source_df = pd.read_excel(xls_file, sheet_name="S5. Raw Data")
+    records = []
+    flow_category=[]
+    for i, row in source_df.iterrows():
+        sector = row['Sector']
+        flow = row['Flowable']
+        compartment = row['Emit Compartment']
+        flow_category = context_dict.get(compartment, "n/a")
+        if flow_category == "soil" and flow == "Flow_P":
+            flow_category = f'{flow_category} (P)'
+            # required to enable distinct mappings to ground for these flows
+        aggregation = row['Aggregation Target']
+        region_id = str(row['Target ID'])
+        if aggregation in ("US_Nation", "US_States", "US_Counties"):
+            if aggregation == "US_Nation":
+                region = "00000"
+            elif len(region_id) < 3:
+                region = region_id.ljust(5, '0')
+            else:
+                region = region_id.rjust(5, '0')
+
+        elif (aggregation in ("World", "Countries")):
+            region = row['Name']
+            if region == "United States":
+                ## ^^ Skip US as country in favor of aggregation == "US_Nation"
+                continue
+            elif region == "Russian Federation" and region_id == "254":
+                ## Two entries for Russian Federation, 254 is a very small island
+                continue
+            if sector == "Genrl" and flow == "Flow_N":
+                ## Drops duplicate factors for Flow_N Comp_Fw
+                continue
+        else:
+            # Ignore aggregation == "Continents"
+            continue
+        if flow != "Flow_N":
+            flow_category = f'{flow_category}/{compartment_dict.get(sector)}'
+
+        factor = row['Average Target Value']
+        indicator = ("Eutrophication (Freshwater)" if flow == "Flow_P"
+                     else "Eutrophication (Marine)")
+        unit = ("kg P eq" if indicator == "Eutrophication (Freshwater)"
+                else "kg N eq")
+
+        dfutil.record(records,
+                      indicator=indicator,
+                      indicator_unit=unit,
+                      flow=flow,
+                      flow_category=flow_category,
+                      flow_unit="kg",
+                      factor=factor,
+                      location=region)
+
+        if aggregation == "World":
+        # openLCA requires a factor without location for use by default
+            dfutil.record(records,
+                          indicator=indicator,
+                          indicator_unit=unit,
+                          flow=flow,
+                          flow_category=flow_category,
+                          flow_unit="kg",
+                          factor=factor,
+                          location="")
+
+    df = dfutil.data_frame(records)
+
+    # Resolve duplicate factors for a single location
+    cols_to_keep = [c for c in df.columns if
+                    c not in ('Characterization Factor')]
+    duplicates = df[df.duplicated(subset=cols_to_keep, keep=False)]
+    ## United States Minor Outlying Islands and Jan Mayen
+    # are unexplicably shown multiple times with different location IDs.
+    # Average those factors together.
+    df2 = (df.groupby(cols_to_keep, as_index=False)
+             .agg({'Characterization Factor': 'mean'}))
+    log.debug(f'{len(duplicates)} duplicate locations consolidated to '
+             f'{(len(duplicates)-(len(df)-len(df2)))}')
+
+    return df2
+
+
+#%%
+if __name__ == "__main__":
+    import esupy.location
+    from lciafmt.util import store_method, save_json
+    method = lciafmt.Method.TRACI2_2
+    # method_meta = method.get_metadata()
+    # f = cache.get_or_download(file=method_meta['eutro_file'], url=method_meta['eutro_url'])
+    # df_eutro = _read_eutro(f)
+    df = get(method)
+    #%%
+    mapping = method.get_metadata()['mapping']
+    mapped_df = lciafmt.map_flows(df, system=mapping)
+    store_method(mapped_df, method)
+    #%% create JSON separately for US and for countries
+    country_df = pd.concat([mapped_df.query('~Location.str.isnumeric()'),
+                            (mapped_df.query('Location == "00000"')
+                             .assign(Location = "United States"))],
+                            ignore_index=True)
+    save_json(method, country_df, name='TRACI2.2_countries', regions=['countries'])