Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds regionalized eutrophication factors for TRACI 2.2 #123

Open
wants to merge 30 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
369d268
test
juliechenerg Feb 2, 2024
5892c65
test 2/2/24
juliechenerg Feb 2, 2024
938ca8d
test 2/8, adding notes
juliechenerg Feb 8, 2024
ff191e3
test
juliechenerg Feb 8, 2024
166a795
1) Adding Eutrophication update. 2) change "from .util import..." to …
juliechenerg Feb 26, 2024
cfee635
Add files via upload
juliechenerg Feb 26, 2024
e7400dd
add eutro url to methods.json, read and cache file
bl-young Apr 16, 2024
44781b6
add function to extract geocoordinates from ecoinvent source (used by…
bl-young Apr 17, 2024
eec0d1c
add temporary funciton to map state names
bl-young Apr 17, 2024
7d54f1d
write location objects to jsonld
bl-young Apr 17, 2024
658b2f7
integrate TRACI2.2 as new method
bl-young Apr 18, 2024
465639f
drop raw dataset, grabbing remote
bl-young Apr 18, 2024
fc3559b
Merge branch 'develop' into traci_eutr
bl-young Apr 19, 2024
5658248
add location to duplicate assessment
bl-young Apr 19, 2024
9645124
use only ref for assigning location to factor
bl-young Apr 19, 2024
74d2392
fix error in maintaining factors with no location from US national
bl-young Apr 19, 2024
13cdd47
move more objects to location.py
bl-young Apr 23, 2024
f12c1f7
add traci2.2 test
bl-young Apr 23, 2024
01f07a2
Merge branch 'develop' into traci_eutr
bl-young Apr 26, 2024
c540f76
fix indicator units
bl-young Apr 27, 2024
5bf6b4a
incorporate data for countries
bl-young Apr 27, 2024
ca18836
pass region to Writer.write()
bl-young Apr 28, 2024
bf15535
resolve issue of duplicate locations
bl-young May 2, 2024
fbecefc
improve clarity of the logic for processing factors, make "World" the…
bl-young Aug 30, 2024
2cf5c76
create jsons for US and for international data
bl-young Aug 30, 2024
d1945c6
move location objects to esupy https://github.com/USEPA/esupy/commit/…
bl-young Oct 1, 2024
f594b2a
allow lists of multiple regions for location writing
bl-young Oct 10, 2024
5974953
Merge branch 'develop' into traci_eutr
bl-young Oct 10, 2024
a9294c6
add example script for traci 2.2
bl-young Nov 11, 2024
4209f43
Merge branch 'develop' into traci_eutr
bl-young Nov 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions examples/traci2_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import pandas as pd
import lciafmt
from lciafmt.util import store_method, save_json, log
import esupy.location


method = lciafmt.Method.TRACI2_2
regions = ['states', 'countries']

def main():

df = lciafmt.get_method(method)
mapping = method.get_metadata()['mapping']
mapped_df = lciafmt.map_flows(df, system=mapping)

# write the result to parquet, includes states and counties as FIPS,
# and all countries
store_method(mapped_df, method)

# Assigns codes to states e.g., "US-AL", leaves counties as FIPS
state_df = esupy.location.assign_state_abbrev(mapped_df)

# Convert country names to ISO Country codes, not all will map
country_codes = (esupy.location.read_iso_3166()
.filter(['Name', 'ISO-2d'])
.set_index('Name')['ISO-2d'].to_dict())
# prevents dropping of the factors without locations
country_codes[''] = ''
all_df = state_df.copy()
all_df['Location'] = (all_df['Location']
.map(country_codes)
.fillna(all_df['Location']))
all_df = all_df.query('Location.isin(@country_codes.values()) |'
'Location.str.startswith("US")')

save_json(method, all_df, name='TRACI2.2', regions=regions)

if __name__ == "__main__":
main()
10 changes: 6 additions & 4 deletions lciafmt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class Method(Enum):
"""LCIAFormatter Method object with available metadata."""

TRACI = "TRACI 2.1"
TRACI2_2 = "TRACI 2.2"
RECIPE_2016 = "ReCiPe 2016"
FEDEFL_INV = "FEDEFL Inventory"
ImpactWorld = "ImpactWorld"
Expand Down Expand Up @@ -110,8 +111,8 @@ def get_method(method_id, add_factors_for_missing_contexts=True,
return custom.get_custom_method(file=file)
else:
method_id = util.check_as_class(method_id)
if method_id == Method.TRACI:
return traci.get(add_factors_for_missing_contexts, file=file, url=None)
if method_id == Method.TRACI or method_id == Method.TRACI2_2:
return traci.get(method_id, add_factors_for_missing_contexts, file=file, url=None)
if method_id == Method.RECIPE_2016:
return recipe.get(add_factors_for_missing_contexts, endpoint, summary,
file=file, url=url)
Expand All @@ -133,8 +134,9 @@ def to_jsonld(df: pd.DataFrame, zip_file: str, write_flows=False, **kwargs):
"""Generate a JSONLD file of the methods passed as DataFrame."""
util.log.info(f"write JSON-LD package to {zip_file}")
with jsonld.Writer(zip_file) as w:
w.write(df, write_flows,
w.write(df, write_flows=write_flows,
preferred_only=kwargs.get('preferred_only', False),
regions=kwargs.get('regions'),
)


Expand All @@ -153,7 +155,7 @@ def map_flows(df: pd.DataFrame, system=None, mapping=None,
preserve_unmapped=preserve_unmapped,
case_insensitive=case_insensitive)
mapped = mapper.run()
x = mapped[mapped[['Method', 'Indicator', 'Flowable', 'Flow UUID']
x = mapped[mapped[['Method', 'Indicator', 'Flowable', 'Flow UUID', 'Location']
].duplicated(keep=False)]
duplicates = list(set(zip(x.Indicator, x.Flowable)))
if len(duplicates) > 0:
Expand Down
15 changes: 15 additions & 0 deletions lciafmt/data/methods.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,26 @@
"path": "traci",
"mapping": "TRACI2.1",
"case_insensitivity": "False",
"file": "traci_2.1.xlsx",
"url": "https://www.epa.gov/sites/default/files/2015-12/traci_2_1_2014_dec_10_0.xlsx",
"bib_id": "bare_traci_2011",
"citation": "Bare 2012",
"source_type": "Excel file"
},
{
"id": "TRACI2_2",
"name": "TRACI 2.2",
"detail_note": "TRACI 2.2 replicates TRACI 2.1 with the exception of the Eutrophication indicators which are described in the attached reference. As in TRACI 2.1, additional flowable mappings were included for general metals (e.g. Copper, Zinc, and Selenium) in the FEDEFL to the most common ions where available (e.g. COPPER(II), ZINC(II), and SELENIUM(IV)). This ensures the application of TRACI 2.2 characterization factors when the general name of the metal is reported, as is the case in EPA datasets such as NEI and TRI. Duplicate names for flowables exist in TRACI 2.2 which have different characterization factors and different CAS. These occur due to duplicate entries in the source files for USEtox. In these cases, the entries with the currently recognized CAS are accepted and the other flowables are ignored. FEDEFL contexts are mapped to TRACI 2.2 to enable impact assessment across most of the possible contexts available within FEDEFL. Mappings that occur in the primary contexts (e.g. air) are assigned to all possible sub-contexts which apply. Where a context is non-specific in FEDEFL relative to the available TRACI 2.2 contexts (e.g. rural/urban), the LCIA Formatter applies the average of the relevant characterization factors from TRACI 2.2. Normalization and weighting factors are not provided in the TRACI 2.2 source file and are not added here.",
"path": "traci",
"mapping": "TRACI2.2",
"case_insensitivity": "False",
"file": "TRACI_2.2.xlsx",
"url": "https://github.com/USEPA/TRACI/raw/master/TraciTool/TRACI_2_2.xlsx",
"eutro_url": "https://pasteur.epa.gov/uploads/10.23719/1520443/TRACI%20Spatial%20Eutrophication%20Characterization%20Factors_2020-10.xlsx",
"eutro_file": "TRACI Spatial Eutrophication Characterization Factors_2020-10.xlsx",
"citation": "Henderson et al. 2021",
"source_type": "Excel file"
},
{
"id": "RECIPE_2016",
"name": "ReCiPe 2016",
Expand Down
42 changes: 41 additions & 1 deletion lciafmt/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from esupy.util import make_uuid
from esupy.bibtex import generate_sources
from esupy.location import extract_coordinates, olca_location_meta
import fedelemflowlist
from .util import is_non_empty_str, generate_method_description,\
log, pkg_version_number, datapath, check_as_class
Expand All @@ -30,6 +31,9 @@ def __init__(self, zip_file: str):
self.__methods = {}
self.__indicators = {}
self.__flows = {}
self.__coordinates = {}
self.__locations = {}
self.__location_meta = olca_location_meta().fillna('')
self.__sources = {}
self.__sources_to_write = {}
self.__bibids = {}
Expand All @@ -41,7 +45,14 @@ def __enter__(self):
def __exit__(self, exc_type, exc_val, exc_tb):
self.__writer.close()

def write(self, df: pd.DataFrame, write_flows=False, preferred_only=False):
def write(self, df: pd.DataFrame,
write_flows=False,
preferred_only=False,
regions=None # list, options include: 'states', 'countries'
):
if any(df['Location'] != '') and regions is not None:
coord = [extract_coordinates(group=r) for r in regions]
self.__coordinates = {k: v for d in coord for k, v in d.items()}
if 'source_method' not in df:
df['source_method'] = df['Method']
if 'source_indicator' not in df:
Expand Down Expand Up @@ -78,12 +89,16 @@ def write(self, df: pd.DataFrame, write_flows=False, preferred_only=False):
factor.flow_property = units.property_ref(unit)
factor.unit = units.unit_ref(unit)
factor.value = row['Characterization Factor']
if self.__coordinates != {}:
location = self.__location(row)
factor.location = location.to_ref() if location else None
indicator.impact_factors.append(factor)

log.debug("write entities")
dicts = [
self.__indicators,
self.__methods,
self.__locations,
self.__sources_to_write
]
if write_flows:
Expand Down Expand Up @@ -191,6 +206,31 @@ def __flow(self, row):
self.__flows[uid] = flow
return flow

def __location(self, row):
if row['Location'] == '':
# no location specified
return None
meta = (self.__location_meta.loc[
self.__location_meta['Code'] == row['Location']].squeeze())
if len(meta) == 0:
# not an available location
return None
location = self.__locations.get(meta.ID)
if location is not None:
# location found, no need to regenerate
return location
location = o.Location(
id=meta.ID,
name=meta.Name,
description=meta.Description,
category=meta.Category,
code=meta.Code,
geometry=self.__coordinates.get(row['Location']),
latitude=meta.Latitude,
longitude=meta.Longitude)
self.__locations[meta.ID] = location
return location

def _return_source(self, name):
for uid, s in self.__sources.items():
if s.name == name or name.startswith(s.name):
Expand Down
156 changes: 150 additions & 6 deletions lciafmt/traci.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@
import lciafmt.df as dfutil
import lciafmt.xls as xls

from .util import log, aggregate_factors_for_primary_contexts, format_cas,\
from lciafmt.util import log, aggregate_factors_for_primary_contexts, format_cas,\
datapath


flowables_replace = pd.read_csv(datapath / 'TRACI_2.1_replacement.csv')
flowables_split = pd.read_csv(datapath / 'TRACI_2.1_split.csv')


def get(add_factors_for_missing_contexts=True, file=None,
def get(method, add_factors_for_missing_contexts=True, file=None,
url=None) -> pd.DataFrame:
"""Generate a method for TRACI in standard format.

Expand All @@ -34,8 +34,8 @@ def get(add_factors_for_missing_contexts=True, file=None,
:param url: str, alternate url for method, defaults to url in method config
:return: DataFrame of method in standard format
"""
log.info("getting method Traci 2.1")
method_meta = lciafmt.Method.TRACI.get_metadata()
log.info("getting method TRACI")
method_meta = method.get_metadata()
f = file
if f is None:
f = _get_file(method_meta, url)
Expand Down Expand Up @@ -66,6 +66,19 @@ def get(add_factors_for_missing_contexts=True, file=None,
df.drop_duplicates(keep='first', inplace=True)
length = length - len(df)
log.info(f"{length} duplicate entries removed")

"""add eutrophication updates
the function _read_eutro is a function to read the raw data from the new
eutrophication updates
"""
if 'eutro_url' in method_meta:
log.info("getting Eutrophication updates")
f = cache.get_or_download(file=method_meta['eutro_file'],
url=method_meta['eutro_url'])
df_eutro = _read_eutro(f)
frames = [df.query('Indicator != "Eutrophication"'), df_eutro]
df = pd.concat(frames)
df['Method'] = method_meta.get('name')

return df

Expand All @@ -78,7 +91,7 @@ def _get_file(method_meta, url=None):

def _read(xls_file: str) -> pd.DataFrame:
"""Read the data from Excel with given path into a DataFrame."""
log.info(f"read Traci 2.1 from file {xls_file}")
log.info(f"read TRACI from file {xls_file}")
wb = openpyxl.load_workbook(xls_file, read_only=True, data_only=True)
sheet = wb["Substances"]
categories = {}
Expand All @@ -105,7 +118,6 @@ def _read(xls_file: str) -> pd.DataFrame:
if factor == 0.0:
continue
dfutil.record(records,
method="TRACI 2.1",
indicator=cat_info[0],
indicator_unit=cat_info[1],
flow=flow,
Expand All @@ -127,6 +139,9 @@ def _category_info(c: str):
if c == "Global Warming Air (kg CO2 eq / kg substance)":
return "Global warming", "kg CO2 eq", "air", "kg"

if c == "Global Climate Air (kg CO2 eq / kg substance)":
return "Global warming", "kg CO2 eq", "air", "kg"

if c == "Acidification Air (kg SO2 eq / kg substance)":
return "Acidification", "kg SO2 eq", "air", "kg"

Expand Down Expand Up @@ -199,3 +214,132 @@ def _category_info(c: str):

if c == "Human health CF [CTUnoncancer/kg], Emission to cont. agric. Soil, non-canc.":
return "Human health - non-cancer", "CTUnoncancer", "soil/agricultural", "kg"

def _read_eutro(xls_file: str) -> pd.DataFrame:
"""
Logic used for selecting US data (max 15 per region):
| | Comp_Air | Comp_Fw | Comp_Soil | Comp_LME |
|---------------|----------|---------|-----------|----------|
| Flow_N | n/a | NonAg | Agric | NonAg |
| Flow_NH3 as N | All | n/a | n/a | n/a |
| Flow_NOx as N | All | n/a | n/a | n/a |
| Flow_P | n/a | All | All | n/a |
* skip Agric & NonAg

Logic used for selecting global data (max 15 per region):
| | Comp_Air | Comp_Fw | Comp_Soil | Comp_LME |
|---------------|----------|----------------|-----------|----------|
| Flow_N | n/a | Genrl == NonAg | Agric | NonAg |
| Flow_NH3 as N | All | n/a | n/a | n/a |
| Flow_NOx as N | All | n/a | n/a | n/a |
| Flow_P | n/a | All | All | n/a |

"""
context_dict = {'Comp_Fw': 'freshwater',
'Comp_Air': 'air',
'Comp_Soil': 'soil',
'Comp_LME': 'marine'}
compartment_dict = {'Genrl': 'unspecified',
'Agric': 'rural',
'NonAg': 'urbran'}
log.info(f"read Eutrophication category from file {xls_file}")
source_df = pd.read_excel(xls_file, sheet_name="S5. Raw Data")
records = []
flow_category=[]
for i, row in source_df.iterrows():
sector = row['Sector']
flow = row['Flowable']
compartment = row['Emit Compartment']
flow_category = context_dict.get(compartment, "n/a")
if flow_category == "soil" and flow == "Flow_P":
flow_category = f'{flow_category} (P)'
# required to enable distinct mappings to ground for these flows
aggregation = row['Aggregation Target']
region_id = str(row['Target ID'])
if aggregation in ("US_Nation", "US_States", "US_Counties"):
if aggregation == "US_Nation":
region = "00000"
elif len(region_id) < 3:
region = region_id.ljust(5, '0')
else:
region = region_id.rjust(5, '0')

elif (aggregation in ("World", "Countries")):
region = row['Name']
if region == "United States":
## ^^ Skip US as country in favor of aggregation == "US_Nation"
continue
elif region == "Russian Federation" and region_id == "254":
## Two entries for Russian Federation, 254 is a very small island
continue
if sector == "Genrl" and flow == "Flow_N":
## Drops duplicate factors for Flow_N Comp_Fw
continue
else:
# Ignore aggregation == "Continents"
continue
if flow != "Flow_N":
flow_category = f'{flow_category}/{compartment_dict.get(sector)}'

factor = row['Average Target Value']
indicator = ("Eutrophication (Freshwater)" if flow == "Flow_P"
else "Eutrophication (Marine)")
unit = ("kg P eq" if indicator == "Eutrophication (Freshwater)"
else "kg N eq")

dfutil.record(records,
indicator=indicator,
indicator_unit=unit,
flow=flow,
flow_category=flow_category,
flow_unit="kg",
factor=factor,
location=region)

if aggregation == "World":
# openLCA requires a factor without location for use by default
dfutil.record(records,
indicator=indicator,
indicator_unit=unit,
flow=flow,
flow_category=flow_category,
flow_unit="kg",
factor=factor,
location="")

df = dfutil.data_frame(records)

# Resolve duplicate factors for a single location
cols_to_keep = [c for c in df.columns if
c not in ('Characterization Factor')]
duplicates = df[df.duplicated(subset=cols_to_keep, keep=False)]
## United States Minor Outlying Islands and Jan Mayen
# are unexplicably shown multiple times with different location IDs.
# Average those factors together.
df2 = (df.groupby(cols_to_keep, as_index=False)
.agg({'Characterization Factor': 'mean'}))
log.debug(f'{len(duplicates)} duplicate locations consolidated to '
f'{(len(duplicates)-(len(df)-len(df2)))}')

return df2


#%%
if __name__ == "__main__":
import esupy.location
from lciafmt.util import store_method, save_json
method = lciafmt.Method.TRACI2_2
# method_meta = method.get_metadata()
# f = cache.get_or_download(file=method_meta['eutro_file'], url=method_meta['eutro_url'])
# df_eutro = _read_eutro(f)
df = get(method)
#%%
mapping = method.get_metadata()['mapping']
mapped_df = lciafmt.map_flows(df, system=mapping)
store_method(mapped_df, method)
#%% create JSON separately for US and for countries
country_df = pd.concat([mapped_df.query('~Location.str.isnumeric()'),
(mapped_df.query('Location == "00000"')
.assign(Location = "United States"))],
ignore_index=True)
save_json(method, country_df, name='TRACI2.2_countries', regions=['countries'])
Loading
Loading