Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion energyplus_regressions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
NAME = 'energyplus_regressions'
VERSION = '2.1.3'
VERSION = '2.1.4'
150 changes: 100 additions & 50 deletions energyplus_regressions/diffs/table_diff.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't need this with Python 3.


"""Takes two E+ html output files and compares them
usage:
Expand Down Expand Up @@ -36,6 +34,7 @@
__copyright__ = "Copyright (c) 2009 Santosh Philip and Amir Roth 2013"
__license__ = "GNU General Public License Version 3"

from pathlib import Path
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Start modernizing slightly using Path instances. Could be done lots more places, but I didn't want to grow scope toooo much.

import sys
import getopt
import os.path
Expand All @@ -45,8 +44,8 @@

help_message = __doc__

path = os.path.dirname(__file__)
script_dir = os.path.abspath(path)
this_file = Path(__file__).resolve()
script_dir = this_file.parent

title_css = """<!DOCTYPE html PUBLIC "-
//W3C//DTD XHTML 1.0 Strict//EN"
Expand Down Expand Up @@ -95,38 +94,26 @@
"""


def thresh_abs_rel_diff(abs_thresh, rel_thresh, x, y):
def thresh_abs_rel_diff(abs_thresh: float, rel_thresh: float, x: str, y: str) -> tuple[float | str, float | str, str]:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A little Python type hinting never hurt anyone.

if x == y:
return 0, 0, 'equal'
# noinspection PyBroadException
try:
diff = 'equal'

fx = float(x)
fy = float(y)

abs_diff = abs(fx - fy)

# the following two lines were here, but I don't see how they could be hit, I'm commenting for now
# if abs_diff == 0.0:
# return 0, 0, 'equal'

rel_diff = abs((fx - fy) / fx) if abs(fx) > abs(fy) else abs((fy - fx) / fy)

diff = 'equal'
if abs_diff > abs_thresh and rel_diff > rel_thresh:
diff = 'big'
elif (0 < abs_diff <= abs_thresh) or (0 < rel_diff <= rel_thresh):
diff = 'small'
# the following else clause was here, but again, I don't see how it could be hit, commenting
# else:
# diff = 'equal'
return abs_diff, rel_diff, diff
except ValueError:
# if we couldn't get a float out of it, we are doing string comparison, check case-insensitively before leaving
if x.lower().strip() == y.lower().strip():
return 0, 0, 'equal'
else:
return '%s vs %s' % (x, y), '%s vs %s' % (x, y), 'stringdiff'
return f'{x} vs {y}', f'{x} vs {y}', 'stringdiff'
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And more f strings instead of format specifiers



def prev_sib(entity):
Expand Down Expand Up @@ -235,7 +222,8 @@ def hdict2soup(soup, heading, num, hdict, tdict, horder):


# Convert html table to heading dictionary (and header list) in single step
def table2hdict_horder(table):
def table2hdict_horder(table, table_a=None):
# If table_a_hdict is passed in, we can try to match the row order to avoid diffs just due to row order
hdict = {}
horder = []
trows = table('tr')
Expand All @@ -250,7 +238,56 @@ def table2hdict_horder(table):
hdict[hcontents] = []
horder.append(hcontents)

for trow in trows[1:]:
# Assume we are going to just loop over the rows and compare the data
search_rows = trows[1:]

# But we can handle it specially if we passed in table_a and it's just a valid reorder
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, this comment should capture the intent. But basically, if we read table a, then pass it in when we read table b, this code will try to identify matching lines in the table, even if they are out of order. If anything goes awry, diffs are detected, etc., it will revert back to the normal row-by-row comparison on both table a and table b.

# There are some weird things to consider here though. For example, some tables have multiple entirely blank
# rows, just there for visual spacing. Also there are tables where the far left entry is not unique.
# Consider the End Uses by Subcategory table. One row starts with "Heating" and then "General".
# The next row then has nothing in the first column, but the second column is "Boiler".
# This implies that "Heating" was a grouping, and "General" or "Boiler" is the actual subcategory.
# I think the only way to handle this robustly would be to use the entire
# row as the key, which is annoying, but should work well.
if table_a:
# process the rows of the "base" table_a that was provided into a list of search keys
trows_a = table_a('tr')
table_a_row_order = []
for trow in trows_a[1:]:
search_key = []
for tcol in trow('td'):
if tcol.contents:
search_key.append(tcol.contents[0])
else: # pragma: no cover
# I really don't think we can make it here while searching, but I don't want to accidentally crash
search_key.append("")
table_a_row_order.append(search_key)
# process the rows of the "mod" table that was provided into a list of search keys
found_table_b_row_order = []
for trow in trows[1:]:
search_key = []
for tcol in trow('td'):
if tcol.contents:
search_key.append(tcol.contents[0])
else: # pragma: no cover
# I really don't think we can make it here while searching, but I don't want to accidentally crash
search_key.append("")
found_table_b_row_order.append(search_key)
# it's the same order exactly, skip any searching and just run with search_rows as-is
if table_a_row_order == found_table_b_row_order:
pass
# if not exactly the same but overall the same stuff, it's reordered and we can match things up
elif sorted(table_a_row_order) == sorted(found_table_b_row_order):
# now just build the list of trows to search by index based on table a order
search_rows = []
for to_find_val in table_a_row_order:
for search_row_index, trow in enumerate(trows[1:]):
if found_table_b_row_order[search_row_index] == to_find_val:
search_rows.append(trow)
break

# whether it was reordered or just using the literal order, build out the hdict instance to pass back
for trow in search_rows:
for htd, td in zip(trows[0]('td'), trow('td')):
try:
hcontents = htd.contents[0]
Expand Down Expand Up @@ -317,7 +354,10 @@ def make_err_table_row(err_soup, tabletag, uheading, count_of_tables, abs_diff_f
'size mismatch' if size_error > 0 else 'not in 1' if not_in_1 > 0 else 'not in 2' if not_in_2 > 0 else '')


def table_diff(thresh_dict, inputfile1, inputfile2, abs_diff_file, rel_diff_file, err_file, summary_file):
def table_diff(
thresh_dict: ThreshDict, input_file_1: str, input_file_2: str, abs_diff_file: str,
rel_diff_file: str, err_file: str, summary_file: str
):
"""
Compares two xxxTable.html files returning
(
Expand All @@ -326,32 +366,33 @@ def table_diff(thresh_dict, inputfile1, inputfile2, abs_diff_file, rel_diff_file
<#size_diff>, <#not_in_file1>, <#not_in_file2>
)
"""
file_1 = Path(input_file_1)
file_2 = Path(input_file_2)

case_name = inputfile1.split(os.sep)[-2]
case_name = file_1.parent.name

# Test for existence of input files
if not os.path.exists(inputfile1):
return 'unable to open file <%s>' % inputfile1, 0, 0, 0, 0, 0, 0, 0, 0
if not os.path.exists(inputfile2):
return 'unable to open file <%s>' % inputfile2, 0, 0, 0, 0, 0, 0, 0, 0
if not file_1.exists():
return 'unable to open file <%s>' % input_file_1, 0, 0, 0, 0, 0, 0, 0, 0
if not file_2.exists():
return 'unable to open file <%s>' % input_file_2, 0, 0, 0, 0, 0, 0, 0, 0

with open(inputfile1, 'rb') as f_1:
with open(file_1, 'rb') as f_1:
txt1 = f_1.read().decode('utf-8', errors='ignore')
with open(inputfile2, 'rb') as f_2:
with open(file_2, 'rb') as f_2:
txt2 = f_2.read().decode('utf-8', errors='ignore')

pagetitle = '%s vs %s' % (os.path.basename(inputfile1), os.path.basename(inputfile2))
page_title = f'{file_1.name} vs {file_2.name}'

# Error soup
err_soup = BeautifulSoup(title_css % (pagetitle + ' -- summary', the_css,),
features='html.parser')
err_soup = BeautifulSoup(title_css % (page_title + ' -- summary', the_css,), features='html.parser')

# Abs diff soup
abs_diff_soup = BeautifulSoup(title_css % (pagetitle + ' -- absolute differences', the_css,),
abs_diff_soup = BeautifulSoup(title_css % (page_title + ' -- absolute differences', the_css,),
features='html.parser')

# Rel diff soup
rel_diff_soup = BeautifulSoup(title_css % (pagetitle + ' -- relative differences', the_css,),
rel_diff_soup = BeautifulSoup(title_css % (page_title + ' -- relative differences', the_css,),
features='html.parser')

# Make error table
Expand Down Expand Up @@ -381,9 +422,9 @@ def table_diff(thresh_dict, inputfile1, inputfile2, abs_diff_file, rel_diff_file
uheadings2.append(get_table_unique_heading(table))

if any([x is None for x in uheadings1]):
return 'malformed comment/table structure in <%s>' % inputfile1, 0, 0, 0, 0, 0, 0, 0, 0
return 'malformed comment/table structure in <%s>' % input_file_1, 0, 0, 0, 0, 0, 0, 0, 0
if any([x is None for x in uheadings2]):
return 'malformed comment/table structure in <%s>' % inputfile2, 0, 0, 0, 0, 0, 0, 0, 0
return 'malformed comment/table structure in <%s>' % input_file_2, 0, 0, 0, 0, 0, 0, 0, 0

uhset1 = set(uheadings1)
uhset2 = set(uheadings2)
Expand Down Expand Up @@ -414,6 +455,14 @@ def table_diff(thresh_dict, inputfile1, inputfile2, abs_diff_file, rel_diff_file

uheading1 = uheadings1[i1]

# There are some (for now one) tables that we will want to skip entirely because they are not useful for
# throwing regressions, add search keys to this list to skip them
completely_skippable_table_keys = [
'Object Count Summary_Entire Facility_Input Fields'
]
if any([x in uheading1 for x in completely_skippable_table_keys]):
continue
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

First off, if a table name is listed here, it can be completely skipped. It won't be included in the equal value count or anything. This should only be used for very specific table names. As of now it is only skipping the input fields table that lists how many autosizable fields and such.


# Table missing in second input file
if uheading1 not in uhset_match:
table_not_in_2 = 1
Expand All @@ -435,8 +484,19 @@ def table_diff(thresh_dict, inputfile1, inputfile2, abs_diff_file, rel_diff_file
table_not_in_1, table_not_in_2)
continue

# create a list of order-dependent table uheading keys, tables that include these keys in the name
# these will use strict row order enforcement
row_order_dependent_table_keys = ['Monthly', 'Topology']
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we identify any tables that should enforce row order when doing diffs. Things like topology or time-based should be added here. This can be adjusted as more are identified.


# always process the first table into a base hdict
hdict1, horder1 = table2hdict_horder(table1)
hdict2, horder2 = table2hdict_horder(table2)

# if we are in a row order dependent table, don't pass table1 as a baseline, just use the literal in-place order
if any(k in uheading1 for k in row_order_dependent_table_keys):
hdict2, horder2 = table2hdict_horder(table2)
# but for all other tables, we can use the first table as a baseline to carefully match up the rows
else:
hdict2, horder2 = table2hdict_horder(table2, table1)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this block will either pass in table2 alone, in which case order dependence will be respected, or pass table1 as an optional second parameter. In that case, table1 order will be used to try to detect row matching, and allow the tables to pass if it's purely a reorder.


# honestly, if the column headings have changed, this should be an indicator to all reviewers that this needs
# up close investigation. As such, we are going to trigger the following things:
Expand Down Expand Up @@ -556,7 +616,7 @@ def table_diff(thresh_dict, inputfile1, inputfile2, abs_diff_file, rel_diff_file
count_of_size_error, count_of_not_in_1, count_of_not_in_2)


def main(argv=None): # pragma: no cover
def main(argv=None) -> int: # pragma: no cover
if argv is None:
argv = sys.argv
try:
Expand All @@ -565,19 +625,9 @@ def main(argv=None): # pragma: no cover
print(sys.argv[0].split("/")[-1] + ": " + str(msg) + "\n\t for help use --help")
return -1

# Test for correct number of arguments
# prog_name = os.path.basename(sys.argv[0])
# if len(args) == 5:
[inputfile1, inputfile2, abs_diff_file, rel_diff_file, err_file, summary_file] = args
# else:
# info('%s: incorrect operands: Try %s -h for more info' % (prog_name, prog_name))
# return -1

# Load diffing threshold dictionary
[input_file_1, input_file_2, abs_diff_file, rel_diff_file, err_file, summary_file] = args
thresh_dict = ThreshDict(os.path.join(script_dir, 'math_diff.config'))

# run the main program.
table_diff(thresh_dict, inputfile1, inputfile2, abs_diff_file, rel_diff_file, err_file, summary_file)
table_diff(thresh_dict, input_file_1, input_file_2, abs_diff_file, rel_diff_file, err_file, summary_file)
return 0


Expand Down
Loading