1- """Generate docs/audit.md from tests/audit_report.json."""
1+ """Generate docs/audit.md by running audit() on all RDF fixtures directly.
22
3- import json
3+ This runs as part of `make docs` so the audit report always reflects the
4+ current code and templates, with no stale intermediate JSON.
5+ """
6+
7+ import statistics
8+ import subprocess
9+ import sys
10+ import time
11+ from datetime import datetime , timezone
412from pathlib import Path
513
14+ from rdflib import Graph
15+
16+ from openbasement import audit , load_template
17+
618ROOT = Path (__file__ ).resolve ().parent .parent
7- REPORT_PATH = ROOT / "tests" / "audit_report.json "
19+ FIXTURE_DIR = ROOT / "tests" / "fixtures" / "procedures "
820OUTPUT_PATH = ROOT / "docs" / "audit.md"
21+ TEMPLATE_NAME = "eu_procedure"
922
1023# Known URI prefixes for shortening
1124PREFIXES = [
@@ -24,7 +37,6 @@ def shorten_uri(uri: str) -> str:
2437 for full , short in PREFIXES :
2538 if uri .startswith (full ):
2639 return short + uri [len (full ):]
27- # Fallback: fragment after #
2840 if "#" in uri :
2941 return uri .rsplit ("#" , 1 )[1 ]
3042 return uri
@@ -35,15 +47,27 @@ def fmt_pct(value: float) -> str:
3547 return f"{ value * 100 :.1f} %"
3648
3749
50+ def get_git_short_hash () -> str :
51+ """Return the short git commit hash, or 'unknown' if not in a repo."""
52+ try :
53+ return subprocess .check_output (
54+ ["git" , "rev-parse" , "--short" , "HEAD" ],
55+ cwd = ROOT ,
56+ text = True ,
57+ ).strip ()
58+ except Exception :
59+ return "unknown"
60+
61+
3862def write_predicate_table (lines : list [str ], predicates : dict , header : str ) -> None :
3963 """Write a predicate table (uncovered or covered)."""
4064 if not predicates :
4165 lines .append (f"*No { header .lower ()} predicates.*\n " )
4266 return
4367
4468 sorted_preds = sorted (predicates .items (), key = lambda x : x [1 ]["fixtures" ], reverse = True )
45- lines .append (f "| Predicate | Fixtures | Triples |" )
46- lines .append (f "|:----------|-------:|-------:|" )
69+ lines .append ("| Predicate | Fixtures | Triples |" )
70+ lines .append ("|:----------|-------:|-------:|" )
4771 for uri , counts in sorted_preds :
4872 lines .append (f"| `{ shorten_uri (uri )} ` | { counts ['fixtures' ]} | { counts ['triples' ]} |" )
4973 lines .append ("" )
@@ -63,20 +87,137 @@ def write_missing_table(lines: list[str], missing_freq: dict) -> None:
6387 lines .append ("" )
6488
6589
66- def main () -> None :
67- with open (REPORT_PATH , "r" , encoding = "utf-8" ) as f :
68- report = json .load (f )
90+ def run_audit () -> dict :
91+ """Run audit() on all fixtures and return an aggregated report dict.
92+
93+ Uses the same accumulation logic as tests/run_audit.py.
94+ """
95+ template = load_template (TEMPLATE_NAME )
96+ rdf_files = sorted (FIXTURE_DIR .glob ("*.rdf" ))
97+
98+ if not rdf_files :
99+ print (f"No .rdf files found in { FIXTURE_DIR } " , file = sys .stderr )
100+ sys .exit (1 )
101+
102+ print (f"Auditing { len (rdf_files )} fixtures with template '{ TEMPLATE_NAME } '..." )
103+
104+ uncovered_counts : dict [str , dict [str , dict ]] = {}
105+ covered_counts : dict [str , dict [str , dict ]] = {}
106+ missing_counts : dict [str , dict [str , int ]] = {}
107+ template_predicates : dict [str , set [str ]] = {}
108+
109+ coverages : list [float ] = []
110+ errors = 0
111+ t0 = time .time ()
112+
113+ for i , rdf_file in enumerate (rdf_files , 1 ):
114+ if i % 100 == 0 or i == len (rdf_files ):
115+ elapsed = time .time () - t0
116+ print (f" [{ i } /{ len (rdf_files )} ] { elapsed :.1f} s elapsed" )
117+
118+ try :
119+ g = Graph ()
120+ g .parse (rdf_file , format = "xml" )
121+ result = audit (g , template )
122+ except Exception as e :
123+ print (f" ERROR parsing { rdf_file .name } : { e } " , file = sys .stderr )
124+ errors += 1
125+ continue
126+
127+ coverages .append (result ["summary" ]["coverage" ])
128+ fixture_id = rdf_file .stem
129+
130+ for entity_name , entity_report in result ["entities" ].items ():
131+ if entity_name not in uncovered_counts :
132+ uncovered_counts [entity_name ] = {}
133+ covered_counts [entity_name ] = {}
134+ missing_counts [entity_name ] = {}
135+ template_predicates [entity_name ] = set ()
136+
137+ for pred , count in entity_report ["uncovered" ].items ():
138+ if pred not in uncovered_counts [entity_name ]:
139+ uncovered_counts [entity_name ][pred ] = {"fixtures" : set (), "triples" : 0 }
140+ uncovered_counts [entity_name ][pred ]["fixtures" ].add (fixture_id )
141+ uncovered_counts [entity_name ][pred ]["triples" ] += count
69142
143+ for pred , count in entity_report ["covered" ].items ():
144+ if pred not in covered_counts [entity_name ]:
145+ covered_counts [entity_name ][pred ] = {"fixtures" : set (), "triples" : 0 }
146+ covered_counts [entity_name ][pred ]["fixtures" ].add (fixture_id )
147+ covered_counts [entity_name ][pred ]["triples" ] += count
148+
149+ for pred in entity_report ["missing" ]:
150+ template_predicates [entity_name ].add (pred )
151+ missing_counts [entity_name ][pred ] = missing_counts [entity_name ].get (pred , 0 ) + 1
152+
153+ for pred in entity_report ["covered" ]:
154+ template_predicates [entity_name ].add (pred )
155+
156+ fixture_count = len (rdf_files ) - errors
157+
158+ report = {
159+ "template" : TEMPLATE_NAME ,
160+ "fixture_count" : fixture_count ,
161+ "errors" : errors ,
162+ "coverage" : {
163+ "mean" : round (statistics .mean (coverages ), 4 ) if coverages else 0 ,
164+ "median" : round (statistics .median (coverages ), 4 ) if coverages else 0 ,
165+ "min" : round (min (coverages ), 4 ) if coverages else 0 ,
166+ "max" : round (max (coverages ), 4 ) if coverages else 0 ,
167+ "stdev" : round (statistics .stdev (coverages ), 4 ) if len (coverages ) > 1 else 0 ,
168+ },
169+ "entities" : {},
170+ }
171+
172+ for entity_name in sorted (uncovered_counts .keys ()):
173+ uncovered = {
174+ pred : {"fixtures" : len (data ["fixtures" ]), "triples" : data ["triples" ]}
175+ for pred , data in sorted (
176+ uncovered_counts [entity_name ].items (),
177+ key = lambda x : (- len (x [1 ]["fixtures" ]), - x [1 ]["triples" ]),
178+ )
179+ }
180+ covered = {
181+ pred : {"fixtures" : len (data ["fixtures" ]), "triples" : data ["triples" ]}
182+ for pred , data in sorted (
183+ covered_counts [entity_name ].items (),
184+ key = lambda x : (- len (x [1 ]["fixtures" ]), - x [1 ]["triples" ]),
185+ )
186+ }
187+ report ["entities" ][entity_name ] = {
188+ "uncovered" : uncovered ,
189+ "covered" : covered ,
190+ "missing_frequency" : {
191+ pred : count
192+ for pred , count in sorted (
193+ missing_counts [entity_name ].items (),
194+ key = lambda x : - x [1 ],
195+ )
196+ },
197+ }
198+
199+ elapsed = time .time () - t0
200+ print (f"Audit complete in { elapsed :.1f} s ({ fixture_count } fixtures, { errors } errors)" )
201+
202+ return report
203+
204+
205+ def generate_markdown (report : dict ) -> str :
206+ """Convert an aggregated report dict into markdown."""
70207 template_name = report ["template" ]
71208 fixture_count = report ["fixture_count" ]
72209 errors = report ["errors" ]
73210 cov = report ["coverage" ]
74211
212+ commit_hash = get_git_short_hash ()
213+ timestamp = datetime .now (timezone .utc ).strftime ("%Y-%m-%d %H:%M UTC" )
214+
75215 lines : list [str ] = []
76216
77- # Header
78217 lines .append ("# Audit Coverage Report" )
79218 lines .append ("" )
219+ lines .append (f"Generated from commit `{ commit_hash } ` on { timestamp } ." )
220+ lines .append ("" )
80221 lines .append (
81222 f"Template: **{ template_name } ** | "
82223 f"Fixtures: **{ fixture_count } ** | "
@@ -137,7 +278,13 @@ def main() -> None:
137278 lines .append ("" )
138279 write_missing_table (lines , missing_freq )
139280
140- OUTPUT_PATH .write_text ("\n " .join (lines ), encoding = "utf-8" )
281+ return "\n " .join (lines )
282+
283+
284+ def main () -> None :
285+ report = run_audit ()
286+ markdown = generate_markdown (report )
287+ OUTPUT_PATH .write_text (markdown , encoding = "utf-8" )
141288 print (f"Wrote { OUTPUT_PATH } " )
142289
143290
0 commit comments