From e7615153c1cf64a3feeae59f837a8da090a144d0 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Mon, 21 Jul 2025 18:47:45 +0100 Subject: [PATCH 01/21] refactor --- backend/apps/ifc_validation/tasks.py | 1452 +++++++++----------------- backend/core/settings.py | 1 + 2 files changed, 509 insertions(+), 944 deletions(-) diff --git a/backend/apps/ifc_validation/tasks.py b/backend/apps/ifc_validation/tasks.py index 3af912f5..139cffab 100644 --- a/backend/apps/ifc_validation/tasks.py +++ b/backend/apps/ifc_validation/tasks.py @@ -5,6 +5,7 @@ import functools import json import ifcopenshell +import typing from celery import shared_task, chain, chord, group from celery.utils.log import get_task_logger @@ -39,13 +40,57 @@ assert sum(PROGRESS_INCREMENTS.values()) == 100 +class ValidationSubprocessError(Exception): pass +class ValidationTimeoutError(ValidationSubprocessError): pass +class ValidationOpenShellError(ValidationSubprocessError): pass +class ValidationIntegrityError(ValidationSubprocessError): pass + +def run_task( + task: ValidationTask, + check_program: typing.List[str], + task_name: str +) -> subprocess.CompletedProcess[str]: + task.set_process_details(None, check_program) + try: + proc = subprocess.run( + check_program, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + timeout=TASK_TIMEOUT_LIMIT, + env= os.environ.copy() + ) + return proc + + except subprocess.TimeoutExpired as err: + logger.exception(f"TimeoutExpired while running task {task.id} with command: {' '.join(check_program)} : {task_name}") + task.mark_as_failed(err) + raise ValidationTimeoutError(f"Task {task_name} timed out") from err + + except ifcopenshell.Error as err: + logger.exception(f"Ifcopenshell error in task {task.id} : {task_name}") + task.mark_as_failed(err) + raise ValidationOpenShellError(f"IFC parsing or validation failed during task {task_name}") from err + + except IntegrityError as err: + logger.exception(f"Database integrity error in task {task.id} : {task_name}") + task.mark_as_failed(err) + raise ValidationIntegrityError(f"Database error during task {task_name}") from err + + except Exception as err: + logger.exception(f"Unexpected error in task {task.id} : {task_name}") + task.mark_as_failed(err) + raise ValidationSubprocessError(f"Unknown error during validation task {task.id}: {task_name}") from err + +def log_program(taskname, check_program): + logger.debug(f'Command for {taskname}: {" ".join(check_program)}') def update_progress(func): @functools.wraps(func) def wrapper(self, *args, **kwargs): return_value = func(self, *args, **kwargs) try: - request_id = args[1] + request_id = kwargs.get("id") # @nb not the most efficient because we fetch the ValidationRequest anew, but # assuming django will cache this efficiently enough for us to keep the code clean request = ValidationRequest.objects.get(pk=request_id) @@ -105,7 +150,7 @@ def chord_error_handler(self, request, exc, traceback, *args, **kwargs): def on_workflow_started(self, *args, **kwargs): # update status - id = self.request.args[0] + id = kwargs.get('id') reason = f"args={args} kwargs={kwargs}" request = ValidationRequest.objects.get(pk=id) request.mark_as_initiated(reason) @@ -123,10 +168,12 @@ def on_workflow_started(self, *args, **kwargs): @shared_task(bind=True) @log_execution @requires_django_user_context -def on_workflow_completed(self, *args, **kwargs): +def on_workflow_completed(self, result, **kwargs): # update status - id = args[1] + id = kwargs.get('id') + if not isinstance(id, int): + raise ValueError(f"Invalid id: {id!r}") reason = "Processing completed" request = ValidationRequest.objects.get(pk=id) request.mark_as_completed(reason) @@ -181,6 +228,34 @@ def get_or_create_ifc_model(request_id): else: return request.model +def validation_task_runner(task_type): + def decorator(func): + @shared_task(bind=True) + @log_execution + @requires_django_user_context + @update_progress + @functools.wraps(func) + def wrapper(self, prev_result, id, file_name, *args, **kwargs): + if args and isinstance(args[0], dict) and "is_valid" in args[0]: + prev_result = args[0] + else: + prev_result = {"is_valid": True} + + + request = ValidationRequest.objects.get(pk=id) + file_path = get_absolute_file_path(request.file.name) + task = ValidationTask.objects.create(request=request, type=task_type) + + if prev_result is not None and prev_result.get('is_valid') is True: + task.mark_as_initiated() + return func(self, task, request, file_path, *args, **kwargs) + else: + reason = f'Skipped as prev_result = {prev_result}.' + task.mark_as_skipped(reason) + return {'is_valid': None, 'reason': reason} + return wrapper + return decorator + @shared_task(bind=True) @log_execution @@ -192,27 +267,27 @@ def ifc_file_validation_task(self, id, file_name, *args, **kwargs): error_task = error_handler.s(id, file_name) chord_error_task = chord_error_handler.s(id, file_name) - workflow_started = on_workflow_started.s(id, file_name) - workflow_completed = on_workflow_completed.s(id, file_name) + workflow_started = on_workflow_started.s(id=id, file_name=file_name) + workflow_completed = on_workflow_completed.s(id=id, file_name=file_name) serial_tasks = chain( - header_syntax_validation_subtask.s(id, file_name), - header_validation_subtask.s(id, file_name), - syntax_validation_subtask.s(id, file_name), - prerequisites_subtask.s(id, file_name), + header_syntax_validation_subtask.s(id=id, file_name=file_name), + header_validation_subtask.s(id=id, file_name=file_name), + syntax_validation_subtask.s(id=id, file_name=file_name), + prerequisites_subtask.s(id=id, file_name=file_name), ) parallel_tasks = group([ - digital_signatures_subtask.s(id, file_name), - schema_validation_subtask.s(id, file_name), - #bsdd_validation_subtask.s(id, file_name), # disabled - normative_rules_ia_validation_subtask.s(id, file_name), - normative_rules_ip_validation_subtask.s(id, file_name), - industry_practices_subtask.s(id, file_name) + digital_signatures_subtask.s(id=id, file_name=file_name), + schema_validation_subtask.s(id=id, file_name=file_name), + #bsdd_validation_subtask.s(id=id, file_name=file_name), # disabled + normative_rules_ia_validation_subtask.s(id=id, file_name=file_name), + normative_rules_ip_validation_subtask.s(id=id, file_name=file_name), + industry_practices_subtask.s(id=id, file_name=file_name) ]) final_tasks = chain( - instance_completion_subtask.s(id, file_name) + instance_completion_subtask.s(id=id, file_name=file_name) ) workflow = ( @@ -226,988 +301,477 @@ def ifc_file_validation_task(self, id, file_name, *args, **kwargs): workflow.apply_async() -@shared_task(bind=True) -@log_execution -@requires_django_user_context -@update_progress -def instance_completion_subtask(self, prev_result, id, file_name, *args, **kwargs): - # fetch request info - request = ValidationRequest.objects.get(pk=id) - file_path = get_absolute_file_path(request.file.name) - - # add task - task = ValidationTask.objects.create(request=request, type=ValidationTask.Type.INSTANCE_COMPLETION) - - prev_result_succeeded = prev_result is not None and prev_result[0]['is_valid'] is True - if prev_result_succeeded: - - task.mark_as_initiated() - - try: - ifc_file = ifcopenshell.open(file_path) - except: - reason = f'Failed to open {file_path}. Likely previous tasks also failed.' - task.mark_as_completed(reason) - return {'is_valid': False, 'reason': reason} - - if ifc_file: - - # fetch and update ModelInstance records without ifc_type - with transaction.atomic(): - model_id = request.model.id - model_instances = ModelInstance.objects.filter(model_id=model_id, ifc_type__in=[None, '']) - instance_count = model_instances.count() - logger.info(f'Retrieved {instance_count:,} ModelInstance record(s)') - - for inst in model_instances.iterator(): - inst.ifc_type = ifc_file[inst.stepfile_id].is_a() - inst.save() - - # update Task info and return - reason = f'Updated {instance_count:,} ModelInstance record(s)' - task.mark_as_completed(reason) - return {'is_valid': True, 'reason': reason} - - else: - reason = f'Skipped as prev_result = {prev_result}.' - task.mark_as_skipped(reason) - return {'is_valid': None, 'reason': reason} - - -@shared_task(bind=True) -@log_execution -@requires_django_user_context -@update_progress -def syntax_validation_subtask(self, prev_result, id, file_name, *args, **kwargs): - # fetch request info - request = ValidationRequest.objects.get(pk=id) - file_path = get_absolute_file_path(request.file.name) - - # determine program/script to run - check_program = [sys.executable, "-m", "ifcopenshell.simple_spf", '--json', file_path] - logger.debug(f'Command for {self.__qualname__}: {" ".join(check_program)}') - - # add task - task = ValidationTask.objects.create(request=request, type=ValidationTask.Type.SYNTAX) - task.mark_as_initiated() - - prev_result_succeeded = prev_result is not None and prev_result['is_valid'] is True - if prev_result_succeeded: - # check syntax - try: - - # note: use run instead of Popen b/c PIPE output can be very big... - task.set_process_details(None, check_program) # run() has no pid... - proc = subprocess.run( - check_program, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - timeout=TASK_TIMEOUT_LIMIT - ) - - # parse output - output = proc.stdout - error_output = proc.stderr - success = (len(list(filter(None, output.split("\n")))) == 0) and len(proc.stderr) == 0 - - with transaction.atomic(): - - # create or retrieve Model info - model = get_or_create_ifc_model(id) - - # update Model info - if success: - model.status_syntax = Model.Status.VALID - task.outcomes.create( - severity=ValidationOutcome.OutcomeSeverity.PASSED, - outcome_code=ValidationOutcome.ValidationOutcomeCode.PASSED, - observed=output if output != '' else None - ) - - elif len(error_output) != 0: - model.status_syntax = Model.Status.INVALID - task.outcomes.create( - severity=ValidationOutcome.OutcomeSeverity.ERROR, - outcome_code=ValidationOutcome.ValidationOutcomeCode.SYNTAX_ERROR, - observed=list(filter(None, proc.stderr.split("\n")))[-1] # last line of traceback - ) - - else: - messages = json.loads(output) - model.status_syntax = Model.Status.INVALID - task.outcomes.create( - severity=ValidationOutcome.OutcomeSeverity.ERROR, - outcome_code=ValidationOutcome.ValidationOutcomeCode.SYNTAX_ERROR, - observed=messages['message'] if 'message' in messages else None - ) - - model.save(update_fields=['status_syntax']) - - # store and return - if success: - reason = "No IFC syntax error(s)." - task.mark_as_completed(reason) - return {'is_valid': True, 'reason': task.status_reason} - else: - reason = f"Found IFC syntax errors:\n\nConsole: \n{output}\n\nError: {error_output}" - task.mark_as_completed(reason) - return {'is_valid': False, 'reason': reason} - - except subprocess.TimeoutExpired as err: - task.mark_as_failed(err) - raise - - except Exception as err: - task.mark_as_failed(err) - raise - - -@shared_task(bind=True) -@log_execution -@requires_django_user_context -@update_progress -def header_syntax_validation_subtask(self, prev_result, id, file_name, *args, **kwargs): - # fetch request info - request = ValidationRequest.objects.get(pk=id) - file_path = get_absolute_file_path(request.file.name) - - # determine program/script to run - check_program = [sys.executable, "-m", "ifcopenshell.simple_spf", '--json', '--only-header', file_path] - logger.debug(f'Command for {self.__qualname__}: {" ".join(check_program)}') - - # add task - task = ValidationTask.objects.create(request=request, type=ValidationTask.Type.HEADER_SYNTAX) - task.mark_as_initiated() - - # check header syntax +@validation_task_runner(ValidationTask.Type.INSTANCE_COMPLETION) +def instance_completion_subtask(self, task, request, file_path, *args, **kwargs): try: - - # note: use run instead of Popen b/c PIPE output can be very big... - task.set_process_details(None, check_program) # run() has no pid... - proc = subprocess.run( - check_program, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - timeout=TASK_TIMEOUT_LIMIT - ) - - # parse output - output = proc.stdout - error_output = proc.stderr - success = (len(list(filter(None, output.split("\n")))) == 0) and len(proc.stderr) == 0 - + ifc_file = ifcopenshell.open(file_path) + except: + reason = f'Failed to open {file_path}. Likely previous tasks also failed.' + task.marked_as_completed(reason) + return {'is_valid': None, 'reason': reason} + + if ifc_file: + # fetch and update ModelInstance records without ifc_type with transaction.atomic(): + model_id = request.model.id + model_instances = ModelInstance.objects.filter(model_id=model_id, ifc_type__in=[None, '']) + instance_count = model_instances.count() + logger.info(f'Retrieved {instance_count:,} ModelInstance record(s)') + + for inst in model_instances.iterator(): + inst.ifc_type = ifc_file[inst.stepfile_id].is_a() + inst.save() - # create or retrieve Model info - model = get_or_create_ifc_model(id) + # update Task info and return + reason = f'Updated {instance_count:,} ModelInstance record(s)' + task.mark_as_completed(reason) + return {'is_valid': True, 'reason': reason} - # update Model info - if success: - model.status_header_syntax = Model.Status.VALID - task.outcomes.create( - severity=ValidationOutcome.OutcomeSeverity.PASSED, - outcome_code=ValidationOutcome.ValidationOutcomeCode.PASSED, - observed=output if output != '' else None - ) - elif len(error_output) != 0: - model.status_header_syntax = Model.Status.INVALID - task.outcomes.create( - severity=ValidationOutcome.OutcomeSeverity.ERROR, - outcome_code=ValidationOutcome.ValidationOutcomeCode.SYNTAX_ERROR, - observed=list(filter(None, proc.stderr.split("\n")))[-1] # last line of traceback - ) +@validation_task_runner(ValidationTask.Type.NORMATIVE_IA) +def normative_rules_ia_validation_subtask(self, task, request, file_path, **kwargs): + return run_gherkin_subtask(self, task, request, file_path, 'IMPLEMENTER_AGREEMENT', 'status_ia') - else: - messages = json.loads(output) - model.status_header_syntax = Model.Status.INVALID - task.outcomes.create( - severity=ValidationOutcome.OutcomeSeverity.ERROR, - outcome_code=ValidationOutcome.ValidationOutcomeCode.SYNTAX_ERROR, - observed=messages['message'] if 'message' in messages else None - ) - model.save(update_fields=['status_header_syntax']) +@validation_task_runner(ValidationTask.Type.NORMATIVE_IP) +def normative_rules_ip_validation_subtask(self, task, request, file_path, **kwargs): + return run_gherkin_subtask(self, task, request, file_path, 'INFORMAL_PROPOSITION', 'status_ip') - # store and return - if success: - reason = "No IFC syntax error(s)." - task.mark_as_completed(reason) - return {'is_valid': True, 'reason': task.status_reason} - else: - reason = f"Found IFC syntax errors in header:\n\nConsole: \n{output}\n\nError: {error_output}" - task.mark_as_completed(reason) - return {'is_valid': False, 'reason': reason} - except subprocess.TimeoutExpired as err: - task.mark_as_failed(err) - raise +@validation_task_runner(ValidationTask.Type.PREREQUISITES) +def prerequisites_subtask(self, task, request, file_path, **kwargs): + return run_gherkin_subtask(self, task, request, file_path, 'CRITICAL', 'status_prereq', extra_args=['--purepythonparser']) - except Exception as err: - task.mark_as_failed(err) - raise +@validation_task_runner(ValidationTask.Type.SYNTAX) +def syntax_validation_subtask(self, task, request, file_path, **kwargs): + check_program = [sys.executable, "-m", "ifcopenshell.simple_spf", "--json", file_path] + log_program(self.__qualname__, check_program) + return run_syntax_subtask(self, task, request, file_path, check_program, 'status_syntax') -@shared_task(bind=True) -@log_execution -@requires_django_user_context -@update_progress -def header_validation_subtask(self, prev_result, id, file_name, *args, **kwargs): - """" - Parses and validates the file header - """ +@validation_task_runner(ValidationTask.Type.HEADER_SYNTAX) +def header_syntax_validation_subtask(self, task, request, file_path, **kwargs): + check_program = [sys.executable, "-m", "ifcopenshell.simple_spf", "--json", "--only-header", file_path] + log_program(self.__qualname__, check_program) + return run_syntax_subtask(self, task, request, file_path, check_program, 'status_header_syntax') - # fetch request info - request = ValidationRequest.objects.get(pk=id) - file_path = get_absolute_file_path(request.file.name) - # add task - task = ValidationTask.objects.create(request=request, type=ValidationTask.Type.HEADER) - - prev_result_succeeded = prev_result is not None and prev_result['is_valid'] is True - if prev_result_succeeded: - task.mark_as_initiated() - # check for header policy - check_script = os.path.join(os.path.dirname(__file__), "checks", "header_policy", "validate_header.py") - - try: - logger.debug(f'before header validation task, path {file_path}, script {check_script} ') - proc = subprocess.run( - [sys.executable, check_script, file_path], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - timeout=TASK_TIMEOUT_LIMIT # Add timeout to prevent infinite hangs +def run_syntax_subtask(self, task, request, file_path, check_program, model_status_field): + proc = run_task( + task=task, + check_program = check_program, + task_name = self.name.split(".")[-1] + ) + output = proc.stdout + error_output = proc.stderr + success = (len(list(filter(None, output.split("\n")))) == 0) and len(error_output) == 0 + + with transaction.atomic(): + model = get_or_create_ifc_model(request.id) + + if success: + setattr(model, model_status_field, Model.Status.VALID) + task.outcomes.create( + severity=ValidationOutcome.OutcomeSeverity.PASSED, + outcome_code=ValidationOutcome.ValidationOutcomeCode.PASSED, + observed=output if output else None ) - - - if (proc.returncode is not None and proc.returncode != 0) or (len(proc.stderr) > 0): - error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" - task.mark_as_failed(error_message) - raise RuntimeError(error_message) - - header_validation = {} - stdout_lines = proc.stdout.splitlines() - for line in stdout_lines: - try: - header_validation = json.loads(line) - except json.JSONDecodeError: - continue - - logger.debug(f'header validation output : {header_validation}') - - with transaction.atomic(): - # create or retrieve Model info - model = get_or_create_ifc_model(id) - - # update Model info - agg_status = task.determine_aggregate_status() - model.status_prereq = agg_status - - # size - model.size = os.path.getsize(file_path) - logger.debug(f'Detected size = {model.size} bytes') - - # schema - model.schema = header_validation.get('schema_identifier') - - logger.debug(f'The schema identifier = {header_validation.get("schema")}') - # time_stamp - if ifc_file_time_stamp := header_validation.get('time_stamp', False): - try: - logger.debug(f'Timestamp within file = {ifc_file_time_stamp}') - date = datetime.datetime.strptime(ifc_file_time_stamp, "%Y-%m-%dT%H:%M:%S") - date_with_tz = datetime.datetime( - date.year, - date.month, - date.day, - date.hour, - date.minute, - date.second, - tzinfo=datetime.timezone.utc) - model.date = date_with_tz - except ValueError: - try: - model.date = datetime.datetime.fromisoformat(ifc_file_time_stamp) - except ValueError: - pass - - # mvd - model.mvd = header_validation.get('mvd') - - app = header_validation.get('application_name') - - version = header_validation.get('version') - name = None if any(value in (None, "Not defined") for value in (app, version)) else app + ' ' + version - company_name = header_validation.get('company_name') - logger.debug(f'Detected Authoring Tool in file = {name}') - - validation_errors = header_validation.get('validation_errors', []) - invalid_marker_fields = ['originating_system', 'version', 'company_name', 'application_name'] - if any(field in validation_errors for field in invalid_marker_fields): - model.status_header = Model.Status.INVALID - else: - # parsing was successful and model can be considered for scorecards - model.status_header = Model.Status.VALID - authoring_tool = AuthoringTool.find_by_full_name(full_name=name) - if (isinstance(authoring_tool, AuthoringTool)): - - if authoring_tool.company is None: - company, _ = Company.objects.get_or_create(name=company_name) - authoring_tool.company = company - authoring_tool.save() - logger.debug(f'Updated existing Authoring Tool with company: {company.name}') - - model.produced_by = authoring_tool - logger.debug(f'Retrieved existing Authoring Tool from DB = {model.produced_by.full_name}') - - elif authoring_tool is None: - company, _ = Company.objects.get_or_create(name=company_name) - authoring_tool, _ = AuthoringTool.objects.get_or_create( - company=company, - name=app, - version=version - ) - model.produced_by = authoring_tool - logger.debug(f'Authoring app not found, ApplicationFullName = {app}, Version = {version} - created new instance') - else: - model.produced_by = None - logger.warning(f'Retrieved multiple Authoring Tool from DB: {authoring_tool} - could not assign any') - - # update header validation - model.header_validation = header_validation - model.save(update_fields=['status_header', 'header_validation']) - model.save() - - - # update Task info and return - is_valid = agg_status != Model.Status.INVALID - reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {header_validation}' - task.mark_as_completed(reason) - return {'is_valid': is_valid, 'reason': reason} - - except subprocess.TimeoutExpired as err: - task.mark_as_failed(err) - raise - except IntegrityError as err: - task.mark_as_failed(err) - raise - except Exception as err: - task.mark_as_failed(err) - raise - else: - reason = f'Skipped as prev_result = {prev_result}.' - task.mark_as_skipped(reason) - return {'is_valid': None, 'reason': reason} - - -@shared_task(bind=True) -@log_execution -@requires_django_user_context -@update_progress -def prerequisites_subtask(self, prev_result, id, file_name, *args, **kwargs): - - # fetch request info - request = ValidationRequest.objects.get(pk=id) - file_path = get_absolute_file_path(request.file.name) - - # add task - task = ValidationTask.objects.create(request=request, type=ValidationTask.Type.PREREQUISITES) - - prev_result_succeeded = prev_result is not None and prev_result['is_valid'] is True - if prev_result_succeeded: - - task.mark_as_initiated() - - # determine program/script to run - check_script = os.path.join(os.path.dirname(__file__), "checks", "check_gherkin.py") - check_program = [sys.executable, check_script, '--file-name', file_path, '--task-id', str(task.id), '--rule-type', 'CRITICAL', "--purepythonparser"] - logger.debug(f'Command for {self.__qualname__}: {" ".join(check_program)}') - logger.debug(f"gherkin log path : {os.path.join(os.getenv('Django_LOG_FOLDER', 'logs'), 'gherkin_rules.log')}") - logger.debug(f"Log folder exists and writable: {os.access(os.getenv('Django_LOG_FOLDER', 'logs'), os.W_OK)}") - - # check Gherkin IP - try: - # note: use run instead of Popen b/c PIPE output can be very big... - proc = subprocess.run( - check_program, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - timeout=TASK_TIMEOUT_LIMIT, - env=os.environ.copy() + elif error_output: + setattr(model, model_status_field, Model.Status.INVALID) + task.outcomes.create( + severity=ValidationOutcome.OutcomeSeverity.ERROR, + outcome_code=ValidationOutcome.ValidationOutcomeCode.SYNTAX_ERROR, + observed=list(filter(None, error_output.split("\n")))[-1] + ) + else: + messages = json.loads(output) + setattr(model, model_status_field, Model.Status.INVALID) + task.outcomes.create( + severity=ValidationOutcome.OutcomeSeverity.ERROR, + outcome_code=ValidationOutcome.ValidationOutcomeCode.SYNTAX_ERROR, + observed=messages.get("message") ) - task.set_process_details(None, check_program) # run() has no pid... - - except subprocess.TimeoutExpired as err: - task.mark_as_failed(err) - raise - - except Exception as err: - task.mark_as_failed(err) - raise - - # @nb previously we also checked for: - # or (len(proc.stderr) > 0): - # - # but I now get warnings: - # - # - features/environment.py:86: ContextMaskWarning: user code is masking context attribute 'gherkin_outcomes'; - # see the tutorial for what this means - if proc.returncode is not None and proc.returncode != 0: - error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" - task.mark_as_failed(error_message) - raise RuntimeError(error_message) - - raw_output = proc.stdout - - with transaction.atomic(): - - # create or retrieve Model info - model = get_or_create_ifc_model(id) - # update Model info - agg_status = task.determine_aggregate_status() - model.status_prereq = agg_status - model.save(update_fields=['status_prereq']) + model.save(update_fields=[model_status_field]) - # update Task info and return - is_valid = agg_status != Model.Status.INVALID - reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' + if success: + reason = "No IFC syntax error(s)." task.mark_as_completed(reason) - return {'is_valid': is_valid, 'reason': reason} - - else: - reason = f'Skipped as prev_result = {prev_result}.' - task.mark_as_skipped(reason) - return {'is_valid': None, 'reason': reason} - - -@shared_task(bind=True) -@log_execution -@requires_django_user_context -@update_progress -def schema_validation_subtask(self, prev_result, id, file_name, *args, **kwargs): - - # fetch request info - request = ValidationRequest.objects.get(pk=id) - file_path = get_absolute_file_path(request.file.name) - - # add task - task = ValidationTask.objects.create(request=request, type=ValidationTask.Type.SCHEMA) - - # TODO: revisit schema validation task, perhaps change order of flow? - prev_result_succeeded = prev_result is not None and (prev_result['is_valid'] is True or 'Unsupported schema' in prev_result['reason']) - if prev_result_succeeded: - - task.mark_as_initiated() + return {'is_valid': True, 'reason': reason} + else: + reason = f"Found IFC syntax errors:\n\nConsole: \n{output}\n\nError: {error_output}" + task.mark_as_completed(reason) + return {'is_valid': False, 'reason': reason} + - # determine program/script to run - check_program = [sys.executable, '-m', 'ifcopenshell.validate', '--json', '--rules', '--fields', file_path] - logger.debug(f'Command for {self.__qualname__}: {" ".join(check_program)}') +@validation_task_runner(ValidationTask.Type.SCHEMA) +def schema_validation_subtask(self, task, request, file_path, *args, **kwargs): + check_program = [sys.executable, "-m", "ifcopenshell.validate", "--json", "--rules", "--fields", file_path] + log_program(self.__qualname__, check_program) - # check schema + proc = run_task( + task=task, + check_program = check_program, + task_name = self.name.split(".")[-1] + ) + def is_schema_error(line): try: - # note: use run instead of Popen b/c PIPE output can be very big... - proc = subprocess.run( - check_program, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - timeout=TASK_TIMEOUT_LIMIT - ) - task.set_process_details(None, check_program) # run() has no pid... - except subprocess.TimeoutExpired as err: - task.mark_as_failed(err) - raise - except ifcopenshell.Error as err: - # logical validation error OR C++ errors - task.mark_as_failed(err) - pass - except Exception as err: - task.mark_as_failed(err) - raise - - # schema check returns either multiple JSON lines, or a single line message, or nothing. - def is_schema_error(line): - try: - json.loads(line) # ignoring non-JSON messages - except ValueError: - return False - return True - - output = list(filter(is_schema_error, proc.stdout.split("\n"))) - # success = (len(output) == 0) - # tfk: if we mark this task as failed we don't do the instance population either. - # marking as failed should probably be reserved for blocking errors (prerequisites) - # and internal errors and differentiate between valid and task_success. - success = proc.returncode >= 0 - valid = (len(output) == 0) + json.loads(line) # ignoring non-JSON messages + except ValueError: + return False + return True + + output = list(filter(is_schema_error, proc.stdout.split("\n"))) - with transaction.atomic(): + success = proc.returncode >= 0 + valid = len(output) == 0 - # create or retrieve Model info - model = get_or_create_ifc_model(id) + with transaction.atomic(): + model = get_or_create_ifc_model(request.id) - # update Model and Validation Outcomes - if valid: - model.status_schema = Model.Status.VALID - task.outcomes.create( - severity=ValidationOutcome.OutcomeSeverity.PASSED, - outcome_code=ValidationOutcome.ValidationOutcomeCode.PASSED, - observed=None + if valid: + model.status_schema = Model.Status.VALID + task.outcomes.create( + severity=ValidationOutcome.OutcomeSeverity.PASSED, + outcome_code=ValidationOutcome.ValidationOutcomeCode.PASSED, + observed=None + ) + else: + model.status_schema = Model.Status.INVALID + outcomes_to_save = list() + outcomes_instances_to_save = list() + + for line in output: + message = json.loads(line) + outcome = ValidationOutcome( + severity=ValidationOutcome.OutcomeSeverity.ERROR, + outcome_code=ValidationOutcome.ValidationOutcomeCode.SCHEMA_ERROR, + observed=message['message'], + feature=json.dumps({ + 'type': message['type'] if 'type' in message else None, + 'attribute': message['attribute'] if 'attribute' in message else None + }), + validation_task=task ) - else: - outcomes_to_save = list() - outcomes_instances_to_save = list() - - for line in output: - message = json.loads(line) - model.status_schema = Model.Status.INVALID - outcome = ValidationOutcome( - severity=ValidationOutcome.OutcomeSeverity.ERROR, - outcome_code=ValidationOutcome.ValidationOutcomeCode.SCHEMA_ERROR, - observed=message['message'], - feature=json.dumps({ - 'type': message['type'] if 'type' in message else None, - 'attribute': message['attribute'] if 'attribute' in message else None - }) - ) - outcome.validation_task = task - outcomes_to_save.append(outcome) - - if 'instance' in message and message['instance'] is not None and 'id' in message['instance'] and 'type' in message['instance']: - instance = ModelInstance( + outcomes_to_save.append(outcome) + if 'instance' in message and message['instance'] is not None and 'id' in message['instance'] and 'type' in message['instance']: + instance = ModelInstance( stepfile_id=message['instance']['id'], ifc_type=message['instance']['type'], model=model ) - outcome.instance_id = instance.stepfile_id # store for later reference (not persisted) - outcomes_instances_to_save.append(instance) - - ModelInstance.objects.bulk_create(outcomes_instances_to_save, batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE, ignore_conflicts=True) # ignore existing - model_instances = dict(ModelInstance.objects.filter(model_id=model.id).values_list('stepfile_id', 'id')) # retrieve all - - for outcome in outcomes_to_save: - if outcome.instance_id: - instance_id = model_instances[outcome.instance_id] - if instance_id: - outcome.instance_id = instance_id - - ValidationOutcome.objects.bulk_create(outcomes_to_save, batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE) - - model.save(update_fields=['status_schema']) - - # return - if success: - reason = 'No IFC schema errors.' - task.mark_as_completed(reason) - return {'is_valid': True, 'reason': reason} - else: - reason = f"'ifcopenshell.validate' returned exit code {proc.returncode} and {len(output):,} errors : {output}" - task.mark_as_completed(reason) - return {'is_valid': False, 'reason': reason} - - else: - reason = f'Skipped as prev_result = {prev_result}.' - task.mark_as_skipped(reason) - return {'is_valid': None, 'reason': reason} + outcome.instance_id = instance.stepfile_id # store for later reference (not persisted) + outcomes_instances_to_save.append(instance) + ModelInstance.objects.bulk_create(outcomes_instances_to_save, batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE, ignore_conflicts=True) #ignore existing + model_instances = dict(ModelInstance.objects.filter(model_id=model.id).values_list('stepfile_id', 'id')) # retrieve all -@shared_task(bind=True) -@log_execution -@requires_django_user_context -@update_progress -def digital_signatures_subtask(self, prev_result, id, file_name, *args, **kwargs): - - # fetch request info - request = ValidationRequest.objects.get(pk=id) - file_path = get_absolute_file_path(request.file.name) + for outcome in outcomes_to_save: + if outcome.instance_id: + instance_id = model_instances[outcome.instance_id] + if instance_id: + outcome.instance_id = instance_id - # add task - task = ValidationTask.objects.create(request=request, type=ValidationTask.Type.DIGITAL_SIGNATURES) + ValidationOutcome.objects.bulk_create(outcomes_to_save, batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE) - prev_result_succeeded = prev_result is not None and prev_result['is_valid'] is True - if prev_result_succeeded: + model.save(update_fields=['status_schema']) - task.mark_as_initiated() + if success: + reason = "No IFC schema errors." + task.mark_as_completed(reason) + return {'is_valid': True, 'reason': reason} + else: + reason = f"'ifcopenshell.validate' returned exit code {proc.returncode} and {len(output):,} errors." + task.mark_as_completed(reason) + return {'is_valid': False, 'reason': reason} - # determine program/script to run - check_script = os.path.join(os.path.dirname(__file__), "checks", "signatures", "check_signatures.py") - check_program = [sys.executable, check_script, file_path] - logger.debug(f'Command for {self.__qualname__}: {" ".join(check_program)}') - # check signatures +@validation_task_runner(ValidationTask.Type.HEADER) +def header_validation_subtask(self, task, request, file_path, **kwargs): + check_script = os.path.join(os.path.dirname(__file__), "checks", "header_policy", "validate_header.py") + check_program = [sys.executable, check_script, file_path] + log_program(self.__qualname__, check_program) + + proc = run_task( + task=task, + check_program = check_program, + task_name = self.name.split(".")[-1] + ) + + + header_validation = {} + stdout_lines = proc.stdout.splitlines() + for line in stdout_lines: try: - # note: use run instead of Popen b/c PIPE output can be very big... - proc = subprocess.run( - check_program, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - timeout=TASK_TIMEOUT_LIMIT - ) - task.set_process_details(None, check_program) # run() has no pid... - except subprocess.TimeoutExpired as err: - task.mark_as_failed(err) - raise - except Exception as err: - task.mark_as_failed(err) - raise + header_validation = json.loads(line) + except json.JSONDecodeError: + continue + + with transaction.atomic(): + # create or retrieve Model info + model = get_or_create_ifc_model(request.id) + agg_status = task.determine_aggregate_status() + model.status_prereq = agg_status + model.size = os.path.getsize(file_path) + logger.debug(f'Detected size = {model.size} bytes') - output = list(map(json.loads, filter(None, map(lambda s: s.strip(), proc.stdout.split("\n"))))) - success = proc.returncode >= 0 - valid = all(m['signature'] != "invalid" for m in output) - - with transaction.atomic(): - - # create or retrieve Model info - model = get_or_create_ifc_model(id) - model.status_signatures = Model.Status.NOT_APPLICABLE if not output else Model.Status.VALID if valid else Model.Status.INVALID - - def create_outcome(di): - return ValidationOutcome( - severity=ValidationOutcome.OutcomeSeverity.ERROR if di.get("signature") == "invalid" else ValidationOutcome.OutcomeSeverity.PASSED, - outcome_code=ValidationOutcome.ValidationOutcomeCode.VALUE_ERROR if di.get("signature") == "invalid" else ValidationOutcome.ValidationOutcomeCode.PASSED, - observed=di, - feature=json.dumps({'digital_signature': 1}), - validation_task = task + model.schema = header_validation.get('schema_identifier') + logger.debug(f'The schema identifier = {header_validation.get("schema")}') + + # time_stamp + if ifc_file_time_stamp := header_validation.get('time_stamp', False): + try: + logger.debug(f'Timestamp within file = {ifc_file_time_stamp}') + date = datetime.datetime.strptime(ifc_file_time_stamp, "%Y-%m-%dT%H:%M:%S") + date_with_tz = datetime.datetime( + date.year, + date.month, + date.day, + date.hour, + date.minute, + date.second, + tzinfo=datetime.timezone.utc) + model.date = date_with_tz + except ValueError: + try: + model.date = datetime.datetime.fromisoformat(ifc_file_time_stamp) + except ValueError: + pass + + # mvd + model.mvd = header_validation.get('mvd') + + app = header_validation.get('application_name') + + version = header_validation.get('version') + name = None if any(value in (None, "Not defined") for value in (app, version)) else app + ' ' + version + company_name = header_validation.get('company_name') + logger.debug(f'Detected Authoring Tool in file = {name}') + + validation_errors = header_validation.get('validation_errors', []) + invalid_marker_fields = ['originating_system', 'version', 'company_name', 'application_name'] + if any(field in validation_errors for field in invalid_marker_fields): + model.status_header = Model.Status.INVALID + else: + # parsing was successful and model can be considered for scorecards + model.status_header = Model.Status.VALID + authoring_tool = AuthoringTool.find_by_full_name(full_name=name) + if (isinstance(authoring_tool, AuthoringTool)): + + if authoring_tool.company is None: + company, _ = Company.objects.get_or_create(name=company_name) + authoring_tool.company = company + authoring_tool.save() + logger.debug(f'Updated existing Authoring Tool with company: {company.name}') + + model.produced_by = authoring_tool + logger.debug(f'Retrieved existing Authoring Tool from DB = {model.produced_by.full_name}') + + elif authoring_tool is None: + company, _ = Company.objects.get_or_create(name=company_name) + authoring_tool, _ = AuthoringTool.objects.get_or_create( + company=company, + name=app, + version=version ) - - ValidationOutcome.objects.bulk_create(list(map(create_outcome, output)), batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE) - - model.save(update_fields=['status_signatures']) - - if success: - reason = 'Digital signature check completed' - task.mark_as_completed(reason) - return {'is_valid': True, 'reason': reason} + model.produced_by = authoring_tool + logger.debug(f'Authoring app not found, ApplicationFullName = {app}, Version = {version} - created new instance') else: - reason = f"Script returned exit code {proc.returncode} and {proc.stderr}" - task.mark_as_completed(reason) - return {'is_valid': False, 'reason': reason} - - else: - reason = f'Skipped as prev_result = {prev_result}.' - task.mark_as_skipped(reason) - return {'is_valid': None, 'reason': reason} - - -@shared_task(bind=True) -@log_execution -@requires_django_user_context -@update_progress -def bsdd_validation_subtask(self, prev_result, id, file_name, *args, **kwargs): - - # fetch request info - request = ValidationRequest.objects.get(pk=id) - file_path = get_absolute_file_path(request.file.name) - - # add task - task = ValidationTask.objects.create(request=request, type=ValidationTask.Type.BSDD) - - prev_result_succeeded = prev_result is not None and prev_result['is_valid'] is True - if prev_result_succeeded: - - task.mark_as_initiated() - - # determine program/script to run - check_script = os.path.join(os.path.dirname(__file__), "checks", "check_bsdd.py") - check_program = [sys.executable, check_script, '--file-name', file_path, '--task-id', str(id)] - logger.debug(f'Command for {self.__qualname__}: {" ".join(check_program)}') - - # check bSDD - try: - # note: use run instead of Popen b/c PIPE output can be very big... - proc = subprocess.run( - check_program, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - timeout=TASK_TIMEOUT_LIMIT, - env=os.environ.copy() - ) - task.set_process_details(None, check_program) # run() has no pid... - - except subprocess.TimeoutExpired as err: - task.mark_as_failed(err) - raise - - except Exception as err: - task.mark_as_failed(err) - raise - - if proc.returncode is not None and proc.returncode != 0: - error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" - task.mark_as_failed(error_message) - raise RuntimeError(error_message) - - raw_output = proc.stdout - - logger.info(f'Output for {self.__name__}: {raw_output}') - - with transaction.atomic(): - - # create or retrieve Model info - model = get_or_create_ifc_model(id) - - # update Validation Outcomes - json_output = json.loads(raw_output) - for message in json_output['messages']: - - outcome = task.outcomes.create( - severity=[c[0] for c in ValidationOutcome.OutcomeSeverity.choices if c[1] == (message['severity'])][0], - outcome_code=[c[0] for c in ValidationOutcome.ValidationOutcomeCode.choices if c[1] == (message['outcome'])][0], - observed=message['message'], - feature=json.dumps({ - 'rule': message['rule'] if 'rule' in message else None, - 'category': message['category'] if 'category' in message else None, - 'dictionary': message['dictionary'] if 'dictionary' in message else None, - 'class': message['class'] if 'class' in message else None, - 'instance_id': message['instance_id'] if 'instance_id' in message else None - }) - ) - - if 'instance_id' in message and message['instance_id'] is not None: - instance, _ = model.instances.get_or_create( - stepfile_id = message['instance_id'], - model=model - ) - outcome.instance = instance - outcome.save() - - # update Model info - agg_status = task.determine_aggregate_status() - model.status_bsdd = agg_status - model.save(update_fields=['status_bsdd']) - - # update Task info and return - is_valid = agg_status != Model.Status.INVALID - reason = f"agg_status = {Model.Status(agg_status).label}\nmessages = {json_output['messages']}" - task.mark_as_completed(reason) - return {'is_valid': is_valid, 'reason': reason} - - else: - reason = f'Skipped as prev_result = {prev_result}.' - task.mark_as_skipped(reason) - return {'is_valid': None, 'reason': reason} - - -@shared_task(bind=True) -@log_execution -@requires_django_user_context -@update_progress -def normative_rules_ia_validation_subtask(self, prev_result, id, file_name, *args, **kwargs): - - # fetch request info - request = ValidationRequest.objects.get(pk=id) - file_path = get_absolute_file_path(request.file.name) - - # add task - task = ValidationTask.objects.create(request=request, type=ValidationTask.Type.NORMATIVE_IA) - - prev_result_succeeded = prev_result is not None and prev_result['is_valid'] is True - if prev_result_succeeded: - - task.mark_as_initiated() + model.produced_by = None + logger.warning(f'Retrieved multiple Authoring Tool from DB: {authoring_tool} - could not assign any') + + # update header validation + model.header_validation = header_validation + model.save(update_fields=['status_header', 'header_validation']) + model.save() + + + # update Task info and return + is_valid = agg_status != Model.Status.INVALID + reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {header_validation}' + task.mark_as_completed(reason) + return {'is_valid': is_valid, 'reason': reason} - # determine program/script to run - check_script = os.path.join(os.path.dirname(__file__), "checks", "check_gherkin.py") - check_program = [sys.executable, check_script, '--file-name', file_path, '--task-id', str(task.id), '--rule-type', 'IMPLEMENTER_AGREEMENT'] - logger.debug(f'Command for {self.__qualname__}: {" ".join(check_program)}') + - # check Gherkin IA - try: - # note: use run instead of Popen b/c PIPE output can be very big... - proc = subprocess.run( - check_program, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - timeout=TASK_TIMEOUT_LIMIT, - env=os.environ.copy() +@validation_task_runner(ValidationTask.Type.DIGITAL_SIGNATURES) +def digital_signatures_subtask(self, task, request, file_path, **kwargs): + check_script = os.path.join(os.path.dirname(__file__), "checks", "signatures", "check_signatures.py") + check_program = [sys.executable, check_script, file_path] + log_program(self.__qualname__, check_program) + + proc = run_task( + task=task, + check_program = check_program, + task_name = self.name.split(".")[-1] + ) + + output = list(map(json.loads, filter(None, map(lambda s: s.strip(), proc.stdout.split("\n"))))) + success = proc.returncode >= 0 + valid = all(m['signature'] != "invalid" for m in output) + + with transaction.atomic(): + + # create or retrieve Model info + model = get_or_create_ifc_model(request.id) + model.status_signatures = Model.Status.NOT_APPLICABLE if not output else Model.Status.VALID if valid else Model.Status.INVALID + + def create_outcome(di): + return ValidationOutcome( + severity=ValidationOutcome.OutcomeSeverity.ERROR if di.get("signature") == "invalid" else ValidationOutcome.OutcomeSeverity.PASSED, + outcome_code=ValidationOutcome.ValidationOutcomeCode.VALUE_ERROR if di.get("signature") == "invalid" else ValidationOutcome.ValidationOutcomeCode.PASSED, + observed=di, + feature=json.dumps({'digital_signature': 1}), + validation_task = task ) - task.set_process_details(None, check_program) # run() has no pid... - - except subprocess.TimeoutExpired as err: - task.mark_as_failed(err) - raise - - except Exception as err: - task.mark_as_failed(err) - raise - - if proc.returncode is not None and proc.returncode != 0: - error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" - task.mark_as_failed(error_message) - raise RuntimeError(error_message) - - raw_output = proc.stdout - - with transaction.atomic(): - # create or retrieve Model info - model = get_or_create_ifc_model(id) + ValidationOutcome.objects.bulk_create(list(map(create_outcome, output)), batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE) - # update Model info - agg_status = task.determine_aggregate_status() - logger.debug(f'Aggregate status for {self.__qualname__}: {agg_status}') - model.status_ia = agg_status - model.save(update_fields=['status_ia']) + model.save(update_fields=['status_signatures']) - # update Task info and return - is_valid = agg_status != Model.Status.INVALID - reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' + if success: + reason = 'Digital signature check completed' task.mark_as_completed(reason) - return {'is_valid': is_valid, 'reason': reason} - - else: - reason = f'Skipped as prev_result = {prev_result}.' - task.mark_as_skipped(reason) - return {'is_valid': None, 'reason': reason} - - -@shared_task(bind=True) -@log_execution -@requires_django_user_context -@update_progress -def normative_rules_ip_validation_subtask(self, prev_result, id, file_name, *args, **kwargs): - - # fetch request info - request = ValidationRequest.objects.get(pk=id) - file_path = get_absolute_file_path(request.file.name) - - # add task - task = ValidationTask.objects.create(request=request, type=ValidationTask.Type.NORMATIVE_IP) - - prev_result_succeeded = prev_result is not None and prev_result['is_valid'] is True - if prev_result_succeeded: - - task.mark_as_initiated() - - # determine program/script to run - check_script = os.path.join(os.path.dirname(__file__), "checks", "check_gherkin.py") - check_program = [sys.executable, check_script, '--file-name', file_path, '--task-id', str(task.id), '--rule-type', 'INFORMAL_PROPOSITION'] - logger.debug(f'Command for {self.__qualname__}: {" ".join(check_program)}') - - # check Gherkin IP - try: - # note: use run instead of Popen b/c PIPE output can be very big... - proc = subprocess.run( - check_program, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - timeout=TASK_TIMEOUT_LIMIT - ) - task.set_process_details(None, check_program) # run() has no pid... - - except subprocess.TimeoutExpired as err: - task.mark_as_failed(err) - raise - - except Exception as err: - task.mark_as_failed(err) - raise - - if proc.returncode is not None and proc.returncode != 0: - error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" - task.mark_as_failed(error_message) - raise RuntimeError(error_message) - - raw_output = proc.stdout - - with transaction.atomic(): - - # create or retrieve Model info - model = get_or_create_ifc_model(id) - - # update Model info - agg_status = task.determine_aggregate_status() - model.status_ip = agg_status - model.save(update_fields=['status_ip']) - - # update Task info and return - is_valid = agg_status != Model.Status.INVALID - reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' + return {'is_valid': True, 'reason': reason} + else: + reason = f"Script returned exit code {proc.returncode} and {proc.stderr}" task.mark_as_completed(reason) - return {'is_valid': is_valid, 'reason': reason} - - else: - reason = f'Skipped as prev_result = {prev_result}.' - task.mark_as_skipped(reason) - return {'is_valid': None, 'reason': reason} - - -@shared_task(bind=True) -@log_execution -@requires_django_user_context -@update_progress -def industry_practices_subtask(self, prev_result, id, file_name, *args, **kwargs): - - # fetch request info - request = ValidationRequest.objects.get(pk=id) - file_path = get_absolute_file_path(request.file.name) - - # add task - task = ValidationTask.objects.create(request=request, type=ValidationTask.Type.INDUSTRY_PRACTICES) - - prev_result_succeeded = prev_result is not None and prev_result['is_valid'] is True - if prev_result_succeeded: - - task.mark_as_initiated() + return {'is_valid': False, 'reason': reason} - # determine program/script to run - check_script = os.path.join(os.path.dirname(__file__), "checks", "check_gherkin.py") - check_program = [sys.executable, check_script, '--file-name', file_path, '--task-id', str(task.id), '--rule-type', 'INDUSTRY_PRACTICE'] - logger.debug(f'Command for {self.__qualname__}: {" ".join(check_program)}') - # check Gherkin IP - try: - # note: use run instead of Popen b/c PIPE output can be very big... - proc = subprocess.run( - check_program, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=True, - timeout=TASK_TIMEOUT_LIMIT +@validation_task_runner(ValidationTask.Type.BSDD) +def bsdd_validation_subtask(self, task, request, file_path, *args, **kwargs): + check_script = os.path.join(os.path.dirname(__file__), "checks", "check_bsdd.py") + check_program = [sys.executable, check_script, '--file-name', file_path, '--task-id', str(task.id)] + log_program(self.__qualname__, check_program) + + proc = run_task( + task=task, + check_program = check_program, + task_name = self.name.split(".")[-1] + ) + + if proc.returncode is not None and proc.returncode != 0: + error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" + task.mark_as_failed(error_message) + raise RuntimeError(error_message) + + raw_output = proc.stdout + logger.info(f'Output for {self.__name__}: {raw_output}') + + with transaction.atomic(): + + # create or retrieve Model info + model = get_or_create_ifc_model(request.id) + + # update Validation Outcomes + json_output = json.loads(raw_output) + for message in json_output['messages']: + + outcome = task.outcomes.create( + severity=[c[0] for c in ValidationOutcome.OutcomeSeverity.choices if c[1] == (message['severity'])][0], + outcome_code=[c[0] for c in ValidationOutcome.ValidationOutcomeCode.choices if c[1] == (message['outcome'])][0], + observed=message['message'], + feature=json.dumps({ + 'rule': message['rule'] if 'rule' in message else None, + 'category': message['category'] if 'category' in message else None, + 'dictionary': message['dictionary'] if 'dictionary' in message else None, + 'class': message['class'] if 'class' in message else None, + 'instance_id': message['instance_id'] if 'instance_id' in message else None + }) ) - task.set_process_details(None, check_program) # run() has no pid... - - except subprocess.TimeoutExpired as err: - task.mark_as_failed(err) - raise - - except Exception as err: - task.mark_as_failed(err) - raise - if proc.returncode is not None and proc.returncode != 0: - error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" - task.mark_as_failed(error_message) - raise RuntimeError(error_message) - - raw_output = proc.stdout + if 'instance_id' in message and message['instance_id'] is not None: + instance, _ = model.instances.get_or_create( + stepfile_id = message['instance_id'], + model=model + ) + outcome.instance = instance + outcome.save() + + # update Model info + agg_status = task.determine_aggregate_status() + model.status_bsdd = agg_status + model.save(update_fields=['status_bsdd']) + + # update Task info and return + is_valid = agg_status != Model.Status.INVALID + reason = f"agg_status = {Model.Status(agg_status).label}\nmessages = {json_output['messages']}" + task.mark_as_completed(reason) + return {'is_valid': is_valid, 'reason': reason} + + +@validation_task_runner(ValidationTask.Type.INDUSTRY_PRACTICES) +def industry_practices_subtask(self, task, request, file_path): + check_script = os.path.join(os.path.dirname(__file__), "checks", "check_gherkin.py") + check_program = [ + sys.executable, + check_script, + '--file-name', file_path, + '--task-id', str(task.id), + '--rule-type', 'INDUSTRY_PRACTICE' + ] + log_program(self.__qualname__, check_program) + + proc = run_task( + task=task, + check_program = check_program, + task_name = self.name.split(".")[-1] + ) - with transaction.atomic(): + if proc.returncode is not None and proc.returncode != 0: + error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" + task.mark_as_failed(error_message) + raise RuntimeError(error_message) + + raw_output = proc.stdout + + with transaction.atomic(): + model = get_or_create_ifc_model(request.id) + agg_status = task.determine_aggregate_status() + model.status_industry_practices = agg_status + model.save(update_fields=['status_industry_practices']) + + is_valid = agg_status != Model.Status.INVALID + reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' + task.mark_as_completed(reason) + return {'is_valid': is_valid, 'reason': reason} + + +def run_gherkin_subtask(self, task, request, file_path, gherkin_task_type, status_field, extra_args=None): + check_script = os.path.join(os.path.dirname(__file__), "checks", "check_gherkin.py") + check_program = [ + sys.executable, + check_script, + '--file-name', file_path, + '--task-id', str(task.id), + '--rule-type', gherkin_task_type + ] + if extra_args: + check_program += extra_args + + log_program(self.__qualname__, check_program) + + proc = run_task( + task=task, + check_program = check_program, + task_name = self.name.split(".")[-1] + ) - # create or retrieve Model info - model = get_or_create_ifc_model(id) + if proc.returncode is not None and proc.returncode != 0: + error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" + task.mark_as_failed(error_message) + raise RuntimeError(error_message) - # update Model info - agg_status = task.determine_aggregate_status() - model.status_industry_practices = agg_status - model.save(update_fields=['status_industry_practices']) + raw_output = proc.stdout - # update Task info and return - is_valid = agg_status != Model.Status.INVALID - reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' - task.mark_as_completed(reason) - return {'is_valid': is_valid, 'reason': reason} + with transaction.atomic(): + model = get_or_create_ifc_model(request.id) + agg_status = task.determine_aggregate_status() + setattr(model, status_field, agg_status) + model.save(update_fields=[status_field]) - else: - reason = f'Skipped as prev_result = {prev_result}.' - task.mark_as_skipped(reason) - return {'is_valid': None, 'reason': reason} + is_valid = agg_status != Model.Status.INVALID + reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' + task.mark_as_completed(reason) + return {'is_valid': is_valid, 'reason': reason} diff --git a/backend/core/settings.py b/backend/core/settings.py index 3fbedc16..f912512b 100644 --- a/backend/core/settings.py +++ b/backend/core/settings.py @@ -293,6 +293,7 @@ CELERY_WORKER_SEND_TASK_EVENTS = True CELERY_TASK_TRACK_STARTED = True CELERY_RESULT_EXPIRES = 90*24*3600 # Results in backend expire after 3 months +CELERY_TASK_ALLOW_ERROR_CB_ON_CHORD_HEADER = True # reliability settings - see https://www.francoisvoron.com/blog/configure-celery-for-reliable-delivery CELERY_TASK_REJECT_ON_WORKER_LOST = True From 1c1b14c7d974234f138902295a44beccdc5cf05d Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Mon, 21 Jul 2025 20:03:28 +0100 Subject: [PATCH 02/21] add task configuration --- backend/apps/ifc_validation/task_configs.py | 122 ++++++++++++++++++++ backend/apps/ifc_validation/tasks.py | 57 ++++----- 2 files changed, 146 insertions(+), 33 deletions(-) create mode 100644 backend/apps/ifc_validation/task_configs.py diff --git a/backend/apps/ifc_validation/task_configs.py b/backend/apps/ifc_validation/task_configs.py new file mode 100644 index 00000000..14bd6d51 --- /dev/null +++ b/backend/apps/ifc_validation/task_configs.py @@ -0,0 +1,122 @@ +from dataclasses import dataclass +import typing +import sys +import os +from apps.ifc_validation_models.models import ValidationTask + +def execute_check(*args: str) -> list: + return [sys.executable, *args] + +def check_syntax(file_path: str, task_id: int) -> list: + return execute_check("-m", "ifcopenshell.simple_spf", "--json", file_path) + +def check_header_syntax(file_path: str, task_id: int) -> list: + return execute_check("-m", "ifcopenshell.simple_spf", "--json", "--only-header", file_path) + +def check_schema(file_path: str, task_id: int) -> list: + return execute_check("-m", "ifcopenshell.validate", "--json", "--rules", "--fields", file_path) + +def check_validate_header(file_path: str, task_id: int) -> list: + return execute_check(os.path.join(os.path.dirname(__file__), "checks", "header_policy", "validate_header.py"), file_path) + +def check_signatures(file_path: str, task_id: int) -> list: + return execute_check(os.path.join(os.path.dirname(__file__), "checks", "signatures", "check_signatures.py"), file_path) + +def check_bsdd(file_path: str, task_id: int) -> list: + return execute_check(os.path.join(os.path.dirname(__file__), "checks", "check_bsdd.py"), + "--file-name", file_path, "--task-id", str(task_id)) + +def check_gherkin(file_path: str, task_id: int, rule_type: str) -> list: + return execute_check(os.path.join(os.path.dirname(__file__), "checks", "check_gherkin.py"), + "--file-name", file_path, + "--task-id", str(task_id), + "--rule-type", rule_type) + +def check_gherkin_prereq(file_path: str, task_id: int) -> list: + return check_gherkin(file_path, task_id, "CRITICAL") + ["--purepythonparser"] + +def check_gherkin_ia(file_path: str, task_id: int) -> list: + return check_gherkin(file_path, task_id, "IMPLEMENTER_AGREEMENT") + +def check_gherkin_ip(file_path: str, task_id: int) -> list: + return check_gherkin(file_path, task_id, "INFORMAL_PROPOSITION") + +def check_gherkin_best_practice(file_path: str, task_id: int) -> list: + return check_gherkin(file_path, task_id, "INDUSTRY_PRACTICE") + + +@dataclass +class TaskConfig: + type: str + increment: int + model_field: str + check_program: typing.Callable[[str], list] + +TASK_CONFIGS: typing.Dict[str, TaskConfig] = { + 'instance_completion_subtask': TaskConfig( + type=ValidationTask.Type.INSTANCE_COMPLETION, + increment=5, + model_field=None, + check_program=lambda file_path, task_id: [] # No external check + ), + 'syntax_validation_subtask': TaskConfig( + type=ValidationTask.Type.SYNTAX, + increment=5, + model_field='status_syntax', + check_program=check_syntax + ), + 'header_syntax_validation_subtask': TaskConfig( + type=ValidationTask.Type.HEADER_SYNTAX, + increment=5, + model_field='status_header_syntax', + check_program=check_header_syntax + ), + 'header_validation_subtask': TaskConfig( + type=ValidationTask.Type.HEADER, + increment=10, + model_field='status_header', + check_program=check_validate_header + ), + 'prerequisites_subtask': TaskConfig( + type=ValidationTask.Type.PREREQUISITES, + increment=10, + model_field='status_prereq', + check_program=check_gherkin_prereq + ), + 'schema_validation_subtask': TaskConfig( + type=ValidationTask.Type.SCHEMA, + increment=10, + model_field='status_schema', + check_program=check_schema + ), + 'digital_signatures_subtask': TaskConfig( + type=ValidationTask.Type.DIGITAL_SIGNATURES, + increment=5, + model_field='status_signatures', + check_program=check_signatures + ), + 'bsdd_validation_subtask': TaskConfig( + type=ValidationTask.Type.BSDD, + increment=0, + model_field='status_bsdd', + check_program=check_bsdd + ), + 'normative_rules_ia_validation_subtask': TaskConfig( + type=ValidationTask.Type.NORMATIVE_IA, + increment=20, + model_field='status_ia', + check_program=check_gherkin_ia + ), + 'normative_rules_ip_validation_subtask': TaskConfig( + type=ValidationTask.Type.NORMATIVE_IP, + increment=20, + model_field='status_ip', + check_program=check_gherkin_ip + ), + 'industry_practices_subtask': TaskConfig( + type=ValidationTask.Type.INDUSTRY_PRACTICES, + increment=10, + model_field='status_industry_practices', + check_program=check_gherkin_best_practice + ), +} \ No newline at end of file diff --git a/backend/apps/ifc_validation/tasks.py b/backend/apps/ifc_validation/tasks.py index 139cffab..08f51a05 100644 --- a/backend/apps/ifc_validation/tasks.py +++ b/backend/apps/ifc_validation/tasks.py @@ -19,11 +19,12 @@ from apps.ifc_validation_models.settings import TASK_TIMEOUT_LIMIT, MEDIA_ROOT from apps.ifc_validation_models.decorators import requires_django_user_context from apps.ifc_validation_models.models import * +from apps.ifc_validation.task_configs import TASK_CONFIGS from .email_tasks import * logger = get_task_logger(__name__) - + PROGRESS_INCREMENTS = { 'instance_completion_subtask': 5, 'syntax_validation_subtask': 5, @@ -38,7 +39,7 @@ 'industry_practices_subtask': 10 } -assert sum(PROGRESS_INCREMENTS.values()) == 100 +assert sum(cfg.increment for cfg in TASK_CONFIGS.values()) == 100 class ValidationSubprocessError(Exception): pass class ValidationTimeoutError(ValidationSubprocessError): pass @@ -93,8 +94,9 @@ def wrapper(self, *args, **kwargs): request_id = kwargs.get("id") # @nb not the most efficient because we fetch the ValidationRequest anew, but # assuming django will cache this efficiently enough for us to keep the code clean + config = TASK_CONFIGS.get(func.__name__) request = ValidationRequest.objects.get(pk=request_id) - increment = PROGRESS_INCREMENTS.get(func.__name__, 0) + increment = config.increment request.progress = min(request.progress + increment, 100) request.save() except Exception as e: @@ -330,28 +332,35 @@ def instance_completion_subtask(self, task, request, file_path, *args, **kwargs) @validation_task_runner(ValidationTask.Type.NORMATIVE_IA) def normative_rules_ia_validation_subtask(self, task, request, file_path, **kwargs): - return run_gherkin_subtask(self, task, request, file_path, 'IMPLEMENTER_AGREEMENT', 'status_ia') + check_program = TASK_CONFIGS['normative_rules_ia_validation_subtask'].check_program(file_path, task.id) + log_program(self.__qualname__, check_program) + return run_gherkin_subtask(self, task, request, file_path, check_program, 'status_ia') @validation_task_runner(ValidationTask.Type.NORMATIVE_IP) def normative_rules_ip_validation_subtask(self, task, request, file_path, **kwargs): - return run_gherkin_subtask(self, task, request, file_path, 'INFORMAL_PROPOSITION', 'status_ip') + check_program = TASK_CONFIGS['normative_rules_ip_validation_subtask'].check_program(file_path, task.id) + log_program(self.__qualname__, check_program) + return run_gherkin_subtask(self, task, request, file_path, check_program, 'status_ip') @validation_task_runner(ValidationTask.Type.PREREQUISITES) def prerequisites_subtask(self, task, request, file_path, **kwargs): - return run_gherkin_subtask(self, task, request, file_path, 'CRITICAL', 'status_prereq', extra_args=['--purepythonparser']) + check_program = TASK_CONFIGS['prerequisites_subtask'].check_program(file_path, task.id) + log_program(self.__qualname__, check_program) + return run_gherkin_subtask(self, task, request, file_path, check_program, 'status_prereq') + @validation_task_runner(ValidationTask.Type.SYNTAX) def syntax_validation_subtask(self, task, request, file_path, **kwargs): - check_program = [sys.executable, "-m", "ifcopenshell.simple_spf", "--json", file_path] + check_program = TASK_CONFIGS['syntax_validation_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) return run_syntax_subtask(self, task, request, file_path, check_program, 'status_syntax') @validation_task_runner(ValidationTask.Type.HEADER_SYNTAX) def header_syntax_validation_subtask(self, task, request, file_path, **kwargs): - check_program = [sys.executable, "-m", "ifcopenshell.simple_spf", "--json", "--only-header", file_path] + check_program = TASK_CONFIGS['header_syntax_validation_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) return run_syntax_subtask(self, task, request, file_path, check_program, 'status_header_syntax') @@ -406,7 +415,8 @@ def run_syntax_subtask(self, task, request, file_path, check_program, model_stat @validation_task_runner(ValidationTask.Type.SCHEMA) def schema_validation_subtask(self, task, request, file_path, *args, **kwargs): - check_program = [sys.executable, "-m", "ifcopenshell.validate", "--json", "--rules", "--fields", file_path] + + check_program = TASK_CONFIGS['schema_validation_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) proc = run_task( @@ -488,8 +498,7 @@ def is_schema_error(line): @validation_task_runner(ValidationTask.Type.HEADER) def header_validation_subtask(self, task, request, file_path, **kwargs): - check_script = os.path.join(os.path.dirname(__file__), "checks", "header_policy", "validate_header.py") - check_program = [sys.executable, check_script, file_path] + check_program = TASK_CONFIGS['header_validation_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) proc = run_task( @@ -597,6 +606,7 @@ def header_validation_subtask(self, task, request, file_path, **kwargs): @validation_task_runner(ValidationTask.Type.DIGITAL_SIGNATURES) def digital_signatures_subtask(self, task, request, file_path, **kwargs): check_script = os.path.join(os.path.dirname(__file__), "checks", "signatures", "check_signatures.py") + check_program = [sys.executable, check_script, file_path] log_program(self.__qualname__, check_program) @@ -641,8 +651,7 @@ def create_outcome(di): @validation_task_runner(ValidationTask.Type.BSDD) def bsdd_validation_subtask(self, task, request, file_path, *args, **kwargs): - check_script = os.path.join(os.path.dirname(__file__), "checks", "check_bsdd.py") - check_program = [sys.executable, check_script, '--file-name', file_path, '--task-id', str(task.id)] + check_program = TASK_CONFIGS['bsdd_validation_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) proc = run_task( @@ -703,14 +712,7 @@ def bsdd_validation_subtask(self, task, request, file_path, *args, **kwargs): @validation_task_runner(ValidationTask.Type.INDUSTRY_PRACTICES) def industry_practices_subtask(self, task, request, file_path): - check_script = os.path.join(os.path.dirname(__file__), "checks", "check_gherkin.py") - check_program = [ - sys.executable, - check_script, - '--file-name', file_path, - '--task-id', str(task.id), - '--rule-type', 'INDUSTRY_PRACTICE' - ] + check_program = TASK_CONFIGS['industry_practices_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) proc = run_task( @@ -738,18 +740,7 @@ def industry_practices_subtask(self, task, request, file_path): return {'is_valid': is_valid, 'reason': reason} -def run_gherkin_subtask(self, task, request, file_path, gherkin_task_type, status_field, extra_args=None): - check_script = os.path.join(os.path.dirname(__file__), "checks", "check_gherkin.py") - check_program = [ - sys.executable, - check_script, - '--file-name', file_path, - '--task-id', str(task.id), - '--rule-type', gherkin_task_type - ] - if extra_args: - check_program += extra_args - +def run_gherkin_subtask(self, task, request, file_path, check_program, status_field): log_program(self.__qualname__, check_program) proc = run_task( From aa33ef040e2c1d85d16716c6f8ffa4102fcd9acb Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Wed, 23 Jul 2025 22:58:21 +0100 Subject: [PATCH 03/21] tasks configuration --- backend/apps/ifc_validation/task_configs.py | 128 +++++++++--- backend/apps/ifc_validation/tasks.py | 207 ++++++++++++-------- 2 files changed, 228 insertions(+), 107 deletions(-) diff --git a/backend/apps/ifc_validation/task_configs.py b/backend/apps/ifc_validation/task_configs.py index 14bd6d51..c52ae974 100644 --- a/backend/apps/ifc_validation/task_configs.py +++ b/backend/apps/ifc_validation/task_configs.py @@ -51,72 +51,154 @@ class TaskConfig: increment: int model_field: str check_program: typing.Callable[[str], list] + blocks: typing.Optional[typing.List[str]] + execution_stage: str = "parallel" + TASK_CONFIGS: typing.Dict[str, TaskConfig] = { - 'instance_completion_subtask': TaskConfig( - type=ValidationTask.Type.INSTANCE_COMPLETION, - increment=5, - model_field=None, - check_program=lambda file_path, task_id: [] # No external check - ), - 'syntax_validation_subtask': TaskConfig( - type=ValidationTask.Type.SYNTAX, - increment=5, - model_field='status_syntax', - check_program=check_syntax - ), 'header_syntax_validation_subtask': TaskConfig( type=ValidationTask.Type.HEADER_SYNTAX, increment=5, model_field='status_header_syntax', - check_program=check_header_syntax + check_program=check_header_syntax, + blocks=[ + 'header_validation_subtask', + 'syntax_validation_subtask', + 'prerequisites_subtask', + 'digital_signatures_subtask', + 'schema_validation_subtask', + 'normative_rules_ia_validation_subtask', + 'normative_rules_ip_validation_subtask', + 'industry_practices_subtask', + 'instance_completion_subtask', + ], + execution_stage="serial", ), 'header_validation_subtask': TaskConfig( type=ValidationTask.Type.HEADER, increment=10, model_field='status_header', - check_program=check_validate_header + check_program=check_validate_header, + blocks = [], + execution_stage="serial", + ), + 'syntax_validation_subtask': TaskConfig( + type=ValidationTask.Type.SYNTAX, + increment=5, + model_field='status_syntax', + check_program=check_syntax, + blocks=[ + 'digital_signatures_subtask', + 'schema_validation_subtask', + 'normative_rules_ia_validation_subtask', + 'normative_rules_ip_validation_subtask', + 'industry_practices_subtask', + 'instance_completion_subtask' + ], + execution_stage="serial", ), 'prerequisites_subtask': TaskConfig( type=ValidationTask.Type.PREREQUISITES, increment=10, model_field='status_prereq', - check_program=check_gherkin_prereq + check_program=check_gherkin_prereq, + blocks=[ + 'digital_signatures_subtask', + 'schema_validation_subtask', + 'normative_rules_ia_validation_subtask', + 'normative_rules_ip_validation_subtask', + 'industry_practices_subtask', + 'instance_completion_subtask' + ], + execution_stage="serial", ), 'schema_validation_subtask': TaskConfig( type=ValidationTask.Type.SCHEMA, increment=10, model_field='status_schema', - check_program=check_schema + check_program=check_schema, + blocks = [], + execution_stage="parallel", ), 'digital_signatures_subtask': TaskConfig( type=ValidationTask.Type.DIGITAL_SIGNATURES, increment=5, model_field='status_signatures', - check_program=check_signatures + check_program=check_signatures, + blocks = [], + execution_stage="parallel", ), 'bsdd_validation_subtask': TaskConfig( type=ValidationTask.Type.BSDD, increment=0, model_field='status_bsdd', - check_program=check_bsdd + check_program=check_bsdd, + blocks = [], + execution_stage="parallel", ), 'normative_rules_ia_validation_subtask': TaskConfig( type=ValidationTask.Type.NORMATIVE_IA, increment=20, model_field='status_ia', - check_program=check_gherkin_ia - ), + check_program=check_gherkin_ia, + blocks = [], + execution_stage="parallel", ), 'normative_rules_ip_validation_subtask': TaskConfig( type=ValidationTask.Type.NORMATIVE_IP, increment=20, model_field='status_ip', - check_program=check_gherkin_ip + check_program=check_gherkin_ip, + blocks = [], + execution_stage="parallel", ), 'industry_practices_subtask': TaskConfig( type=ValidationTask.Type.INDUSTRY_PRACTICES, increment=10, model_field='status_industry_practices', - check_program=check_gherkin_best_practice + check_program=check_gherkin_best_practice, + blocks = [], + execution_stage="parallel", + ), + 'instance_completion_subtask': TaskConfig( + type=ValidationTask.Type.INSTANCE_COMPLETION, + increment=5, + model_field=None, + check_program=lambda file_path, task_id: [], + blocks=[], + execution_stage="final", ), -} \ No newline at end of file +} + +class TaskRegistry: + def __init__(self, config_map: dict[str, TaskConfig]): + self._configs = config_map + self._by_task_type = {cfg.type: name for name, cfg in config_map.items()} + self._by_task_type_name = {cfg.type.name: name for name, cfg in config_map.items()} + + def get_config_by_celery_name(self, name: str) -> TaskConfig: + return self._configs.get(name) + + def get_celery_name_by_task_type(self, task_type: ValidationTask.Type) -> str: + return self._by_task_type.get(task_type) + + def get_celery_name_by_task_type_name(self, task_type_name: str) -> str: + return self._by_task_type_name.get(task_type_name) + + def get_blocked_tasks(self, task_name: str) -> typing.List[str]: + return self._configs[task_name].blocks or [] + + def get_tasks_by_stage(self, stage: str) -> typing.List[str]: + return [name for name, cfg in self._configs.items() if cfg.execution_stage == stage] + + def __getitem__(self, task_name: str) -> TaskConfig: + return self._configs[task_name] + + def get_blockers_of(self, task_name: str) -> typing.List[str]: + return [ + blocker_name + for blocker_name, cfg in self._configs.items() + if task_name in (cfg.blocks or []) + ] + + def all(self) -> dict[str, TaskConfig]: + return self._configs \ No newline at end of file diff --git a/backend/apps/ifc_validation/tasks.py b/backend/apps/ifc_validation/tasks.py index 08f51a05..0a7ed80f 100644 --- a/backend/apps/ifc_validation/tasks.py +++ b/backend/apps/ifc_validation/tasks.py @@ -19,10 +19,11 @@ from apps.ifc_validation_models.settings import TASK_TIMEOUT_LIMIT, MEDIA_ROOT from apps.ifc_validation_models.decorators import requires_django_user_context from apps.ifc_validation_models.models import * -from apps.ifc_validation.task_configs import TASK_CONFIGS +from apps.ifc_validation.task_configs import TASK_CONFIGS, TaskRegistry from .email_tasks import * +task_registry = TaskRegistry(TASK_CONFIGS) logger = get_task_logger(__name__) PROGRESS_INCREMENTS = { @@ -49,7 +50,6 @@ class ValidationIntegrityError(ValidationSubprocessError): pass def run_task( task: ValidationTask, check_program: typing.List[str], - task_name: str ) -> subprocess.CompletedProcess[str]: task.set_process_details(None, check_program) try: @@ -61,30 +61,43 @@ def run_task( timeout=TASK_TIMEOUT_LIMIT, env= os.environ.copy() ) + logger.info(f'test run task task name {task.type}, task value : {task}') return proc except subprocess.TimeoutExpired as err: - logger.exception(f"TimeoutExpired while running task {task.id} with command: {' '.join(check_program)} : {task_name}") + logger.exception(f"TimeoutExpired while running task {task.id} with command: {' '.join(check_program)} : {task_type}") task.mark_as_failed(err) - raise ValidationTimeoutError(f"Task {task_name} timed out") from err + raise ValidationTimeoutError(f"Task {task.type} timed out") from err except ifcopenshell.Error as err: - logger.exception(f"Ifcopenshell error in task {task.id} : {task_name}") + logger.exception(f"Ifcopenshell error in task {task.id} : {task.type}") task.mark_as_failed(err) - raise ValidationOpenShellError(f"IFC parsing or validation failed during task {task_name}") from err + raise ValidationOpenShellError(f"IFC parsing or validation failed during task {task.type}") from err except IntegrityError as err: - logger.exception(f"Database integrity error in task {task.id} : {task_name}") + logger.exception(f"Database integrity error in task {task.id} : {task.type}") task.mark_as_failed(err) - raise ValidationIntegrityError(f"Database error during task {task_name}") from err + raise ValidationIntegrityError(f"Database error during task {task.type}") from err except Exception as err: - logger.exception(f"Unexpected error in task {task.id} : {task_name}") + logger.exception(f"Unexpected error in task {task.id} : {task.type}") task.mark_as_failed(err) - raise ValidationSubprocessError(f"Unknown error during validation task {task.id}: {task_name}") from err + raise ValidationSubprocessError(f"Unknown error during validation task {task.id}: {task.type}") from err def log_program(taskname, check_program): logger.debug(f'Command for {taskname}: {" ".join(check_program)}') + +def get_task_type(name): + return name.split(".")[-1] + +def get_internal_result(task_type, prev_result, is_valid, reason): + prev_result = prev_result or {} + current_result = { + 'is_valid': is_valid, + 'reason': reason + } + return {**prev_result, task_type: current_result} + def update_progress(func): @functools.wraps(func) @@ -94,9 +107,8 @@ def wrapper(self, *args, **kwargs): request_id = kwargs.get("id") # @nb not the most efficient because we fetch the ValidationRequest anew, but # assuming django will cache this efficiently enough for us to keep the code clean - config = TASK_CONFIGS.get(func.__name__) request = ValidationRequest.objects.get(pk=request_id) - increment = config.increment + increment = task_registry[func.__name__].increment request.progress = min(request.progress + increment, 100) request.save() except Exception as e: @@ -105,6 +117,7 @@ def wrapper(self, *args, **kwargs): return wrapper + @functools.lru_cache(maxsize=1024) def get_absolute_file_path(file_name): @@ -238,23 +251,33 @@ def decorator(func): @update_progress @functools.wraps(func) def wrapper(self, prev_result, id, file_name, *args, **kwargs): - if args and isinstance(args[0], dict) and "is_valid" in args[0]: - prev_result = args[0] - else: - prev_result = {"is_valid": True} - + + # default celery output for parallel tasks is a list, + if isinstance(prev_result, list): + merged_result = {} + for result in prev_result: + if isinstance(result, dict): + merged_result.update(result) + prev_result = merged_result + block_current_task = any( + not prev_result.get(blocker, {}).get('is_valid', True) + for blocker in task_registry.get_blockers_of(get_task_type(self.name)) + ) request = ValidationRequest.objects.get(pk=id) file_path = get_absolute_file_path(request.file.name) task = ValidationTask.objects.create(request=request, type=task_type) - if prev_result is not None and prev_result.get('is_valid') is True: + if not block_current_task: task.mark_as_initiated() - return func(self, task, request, file_path, *args, **kwargs) + return func(self, task, prev_result, request, file_path, *args, **kwargs) else: reason = f'Skipped as prev_result = {prev_result}.' task.mark_as_skipped(reason) - return {'is_valid': None, 'reason': reason} + is_valid = None + return get_internal_result( + task_type, prev_result, is_valid, reason + ) return wrapper return decorator @@ -304,12 +327,12 @@ def ifc_file_validation_task(self, id, file_name, *args, **kwargs): @validation_task_runner(ValidationTask.Type.INSTANCE_COMPLETION) -def instance_completion_subtask(self, task, request, file_path, *args, **kwargs): +def instance_completion_subtask(self, task, prev_result, request, file_path, *args, **kwargs): try: ifc_file = ifcopenshell.open(file_path) except: reason = f'Failed to open {file_path}. Likely previous tasks also failed.' - task.marked_as_completed(reason) + task.mark_as_completed(reason) return {'is_valid': None, 'reason': reason} if ifc_file: @@ -327,49 +350,52 @@ def instance_completion_subtask(self, task, request, file_path, *args, **kwargs) # update Task info and return reason = f'Updated {instance_count:,} ModelInstance record(s)' task.mark_as_completed(reason) - return {'is_valid': True, 'reason': reason} - + current_result = { # last result, result does not need to contain previous results + 'is_valid': True, + 'reason': reason + } + return current_result @validation_task_runner(ValidationTask.Type.NORMATIVE_IA) -def normative_rules_ia_validation_subtask(self, task, request, file_path, **kwargs): - check_program = TASK_CONFIGS['normative_rules_ia_validation_subtask'].check_program(file_path, task.id) +def normative_rules_ia_validation_subtask(self, task, prev_result, request, file_path, **kwargs): + check_program = task_registry['normative_rules_ia_validation_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) - return run_gherkin_subtask(self, task, request, file_path, check_program, 'status_ia') + return run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, 'status_ia') @validation_task_runner(ValidationTask.Type.NORMATIVE_IP) -def normative_rules_ip_validation_subtask(self, task, request, file_path, **kwargs): - check_program = TASK_CONFIGS['normative_rules_ip_validation_subtask'].check_program(file_path, task.id) +def normative_rules_ip_validation_subtask(self, task, prev_result, request, file_path, **kwargs): + check_program = task_registry['normative_rules_ip_validation_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) - return run_gherkin_subtask(self, task, request, file_path, check_program, 'status_ip') + return run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, 'status_ip') @validation_task_runner(ValidationTask.Type.PREREQUISITES) -def prerequisites_subtask(self, task, request, file_path, **kwargs): - check_program = TASK_CONFIGS['prerequisites_subtask'].check_program(file_path, task.id) +def prerequisites_subtask(self, task, prev_result, request, file_path, **kwargs): + check_program = task_registry['prerequisites_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) - return run_gherkin_subtask(self, task, request, file_path, check_program, 'status_prereq') + return run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, 'status_prereq') @validation_task_runner(ValidationTask.Type.SYNTAX) -def syntax_validation_subtask(self, task, request, file_path, **kwargs): - check_program = TASK_CONFIGS['syntax_validation_subtask'].check_program(file_path, task.id) +def syntax_validation_subtask(self, task, prev_result, request, file_path, **kwargs): + check_program = task_registry['syntax_validation_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) - return run_syntax_subtask(self, task, request, file_path, check_program, 'status_syntax') + return run_syntax_subtask(self, task, prev_result, request, file_path, check_program, 'status_syntax') @validation_task_runner(ValidationTask.Type.HEADER_SYNTAX) -def header_syntax_validation_subtask(self, task, request, file_path, **kwargs): - check_program = TASK_CONFIGS['header_syntax_validation_subtask'].check_program(file_path, task.id) +def header_syntax_validation_subtask(self, task, prev_result, request, file_path, **kwargs): + check_program = task_registry['header_syntax_validation_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) - return run_syntax_subtask(self, task, request, file_path, check_program, 'status_header_syntax') + return run_syntax_subtask(self, task, prev_result, request, file_path, check_program, 'status_header_syntax') -def run_syntax_subtask(self, task, request, file_path, check_program, model_status_field): +def run_syntax_subtask(self, task, prev_result, request, file_path, check_program, model_status_field): + task_type = get_task_type(self.name) proc = run_task( task=task, check_program = check_program, - task_name = self.name.split(".")[-1] ) output = proc.stdout error_output = proc.stderr @@ -393,36 +419,36 @@ def run_syntax_subtask(self, task, request, file_path, check_program, model_stat observed=list(filter(None, error_output.split("\n")))[-1] ) else: - messages = json.loads(output) - setattr(model, model_status_field, Model.Status.INVALID) - task.outcomes.create( - severity=ValidationOutcome.OutcomeSeverity.ERROR, - outcome_code=ValidationOutcome.ValidationOutcomeCode.SYNTAX_ERROR, - observed=messages.get("message") - ) + for msg in json.loads(output): + setattr(model, model_status_field, Model.Status.INVALID) + task.outcomes.create( + severity=ValidationOutcome.OutcomeSeverity.ERROR, + outcome_code=ValidationOutcome.ValidationOutcomeCode.SYNTAX_ERROR, + observed=msg.get("message") + ) model.save(update_fields=[model_status_field]) - if success: + is_valid = success + if is_valid: reason = "No IFC syntax error(s)." - task.mark_as_completed(reason) - return {'is_valid': True, 'reason': reason} else: reason = f"Found IFC syntax errors:\n\nConsole: \n{output}\n\nError: {error_output}" - task.mark_as_completed(reason) - return {'is_valid': False, 'reason': reason} + task.mark_as_completed(reason) + return get_internal_result( + task_type, prev_result, is_valid, reason + ) @validation_task_runner(ValidationTask.Type.SCHEMA) -def schema_validation_subtask(self, task, request, file_path, *args, **kwargs): - - check_program = TASK_CONFIGS['schema_validation_subtask'].check_program(file_path, task.id) +def schema_validation_subtask(self, task, prev_result, request, file_path, *args, **kwargs): + task_type = get_task_type(self.name) + check_program = task_registry['schema_validation_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) proc = run_task( task=task, check_program = check_program, - task_name = self.name.split(".")[-1] ) def is_schema_error(line): try: @@ -486,28 +512,29 @@ def is_schema_error(line): model.save(update_fields=['status_schema']) - if success: + is_valid = success + if is_valid: reason = "No IFC schema errors." task.mark_as_completed(reason) - return {'is_valid': True, 'reason': reason} else: reason = f"'ifcopenshell.validate' returned exit code {proc.returncode} and {len(output):,} errors." task.mark_as_completed(reason) - return {'is_valid': False, 'reason': reason} - + + return get_internal_result( + task_type, prev_result, is_valid, reason + ) @validation_task_runner(ValidationTask.Type.HEADER) -def header_validation_subtask(self, task, request, file_path, **kwargs): - check_program = TASK_CONFIGS['header_validation_subtask'].check_program(file_path, task.id) +def header_validation_subtask(self, task, prev_result, request, file_path, **kwargs): + task_type = get_task_type(self.name) + check_program = task_registry['header_validation_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) proc = run_task( task=task, check_program = check_program, - task_name = self.name.split(".")[-1] ) - - + header_validation = {} stdout_lines = proc.stdout.splitlines() for line in stdout_lines: @@ -599,12 +626,15 @@ def header_validation_subtask(self, task, request, file_path, **kwargs): is_valid = agg_status != Model.Status.INVALID reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {header_validation}' task.mark_as_completed(reason) - return {'is_valid': is_valid, 'reason': reason} + return get_internal_result( + task_type, prev_result, is_valid, reason + ) @validation_task_runner(ValidationTask.Type.DIGITAL_SIGNATURES) -def digital_signatures_subtask(self, task, request, file_path, **kwargs): +def digital_signatures_subtask(self, task, prev_result, request, file_path, **kwargs): + task_type = get_task_type(self.name) check_script = os.path.join(os.path.dirname(__file__), "checks", "signatures", "check_signatures.py") check_program = [sys.executable, check_script, file_path] @@ -613,7 +643,6 @@ def digital_signatures_subtask(self, task, request, file_path, **kwargs): proc = run_task( task=task, check_program = check_program, - task_name = self.name.split(".")[-1] ) output = list(map(json.loads, filter(None, map(lambda s: s.strip(), proc.stdout.split("\n"))))) @@ -638,26 +667,29 @@ def create_outcome(di): ValidationOutcome.objects.bulk_create(list(map(create_outcome, output)), batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE) model.save(update_fields=['status_signatures']) - + is_valid = True if success: reason = 'Digital signature check completed' task.mark_as_completed(reason) - return {'is_valid': True, 'reason': reason} + is_valid = True else: reason = f"Script returned exit code {proc.returncode} and {proc.stderr}" task.mark_as_completed(reason) - return {'is_valid': False, 'reason': reason} + is_valid = False + return get_internal_result( + task_type, prev_result, is_valid, reason + ) @validation_task_runner(ValidationTask.Type.BSDD) -def bsdd_validation_subtask(self, task, request, file_path, *args, **kwargs): - check_program = TASK_CONFIGS['bsdd_validation_subtask'].check_program(file_path, task.id) +def bsdd_validation_subtask(self, task, prev_result, request, file_path, *args, **kwargs): + task_type = get_task_type(self.name) + check_program = task_registry['bsdd_validation_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) proc = run_task( task=task, check_program = check_program, - task_name = self.name.split(".")[-1] ) if proc.returncode is not None and proc.returncode != 0: @@ -707,18 +739,21 @@ def bsdd_validation_subtask(self, task, request, file_path, *args, **kwargs): is_valid = agg_status != Model.Status.INVALID reason = f"agg_status = {Model.Status(agg_status).label}\nmessages = {json_output['messages']}" task.mark_as_completed(reason) - return {'is_valid': is_valid, 'reason': reason} + return get_internal_result( + task_type, prev_result, is_valid, reason + ) + @validation_task_runner(ValidationTask.Type.INDUSTRY_PRACTICES) -def industry_practices_subtask(self, task, request, file_path): - check_program = TASK_CONFIGS['industry_practices_subtask'].check_program(file_path, task.id) +def industry_practices_subtask(self, task, prev_result, request, file_path): + task_type = get_task_type(self.name) + check_program = task_registry['industry_practices_subtask'].check_program(file_path, task.id) log_program(self.__qualname__, check_program) proc = run_task( task=task, check_program = check_program, - task_name = self.name.split(".")[-1] ) if proc.returncode is not None and proc.returncode != 0: @@ -737,16 +772,18 @@ def industry_practices_subtask(self, task, request, file_path): is_valid = agg_status != Model.Status.INVALID reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' task.mark_as_completed(reason) - return {'is_valid': is_valid, 'reason': reason} + return get_internal_result( + task_type, prev_result, is_valid, reason + ) -def run_gherkin_subtask(self, task, request, file_path, check_program, status_field): +def run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, status_field): + task_type = get_task_type(self.name) log_program(self.__qualname__, check_program) proc = run_task( task=task, - check_program = check_program, - task_name = self.name.split(".")[-1] + check_program = check_program ) if proc.returncode is not None and proc.returncode != 0: @@ -765,4 +802,6 @@ def run_gherkin_subtask(self, task, request, file_path, check_program, status_fi is_valid = agg_status != Model.Status.INVALID reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' task.mark_as_completed(reason) - return {'is_valid': is_valid, 'reason': reason} + return get_internal_result( + task_type, prev_result, is_valid, reason + ) From 5e28a51c5a4c67fa9b7aad8de92140cdf5d1a53e Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Thu, 24 Jul 2025 00:39:26 +0100 Subject: [PATCH 04/21] small DRY improvements --- backend/apps/ifc_validation/tasks.py | 100 ++++++++------------------- 1 file changed, 28 insertions(+), 72 deletions(-) diff --git a/backend/apps/ifc_validation/tasks.py b/backend/apps/ifc_validation/tasks.py index 0a7ed80f..fd48fee3 100644 --- a/backend/apps/ifc_validation/tasks.py +++ b/backend/apps/ifc_validation/tasks.py @@ -6,6 +6,7 @@ import json import ifcopenshell import typing +import contextlib from celery import shared_task, chain, chord, group from celery.utils.log import get_task_logger @@ -25,20 +26,7 @@ task_registry = TaskRegistry(TASK_CONFIGS) logger = get_task_logger(__name__) - -PROGRESS_INCREMENTS = { - 'instance_completion_subtask': 5, - 'syntax_validation_subtask': 5, - 'header_syntax_validation_subtask': 5, - 'header_validation_subtask': 10, - 'prerequisites_subtask': 10, - 'schema_validation_subtask': 10, - 'digital_signatures_subtask': 5, - 'bsdd_validation_subtask': 0, - 'normative_rules_ia_validation_subtask': 20, - 'normative_rules_ip_validation_subtask': 20, - 'industry_practices_subtask': 10 -} + assert sum(cfg.increment for cfg in TASK_CONFIGS.values()) == 100 @@ -51,6 +39,7 @@ def run_task( task: ValidationTask, check_program: typing.List[str], ) -> subprocess.CompletedProcess[str]: + logger.debug(f'Command for {task.type}: {" ".join(check_program)}') task.set_process_details(None, check_program) try: proc = subprocess.run( @@ -65,7 +54,7 @@ def run_task( return proc except subprocess.TimeoutExpired as err: - logger.exception(f"TimeoutExpired while running task {task.id} with command: {' '.join(check_program)} : {task_type}") + logger.exception(f"TimeoutExpired while running task {task.id} with command: {' '.join(check_program)} : {task.type}") task.mark_as_failed(err) raise ValidationTimeoutError(f"Task {task.type} timed out") from err @@ -83,13 +72,22 @@ def run_task( logger.exception(f"Unexpected error in task {task.id} : {task.type}") task.mark_as_failed(err) raise ValidationSubprocessError(f"Unknown error during validation task {task.id}: {task.type}") from err - -def log_program(taskname, check_program): - logger.debug(f'Command for {taskname}: {" ".join(check_program)}') def get_task_type(name): return name.split(".")[-1] +def check_proc_success_or_fail(proc, task): + if proc.returncode is not None and proc.returncode != 0: + error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" + task.mark_as_failed(error_message) + raise RuntimeError(error_message) + return proc.stdout + +@contextlib.contextmanager +def with_model(request_id): + with transaction.atomic(): + yield get_or_create_ifc_model(request_id) + def get_internal_result(task_type, prev_result, is_valid, reason): prev_result = prev_result or {} current_result = { @@ -359,21 +357,19 @@ def instance_completion_subtask(self, task, prev_result, request, file_path, *ar @validation_task_runner(ValidationTask.Type.NORMATIVE_IA) def normative_rules_ia_validation_subtask(self, task, prev_result, request, file_path, **kwargs): check_program = task_registry['normative_rules_ia_validation_subtask'].check_program(file_path, task.id) - log_program(self.__qualname__, check_program) + logger.info(f'qualname : {self.__qualname__}') return run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, 'status_ia') @validation_task_runner(ValidationTask.Type.NORMATIVE_IP) def normative_rules_ip_validation_subtask(self, task, prev_result, request, file_path, **kwargs): check_program = task_registry['normative_rules_ip_validation_subtask'].check_program(file_path, task.id) - log_program(self.__qualname__, check_program) return run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, 'status_ip') @validation_task_runner(ValidationTask.Type.PREREQUISITES) def prerequisites_subtask(self, task, prev_result, request, file_path, **kwargs): check_program = task_registry['prerequisites_subtask'].check_program(file_path, task.id) - log_program(self.__qualname__, check_program) return run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, 'status_prereq') @@ -381,13 +377,11 @@ def prerequisites_subtask(self, task, prev_result, request, file_path, **kwargs) @validation_task_runner(ValidationTask.Type.SYNTAX) def syntax_validation_subtask(self, task, prev_result, request, file_path, **kwargs): check_program = task_registry['syntax_validation_subtask'].check_program(file_path, task.id) - log_program(self.__qualname__, check_program) return run_syntax_subtask(self, task, prev_result, request, file_path, check_program, 'status_syntax') @validation_task_runner(ValidationTask.Type.HEADER_SYNTAX) def header_syntax_validation_subtask(self, task, prev_result, request, file_path, **kwargs): check_program = task_registry['header_syntax_validation_subtask'].check_program(file_path, task.id) - log_program(self.__qualname__, check_program) return run_syntax_subtask(self, task, prev_result, request, file_path, check_program, 'status_header_syntax') @@ -401,8 +395,7 @@ def run_syntax_subtask(self, task, prev_result, request, file_path, check_progra error_output = proc.stderr success = (len(list(filter(None, output.split("\n")))) == 0) and len(error_output) == 0 - with transaction.atomic(): - model = get_or_create_ifc_model(request.id) + with with_model(request.id) as model: if success: setattr(model, model_status_field, Model.Status.VALID) @@ -444,7 +437,6 @@ def run_syntax_subtask(self, task, prev_result, request, file_path, check_progra def schema_validation_subtask(self, task, prev_result, request, file_path, *args, **kwargs): task_type = get_task_type(self.name) check_program = task_registry['schema_validation_subtask'].check_program(file_path, task.id) - log_program(self.__qualname__, check_program) proc = run_task( task=task, @@ -462,8 +454,7 @@ def is_schema_error(line): success = proc.returncode >= 0 valid = len(output) == 0 - with transaction.atomic(): - model = get_or_create_ifc_model(request.id) + with with_model(request.id) as model: if valid: model.status_schema = Model.Status.VALID @@ -528,8 +519,6 @@ def is_schema_error(line): def header_validation_subtask(self, task, prev_result, request, file_path, **kwargs): task_type = get_task_type(self.name) check_program = task_registry['header_validation_subtask'].check_program(file_path, task.id) - log_program(self.__qualname__, check_program) - proc = run_task( task=task, check_program = check_program, @@ -543,9 +532,7 @@ def header_validation_subtask(self, task, prev_result, request, file_path, **kwa except json.JSONDecodeError: continue - with transaction.atomic(): - # create or retrieve Model info - model = get_or_create_ifc_model(request.id) + with with_model(request.id) as model: agg_status = task.determine_aggregate_status() model.status_prereq = agg_status model.size = os.path.getsize(file_path) @@ -553,7 +540,6 @@ def header_validation_subtask(self, task, prev_result, request, file_path, **kwa model.schema = header_validation.get('schema_identifier') logger.debug(f'The schema identifier = {header_validation.get("schema")}') - # time_stamp if ifc_file_time_stamp := header_validation.get('time_stamp', False): try: @@ -618,10 +604,9 @@ def header_validation_subtask(self, task, prev_result, request, file_path, **kwa # update header validation model.header_validation = header_validation - model.save(update_fields=['status_header', 'header_validation']) + model.save(update_fields=['status_header', 'header_validation']) model.save() - # update Task info and return is_valid = agg_status != Model.Status.INVALID reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {header_validation}' @@ -638,7 +623,6 @@ def digital_signatures_subtask(self, task, prev_result, request, file_path, **kw check_script = os.path.join(os.path.dirname(__file__), "checks", "signatures", "check_signatures.py") check_program = [sys.executable, check_script, file_path] - log_program(self.__qualname__, check_program) proc = run_task( task=task, @@ -649,10 +633,7 @@ def digital_signatures_subtask(self, task, prev_result, request, file_path, **kw success = proc.returncode >= 0 valid = all(m['signature'] != "invalid" for m in output) - with transaction.atomic(): - - # create or retrieve Model info - model = get_or_create_ifc_model(request.id) + with with_model(request.id) as model: model.status_signatures = Model.Status.NOT_APPLICABLE if not output else Model.Status.VALID if valid else Model.Status.INVALID def create_outcome(di): @@ -667,7 +648,6 @@ def create_outcome(di): ValidationOutcome.objects.bulk_create(list(map(create_outcome, output)), batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE) model.save(update_fields=['status_signatures']) - is_valid = True if success: reason = 'Digital signature check completed' task.mark_as_completed(reason) @@ -685,25 +665,16 @@ def create_outcome(di): def bsdd_validation_subtask(self, task, prev_result, request, file_path, *args, **kwargs): task_type = get_task_type(self.name) check_program = task_registry['bsdd_validation_subtask'].check_program(file_path, task.id) - log_program(self.__qualname__, check_program) proc = run_task( task=task, check_program = check_program, ) - if proc.returncode is not None and proc.returncode != 0: - error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" - task.mark_as_failed(error_message) - raise RuntimeError(error_message) - - raw_output = proc.stdout + raw_output = check_proc_success_or_fail(proc, task) logger.info(f'Output for {self.__name__}: {raw_output}') - with transaction.atomic(): - - # create or retrieve Model info - model = get_or_create_ifc_model(request.id) + with with_model(request.id) as model: # update Validation Outcomes json_output = json.loads(raw_output) @@ -749,22 +720,14 @@ def bsdd_validation_subtask(self, task, prev_result, request, file_path, *args, def industry_practices_subtask(self, task, prev_result, request, file_path): task_type = get_task_type(self.name) check_program = task_registry['industry_practices_subtask'].check_program(file_path, task.id) - log_program(self.__qualname__, check_program) proc = run_task( task=task, check_program = check_program, ) - if proc.returncode is not None and proc.returncode != 0: - error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" - task.mark_as_failed(error_message) - raise RuntimeError(error_message) - - raw_output = proc.stdout - - with transaction.atomic(): - model = get_or_create_ifc_model(request.id) + raw_output = check_proc_success_or_fail(proc, task) + with with_model(request.id) as model: agg_status = task.determine_aggregate_status() model.status_industry_practices = agg_status model.save(update_fields=['status_industry_practices']) @@ -779,22 +742,15 @@ def industry_practices_subtask(self, task, prev_result, request, file_path): def run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, status_field): task_type = get_task_type(self.name) - log_program(self.__qualname__, check_program) proc = run_task( task=task, check_program = check_program ) - if proc.returncode is not None and proc.returncode != 0: - error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" - task.mark_as_failed(error_message) - raise RuntimeError(error_message) + raw_output = check_proc_success_or_fail(proc, task) - raw_output = proc.stdout - - with transaction.atomic(): - model = get_or_create_ifc_model(request.id) + with with_model(request.id) as model: agg_status = task.determine_aggregate_status() setattr(model, status_field, agg_status) model.save(update_fields=[status_field]) From e5d1ec9d14909ff35e36bc99e2be5e971f926d07 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Sat, 26 Jul 2025 23:53:26 +0100 Subject: [PATCH 05/21] set model status to django model descriptors --- backend/apps/ifc_validation/task_configs.py | 22 ++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/backend/apps/ifc_validation/task_configs.py b/backend/apps/ifc_validation/task_configs.py index c52ae974..66b2701d 100644 --- a/backend/apps/ifc_validation/task_configs.py +++ b/backend/apps/ifc_validation/task_configs.py @@ -2,7 +2,7 @@ import typing import sys import os -from apps.ifc_validation_models.models import ValidationTask +from apps.ifc_validation_models.models import ValidationTask, Model def execute_check(*args: str) -> list: return [sys.executable, *args] @@ -59,7 +59,7 @@ class TaskConfig: 'header_syntax_validation_subtask': TaskConfig( type=ValidationTask.Type.HEADER_SYNTAX, increment=5, - model_field='status_header_syntax', + model_field=Model.status_header_syntax, check_program=check_header_syntax, blocks=[ 'header_validation_subtask', @@ -77,7 +77,7 @@ class TaskConfig: 'header_validation_subtask': TaskConfig( type=ValidationTask.Type.HEADER, increment=10, - model_field='status_header', + model_field=Model.status_header, check_program=check_validate_header, blocks = [], execution_stage="serial", @@ -85,7 +85,7 @@ class TaskConfig: 'syntax_validation_subtask': TaskConfig( type=ValidationTask.Type.SYNTAX, increment=5, - model_field='status_syntax', + model_field=Model.status_syntax, check_program=check_syntax, blocks=[ 'digital_signatures_subtask', @@ -100,7 +100,7 @@ class TaskConfig: 'prerequisites_subtask': TaskConfig( type=ValidationTask.Type.PREREQUISITES, increment=10, - model_field='status_prereq', + model_field=Model.status_prereq, check_program=check_gherkin_prereq, blocks=[ 'digital_signatures_subtask', @@ -115,7 +115,7 @@ class TaskConfig: 'schema_validation_subtask': TaskConfig( type=ValidationTask.Type.SCHEMA, increment=10, - model_field='status_schema', + model_field=Model.status_schema, check_program=check_schema, blocks = [], execution_stage="parallel", @@ -123,7 +123,7 @@ class TaskConfig: 'digital_signatures_subtask': TaskConfig( type=ValidationTask.Type.DIGITAL_SIGNATURES, increment=5, - model_field='status_signatures', + model_field=Model.status_signatures, check_program=check_signatures, blocks = [], execution_stage="parallel", @@ -131,7 +131,7 @@ class TaskConfig: 'bsdd_validation_subtask': TaskConfig( type=ValidationTask.Type.BSDD, increment=0, - model_field='status_bsdd', + model_field=Model.status_bsdd, check_program=check_bsdd, blocks = [], execution_stage="parallel", @@ -139,14 +139,14 @@ class TaskConfig: 'normative_rules_ia_validation_subtask': TaskConfig( type=ValidationTask.Type.NORMATIVE_IA, increment=20, - model_field='status_ia', + model_field=Model.status_ia, check_program=check_gherkin_ia, blocks = [], execution_stage="parallel", ), 'normative_rules_ip_validation_subtask': TaskConfig( type=ValidationTask.Type.NORMATIVE_IP, increment=20, - model_field='status_ip', + model_field=Model.status_ip, check_program=check_gherkin_ip, blocks = [], execution_stage="parallel", @@ -154,7 +154,7 @@ class TaskConfig: 'industry_practices_subtask': TaskConfig( type=ValidationTask.Type.INDUSTRY_PRACTICES, increment=10, - model_field='status_industry_practices', + model_field=Model.status_industry_practices, check_program=check_gherkin_best_practice, blocks = [], execution_stage="parallel", From 2716657224dd8af1d0dce71839ddd554facaaf38 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Sun, 27 Jul 2025 14:30:42 +0100 Subject: [PATCH 06/21] simplify error handling --- backend/apps/ifc_validation/tasks.py | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/backend/apps/ifc_validation/tasks.py b/backend/apps/ifc_validation/tasks.py index fd48fee3..89bb86e0 100644 --- a/backend/apps/ifc_validation/tasks.py +++ b/backend/apps/ifc_validation/tasks.py @@ -30,11 +30,6 @@ assert sum(cfg.increment for cfg in TASK_CONFIGS.values()) == 100 -class ValidationSubprocessError(Exception): pass -class ValidationTimeoutError(ValidationSubprocessError): pass -class ValidationOpenShellError(ValidationSubprocessError): pass -class ValidationIntegrityError(ValidationSubprocessError): pass - def run_task( task: ValidationTask, check_program: typing.List[str], @@ -53,25 +48,10 @@ def run_task( logger.info(f'test run task task name {task.type}, task value : {task}') return proc - except subprocess.TimeoutExpired as err: - logger.exception(f"TimeoutExpired while running task {task.id} with command: {' '.join(check_program)} : {task.type}") - task.mark_as_failed(err) - raise ValidationTimeoutError(f"Task {task.type} timed out") from err - - except ifcopenshell.Error as err: - logger.exception(f"Ifcopenshell error in task {task.id} : {task.type}") - task.mark_as_failed(err) - raise ValidationOpenShellError(f"IFC parsing or validation failed during task {task.type}") from err - - except IntegrityError as err: - logger.exception(f"Database integrity error in task {task.id} : {task.type}") - task.mark_as_failed(err) - raise ValidationIntegrityError(f"Database error during task {task.type}") from err - except Exception as err: - logger.exception(f"Unexpected error in task {task.id} : {task.type}") + logger.exception(f"{type(err).__name__} in task {task.id} : {task.type}") task.mark_as_failed(err) - raise ValidationSubprocessError(f"Unknown error during validation task {task.id}: {task.type}") from err + raise type(err)(f"Unknown error during validation task {task.id}: {task.type}") from err def get_task_type(name): return name.split(".")[-1] From e83cbad22223ba2652e2e6269fc47ba27c371683 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Sun, 27 Jul 2025 15:38:33 +0100 Subject: [PATCH 07/21] Add documentation for prev_result parallel tasks --- backend/apps/ifc_validation/tasks.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/backend/apps/ifc_validation/tasks.py b/backend/apps/ifc_validation/tasks.py index 89bb86e0..a3bfb06b 100644 --- a/backend/apps/ifc_validation/tasks.py +++ b/backend/apps/ifc_validation/tasks.py @@ -7,6 +7,7 @@ import ifcopenshell import typing import contextlib +import operator from celery import shared_task, chain, chord, group from celery.utils.log import get_task_logger @@ -231,13 +232,8 @@ def decorator(func): def wrapper(self, prev_result, id, file_name, *args, **kwargs): - # default celery output for parallel tasks is a list, - if isinstance(prev_result, list): - merged_result = {} - for result in prev_result: - if isinstance(result, dict): - merged_result.update(result) - prev_result = merged_result + # Chord results from parallel tasks arrive as a list of dicts; merge them into a single dict for consistency + prev_result = functools.reduce(operator.or_, filter(lambda x: isinstance(x, dict), prev_result), {}) block_current_task = any( not prev_result.get(blocker, {}).get('is_valid', True) for blocker in task_registry.get_blockers_of(get_task_type(self.name)) From dac01c1458fd13b62b0de7e94827725817d67aca Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Sun, 27 Jul 2025 19:04:34 +0100 Subject: [PATCH 08/21] update configs --- backend/apps/ifc_validation/task_configs.py | 182 ++++++-------------- backend/apps/ifc_validation/tasks.py | 26 +-- 2 files changed, 69 insertions(+), 139 deletions(-) diff --git a/backend/apps/ifc_validation/task_configs.py b/backend/apps/ifc_validation/task_configs.py index 66b2701d..b8511ed6 100644 --- a/backend/apps/ifc_validation/task_configs.py +++ b/backend/apps/ifc_validation/task_configs.py @@ -44,6 +44,8 @@ def check_gherkin_ip(file_path: str, task_id: int) -> list: def check_gherkin_best_practice(file_path: str, task_id: int) -> list: return check_gherkin(file_path, task_id, "INDUSTRY_PRACTICE") +def check_instance_completion(file_path, task_id): + return [] @dataclass class TaskConfig: @@ -54,121 +56,42 @@ class TaskConfig: blocks: typing.Optional[typing.List[str]] execution_stage: str = "parallel" - -TASK_CONFIGS: typing.Dict[str, TaskConfig] = { - 'header_syntax_validation_subtask': TaskConfig( - type=ValidationTask.Type.HEADER_SYNTAX, - increment=5, - model_field=Model.status_header_syntax, - check_program=check_header_syntax, - blocks=[ - 'header_validation_subtask', - 'syntax_validation_subtask', - 'prerequisites_subtask', - 'digital_signatures_subtask', - 'schema_validation_subtask', - 'normative_rules_ia_validation_subtask', - 'normative_rules_ip_validation_subtask', - 'industry_practices_subtask', - 'instance_completion_subtask', - ], - execution_stage="serial", - ), - 'header_validation_subtask': TaskConfig( - type=ValidationTask.Type.HEADER, - increment=10, - model_field=Model.status_header, - check_program=check_validate_header, - blocks = [], - execution_stage="serial", - ), - 'syntax_validation_subtask': TaskConfig( - type=ValidationTask.Type.SYNTAX, - increment=5, - model_field=Model.status_syntax, - check_program=check_syntax, - blocks=[ - 'digital_signatures_subtask', - 'schema_validation_subtask', - 'normative_rules_ia_validation_subtask', - 'normative_rules_ip_validation_subtask', - 'industry_practices_subtask', - 'instance_completion_subtask' - ], - execution_stage="serial", - ), - 'prerequisites_subtask': TaskConfig( - type=ValidationTask.Type.PREREQUISITES, - increment=10, - model_field=Model.status_prereq, - check_program=check_gherkin_prereq, - blocks=[ - 'digital_signatures_subtask', - 'schema_validation_subtask', - 'normative_rules_ia_validation_subtask', - 'normative_rules_ip_validation_subtask', - 'industry_practices_subtask', - 'instance_completion_subtask' - ], - execution_stage="serial", - ), - 'schema_validation_subtask': TaskConfig( - type=ValidationTask.Type.SCHEMA, - increment=10, - model_field=Model.status_schema, - check_program=check_schema, - blocks = [], - execution_stage="parallel", - ), - 'digital_signatures_subtask': TaskConfig( - type=ValidationTask.Type.DIGITAL_SIGNATURES, - increment=5, - model_field=Model.status_signatures, - check_program=check_signatures, - blocks = [], - execution_stage="parallel", - ), - 'bsdd_validation_subtask': TaskConfig( - type=ValidationTask.Type.BSDD, - increment=0, - model_field=Model.status_bsdd, - check_program=check_bsdd, - blocks = [], - execution_stage="parallel", - ), - 'normative_rules_ia_validation_subtask': TaskConfig( - type=ValidationTask.Type.NORMATIVE_IA, - increment=20, - model_field=Model.status_ia, - check_program=check_gherkin_ia, - blocks = [], - execution_stage="parallel", ), - 'normative_rules_ip_validation_subtask': TaskConfig( - type=ValidationTask.Type.NORMATIVE_IP, - increment=20, - model_field=Model.status_ip, - check_program=check_gherkin_ip, - blocks = [], - execution_stage="parallel", - ), - 'industry_practices_subtask': TaskConfig( - type=ValidationTask.Type.INDUSTRY_PRACTICES, - increment=10, - model_field=Model.status_industry_practices, - check_program=check_gherkin_best_practice, - blocks = [], - execution_stage="parallel", - ), - 'instance_completion_subtask': TaskConfig( - type=ValidationTask.Type.INSTANCE_COMPLETION, - increment=5, - model_field=None, - check_program=lambda file_path, task_id: [], +# create blueprint +def make_task(*, type, increment, field=None, check, stage="parallel"): + return TaskConfig( + type=type, + increment=increment, + model_field=getattr(Model, field) if field else None, + check_program=check, blocks=[], - execution_stage="final", - ), -} - + execution_stage=stage, + ) + +# define task info for celery +header_syntax = make_task(type=ValidationTask.Type.HEADER_SYNTAX, increment=5, field='status_header_syntax', check=check_header_syntax, stage="serial") +header = make_task(type=ValidationTask.Type.HEADER, increment=10, field='status_header', check=check_validate_header, stage="serial") +syntax = make_task(type=ValidationTask.Type.SYNTAX, increment=5, field='status_syntax', check=check_syntax, stage="serial") +prereq = make_task(type=ValidationTask.Type.PREREQUISITES, increment=10, field='status_prereq', check=check_gherkin_prereq, stage="serial") +schema = make_task(type=ValidationTask.Type.SCHEMA, increment=10, field='status_schema', check=check_schema) +digital_signatures = make_task(type=ValidationTask.Type.DIGITAL_SIGNATURES, increment=5, field='status_signatures', check=check_signatures) +bsdd = make_task(type=ValidationTask.Type.BSDD, increment=0, field='status_bsdd', check=check_bsdd) +normative_ia = make_task(type=ValidationTask.Type.NORMATIVE_IA, increment=20, field='status_ia', check=check_gherkin_ia) +normative_ip = make_task(type=ValidationTask.Type.NORMATIVE_IP, increment=20, field='status_ip', check=check_gherkin_ip) +industry_practices = make_task(type=ValidationTask.Type.INDUSTRY_PRACTICES, increment=10, field='status_industry_practices', check=check_gherkin_best_practice) +instance_completion = make_task(type=ValidationTask.Type.INSTANCE_COMPLETION, increment=5, field=None, check=check_instance_completion, stage="final") + +# block tasks on error +post_tasks = [digital_signatures, schema, normative_ia, normative_ip, industry_practices, instance_completion] +header_syntax.blocks = [header, syntax, prereq] + post_tasks +syntax.blocks = post_tasks.copy() +prereq.blocks = post_tasks.copy() + +# register +ALL_TASKS = [ + header_syntax, header, syntax, prereq, + schema, digital_signatures, bsdd, + normative_ia, normative_ip, industry_practices, instance_completion, +] class TaskRegistry: def __init__(self, config_map: dict[str, TaskConfig]): self._configs = config_map @@ -176,7 +99,7 @@ def __init__(self, config_map: dict[str, TaskConfig]): self._by_task_type_name = {cfg.type.name: name for name, cfg in config_map.items()} def get_config_by_celery_name(self, name: str) -> TaskConfig: - return self._configs.get(name) + return self._configs[self.get_task_type_from_celery_name(name)] def get_celery_name_by_task_type(self, task_type: ValidationTask.Type) -> str: return self._by_task_type.get(task_type) @@ -184,21 +107,28 @@ def get_celery_name_by_task_type(self, task_type: ValidationTask.Type) -> str: def get_celery_name_by_task_type_name(self, task_type_name: str) -> str: return self._by_task_type_name.get(task_type_name) - def get_blocked_tasks(self, task_name: str) -> typing.List[str]: - return self._configs[task_name].blocks or [] + def get_blocked_tasks(self, task_type: ValidationTask.Type) -> typing.List[TaskConfig]: + return self._configs[task_type].blocks or [] def get_tasks_by_stage(self, stage: str) -> typing.List[str]: - return [name for name, cfg in self._configs.items() if cfg.execution_stage == stage] + return [cfg for cfg in self._configs.values() if cfg.execution_stage == stage] - def __getitem__(self, task_name: str) -> TaskConfig: - return self._configs[task_name] + def __getitem__(self, task_type: ValidationTask.Type) -> TaskConfig: + return self._configs[task_type] - def get_blockers_of(self, task_name: str) -> typing.List[str]: + def get_blockers_of(self, task_type: ValidationTask.Type) -> typing.List[ValidationTask.Type]: return [ - blocker_name - for blocker_name, cfg in self._configs.items() - if task_name in (cfg.blocks or []) + blocker_type + for blocker_type, cfg in self._configs.items() + if any(block.type == task_type for block in cfg.blocks or []) ] - + def all(self) -> dict[str, TaskConfig]: - return self._configs \ No newline at end of file + return self._configs + + def total_increment(self) -> int: + return sum(cfg.increment for cfg in self._configs.values()) + +task_registry = TaskRegistry({task.type: task for task in ALL_TASKS}) + +# import pdb; pdb.set_trace() diff --git a/backend/apps/ifc_validation/tasks.py b/backend/apps/ifc_validation/tasks.py index a3bfb06b..f538fa57 100644 --- a/backend/apps/ifc_validation/tasks.py +++ b/backend/apps/ifc_validation/tasks.py @@ -21,15 +21,14 @@ from apps.ifc_validation_models.settings import TASK_TIMEOUT_LIMIT, MEDIA_ROOT from apps.ifc_validation_models.decorators import requires_django_user_context from apps.ifc_validation_models.models import * -from apps.ifc_validation.task_configs import TASK_CONFIGS, TaskRegistry +from apps.ifc_validation.task_configs import task_registry from .email_tasks import * -task_registry = TaskRegistry(TASK_CONFIGS) logger = get_task_logger(__name__) -assert sum(cfg.increment for cfg in TASK_CONFIGS.values()) == 100 +assert task_registry.total_increment() == 100 def run_task( task: ValidationTask, @@ -233,7 +232,8 @@ def wrapper(self, prev_result, id, file_name, *args, **kwargs): # Chord results from parallel tasks arrive as a list of dicts; merge them into a single dict for consistency - prev_result = functools.reduce(operator.or_, filter(lambda x: isinstance(x, dict), prev_result), {}) + if task_type == ValidationTask.Type.INSTANCE_COMPLETION: + prev_result = functools.reduce(operator.or_, filter(lambda x: isinstance(x, dict), prev_result), {}) block_current_task = any( not prev_result.get(blocker, {}).get('is_valid', True) for blocker in task_registry.get_blockers_of(get_task_type(self.name)) @@ -332,32 +332,32 @@ def instance_completion_subtask(self, task, prev_result, request, file_path, *ar @validation_task_runner(ValidationTask.Type.NORMATIVE_IA) def normative_rules_ia_validation_subtask(self, task, prev_result, request, file_path, **kwargs): - check_program = task_registry['normative_rules_ia_validation_subtask'].check_program(file_path, task.id) + check_program = task_registry[task.type].check_program(file_path, task.id) logger.info(f'qualname : {self.__qualname__}') return run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, 'status_ia') @validation_task_runner(ValidationTask.Type.NORMATIVE_IP) def normative_rules_ip_validation_subtask(self, task, prev_result, request, file_path, **kwargs): - check_program = task_registry['normative_rules_ip_validation_subtask'].check_program(file_path, task.id) + check_program = task_registry[task.type].check_program(file_path, task.id) return run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, 'status_ip') @validation_task_runner(ValidationTask.Type.PREREQUISITES) def prerequisites_subtask(self, task, prev_result, request, file_path, **kwargs): - check_program = task_registry['prerequisites_subtask'].check_program(file_path, task.id) + check_program = task_registry[task.type].check_program(file_path, task.id) return run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, 'status_prereq') @validation_task_runner(ValidationTask.Type.SYNTAX) def syntax_validation_subtask(self, task, prev_result, request, file_path, **kwargs): - check_program = task_registry['syntax_validation_subtask'].check_program(file_path, task.id) + check_program = task_registry[task.type].check_program(file_path, task.id) return run_syntax_subtask(self, task, prev_result, request, file_path, check_program, 'status_syntax') @validation_task_runner(ValidationTask.Type.HEADER_SYNTAX) def header_syntax_validation_subtask(self, task, prev_result, request, file_path, **kwargs): - check_program = task_registry['header_syntax_validation_subtask'].check_program(file_path, task.id) + check_program = task_registry[task.type].check_program(file_path, task.id) return run_syntax_subtask(self, task, prev_result, request, file_path, check_program, 'status_header_syntax') @@ -412,7 +412,7 @@ def run_syntax_subtask(self, task, prev_result, request, file_path, check_progra @validation_task_runner(ValidationTask.Type.SCHEMA) def schema_validation_subtask(self, task, prev_result, request, file_path, *args, **kwargs): task_type = get_task_type(self.name) - check_program = task_registry['schema_validation_subtask'].check_program(file_path, task.id) + check_program = task_registry[task.type].check_program(file_path, task.id) proc = run_task( task=task, @@ -494,7 +494,7 @@ def is_schema_error(line): @validation_task_runner(ValidationTask.Type.HEADER) def header_validation_subtask(self, task, prev_result, request, file_path, **kwargs): task_type = get_task_type(self.name) - check_program = task_registry['header_validation_subtask'].check_program(file_path, task.id) + check_program = task_registry[task.type].check_program(file_path, task.id) proc = run_task( task=task, check_program = check_program, @@ -640,7 +640,7 @@ def create_outcome(di): @validation_task_runner(ValidationTask.Type.BSDD) def bsdd_validation_subtask(self, task, prev_result, request, file_path, *args, **kwargs): task_type = get_task_type(self.name) - check_program = task_registry['bsdd_validation_subtask'].check_program(file_path, task.id) + check_program = task_registry[task.type].check_program(file_path, task.id) proc = run_task( task=task, @@ -695,7 +695,7 @@ def bsdd_validation_subtask(self, task, prev_result, request, file_path, *args, @validation_task_runner(ValidationTask.Type.INDUSTRY_PRACTICES) def industry_practices_subtask(self, task, prev_result, request, file_path): task_type = get_task_type(self.name) - check_program = task_registry['industry_practices_subtask'].check_program(file_path, task.id) + check_program = task_registry[task.type].check_program(file_path, task.id) proc = run_task( task=task, From e47811980a0205c7e70eabd39466e4e037d7dad2 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Sun, 27 Jul 2025 19:07:27 +0100 Subject: [PATCH 09/21] update_progress inside single decorator --- backend/apps/ifc_validation/tasks.py | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/backend/apps/ifc_validation/tasks.py b/backend/apps/ifc_validation/tasks.py index f538fa57..69290024 100644 --- a/backend/apps/ifc_validation/tasks.py +++ b/backend/apps/ifc_validation/tasks.py @@ -77,25 +77,6 @@ def get_internal_result(task_type, prev_result, is_valid, reason): return {**prev_result, task_type: current_result} -def update_progress(func): - @functools.wraps(func) - def wrapper(self, *args, **kwargs): - return_value = func(self, *args, **kwargs) - try: - request_id = kwargs.get("id") - # @nb not the most efficient because we fetch the ValidationRequest anew, but - # assuming django will cache this efficiently enough for us to keep the code clean - request = ValidationRequest.objects.get(pk=request_id) - increment = task_registry[func.__name__].increment - request.progress = min(request.progress + increment, 100) - request.save() - except Exception as e: - print(f"Error updating progress for {func.__name__}: {e}") - return return_value - return wrapper - - - @functools.lru_cache(maxsize=1024) def get_absolute_file_path(file_name): @@ -226,7 +207,6 @@ def decorator(func): @shared_task(bind=True) @log_execution @requires_django_user_context - @update_progress @functools.wraps(func) def wrapper(self, prev_result, id, file_name, *args, **kwargs): @@ -241,6 +221,11 @@ def wrapper(self, prev_result, id, file_name, *args, **kwargs): request = ValidationRequest.objects.get(pk=id) file_path = get_absolute_file_path(request.file.name) task = ValidationTask.objects.create(request=request, type=task_type) + + # update progress + increment = task_registry[task_type].increment + request.progress = min(request.progress + increment, 100) + request.save() if not block_current_task: task.mark_as_initiated() From 3a8eebea9dbe45d3f068ca90b1d4e92441268c40 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Sun, 27 Jul 2025 19:31:06 +0100 Subject: [PATCH 10/21] Move all tasks to separate folder --- backend/apps/ifc_validation/tasks/__init__.py | 3 +++ .../{task_configs.py => tasks/configs.py} | 10 ++++++---- backend/apps/ifc_validation/{ => tasks}/email_tasks.py | 0 backend/apps/ifc_validation/{ => tasks}/tasks.py | 2 +- backend/apps/ifc_validation_bff/views_legacy.py | 2 +- 5 files changed, 11 insertions(+), 6 deletions(-) create mode 100644 backend/apps/ifc_validation/tasks/__init__.py rename backend/apps/ifc_validation/{task_configs.py => tasks/configs.py} (93%) rename backend/apps/ifc_validation/{ => tasks}/email_tasks.py (100%) rename backend/apps/ifc_validation/{ => tasks}/tasks.py (99%) diff --git a/backend/apps/ifc_validation/tasks/__init__.py b/backend/apps/ifc_validation/tasks/__init__.py new file mode 100644 index 00000000..5db1f582 --- /dev/null +++ b/backend/apps/ifc_validation/tasks/__init__.py @@ -0,0 +1,3 @@ +from .tasks import ifc_file_validation_task + +__all__ = ["ifc_file_validation_task"] \ No newline at end of file diff --git a/backend/apps/ifc_validation/task_configs.py b/backend/apps/ifc_validation/tasks/configs.py similarity index 93% rename from backend/apps/ifc_validation/task_configs.py rename to backend/apps/ifc_validation/tasks/configs.py index b8511ed6..77f66651 100644 --- a/backend/apps/ifc_validation/task_configs.py +++ b/backend/apps/ifc_validation/tasks/configs.py @@ -4,6 +4,8 @@ import os from apps.ifc_validation_models.models import ValidationTask, Model +checks_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "checks")) + def execute_check(*args: str) -> list: return [sys.executable, *args] @@ -17,17 +19,17 @@ def check_schema(file_path: str, task_id: int) -> list: return execute_check("-m", "ifcopenshell.validate", "--json", "--rules", "--fields", file_path) def check_validate_header(file_path: str, task_id: int) -> list: - return execute_check(os.path.join(os.path.dirname(__file__), "checks", "header_policy", "validate_header.py"), file_path) + return execute_check(os.path.join(checks_dir, "header_policy", "validate_header.py"), file_path) def check_signatures(file_path: str, task_id: int) -> list: - return execute_check(os.path.join(os.path.dirname(__file__), "checks", "signatures", "check_signatures.py"), file_path) + return execute_check(os.path.join(checks_dir, "signatures", "check_signatures.py"), file_path) def check_bsdd(file_path: str, task_id: int) -> list: - return execute_check(os.path.join(os.path.dirname(__file__), "checks", "check_bsdd.py"), + return execute_check(os.path.join(checks_dir, "check_bsdd.py"), "--file-name", file_path, "--task-id", str(task_id)) def check_gherkin(file_path: str, task_id: int, rule_type: str) -> list: - return execute_check(os.path.join(os.path.dirname(__file__), "checks", "check_gherkin.py"), + return execute_check(os.path.join(checks_dir, "check_gherkin.py"), "--file-name", file_path, "--task-id", str(task_id), "--rule-type", rule_type) diff --git a/backend/apps/ifc_validation/email_tasks.py b/backend/apps/ifc_validation/tasks/email_tasks.py similarity index 100% rename from backend/apps/ifc_validation/email_tasks.py rename to backend/apps/ifc_validation/tasks/email_tasks.py diff --git a/backend/apps/ifc_validation/tasks.py b/backend/apps/ifc_validation/tasks/tasks.py similarity index 99% rename from backend/apps/ifc_validation/tasks.py rename to backend/apps/ifc_validation/tasks/tasks.py index 69290024..5dcd4fcc 100644 --- a/backend/apps/ifc_validation/tasks.py +++ b/backend/apps/ifc_validation/tasks/tasks.py @@ -21,7 +21,7 @@ from apps.ifc_validation_models.settings import TASK_TIMEOUT_LIMIT, MEDIA_ROOT from apps.ifc_validation_models.decorators import requires_django_user_context from apps.ifc_validation_models.models import * -from apps.ifc_validation.task_configs import task_registry +from .configs import task_registry from .email_tasks import * diff --git a/backend/apps/ifc_validation_bff/views_legacy.py b/backend/apps/ifc_validation_bff/views_legacy.py index d4d8c2c6..57304751 100644 --- a/backend/apps/ifc_validation_bff/views_legacy.py +++ b/backend/apps/ifc_validation_bff/views_legacy.py @@ -21,7 +21,7 @@ from apps.ifc_validation_models.models import Model from apps.ifc_validation_models.models import UserAdditionalInfo -from apps.ifc_validation.tasks import ifc_file_validation_task +from apps.ifc_validation.tasks.tasks import ifc_file_validation_task from core.settings import MEDIA_ROOT, MAX_FILES_PER_UPLOAD from core.settings import DEVELOPMENT, LOGIN_URL, USE_WHITELIST From 7f6f99393ada959f24526cdb7100fcf4e86b8ad0 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Mon, 28 Jul 2025 13:09:10 +0100 Subject: [PATCH 11/21] use db status rather than prev_result of for blocking tasks --- backend/apps/ifc_validation/tasks/tasks.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/backend/apps/ifc_validation/tasks/tasks.py b/backend/apps/ifc_validation/tasks/tasks.py index 5dcd4fcc..11b53491 100644 --- a/backend/apps/ifc_validation/tasks/tasks.py +++ b/backend/apps/ifc_validation/tasks/tasks.py @@ -209,19 +209,20 @@ def decorator(func): @requires_django_user_context @functools.wraps(func) def wrapper(self, prev_result, id, file_name, *args, **kwargs): - - # Chord results from parallel tasks arrive as a list of dicts; merge them into a single dict for consistency - if task_type == ValidationTask.Type.INSTANCE_COMPLETION: - prev_result = functools.reduce(operator.or_, filter(lambda x: isinstance(x, dict), prev_result), {}) - block_current_task = any( - not prev_result.get(blocker, {}).get('is_valid', True) - for blocker in task_registry.get_blockers_of(get_task_type(self.name)) - ) request = ValidationRequest.objects.get(pk=id) file_path = get_absolute_file_path(request.file.name) + + # Always create the task record, even if it will be skipped due to blocking conditions, + # so it is logged and its status can be marked as 'skipped' task = ValidationTask.objects.create(request=request, type=task_type) + model = request.model + block_current_task = any( + getattr(model, task_registry[blocker].status_field.name) == Model.Status.INVALID + for blocker in task_registry.get_blockers_of(task_type) + ) + # update progress increment = task_registry[task_type].increment request.progress = min(request.progress + increment, 100) From f46c77995aefa142c94b18b62dfbb5960123d525 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Mon, 28 Jul 2025 13:36:25 +0100 Subject: [PATCH 12/21] rm internal results, rely on db --- backend/apps/ifc_validation/tasks/tasks.py | 115 ++++++--------------- 1 file changed, 34 insertions(+), 81 deletions(-) diff --git a/backend/apps/ifc_validation/tasks/tasks.py b/backend/apps/ifc_validation/tasks/tasks.py index 11b53491..2256ca19 100644 --- a/backend/apps/ifc_validation/tasks/tasks.py +++ b/backend/apps/ifc_validation/tasks/tasks.py @@ -68,14 +68,6 @@ def with_model(request_id): with transaction.atomic(): yield get_or_create_ifc_model(request_id) -def get_internal_result(task_type, prev_result, is_valid, reason): - prev_result = prev_result or {} - current_result = { - 'is_valid': is_valid, - 'reason': reason - } - return {**prev_result, task_type: current_result} - @functools.lru_cache(maxsize=1024) def get_absolute_file_path(file_name): @@ -208,7 +200,8 @@ def decorator(func): @log_execution @requires_django_user_context @functools.wraps(func) - def wrapper(self, prev_result, id, file_name, *args, **kwargs): + def wrapper(self, *args, **kwargs): + id = kwargs.get('id') request = ValidationRequest.objects.get(pk=id) file_path = get_absolute_file_path(request.file.name) @@ -218,26 +211,23 @@ def wrapper(self, prev_result, id, file_name, *args, **kwargs): task = ValidationTask.objects.create(request=request, type=task_type) model = request.model - block_current_task = any( - getattr(model, task_registry[blocker].status_field.name) == Model.Status.INVALID - for blocker in task_registry.get_blockers_of(task_type) - ) + invalid_blockers = list(filter( + lambda b: getattr(model, task_registry[b].status_field.name) == Model.Status.INVALID, + task_registry.get_blockers_of(task_type) + )) # update progress increment = task_registry[task_type].increment request.progress = min(request.progress + increment, 100) request.save() - if not block_current_task: + if not invalid_blockers: task.mark_as_initiated() - return func(self, task, prev_result, request, file_path, *args, **kwargs) + return func(self, task, request, file_path) else: - reason = f'Skipped as prev_result = {prev_result}.' + reason = f"Skipped due to fail in blocking tasks: {', '.join(invalid_blockers)}" + logger.debug(reason) task.mark_as_skipped(reason) - is_valid = None - return get_internal_result( - task_type, prev_result, is_valid, reason - ) return wrapper return decorator @@ -287,7 +277,7 @@ def ifc_file_validation_task(self, id, file_name, *args, **kwargs): @validation_task_runner(ValidationTask.Type.INSTANCE_COMPLETION) -def instance_completion_subtask(self, task, prev_result, request, file_path, *args, **kwargs): +def instance_completion_subtask(self, task, request, file_path, *args, **kwargs): try: ifc_file = ifcopenshell.open(file_path) except: @@ -317,37 +307,37 @@ def instance_completion_subtask(self, task, prev_result, request, file_path, *ar return current_result @validation_task_runner(ValidationTask.Type.NORMATIVE_IA) -def normative_rules_ia_validation_subtask(self, task, prev_result, request, file_path, **kwargs): +def normative_rules_ia_validation_subtask(self, task, request, file_path): check_program = task_registry[task.type].check_program(file_path, task.id) logger.info(f'qualname : {self.__qualname__}') - return run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, 'status_ia') + return run_gherkin_subtask(self, task, request, file_path, check_program, 'status_ia') @validation_task_runner(ValidationTask.Type.NORMATIVE_IP) -def normative_rules_ip_validation_subtask(self, task, prev_result, request, file_path, **kwargs): +def normative_rules_ip_validation_subtask(self, task, request, file_path): check_program = task_registry[task.type].check_program(file_path, task.id) - return run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, 'status_ip') + return run_gherkin_subtask(self, task, request, file_path, check_program, 'status_ip') @validation_task_runner(ValidationTask.Type.PREREQUISITES) -def prerequisites_subtask(self, task, prev_result, request, file_path, **kwargs): +def prerequisites_subtask(self, task, request, file_path): check_program = task_registry[task.type].check_program(file_path, task.id) - return run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, 'status_prereq') + return run_gherkin_subtask(self, task, request, file_path, check_program, 'status_prereq') @validation_task_runner(ValidationTask.Type.SYNTAX) -def syntax_validation_subtask(self, task, prev_result, request, file_path, **kwargs): +def syntax_validation_subtask(self, task, request, file_path): check_program = task_registry[task.type].check_program(file_path, task.id) - return run_syntax_subtask(self, task, prev_result, request, file_path, check_program, 'status_syntax') + return run_syntax_subtask(self, task, request, file_path, check_program, 'status_syntax') @validation_task_runner(ValidationTask.Type.HEADER_SYNTAX) -def header_syntax_validation_subtask(self, task, prev_result, request, file_path, **kwargs): +def header_syntax_validation_subtask(self, task, request, file_path): check_program = task_registry[task.type].check_program(file_path, task.id) - return run_syntax_subtask(self, task, prev_result, request, file_path, check_program, 'status_header_syntax') + return run_syntax_subtask(self, task, request, file_path, check_program, 'status_header_syntax') -def run_syntax_subtask(self, task, prev_result, request, file_path, check_program, model_status_field): +def run_syntax_subtask(self, task, request, file_path, check_program, model_status_field): task_type = get_task_type(self.name) proc = run_task( task=task, @@ -390,13 +380,10 @@ def run_syntax_subtask(self, task, prev_result, request, file_path, check_progra else: reason = f"Found IFC syntax errors:\n\nConsole: \n{output}\n\nError: {error_output}" task.mark_as_completed(reason) - return get_internal_result( - task_type, prev_result, is_valid, reason - ) @validation_task_runner(ValidationTask.Type.SCHEMA) -def schema_validation_subtask(self, task, prev_result, request, file_path, *args, **kwargs): +def schema_validation_subtask(self, task, request, file_path, *args, **kwargs): task_type = get_task_type(self.name) check_program = task_registry[task.type].check_program(file_path, task.id) @@ -465,20 +452,12 @@ def is_schema_error(line): model.save(update_fields=['status_schema']) - is_valid = success - if is_valid: - reason = "No IFC schema errors." - task.mark_as_completed(reason) - else: - reason = f"'ifcopenshell.validate' returned exit code {proc.returncode} and {len(output):,} errors." - task.mark_as_completed(reason) - - return get_internal_result( - task_type, prev_result, is_valid, reason - ) + reason = "No IFC schema errors." if success else f"'ifcopenshell.validate' returned exit code {proc.returncode} and {len(output):,} errors." + task.mark_as_completed(reason) + @validation_task_runner(ValidationTask.Type.HEADER) -def header_validation_subtask(self, task, prev_result, request, file_path, **kwargs): +def header_validation_subtask(self, task, request, file_path, **kwargs): task_type = get_task_type(self.name) check_program = task_registry[task.type].check_program(file_path, task.id) proc = run_task( @@ -570,17 +549,13 @@ def header_validation_subtask(self, task, prev_result, request, file_path, **kwa model.save() # update Task info and return - is_valid = agg_status != Model.Status.INVALID reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {header_validation}' task.mark_as_completed(reason) - return get_internal_result( - task_type, prev_result, is_valid, reason - ) @validation_task_runner(ValidationTask.Type.DIGITAL_SIGNATURES) -def digital_signatures_subtask(self, task, prev_result, request, file_path, **kwargs): +def digital_signatures_subtask(self, task, request, file_path, **kwargs): task_type = get_task_type(self.name) check_script = os.path.join(os.path.dirname(__file__), "checks", "signatures", "check_signatures.py") @@ -610,21 +585,12 @@ def create_outcome(di): ValidationOutcome.objects.bulk_create(list(map(create_outcome, output)), batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE) model.save(update_fields=['status_signatures']) - if success: - reason = 'Digital signature check completed' - task.mark_as_completed(reason) - is_valid = True - else: - reason = f"Script returned exit code {proc.returncode} and {proc.stderr}" - task.mark_as_completed(reason) - is_valid = False - return get_internal_result( - task_type, prev_result, is_valid, reason - ) + reason = 'Digital signature check completed' if success else f"Script returned exit code {proc.returncode} and {proc.stderr}" + task.mark_as_completed(reason) @validation_task_runner(ValidationTask.Type.BSDD) -def bsdd_validation_subtask(self, task, prev_result, request, file_path, *args, **kwargs): +def bsdd_validation_subtask(self, task, request, file_path, *args, **kwargs): task_type = get_task_type(self.name) check_program = task_registry[task.type].check_program(file_path, task.id) @@ -669,17 +635,13 @@ def bsdd_validation_subtask(self, task, prev_result, request, file_path, *args, model.save(update_fields=['status_bsdd']) # update Task info and return - is_valid = agg_status != Model.Status.INVALID reason = f"agg_status = {Model.Status(agg_status).label}\nmessages = {json_output['messages']}" task.mark_as_completed(reason) - return get_internal_result( - task_type, prev_result, is_valid, reason - ) @validation_task_runner(ValidationTask.Type.INDUSTRY_PRACTICES) -def industry_practices_subtask(self, task, prev_result, request, file_path): +def industry_practices_subtask(self, task, request, file_path): task_type = get_task_type(self.name) check_program = task_registry[task.type].check_program(file_path, task.id) @@ -694,15 +656,10 @@ def industry_practices_subtask(self, task, prev_result, request, file_path): model.status_industry_practices = agg_status model.save(update_fields=['status_industry_practices']) - is_valid = agg_status != Model.Status.INVALID reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' task.mark_as_completed(reason) - return get_internal_result( - task_type, prev_result, is_valid, reason - ) - -def run_gherkin_subtask(self, task, prev_result, request, file_path, check_program, status_field): +def run_gherkin_subtask(self, task, request, file_path, check_program, status_field): task_type = get_task_type(self.name) proc = run_task( @@ -717,9 +674,5 @@ def run_gherkin_subtask(self, task, prev_result, request, file_path, check_progr setattr(model, status_field, agg_status) model.save(update_fields=[status_field]) - is_valid = agg_status != Model.Status.INVALID reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' - task.mark_as_completed(reason) - return get_internal_result( - task_type, prev_result, is_valid, reason - ) + task.mark_as_completed(reason) \ No newline at end of file From 3861e60e95a580d71c84251b7d077126a1c3e29e Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Tue, 29 Jul 2025 13:18:02 +0100 Subject: [PATCH 13/21] configuration improvements --- backend/apps/ifc_validation/tasks/configs.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/backend/apps/ifc_validation/tasks/configs.py b/backend/apps/ifc_validation/tasks/configs.py index 77f66651..3894a94f 100644 --- a/backend/apps/ifc_validation/tasks/configs.py +++ b/backend/apps/ifc_validation/tasks/configs.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -import typing +from typing import List, Optional, Callable import sys import os from apps.ifc_validation_models.models import ValidationTask, Model @@ -53,17 +53,19 @@ def check_instance_completion(file_path, task_id): class TaskConfig: type: str increment: int - model_field: str - check_program: typing.Callable[[str], list] - blocks: typing.Optional[typing.List[str]] + status_field: Optional[str] + check_program: Callable[[str], list] + blocks: Optional[List[str]] execution_stage: str = "parallel" + run: Callable | None = None + # create blueprint def make_task(*, type, increment, field=None, check, stage="parallel"): return TaskConfig( type=type, increment=increment, - model_field=getattr(Model, field) if field else None, + status_field=Model._meta.get_field(field) if field else None, check_program=check, blocks=[], execution_stage=stage, @@ -109,16 +111,16 @@ def get_celery_name_by_task_type(self, task_type: ValidationTask.Type) -> str: def get_celery_name_by_task_type_name(self, task_type_name: str) -> str: return self._by_task_type_name.get(task_type_name) - def get_blocked_tasks(self, task_type: ValidationTask.Type) -> typing.List[TaskConfig]: + def get_blocked_tasks(self, task_type: ValidationTask.Type) -> List[TaskConfig]: return self._configs[task_type].blocks or [] - def get_tasks_by_stage(self, stage: str) -> typing.List[str]: + def get_tasks_by_stage(self, stage: str) -> List[str]: return [cfg for cfg in self._configs.values() if cfg.execution_stage == stage] def __getitem__(self, task_type: ValidationTask.Type) -> TaskConfig: return self._configs[task_type] - def get_blockers_of(self, task_type: ValidationTask.Type) -> typing.List[ValidationTask.Type]: + def get_blockers_of(self, task_type: ValidationTask.Type) -> List[ValidationTask.Type]: return [ blocker_type for blocker_type, cfg in self._configs.items() From fa85b8586f6cd1a4c8923280c0deacf939782b53 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Tue, 29 Jul 2025 15:52:47 +0100 Subject: [PATCH 14/21] update ifcopenshell, align tests --- .github/workflows/ci_cd.yml | 2 +- backend/Makefile | 6 ++-- backend/apps/ifc_validation/tasks/__init__.py | 28 +++++++++++++++++-- backend/apps/ifc_validation/tasks/tasks.py | 14 ++++++---- docker/backend/Dockerfile | 2 +- 5 files changed, 39 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index fadf1f17..f2c43b1b 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -87,7 +87,7 @@ jobs: source venv/bin/activate # use version of ifcopenshell with desired schema parsing # TODO: revert to pyPI when schema parsing is published in the future - wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.3-260bc80-linux64.zip" + wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-e13ba76-linux64.zip" mkdir -p venv/lib/python3.11/site-packages unzip -d venv/lib/python3.11/site-packages /tmp/ifcopenshell_python.zip diff --git a/backend/Makefile b/backend/Makefile index 5b9377dc..a9697166 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -14,7 +14,7 @@ venv: install: venv $(PIP) install --upgrade pip $(PIP) install -r requirements.txt - wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.3-260bc80-linux64.zip" + wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-e13ba76-linux64.zip" mkdir -p $(VIRTUAL_ENV)/lib/python3.11/site-packages unzip -f -d $(VIRTUAL_ENV)/lib/python3.11/site-packages /tmp/ifcopenshell_python.zip rm /tmp/ifcopenshell_python.zip @@ -22,7 +22,7 @@ install: venv install-macos: venv $(PIP) install --upgrade pip $(PIP) install -r requirements.txt - wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.3-260bc80-macos64.zip" + wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-e13ba76-macos64.zip" mkdir -p $(VIRTUAL_ENV)/lib/python3.11/site-packages unzip /tmp/ifcopenshell_python.zip -d $(VIRTUAL_ENV)/lib/python3.11/site-packages rm /tmp/ifcopenshell_python.zip @@ -30,7 +30,7 @@ install-macos: venv install-macos-m1: venv $(PIP) install --upgrade pip $(PIP) install -r requirements.txt - wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.3-260bc80-macosm164.zip" + wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-e13ba76-macosm164.zip" mkdir -p $(VIRTUAL_ENV)/lib/python3.11/site-packages unzip /tmp/ifcopenshell_python.zip -d $(VIRTUAL_ENV)/lib/python3.11/site-packages rm /tmp/ifcopenshell_python.zip diff --git a/backend/apps/ifc_validation/tasks/__init__.py b/backend/apps/ifc_validation/tasks/__init__.py index 5db1f582..98e92329 100644 --- a/backend/apps/ifc_validation/tasks/__init__.py +++ b/backend/apps/ifc_validation/tasks/__init__.py @@ -1,3 +1,27 @@ -from .tasks import ifc_file_validation_task +from .tasks import ( + ifc_file_validation_task, + header_syntax_validation_subtask, + header_validation_subtask, + syntax_validation_subtask, + prerequisites_subtask, + schema_validation_subtask, + normative_rules_ia_validation_subtask, + normative_rules_ip_validation_subtask, + bsdd_validation_subtask, + industry_practices_subtask, + instance_completion_subtask +) -__all__ = ["ifc_file_validation_task"] \ No newline at end of file +__all__ = [ + "ifc_file_validation_task", + "header_syntax_validation_subtask", + "header_validation_subtask", + "syntax_validation_subtask", + "prerequisites_subtask", + "schema_validation_subtask", + "bsdd_validation_subtask", + "normative_rules_ia_validation_subtask", + "normative_rules_ip_validation_subtask", + "industry_practices_subtask", + "instance_completion_subtask", +] \ No newline at end of file diff --git a/backend/apps/ifc_validation/tasks/tasks.py b/backend/apps/ifc_validation/tasks/tasks.py index 2256ca19..ebff2b64 100644 --- a/backend/apps/ifc_validation/tasks/tasks.py +++ b/backend/apps/ifc_validation/tasks/tasks.py @@ -210,12 +210,14 @@ def wrapper(self, *args, **kwargs): # so it is logged and its status can be marked as 'skipped' task = ValidationTask.objects.create(request=request, type=task_type) - model = request.model - invalid_blockers = list(filter( - lambda b: getattr(model, task_registry[b].status_field.name) == Model.Status.INVALID, - task_registry.get_blockers_of(task_type) - )) - + if model := request.model: + invalid_blockers = list(filter( + lambda b: getattr(model, task_registry[b].status_field.name) == Model.Status.INVALID, + task_registry.get_blockers_of(task_type) + )) + else: # for testing, we're not instantiating a model + invalid_blockers = [] + # update progress increment = task_registry[task_type].increment request.progress = min(request.progress + increment, 100) diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 1b744a1a..4f9822b1 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -37,7 +37,7 @@ RUN --mount=type=cache,target=/root/.cache \ pip install --no-cache-dir -r /app/backend/requirements.txt && \ # use version of ifcopenshell with desired schema parsing # TODO: revert to pyPI when schema parsing is published in the future - wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.3-260bc80-linux64.zip" && \ + wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-e13ba76-linux64.zip" && \ mkdir -p /opt/venv/lib/python3.11/site-packages && \ unzip -d /opt/venv/lib/python3.11/site-packages /tmp/ifcopenshell_python.zip && \ # some cleanup From 54499a76404b9acf7b530b59e0b78c946d2ce8cc Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Tue, 29 Jul 2025 15:58:11 +0100 Subject: [PATCH 15/21] trigger tests, rm pdb --- backend/apps/ifc_validation/tasks/configs.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/apps/ifc_validation/tasks/configs.py b/backend/apps/ifc_validation/tasks/configs.py index 3894a94f..fb612b62 100644 --- a/backend/apps/ifc_validation/tasks/configs.py +++ b/backend/apps/ifc_validation/tasks/configs.py @@ -134,5 +134,3 @@ def total_increment(self) -> int: return sum(cfg.increment for cfg in self._configs.values()) task_registry = TaskRegistry({task.type: task for task in ALL_TASKS}) - -# import pdb; pdb.set_trace() From 645868dbb7c525f6c59688d8a86bfb36fd227bab Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Tue, 29 Jul 2025 19:31:08 +0100 Subject: [PATCH 16/21] remove lines of code --- backend/apps/ifc_validation/tasks/tasks.py | 85 +++++++--------------- 1 file changed, 28 insertions(+), 57 deletions(-) diff --git a/backend/apps/ifc_validation/tasks/tasks.py b/backend/apps/ifc_validation/tasks/tasks.py index ebff2b64..ef3b7982 100644 --- a/backend/apps/ifc_validation/tasks/tasks.py +++ b/backend/apps/ifc_validation/tasks/tasks.py @@ -52,9 +52,7 @@ def run_task( logger.exception(f"{type(err).__name__} in task {task.id} : {task.type}") task.mark_as_failed(err) raise type(err)(f"Unknown error during validation task {task.id}: {task.type}") from err - -def get_task_type(name): - return name.split(".")[-1] + def check_proc_success_or_fail(proc, task): if proc.returncode is not None and proc.returncode != 0: @@ -225,7 +223,13 @@ def wrapper(self, *args, **kwargs): if not invalid_blockers: task.mark_as_initiated() - return func(self, task, request, file_path) + try: + reason = func(self, task, request, file_path) + task.mark_as_completed(reason) + logger.debug(f'task completed {task_type}, registered reason is {reason}') + except Exception as err: + task.mark_as_failed(str(err)) + logger.exception(f"Task {task_type} failed: {err}") else: reason = f"Skipped due to fail in blocking tasks: {', '.join(invalid_blockers)}" logger.debug(reason) @@ -280,33 +284,20 @@ def ifc_file_validation_task(self, id, file_name, *args, **kwargs): @validation_task_runner(ValidationTask.Type.INSTANCE_COMPLETION) def instance_completion_subtask(self, task, request, file_path, *args, **kwargs): - try: - ifc_file = ifcopenshell.open(file_path) - except: - reason = f'Failed to open {file_path}. Likely previous tasks also failed.' - task.mark_as_completed(reason) - return {'is_valid': None, 'reason': reason} + ifc_file = ifcopenshell.open(file_path) - if ifc_file: - # fetch and update ModelInstance records without ifc_type - with transaction.atomic(): - model_id = request.model.id - model_instances = ModelInstance.objects.filter(model_id=model_id, ifc_type__in=[None, '']) - instance_count = model_instances.count() - logger.info(f'Retrieved {instance_count:,} ModelInstance record(s)') - - for inst in model_instances.iterator(): - inst.ifc_type = ifc_file[inst.stepfile_id].is_a() - inst.save() - - # update Task info and return - reason = f'Updated {instance_count:,} ModelInstance record(s)' - task.mark_as_completed(reason) - current_result = { # last result, result does not need to contain previous results - 'is_valid': True, - 'reason': reason - } - return current_result + with transaction.atomic(): + model_id = request.model.id + model_instances = ModelInstance.objects.filter(model_id=model_id, ifc_type__in=[None, '']) + instance_count = model_instances.count() + logger.info(f'Retrieved {instance_count:,} ModelInstance record(s)') + + for inst in model_instances.iterator(): + inst.ifc_type = ifc_file[inst.stepfile_id].is_a() + inst.save() + + return f'Updated {instance_count:,} ModelInstance record(s)' + @validation_task_runner(ValidationTask.Type.NORMATIVE_IA) def normative_rules_ia_validation_subtask(self, task, request, file_path): @@ -340,7 +331,6 @@ def header_syntax_validation_subtask(self, task, request, file_path): def run_syntax_subtask(self, task, request, file_path, check_program, model_status_field): - task_type = get_task_type(self.name) proc = run_task( task=task, check_program = check_program, @@ -375,18 +365,11 @@ def run_syntax_subtask(self, task, request, file_path, check_program, model_stat ) model.save(update_fields=[model_status_field]) - - is_valid = success - if is_valid: - reason = "No IFC syntax error(s)." - else: - reason = f"Found IFC syntax errors:\n\nConsole: \n{output}\n\nError: {error_output}" - task.mark_as_completed(reason) + return "No IFC syntax error(s)." if success else f"Found IFC syntax errors:\n\nConsole: \n{output}\n\nError: {error_output}" @validation_task_runner(ValidationTask.Type.SCHEMA) def schema_validation_subtask(self, task, request, file_path, *args, **kwargs): - task_type = get_task_type(self.name) check_program = task_registry[task.type].check_program(file_path, task.id) proc = run_task( @@ -454,13 +437,11 @@ def is_schema_error(line): model.save(update_fields=['status_schema']) - reason = "No IFC schema errors." if success else f"'ifcopenshell.validate' returned exit code {proc.returncode} and {len(output):,} errors." - task.mark_as_completed(reason) + return "No IFC schema errors." if success else f"'ifcopenshell.validate' returned exit code {proc.returncode} and {len(output):,} errors." @validation_task_runner(ValidationTask.Type.HEADER) def header_validation_subtask(self, task, request, file_path, **kwargs): - task_type = get_task_type(self.name) check_program = task_registry[task.type].check_program(file_path, task.id) proc = run_task( task=task, @@ -551,14 +532,12 @@ def header_validation_subtask(self, task, request, file_path, **kwargs): model.save() # update Task info and return - reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {header_validation}' - task.mark_as_completed(reason) + return f'agg_status = {Model.Status(agg_status).label}\nraw_output = {header_validation}' @validation_task_runner(ValidationTask.Type.DIGITAL_SIGNATURES) def digital_signatures_subtask(self, task, request, file_path, **kwargs): - task_type = get_task_type(self.name) check_script = os.path.join(os.path.dirname(__file__), "checks", "signatures", "check_signatures.py") check_program = [sys.executable, check_script, file_path] @@ -587,13 +566,11 @@ def create_outcome(di): ValidationOutcome.objects.bulk_create(list(map(create_outcome, output)), batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE) model.save(update_fields=['status_signatures']) - reason = 'Digital signature check completed' if success else f"Script returned exit code {proc.returncode} and {proc.stderr}" - task.mark_as_completed(reason) + return 'Digital signature check completed' if success else f"Script returned exit code {proc.returncode} and {proc.stderr}" @validation_task_runner(ValidationTask.Type.BSDD) def bsdd_validation_subtask(self, task, request, file_path, *args, **kwargs): - task_type = get_task_type(self.name) check_program = task_registry[task.type].check_program(file_path, task.id) proc = run_task( @@ -636,15 +613,12 @@ def bsdd_validation_subtask(self, task, request, file_path, *args, **kwargs): model.status_bsdd = agg_status model.save(update_fields=['status_bsdd']) - # update Task info and return - reason = f"agg_status = {Model.Status(agg_status).label}\nmessages = {json_output['messages']}" - task.mark_as_completed(reason) + return f"agg_status = {Model.Status(agg_status).label}\nmessages = {json_output['messages']}" @validation_task_runner(ValidationTask.Type.INDUSTRY_PRACTICES) def industry_practices_subtask(self, task, request, file_path): - task_type = get_task_type(self.name) check_program = task_registry[task.type].check_program(file_path, task.id) proc = run_task( @@ -658,11 +632,9 @@ def industry_practices_subtask(self, task, request, file_path): model.status_industry_practices = agg_status model.save(update_fields=['status_industry_practices']) - reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' - task.mark_as_completed(reason) + return f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' def run_gherkin_subtask(self, task, request, file_path, check_program, status_field): - task_type = get_task_type(self.name) proc = run_task( task=task, @@ -676,5 +648,4 @@ def run_gherkin_subtask(self, task, request, file_path, check_program, status_fi setattr(model, status_field, agg_status) model.save(update_fields=[status_field]) - reason = f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' - task.mark_as_completed(reason) \ No newline at end of file + return f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' \ No newline at end of file From 4f2c1c032465943c634c071957de10f3dbbbb9ea Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Sat, 2 Aug 2025 21:26:47 +0100 Subject: [PATCH 17/21] add separate execution layer --- .../ifc_validation/tasks/check_programs.py | 47 ++++++++++ backend/apps/ifc_validation/tasks/configs.py | 90 ++++++------------- 2 files changed, 72 insertions(+), 65 deletions(-) create mode 100644 backend/apps/ifc_validation/tasks/check_programs.py diff --git a/backend/apps/ifc_validation/tasks/check_programs.py b/backend/apps/ifc_validation/tasks/check_programs.py new file mode 100644 index 00000000..fe6754ae --- /dev/null +++ b/backend/apps/ifc_validation/tasks/check_programs.py @@ -0,0 +1,47 @@ +import os +import sys + +checks_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "checks")) + +def execute_check(*args: str) -> list: + return [sys.executable, *args] + +def check_syntax(file_path: str, task_id: int) -> list: + return execute_check("-m", "ifcopenshell.simple_spf", "--json", file_path) + +def check_header_syntax(file_path: str, task_id: int) -> list: + return execute_check("-m", "ifcopenshell.simple_spf", "--json", "--only-header", file_path) + +def check_schema(file_path: str, task_id: int) -> list: + return execute_check("-m", "ifcopenshell.validate", "--json", "--rules", "--fields", file_path) + +def check_header(file_path: str, task_id: int) -> list: + return execute_check(os.path.join(checks_dir, "header_policy", "validate_header.py"), file_path) + +def check_digital_signatures(file_path: str, task_id: int) -> list: + return execute_check(os.path.join(checks_dir, "signatures", "check_signatures.py"), file_path) + +def check_bsdd(file_path: str, task_id: int) -> list: + return execute_check(os.path.join(checks_dir, "check_bsdd.py"), + "--file-name", file_path, "--task-id", str(task_id)) + +def check_gherkin(file_path: str, task_id: int, rule_type: str) -> list: + return execute_check(os.path.join(checks_dir, "check_gherkin.py"), + "--file-name", file_path, + "--task-id", str(task_id), + "--rule-type", rule_type) + +def check_prerequisites(file_path: str, task_id: int) -> list: + return check_gherkin(file_path, task_id, "CRITICAL") + ["--purepythonparser"] + +def check_normative_ia(file_path: str, task_id: int) -> list: + return check_gherkin(file_path, task_id, "IMPLEMENTER_AGREEMENT") + +def check_normative_ip(file_path: str, task_id: int) -> list: + return check_gherkin(file_path, task_id, "INFORMAL_PROPOSITION") + +def check_industry_practices(file_path: str, task_id: int) -> list: + return check_gherkin(file_path, task_id, "INDUSTRY_PRACTICE") + +def check_instance_completion(file_path, task_id): + return [] \ No newline at end of file diff --git a/backend/apps/ifc_validation/tasks/configs.py b/backend/apps/ifc_validation/tasks/configs.py index fb612b62..a89a5ca1 100644 --- a/backend/apps/ifc_validation/tasks/configs.py +++ b/backend/apps/ifc_validation/tasks/configs.py @@ -1,54 +1,7 @@ from dataclasses import dataclass from typing import List, Optional, Callable -import sys -import os from apps.ifc_validation_models.models import ValidationTask, Model - -checks_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "checks")) - -def execute_check(*args: str) -> list: - return [sys.executable, *args] - -def check_syntax(file_path: str, task_id: int) -> list: - return execute_check("-m", "ifcopenshell.simple_spf", "--json", file_path) - -def check_header_syntax(file_path: str, task_id: int) -> list: - return execute_check("-m", "ifcopenshell.simple_spf", "--json", "--only-header", file_path) - -def check_schema(file_path: str, task_id: int) -> list: - return execute_check("-m", "ifcopenshell.validate", "--json", "--rules", "--fields", file_path) - -def check_validate_header(file_path: str, task_id: int) -> list: - return execute_check(os.path.join(checks_dir, "header_policy", "validate_header.py"), file_path) - -def check_signatures(file_path: str, task_id: int) -> list: - return execute_check(os.path.join(checks_dir, "signatures", "check_signatures.py"), file_path) - -def check_bsdd(file_path: str, task_id: int) -> list: - return execute_check(os.path.join(checks_dir, "check_bsdd.py"), - "--file-name", file_path, "--task-id", str(task_id)) - -def check_gherkin(file_path: str, task_id: int, rule_type: str) -> list: - return execute_check(os.path.join(checks_dir, "check_gherkin.py"), - "--file-name", file_path, - "--task-id", str(task_id), - "--rule-type", rule_type) - -def check_gherkin_prereq(file_path: str, task_id: int) -> list: - return check_gherkin(file_path, task_id, "CRITICAL") + ["--purepythonparser"] - -def check_gherkin_ia(file_path: str, task_id: int) -> list: - return check_gherkin(file_path, task_id, "IMPLEMENTER_AGREEMENT") - -def check_gherkin_ip(file_path: str, task_id: int) -> list: - return check_gherkin(file_path, task_id, "INFORMAL_PROPOSITION") - -def check_gherkin_best_practice(file_path: str, task_id: int) -> list: - return check_gherkin(file_path, task_id, "INDUSTRY_PRACTICE") - -def check_instance_completion(file_path, task_id): - return [] - +from . import check_programs @dataclass class TaskConfig: type: str @@ -61,38 +14,45 @@ class TaskConfig: # create blueprint -def make_task(*, type, increment, field=None, check, stage="parallel"): +def make_task(*, type, increment, field=None, stage="parallel"): + try: + check_program = getattr(check_programs, f'check_{type.name.lower()}') #e.g. check_header_syntax(), must be always the 'check' followed by the ValidationTask.Type + except AttributeError as err: + raise ImportError( + f"Missing executor function for task type '{type.name}'. " + f"Expected a function named 'check_{type.name.lower()}' in 'executors.py'." + ) return TaskConfig( type=type, increment=increment, status_field=Model._meta.get_field(field) if field else None, - check_program=check, + check_program=check_program, blocks=[], execution_stage=stage, ) # define task info for celery -header_syntax = make_task(type=ValidationTask.Type.HEADER_SYNTAX, increment=5, field='status_header_syntax', check=check_header_syntax, stage="serial") -header = make_task(type=ValidationTask.Type.HEADER, increment=10, field='status_header', check=check_validate_header, stage="serial") -syntax = make_task(type=ValidationTask.Type.SYNTAX, increment=5, field='status_syntax', check=check_syntax, stage="serial") -prereq = make_task(type=ValidationTask.Type.PREREQUISITES, increment=10, field='status_prereq', check=check_gherkin_prereq, stage="serial") -schema = make_task(type=ValidationTask.Type.SCHEMA, increment=10, field='status_schema', check=check_schema) -digital_signatures = make_task(type=ValidationTask.Type.DIGITAL_SIGNATURES, increment=5, field='status_signatures', check=check_signatures) -bsdd = make_task(type=ValidationTask.Type.BSDD, increment=0, field='status_bsdd', check=check_bsdd) -normative_ia = make_task(type=ValidationTask.Type.NORMATIVE_IA, increment=20, field='status_ia', check=check_gherkin_ia) -normative_ip = make_task(type=ValidationTask.Type.NORMATIVE_IP, increment=20, field='status_ip', check=check_gherkin_ip) -industry_practices = make_task(type=ValidationTask.Type.INDUSTRY_PRACTICES, increment=10, field='status_industry_practices', check=check_gherkin_best_practice) -instance_completion = make_task(type=ValidationTask.Type.INSTANCE_COMPLETION, increment=5, field=None, check=check_instance_completion, stage="final") +header_syntax = make_task(type=ValidationTask.Type.HEADER_SYNTAX, increment=5, field='status_header_syntax', stage="serial") +header = make_task(type=ValidationTask.Type.HEADER, increment=10, field='status_header', stage="serial") +syntax = make_task(type=ValidationTask.Type.SYNTAX, increment=5, field='status_syntax', stage="serial") +prerequisites = make_task(type=ValidationTask.Type.PREREQUISITES, increment=10, field='status_prereq', stage="serial") +schema = make_task(type=ValidationTask.Type.SCHEMA, increment=10, field='status_schema') +digital_signatures = make_task(type=ValidationTask.Type.DIGITAL_SIGNATURES, increment=5, field='status_signatures') +bsdd = make_task(type=ValidationTask.Type.BSDD, increment=0, field='status_bsdd') +normative_ia = make_task(type=ValidationTask.Type.NORMATIVE_IA, increment=20, field='status_ia') +normative_ip = make_task(type=ValidationTask.Type.NORMATIVE_IP, increment=20, field='status_ip') +industry_practices = make_task(type=ValidationTask.Type.INDUSTRY_PRACTICES, increment=10, field='status_industry_practices') +instance_completion = make_task(type=ValidationTask.Type.INSTANCE_COMPLETION, increment=5, field=None, stage="final") # block tasks on error post_tasks = [digital_signatures, schema, normative_ia, normative_ip, industry_practices, instance_completion] -header_syntax.blocks = [header, syntax, prereq] + post_tasks +header_syntax.blocks = [header, syntax, prerequisites] + post_tasks syntax.blocks = post_tasks.copy() -prereq.blocks = post_tasks.copy() +prerequisites.blocks = post_tasks.copy() # register ALL_TASKS = [ - header_syntax, header, syntax, prereq, + header_syntax, header, syntax, prerequisites, schema, digital_signatures, bsdd, normative_ia, normative_ip, industry_practices, instance_completion, ] @@ -133,4 +93,4 @@ def all(self) -> dict[str, TaskConfig]: def total_increment(self) -> int: return sum(cfg.increment for cfg in self._configs.values()) -task_registry = TaskRegistry({task.type: task for task in ALL_TASKS}) +task_registry = TaskRegistry({task.type: task for task in ALL_TASKS}) \ No newline at end of file From 25489bb1f56e77bc106fea695ca42e4d00f430ab Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Sun, 3 Aug 2025 20:43:53 +0100 Subject: [PATCH 18/21] check_program bugfix --- backend/apps/ifc_validation/tasks/configs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/apps/ifc_validation/tasks/configs.py b/backend/apps/ifc_validation/tasks/configs.py index a89a5ca1..d72edfba 100644 --- a/backend/apps/ifc_validation/tasks/configs.py +++ b/backend/apps/ifc_validation/tasks/configs.py @@ -1,13 +1,13 @@ from dataclasses import dataclass from typing import List, Optional, Callable from apps.ifc_validation_models.models import ValidationTask, Model -from . import check_programs +from . import check_programs # execution layer @dataclass class TaskConfig: type: str increment: int status_field: Optional[str] - check_program: Callable[[str], list] + check_program: Callable[[str, int], list] blocks: Optional[List[str]] execution_stage: str = "parallel" run: Callable | None = None @@ -20,7 +20,7 @@ def make_task(*, type, increment, field=None, stage="parallel"): except AttributeError as err: raise ImportError( f"Missing executor function for task type '{type.name}'. " - f"Expected a function named 'check_{type.name.lower()}' in 'executors.py'." + f"Expected a function named 'check_{type.name.lower()}' in 'check_programs.py'." ) return TaskConfig( type=type, From 1fa7c38501f92f4c3c1b81e43c831e66a4a79e52 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Mon, 4 Aug 2025 21:43:26 +0100 Subject: [PATCH 19/21] refactor processing layer, create task context --- backend/Makefile | 2 +- backend/apps/ifc_validation/tasks/__init__.py | 6 +- .../ifc_validation/tasks/check_programs.py | 210 +++++- backend/apps/ifc_validation/tasks/configs.py | 28 +- backend/apps/ifc_validation/tasks/context.py | 11 + .../apps/ifc_validation/tasks/email_tasks.py | 4 - backend/apps/ifc_validation/tasks/logger.py | 2 + .../tasks/processing/__init__.py | 13 + .../ifc_validation/tasks/processing/bsdd.py | 42 ++ .../tasks/processing/digital_signatures.py | 28 + .../tasks/processing/gherkin.py | 25 + .../ifc_validation/tasks/processing/header.py | 86 +++ .../tasks/processing/instance_completion.py | 21 + .../ifc_validation/tasks/processing/schema.py | 62 ++ .../ifc_validation/tasks/processing/syntax.py | 49 ++ .../apps/ifc_validation/tasks/task_runner.py | 236 +++++++ backend/apps/ifc_validation/tasks/tasks.py | 651 ------------------ backend/apps/ifc_validation/tasks/utils.py | 75 ++ .../apps/ifc_validation_bff/views_legacy.py | 2 +- 19 files changed, 855 insertions(+), 698 deletions(-) create mode 100644 backend/apps/ifc_validation/tasks/context.py create mode 100644 backend/apps/ifc_validation/tasks/logger.py create mode 100644 backend/apps/ifc_validation/tasks/processing/__init__.py create mode 100644 backend/apps/ifc_validation/tasks/processing/bsdd.py create mode 100644 backend/apps/ifc_validation/tasks/processing/digital_signatures.py create mode 100644 backend/apps/ifc_validation/tasks/processing/gherkin.py create mode 100644 backend/apps/ifc_validation/tasks/processing/header.py create mode 100644 backend/apps/ifc_validation/tasks/processing/instance_completion.py create mode 100644 backend/apps/ifc_validation/tasks/processing/schema.py create mode 100644 backend/apps/ifc_validation/tasks/processing/syntax.py create mode 100644 backend/apps/ifc_validation/tasks/task_runner.py delete mode 100644 backend/apps/ifc_validation/tasks/tasks.py create mode 100644 backend/apps/ifc_validation/tasks/utils.py diff --git a/backend/Makefile b/backend/Makefile index bcadab01..abedef64 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -66,7 +66,7 @@ stop-worker: -$(PYTHON) -m celery -A core control shutdown \ --destination=worker@$(shell hostname) || true -test: test-models test-bsdd-task test-header-validation-task test-syntax-task test-syntax-header-validation-task test-schema-task +test: test-models test-header-validation-task test-syntax-task test-syntax-header-validation-task test-schema-task test-models: MEDIA_ROOT=./apps/ifc_validation/fixtures $(PYTHON) manage.py test apps/ifc_validation_models --settings apps.ifc_validation_models.test_settings --debug-mode --verbosity 3 diff --git a/backend/apps/ifc_validation/tasks/__init__.py b/backend/apps/ifc_validation/tasks/__init__.py index 98e92329..603604bb 100644 --- a/backend/apps/ifc_validation/tasks/__init__.py +++ b/backend/apps/ifc_validation/tasks/__init__.py @@ -1,4 +1,8 @@ -from .tasks import ( +from .context import TaskContext +from .utils import with_model, get_absolute_file_path, get_or_create_ifc_model +from .logger import logger + +from .task_runner import ( ifc_file_validation_task, header_syntax_validation_subtask, header_validation_subtask, diff --git a/backend/apps/ifc_validation/tasks/check_programs.py b/backend/apps/ifc_validation/tasks/check_programs.py index fe6754ae..57cdf154 100644 --- a/backend/apps/ifc_validation/tasks/check_programs.py +++ b/backend/apps/ifc_validation/tasks/check_programs.py @@ -1,47 +1,197 @@ import os import sys +import json +import subprocess +from typing import List + +from apps.ifc_validation_models.settings import TASK_TIMEOUT_LIMIT +from apps.ifc_validation_models.models import ValidationTask + +from .logger import logger +from .context import TaskContext checks_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "checks")) -def execute_check(*args: str) -> list: - return [sys.executable, *args] +def check_syntax(context:TaskContext): + proc = run_subprocess(context.task, [sys.executable, "-m", "ifcopenshell.simple_spf", "--json", context.file_path ]) + output = proc.stdout + error_output = proc.stderr + success = (len(list(filter(None, output.split("\n")))) == 0) and len(error_output) == 0 + context.result = { + 'output': proc.stdout, + 'error_output': proc.stderr, + 'success': success + } + return context + +def check_header_syntax(context:TaskContext): + proc = run_subprocess(context.task, [sys.executable, "-m", "ifcopenshell.simple_spf", "--json", "--only-header", context.file_path]) + output = proc.stdout + error_output = proc.stderr + success = (len(list(filter(None, output.split("\n")))) == 0) and len(error_output) == 0 + context.result = { + 'output': proc.stdout, + 'error_output': proc.stderr, + 'success': success + } + return context -def check_syntax(file_path: str, task_id: int) -> list: - return execute_check("-m", "ifcopenshell.simple_spf", "--json", file_path) +def is_schema_error(line): + try: + json.loads(line) + except ValueError: + return False + return True -def check_header_syntax(file_path: str, task_id: int) -> list: - return execute_check("-m", "ifcopenshell.simple_spf", "--json", "--only-header", file_path) +def check_schema(context:TaskContext): + proc = run_subprocess( + task = context.task, + command = [sys.executable, "-m", "ifcopenshell.validate", "--json", "--rules", "--fields", context.file_path ] + ) + output = list(filter(is_schema_error, proc.stdout.split("\n"))) + success = proc.returncode >= 0 + valid = len(output) == 0 + + context.result = { + 'output': output, + 'success': success, + 'valid': valid + } + return context + + +def check_header(context:TaskContext): + proc = run_subprocess( + task=context.task, + command=[sys.executable, os.path.join(checks_dir, "header_policy", "validate_header.py"), context.file_path] + ) + header_validation = {} + for line in proc.stdout.splitlines(): + try: + header_validation = json.loads(line) + except json.JSONDecodeError: + continue + context.result = header_validation + return context -def check_schema(file_path: str, task_id: int) -> list: - return execute_check("-m", "ifcopenshell.validate", "--json", "--rules", "--fields", file_path) -def check_header(file_path: str, task_id: int) -> list: - return execute_check(os.path.join(checks_dir, "header_policy", "validate_header.py"), file_path) +def check_digital_signatures(context:TaskContext): + proc = run_subprocess( + task=context.task, + command=[sys.executable, os.path.join(checks_dir, "signatures", "check_signatures.py"), context.file_path] + ) + output = list(map(json.loads, filter(None, map(lambda s: s.strip(), proc.stdout.split("\n"))))) + success = proc.returncode >= 0 + valid = all(m['signature'] != "invalid" for m in output) + + context.result = { + 'output': output, + 'success': success, + 'valid': valid + } + return context + -def check_digital_signatures(file_path: str, task_id: int) -> list: - return execute_check(os.path.join(checks_dir, "signatures", "check_signatures.py"), file_path) +def check_bsdd(context:TaskContext): + proc = run_subprocess( + task=context.task, + command=[sys.executable, os.path.join(checks_dir, "check_bsdd.py"), "-file-name", context.file_path, "--task-id", str(context.task.id) ] + ) + raw_output = check_proc_success_or_fail(proc, context.task) + logger.info(f'Output for {context.config.type}: {raw_output}') + context.result = raw_output + return context -def check_bsdd(file_path: str, task_id: int) -> list: - return execute_check(os.path.join(checks_dir, "check_bsdd.py"), - "--file-name", file_path, "--task-id", str(task_id)) +def check_prerequisites(context:TaskContext): + proc = run_subprocess( + task=context.task, + command = [ + sys.executable, + os.path.join(checks_dir, "check_gherkin.py"), + "--file-name", context.file_path, + "--task-id", str(context.task.id), + "--rule-type", "CRITICAL", + "--purepythonparser" + ] + ) + raw_output = check_proc_success_or_fail(proc, context.task) + context.result = raw_output + return context -def check_gherkin(file_path: str, task_id: int, rule_type: str) -> list: - return execute_check(os.path.join(checks_dir, "check_gherkin.py"), - "--file-name", file_path, - "--task-id", str(task_id), - "--rule-type", rule_type) +def check_normative_ia(context:TaskContext): + proc = run_subprocess( + task=context.task, + command = [ + sys.executable, + os.path.join(checks_dir, "check_gherkin.py"), + "--file-name", context.file_path, + "--task-id", str(context.task.id), + "--rule-type", "IMPLEMENTER_AGREEMENT" + ] + ) + raw_output = check_proc_success_or_fail(proc, context.task) + context.result = raw_output + return context -def check_prerequisites(file_path: str, task_id: int) -> list: - return check_gherkin(file_path, task_id, "CRITICAL") + ["--purepythonparser"] +def check_normative_ip(context:TaskContext): + proc = run_subprocess( + task=context.task, + command = [ + sys.executable, + os.path.join(checks_dir, "check_gherkin.py"), + "--file-name", context.file_path, + "--task-id", str(context.task.id), + "--rule-type", "INFORMAL_PROPOSITION" + ] + ) + raw_output = check_proc_success_or_fail(proc, context.task) + context.result = raw_output + return context -def check_normative_ia(file_path: str, task_id: int) -> list: - return check_gherkin(file_path, task_id, "IMPLEMENTER_AGREEMENT") +def check_industry_practices(context:TaskContext): + proc = run_subprocess( + task=context.task, + command = [ + sys.executable, + os.path.join(checks_dir, "check_gherkin.py"), + "--file-name", context.file_path, + "--task-id", str(context.task.id), + "--rule-type", "INDUSTRY_PRACTICE" + ] + ) + raw_output = check_proc_success_or_fail(proc, context.task) + context.result = raw_output + return context -def check_normative_ip(file_path: str, task_id: int) -> list: - return check_gherkin(file_path, task_id, "INFORMAL_PROPOSITION") +def check_instance_completion(context:TaskContext): + return context -def check_industry_practices(file_path: str, task_id: int) -> list: - return check_gherkin(file_path, task_id, "INDUSTRY_PRACTICE") +def check_proc_success_or_fail(proc, task): + if proc.returncode is not None and proc.returncode != 0: + error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" + task.mark_as_failed(error_message) + raise RuntimeError(error_message) + return proc.stdout -def check_instance_completion(file_path, task_id): - return [] \ No newline at end of file +def run_subprocess( + task: ValidationTask, + command: List[str], +) -> subprocess.CompletedProcess[str]: + logger.debug(f'Command for {task.type}: {" ".join(command)}') + task.set_process_details(None, command) + try: + proc = subprocess.run( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + timeout=TASK_TIMEOUT_LIMIT, + env= os.environ.copy() + ) + logger.info(f'test run task task name {task.type}, task value : {task}') + return proc + + except Exception as err: + logger.exception(f"{type(err).__name__} in task {task.id} : {task.type}") + task.mark_as_failed(err) + raise type(err)(f"Unknown error during validation task {task.id}: {task.type}") from err \ No newline at end of file diff --git a/backend/apps/ifc_validation/tasks/configs.py b/backend/apps/ifc_validation/tasks/configs.py index d72edfba..b9e5216a 100644 --- a/backend/apps/ifc_validation/tasks/configs.py +++ b/backend/apps/ifc_validation/tasks/configs.py @@ -2,6 +2,8 @@ from typing import List, Optional, Callable from apps.ifc_validation_models.models import ValidationTask, Model from . import check_programs # execution layer +from . import processing # processing layer + @dataclass class TaskConfig: type: str @@ -10,18 +12,23 @@ class TaskConfig: check_program: Callable[[str, int], list] blocks: Optional[List[str]] execution_stage: str = "parallel" - run: Callable | None = None - + process_results: Callable | None = None # create blueprint def make_task(*, type, increment, field=None, stage="parallel"): - try: - check_program = getattr(check_programs, f'check_{type.name.lower()}') #e.g. check_header_syntax(), must be always the 'check' followed by the ValidationTask.Type - except AttributeError as err: - raise ImportError( - f"Missing executor function for task type '{type.name}'. " - f"Expected a function named 'check_{type.name.lower()}' in 'check_programs.py'." - ) + def _load_function(module, prefix, type): + func_name = f"{prefix}_{type.name.lower()}" + try: + return getattr(module, func_name) + except AttributeError: + raise ImportError( + f"Missing `{prefix}` function for task type '{type.name}'. " + f"Expected `{func_name}()` in `{module.__name__}.py`." + ) from None + + check_program = _load_function(check_programs, "check", type) + process_results = _load_function(processing, "process", type) + return TaskConfig( type=type, increment=increment, @@ -29,9 +36,10 @@ def make_task(*, type, increment, field=None, stage="parallel"): check_program=check_program, blocks=[], execution_stage=stage, + process_results = process_results ) -# define task info for celery +# define task info header_syntax = make_task(type=ValidationTask.Type.HEADER_SYNTAX, increment=5, field='status_header_syntax', stage="serial") header = make_task(type=ValidationTask.Type.HEADER, increment=10, field='status_header', stage="serial") syntax = make_task(type=ValidationTask.Type.SYNTAX, increment=5, field='status_syntax', stage="serial") diff --git a/backend/apps/ifc_validation/tasks/context.py b/backend/apps/ifc_validation/tasks/context.py new file mode 100644 index 00000000..ae918bb5 --- /dev/null +++ b/backend/apps/ifc_validation/tasks/context.py @@ -0,0 +1,11 @@ +from typing import Any, Optional +from dataclasses import dataclass +from apps.ifc_validation_models.models import ValidationRequest, ValidationTask + +@dataclass # moving context from exeuction to processing layer +class TaskContext: + config: Any # Static info -- hould be TaskConfig — delayed import due to modular imports + request: ValidationRequest # the current request + task: ValidationTask #the current task + file_path: str # for IFC files + result: Optional[Any] = None # result from execution layer diff --git a/backend/apps/ifc_validation/tasks/email_tasks.py b/backend/apps/ifc_validation/tasks/email_tasks.py index 713c88d2..4d86d7a7 100644 --- a/backend/apps/ifc_validation/tasks/email_tasks.py +++ b/backend/apps/ifc_validation/tasks/email_tasks.py @@ -1,5 +1,4 @@ from celery import shared_task -from celery.utils.log import get_task_logger from django.template.loader import render_to_string from core.utils import log_execution @@ -9,9 +8,6 @@ from apps.ifc_validation_models.models import ValidationRequest -logger = get_task_logger(__name__) - - def status_combine(*args): statuses = "-pvnwi" return statuses[max(map(statuses.index, args))] diff --git a/backend/apps/ifc_validation/tasks/logger.py b/backend/apps/ifc_validation/tasks/logger.py new file mode 100644 index 00000000..cf11ea61 --- /dev/null +++ b/backend/apps/ifc_validation/tasks/logger.py @@ -0,0 +1,2 @@ +from celery.utils.log import get_task_logger +logger = get_task_logger("ifc_validation") \ No newline at end of file diff --git a/backend/apps/ifc_validation/tasks/processing/__init__.py b/backend/apps/ifc_validation/tasks/processing/__init__.py new file mode 100644 index 00000000..40fea8ea --- /dev/null +++ b/backend/apps/ifc_validation/tasks/processing/__init__.py @@ -0,0 +1,13 @@ +from .instance_completion import process_instance_completion +from .gherkin import ( + process_gherkin_outcomes, + process_normative_ia, + process_normative_ip, + process_prerequisites, + process_industry_practices +) +from .syntax import process_syntax, process_header_syntax +from .schema import process_schema +from .header import process_header +from .digital_signatures import process_digital_signatures +from .bsdd import process_bsdd \ No newline at end of file diff --git a/backend/apps/ifc_validation/tasks/processing/bsdd.py b/backend/apps/ifc_validation/tasks/processing/bsdd.py new file mode 100644 index 00000000..b5990ef3 --- /dev/null +++ b/backend/apps/ifc_validation/tasks/processing/bsdd.py @@ -0,0 +1,42 @@ + +import json + +from apps.ifc_validation_models.models import Model, ValidationOutcome +from .. import TaskContext, logger, with_model + + +def process_bsdd(context:TaskContext): + + with with_model(context.request.id) as model: + + # update Validation Outcomes + json_output = json.loads(context.result) + for message in json_output['messages']: + + outcome = context.task.outcomes.create( + severity=[c[0] for c in ValidationOutcome.OutcomeSeverity.choices if c[1] == (message['severity'])][0], + outcome_code=[c[0] for c in ValidationOutcome.ValidationOutcomeCode.choices if c[1] == (message['outcome'])][0], + observed=message['message'], + feature=json.dumps({ + 'rule': message['rule'] if 'rule' in message else None, + 'category': message['category'] if 'category' in message else None, + 'dictionary': message['dictionary'] if 'dictionary' in message else None, + 'class': message['class'] if 'class' in message else None, + 'instance_id': message['instance_id'] if 'instance_id' in message else None + }) + ) + + if 'instance_id' in message and message['instance_id'] is not None: + instance, _ = model.instances.get_or_create( + stepfile_id = message['instance_id'], + model=model + ) + outcome.instance = instance + outcome.save() + + # update Model info + agg_status = context.task.determine_aggregate_status() + model.status_bsdd = agg_status + model.save(update_fields=['status_bsdd']) + + return f"agg_status = {Model.Status(agg_status).label}\nmessages = {json_output['messages']}" \ No newline at end of file diff --git a/backend/apps/ifc_validation/tasks/processing/digital_signatures.py b/backend/apps/ifc_validation/tasks/processing/digital_signatures.py new file mode 100644 index 00000000..231ee109 --- /dev/null +++ b/backend/apps/ifc_validation/tasks/processing/digital_signatures.py @@ -0,0 +1,28 @@ + +import json + +from core.settings import DJANGO_DB_BULK_CREATE_BATCH_SIZE + +from apps.ifc_validation_models.models import Model, ValidationOutcome +from .. import TaskContext, logger, with_model + + +def process_digital_signatures(context:TaskContext): + output, success, valid = (context.result.get(k) for k in ("output", "success", "valid")) + + with with_model(context.request.id) as model: + model.status_signatures = Model.Status.NOT_APPLICABLE if not output else Model.Status.VALID if valid else Model.Status.INVALID + + def create_outcome(di): + return ValidationOutcome( + severity=ValidationOutcome.OutcomeSeverity.ERROR if di.get("signature") == "invalid" else ValidationOutcome.OutcomeSeverity.PASSED, + outcome_code=ValidationOutcome.ValidationOutcomeCode.VALUE_ERROR if di.get("signature") == "invalid" else ValidationOutcome.ValidationOutcomeCode.PASSED, + observed=di, + feature=json.dumps({'digital_signature': 1}), + validation_task = context.task + ) + + ValidationOutcome.objects.bulk_create(list(map(create_outcome, output)), batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE) + + model.save(update_fields=['status_signatures']) + return 'Digital signature check completed' if success else f"Script returned exit code {context.result.returncode} and {context.result.stderr}" diff --git a/backend/apps/ifc_validation/tasks/processing/gherkin.py b/backend/apps/ifc_validation/tasks/processing/gherkin.py new file mode 100644 index 00000000..a38d5a93 --- /dev/null +++ b/backend/apps/ifc_validation/tasks/processing/gherkin.py @@ -0,0 +1,25 @@ +from apps.ifc_validation_models.models import Model + +from .. import TaskContext, logger, with_model + +def process_gherkin_outcomes(context:TaskContext): + with with_model(context.request.id) as model: + # @gh todo, actually write gherkin results to DB here, currently in gherkin environment.py + status_field = context.config.status_field.name + agg_status = context.task.determine_aggregate_status() + setattr(model, status_field, agg_status) + model.save(update_fields=[status_field]) + + return f'agg_status = {Model.Status(agg_status).label}\nraw_output = {context.result}' + +def process_normative_ia(context:TaskContext): + return process_gherkin_outcomes(context) + +def process_normative_ip(context:TaskContext): + return process_gherkin_outcomes(context) + +def process_prerequisites(context:TaskContext): + return process_gherkin_outcomes(context) + +def process_industry_practices(context:TaskContext): + return process_gherkin_outcomes(context) \ No newline at end of file diff --git a/backend/apps/ifc_validation/tasks/processing/header.py b/backend/apps/ifc_validation/tasks/processing/header.py new file mode 100644 index 00000000..a1d47a0a --- /dev/null +++ b/backend/apps/ifc_validation/tasks/processing/header.py @@ -0,0 +1,86 @@ +import json +import datetime +import os + +from apps.ifc_validation_models.models import Model, AuthoringTool, Company +from .. import TaskContext, logger, with_model + +def process_header(context:TaskContext): + header_validation = context.result + + with with_model(context.request.id) as model: + agg_status = context.task.determine_aggregate_status() + model.status_prereq = agg_status + model.size = os.path.getsize(context.file_path) + logger.debug(f'Detected size = {model.size} bytes') + + model.schema = header_validation.get('schema_identifier') + logger.debug(f'The schema identifier = {header_validation.get("schema")}') + # time_stamp + if ifc_file_time_stamp := header_validation.get('time_stamp', False): + try: + logger.debug(f'Timestamp within file = {ifc_file_time_stamp}') + date = datetime.datetime.strptime(ifc_file_time_stamp, "%Y-%m-%dT%H:%M:%S") + date_with_tz = datetime.datetime( + date.year, + date.month, + date.day, + date.hour, + date.minute, + date.second, + tzinfo=datetime.timezone.utc) + model.date = date_with_tz + except ValueError: + try: + model.date = datetime.datetime.fromisoformat(ifc_file_time_stamp) + except ValueError: + pass + + # mvd + model.mvd = header_validation.get('mvd') + + app = header_validation.get('application_name') + + version = header_validation.get('version') + name = None if any(value in (None, "Not defined") for value in (app, version)) else app + ' ' + version + company_name = header_validation.get('company_name') + logger.debug(f'Detected Authoring Tool in file = {name}') + + validation_errors = header_validation.get('validation_errors', []) + invalid_marker_fields = ['originating_system', 'version', 'company_name', 'application_name'] + if any(field in validation_errors for field in invalid_marker_fields): + model.status_header = Model.Status.INVALID + else: + # parsing was successful and model can be considered for scorecards + model.status_header = Model.Status.VALID + authoring_tool = AuthoringTool.find_by_full_name(full_name=name) + if (isinstance(authoring_tool, AuthoringTool)): + + if authoring_tool.company is None: + company, _ = Company.objects.get_or_create(name=company_name) + authoring_tool.company = company + authoring_tool.save() + logger.debug(f'Updated existing Authoring Tool with company: {company.name}') + + model.produced_by = authoring_tool + logger.debug(f'Retrieved existing Authoring Tool from DB = {model.produced_by.full_name}') + + elif authoring_tool is None: + company, _ = Company.objects.get_or_create(name=company_name) + authoring_tool, _ = AuthoringTool.objects.get_or_create( + company=company, + name=app, + version=version + ) + model.produced_by = authoring_tool + logger.debug(f'Authoring app not found, ApplicationFullName = {app}, Version = {version} - created new instance') + else: + model.produced_by = None + logger.warning(f'Retrieved multiple Authoring Tool from DB: {authoring_tool} - could not assign any') + + # update header validation + model.header_validation = header_validation + model.save(update_fields=['status_header', 'header_validation']) + model.save() + + return f'agg_status = {Model.Status(agg_status).label}\nraw_output = {header_validation}' \ No newline at end of file diff --git a/backend/apps/ifc_validation/tasks/processing/instance_completion.py b/backend/apps/ifc_validation/tasks/processing/instance_completion.py new file mode 100644 index 00000000..1e26e6c5 --- /dev/null +++ b/backend/apps/ifc_validation/tasks/processing/instance_completion.py @@ -0,0 +1,21 @@ +import ifcopenshell +from .. import TaskContext, logger + +from apps.ifc_validation_models.models import ModelInstance + +from django.db import transaction + +def process_instance_completion(context:TaskContext): + # the current task doesn't have any execution layer and links instance ids to outcomes + ifc_file = ifcopenshell.open(context.file_path) + with transaction.atomic(): + model_id = context.request.model.id + model_instances = ModelInstance.objects.filter(model_id=model_id, ifc_type__in=[None, '']) + instance_count = model_instances.count() + logger.info(f'Retrieved {instance_count:,} ModelInstance record(s)') + + for inst in model_instances.iterator(): + inst.ifc_type = ifc_file[inst.stepfile_id].is_a() + inst.save() + + return f'Updated {instance_count:,} ModelInstance record(s)' diff --git a/backend/apps/ifc_validation/tasks/processing/schema.py b/backend/apps/ifc_validation/tasks/processing/schema.py new file mode 100644 index 00000000..2279461e --- /dev/null +++ b/backend/apps/ifc_validation/tasks/processing/schema.py @@ -0,0 +1,62 @@ +import json +from django.db import transaction + +from core.settings import DJANGO_DB_BULK_CREATE_BATCH_SIZE + +from apps.ifc_validation_models.models import ModelInstance, Model, ValidationOutcome +from .. import TaskContext, logger, with_model + + +def process_schema(context:TaskContext): + output, success, valid = (context.result.get(k) for k in ("output", "success", "valid")) + + with with_model(context.request.id) as model: + + if valid: + model.status_schema = Model.Status.VALID + context.task.outcomes.create( + severity=ValidationOutcome.OutcomeSeverity.PASSED, + outcome_code=ValidationOutcome.ValidationOutcomeCode.PASSED, + observed=None + ) + else: + model.status_schema = Model.Status.INVALID + outcomes_to_save = list() + outcomes_instances_to_save = list() + + for line in output: + message = json.loads(line) + outcome = ValidationOutcome( + severity=ValidationOutcome.OutcomeSeverity.ERROR, + outcome_code=ValidationOutcome.ValidationOutcomeCode.SCHEMA_ERROR, + observed=message['message'], + feature=json.dumps({ + 'type': message['type'] if 'type' in message else None, + 'attribute': message['attribute'] if 'attribute' in message else None + }), + validation_task=context.task + ) + outcomes_to_save.append(outcome) + if 'instance' in message and message['instance'] is not None and 'id' in message['instance'] and 'type' in message['instance']: + instance = ModelInstance( + stepfile_id=message['instance']['id'], + ifc_type=message['instance']['type'], + model=model + ) + outcome.instance_id = instance.stepfile_id # store for later reference (not persisted) + outcomes_instances_to_save.append(instance) + + ModelInstance.objects.bulk_create(outcomes_instances_to_save, batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE, ignore_conflicts=True) #ignore existing + model_instances = dict(ModelInstance.objects.filter(model_id=model.id).values_list('stepfile_id', 'id')) # retrieve all + + for outcome in outcomes_to_save: + if outcome.instance_id: + instance_id = model_instances[outcome.instance_id] + if instance_id: + outcome.instance_id = instance_id + + ValidationOutcome.objects.bulk_create(outcomes_to_save, batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE) + + model.save(update_fields=['status_schema']) + + return "No IFC schema errors." if success else f"'ifcopenshell.validate' returned exit code {context.proc.returncode} and {len(output):,} errors." diff --git a/backend/apps/ifc_validation/tasks/processing/syntax.py b/backend/apps/ifc_validation/tasks/processing/syntax.py new file mode 100644 index 00000000..112cbab9 --- /dev/null +++ b/backend/apps/ifc_validation/tasks/processing/syntax.py @@ -0,0 +1,49 @@ + +import json + +from apps.ifc_validation_models.models import Model, ValidationOutcome +from .. import TaskContext, logger, with_model + + +def process_syntax_outcomes(context:TaskContext): + #todo - unify output for all task executions + output, error_output, success = (context.result.get(k) for k in ("output", "error_output", "success")) + + # process + with with_model(context.request.id) as model: + status_field = context.config.status_field.name + task = context.task + if success: + setattr(model, status_field, Model.Status.VALID) + task.outcomes.create( + severity=ValidationOutcome.OutcomeSeverity.PASSED, + outcome_code=ValidationOutcome.ValidationOutcomeCode.PASSED, + observed=output if output else None + ) + elif error_output: + setattr(model, status_field, Model.Status.INVALID) + task.outcomes.create( + severity=ValidationOutcome.OutcomeSeverity.ERROR, + outcome_code=ValidationOutcome.ValidationOutcomeCode.SYNTAX_ERROR, + observed=list(filter(None, error_output.split("\n")))[-1] + ) + else: + for msg in json.loads(output): + setattr(model, status_field, Model.Status.INVALID) + task.outcomes.create( + severity=ValidationOutcome.OutcomeSeverity.ERROR, + outcome_code=ValidationOutcome.ValidationOutcomeCode.SYNTAX_ERROR, + observed=msg.get("message") + ) + + model.save(update_fields=[status_field]) + + # return reason for logging + return "No IFC syntax error(s)." if success else f"Found IFC syntax errors:\n\nConsole: \n{output}\n\nError: {error_output}" + + +def process_syntax(context:TaskContext): + return process_syntax_outcomes(context) + +def process_header_syntax(context:TaskContext): + return process_syntax_outcomes(context) \ No newline at end of file diff --git a/backend/apps/ifc_validation/tasks/task_runner.py b/backend/apps/ifc_validation/tasks/task_runner.py new file mode 100644 index 00000000..8273be2d --- /dev/null +++ b/backend/apps/ifc_validation/tasks/task_runner.py @@ -0,0 +1,236 @@ +import functools + +from celery import shared_task, chain, chord, group + +from core.utils import log_execution + +from apps.ifc_validation_models.decorators import requires_django_user_context +from apps.ifc_validation_models.models import * +from .configs import task_registry +from .context import TaskContext +from .utils import get_absolute_file_path +from .logger import logger +from .email_tasks import * + + +assert task_registry.total_increment() == 100 + +def check_proc_success_or_fail(proc, task): + if proc.returncode is not None and proc.returncode != 0: + error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" + task.mark_as_failed(error_message) + raise RuntimeError(error_message) + return proc.stdout + +@shared_task(bind=True) +@log_execution +def error_handler(self, *args, **kwargs): + + on_workflow_failed.delay(*args, **kwargs) + + +@shared_task(bind=True) +@log_execution +def chord_error_handler(self, request, exc, traceback, *args, **kwargs): + + on_workflow_failed.apply_async([request, exc, traceback]) + +@shared_task(bind=True) +@log_execution +@requires_django_user_context +def on_workflow_started(self, *args, **kwargs): + + # update status + id = kwargs.get('id') + reason = f"args={args} kwargs={kwargs}" + request = ValidationRequest.objects.get(pk=id) + request.mark_as_initiated(reason) + + # queue sending emails + nbr_of_tasks = request.tasks.count() + if nbr_of_tasks == 0: + # send_acknowledgement_user_email_task.delay(id=id, file_name=request.file_name) # disabled + send_acknowledgement_admin_email_task.delay(id=id, file_name=request.file_name) + else: + # send_revalidating_user_email_task.delay(id=id, file_name=request.file_name) # disabled + send_revalidating_admin_email_task.delay(id=id, file_name=request.file_name) + + +@shared_task(bind=True) +@log_execution +@requires_django_user_context +def on_workflow_completed(self, result, **kwargs): + + # update status + id = kwargs.get('id') + if not isinstance(id, int): + raise ValueError(f"Invalid id: {id!r}") + reason = "Processing completed" + request = ValidationRequest.objects.get(pk=id) + request.mark_as_completed(reason) + + # queue sending email + send_completion_email_task.delay(id=id, file_name=request.file_name) + + +@shared_task(bind=True) +@log_execution +@requires_django_user_context +def on_workflow_failed(self, *args, **kwargs): + + logger.debug(f'Function {self.__name__} called with args={args} kwargs={kwargs}') + + # update status + id = args[1] + reason = f"Processing failed: args={args} kwargs={kwargs}" + request = ValidationRequest.objects.get(pk=id) + request.mark_as_failed(reason) + + # queue sending email + send_failure_email_task.delay(id=id, file_name=request.file_name) + send_failure_admin_email_task.delay(id=id, file_name=request.file_name) + + +def validation_task_runner(task_type): + def decorator(func): + @shared_task(bind=True) + @log_execution + @requires_django_user_context + @functools.wraps(func) + def wrapper(self, *args, **kwargs): + id = kwargs.get('id') + + request = ValidationRequest.objects.get(pk=id) + file_path = get_absolute_file_path(request.file.name) + + # Always create the task record, even if it will be skipped due to blocking conditions, + # so it is logged and its status can be marked as 'skipped' + task = ValidationTask.objects.create(request=request, type=task_type) + + if model := request.model: + invalid_blockers = list(filter( + lambda b: getattr(model, task_registry[b].status_field.name) == Model.Status.INVALID, + task_registry.get_blockers_of(task_type) + )) + else: # for testing, we're not instantiating a model + invalid_blockers = [] + + # get task configuration + config = task_registry[task_type] + + # update progress + increment = config.increment + request.progress = min(request.progress + increment, 100) + request.save() + + if not invalid_blockers: + task.mark_as_initiated() + + # Execution Layer + try: + context = config.check_program(TaskContext( + config=config, + task=task, + request=request, + file_path=file_path, + )) + except Exception as err: + task.mark_as_failed(str(err)) + logger.exception(f"Execution failed in task {task_type}: {task}") + return + + # Processing Layer / write to DB + try: + reason = config.process_results(context) + task.mark_as_completed(reason) + logger.debug(f"Task {task_type} completed, reason: {reason}") + except Exception as err: + task.mark_as_failed(str(err)) + logger.exception(f"Processing failed in task {task_type}: {err}") + return + + # Handle skipped tasks + else: + reason = f"Skipped due to fail in blocking tasks: {', '.join(invalid_blockers)}" + logger.debug(reason) + task.mark_as_skipped(reason) + return wrapper + return decorator + + +@shared_task(bind=True) +@log_execution +def ifc_file_validation_task(self, id, file_name, *args, **kwargs): + + if id is None or file_name is None: + raise ValueError("Arguments 'id' and/or 'file_name' are required.") + + error_task = error_handler.s(id, file_name) + chord_error_task = chord_error_handler.s(id, file_name) + + workflow_started = on_workflow_started.s(id=id, file_name=file_name) + workflow_completed = on_workflow_completed.s(id=id, file_name=file_name) + + serial_tasks = chain( + header_syntax_validation_subtask.s(id=id, file_name=file_name), + header_validation_subtask.s(id=id, file_name=file_name), + syntax_validation_subtask.s(id=id, file_name=file_name), + prerequisites_subtask.s(id=id, file_name=file_name), + ) + + parallel_tasks = group([ + digital_signatures_subtask.s(id=id, file_name=file_name), + schema_validation_subtask.s(id=id, file_name=file_name), + #bsdd_validation_subtask.s(id=id, file_name=file_name), # disabled + normative_rules_ia_validation_subtask.s(id=id, file_name=file_name), + normative_rules_ip_validation_subtask.s(id=id, file_name=file_name), + industry_practices_subtask.s(id=id, file_name=file_name) + ]) + + final_tasks = chain( + instance_completion_subtask.s(id=id, file_name=file_name) + ) + + workflow = ( + workflow_started | + serial_tasks | + chord( + chord(parallel_tasks, final_tasks).on_error(chord_error_task), + workflow_completed + ).on_error(chord_error_task)) + workflow.set(link_error=[error_task]) + workflow.apply_async() + + +@validation_task_runner(ValidationTask.Type.INSTANCE_COMPLETION) +def instance_completion_subtask(): pass + +@validation_task_runner(ValidationTask.Type.NORMATIVE_IA) +def normative_rules_ia_validation_subtask(): pass + +@validation_task_runner(ValidationTask.Type.NORMATIVE_IP) +def normative_rules_ip_validation_subtask(): pass + +@validation_task_runner(ValidationTask.Type.PREREQUISITES) +def prerequisites_subtask(): pass + +@validation_task_runner(ValidationTask.Type.SYNTAX) +def syntax_validation_subtask(): pass + +@validation_task_runner(ValidationTask.Type.HEADER_SYNTAX) +def header_syntax_validation_subtask(): pass + +@validation_task_runner(ValidationTask.Type.SCHEMA) +def schema_validation_subtask(): pass + +@validation_task_runner(ValidationTask.Type.HEADER) +def header_validation_subtask(): pass + +@validation_task_runner(ValidationTask.Type.DIGITAL_SIGNATURES) +def digital_signatures_subtask(): pass + +@validation_task_runner(ValidationTask.Type.BSDD) +def bsdd_validation_subtask(): pass + +@validation_task_runner(ValidationTask.Type.INDUSTRY_PRACTICES) +def industry_practices_subtask(): pass \ No newline at end of file diff --git a/backend/apps/ifc_validation/tasks/tasks.py b/backend/apps/ifc_validation/tasks/tasks.py deleted file mode 100644 index ef3b7982..00000000 --- a/backend/apps/ifc_validation/tasks/tasks.py +++ /dev/null @@ -1,651 +0,0 @@ -import os -import sys -import datetime -import subprocess -import functools -import json -import ifcopenshell -import typing -import contextlib -import operator - -from celery import shared_task, chain, chord, group -from celery.utils.log import get_task_logger -from django.db import transaction -from django.db.utils import IntegrityError - - -from core.utils import log_execution -from core.settings import DJANGO_DB_BULK_CREATE_BATCH_SIZE - -from apps.ifc_validation_models.settings import TASK_TIMEOUT_LIMIT, MEDIA_ROOT -from apps.ifc_validation_models.decorators import requires_django_user_context -from apps.ifc_validation_models.models import * -from .configs import task_registry - -from .email_tasks import * - -logger = get_task_logger(__name__) - - -assert task_registry.total_increment() == 100 - -def run_task( - task: ValidationTask, - check_program: typing.List[str], -) -> subprocess.CompletedProcess[str]: - logger.debug(f'Command for {task.type}: {" ".join(check_program)}') - task.set_process_details(None, check_program) - try: - proc = subprocess.run( - check_program, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - timeout=TASK_TIMEOUT_LIMIT, - env= os.environ.copy() - ) - logger.info(f'test run task task name {task.type}, task value : {task}') - return proc - - except Exception as err: - logger.exception(f"{type(err).__name__} in task {task.id} : {task.type}") - task.mark_as_failed(err) - raise type(err)(f"Unknown error during validation task {task.id}: {task.type}") from err - - -def check_proc_success_or_fail(proc, task): - if proc.returncode is not None and proc.returncode != 0: - error_message = f"Running {' '.join(proc.args)} failed with exit code {proc.returncode}\n{proc.stdout}\n{proc.stderr}" - task.mark_as_failed(error_message) - raise RuntimeError(error_message) - return proc.stdout - -@contextlib.contextmanager -def with_model(request_id): - with transaction.atomic(): - yield get_or_create_ifc_model(request_id) - - -@functools.lru_cache(maxsize=1024) -def get_absolute_file_path(file_name): - - """ - Resolves the absolute file path of an uploaded file and checks if it exists. - It tries resolving Django MEDIA_ROOT and current working directory, and caches the result. - - Mandatory Args: - file_name: relative file name of the uploaded file. - - Returns: - Absolute file path of the uploaded file. - """ - - ifc_fn = os.path.join(MEDIA_ROOT, file_name) - - if not os.path.exists(ifc_fn): - ifc_fn2 = os.path.join(os.getcwd(), ifc_fn) - if not os.path.exists(ifc_fn2): - raise FileNotFoundError(f"File path for file_name={file_name} was not found (tried loading '{ifc_fn}' and '{ifc_fn2}').") - - ifc_fn = os.path.abspath(ifc_fn) - - logger.debug(f"get_absolute_file_path(): file_name={file_name} returned '{ifc_fn}'") - return ifc_fn - - -@shared_task(bind=True) -@log_execution -def error_handler(self, *args, **kwargs): - - on_workflow_failed.delay(*args, **kwargs) - - -@shared_task(bind=True) -@log_execution -def chord_error_handler(self, request, exc, traceback, *args, **kwargs): - - on_workflow_failed.apply_async([request, exc, traceback]) - - -@shared_task(bind=True) -@log_execution -@requires_django_user_context -def on_workflow_started(self, *args, **kwargs): - - # update status - id = kwargs.get('id') - reason = f"args={args} kwargs={kwargs}" - request = ValidationRequest.objects.get(pk=id) - request.mark_as_initiated(reason) - - # queue sending emails - nbr_of_tasks = request.tasks.count() - if nbr_of_tasks == 0: - # send_acknowledgement_user_email_task.delay(id=id, file_name=request.file_name) # disabled - send_acknowledgement_admin_email_task.delay(id=id, file_name=request.file_name) - else: - # send_revalidating_user_email_task.delay(id=id, file_name=request.file_name) # disabled - send_revalidating_admin_email_task.delay(id=id, file_name=request.file_name) - - -@shared_task(bind=True) -@log_execution -@requires_django_user_context -def on_workflow_completed(self, result, **kwargs): - - # update status - id = kwargs.get('id') - if not isinstance(id, int): - raise ValueError(f"Invalid id: {id!r}") - reason = "Processing completed" - request = ValidationRequest.objects.get(pk=id) - request.mark_as_completed(reason) - - # queue sending email - send_completion_email_task.delay(id=id, file_name=request.file_name) - - -@shared_task(bind=True) -@log_execution -@requires_django_user_context -def on_workflow_failed(self, *args, **kwargs): - - logger.debug(f'Function {self.__name__} called with args={args} kwargs={kwargs}') - - # update status - id = args[1] - reason = f"Processing failed: args={args} kwargs={kwargs}" - request = ValidationRequest.objects.get(pk=id) - request.mark_as_failed(reason) - - # queue sending email - send_failure_email_task.delay(id=id, file_name=request.file_name) - send_failure_admin_email_task.delay(id=id, file_name=request.file_name) - - -@log_execution -@requires_django_user_context -@transaction.atomic -# @requires_django_exclusive_table_lock(Model, 'EXCLUSIVE') -# --> table lock, slower - DO NOT USE -def get_or_create_ifc_model(request_id): - - id = request_id - request = ValidationRequest.objects.get(pk=id) - if request.model is None: - - # acquire row lock (... uses "FOR UPDATE" hint) - request = ValidationRequest.objects.select_for_update().get(pk=id) - - model, _ = Model.objects.get_or_create( - file_name=request.file_name, - file=request.file, - size=request.file.size, - uploaded_by=request.created_by - ) - request.model = model - request.save() - - return model - - else: - return request.model - -def validation_task_runner(task_type): - def decorator(func): - @shared_task(bind=True) - @log_execution - @requires_django_user_context - @functools.wraps(func) - def wrapper(self, *args, **kwargs): - id = kwargs.get('id') - - request = ValidationRequest.objects.get(pk=id) - file_path = get_absolute_file_path(request.file.name) - - # Always create the task record, even if it will be skipped due to blocking conditions, - # so it is logged and its status can be marked as 'skipped' - task = ValidationTask.objects.create(request=request, type=task_type) - - if model := request.model: - invalid_blockers = list(filter( - lambda b: getattr(model, task_registry[b].status_field.name) == Model.Status.INVALID, - task_registry.get_blockers_of(task_type) - )) - else: # for testing, we're not instantiating a model - invalid_blockers = [] - - # update progress - increment = task_registry[task_type].increment - request.progress = min(request.progress + increment, 100) - request.save() - - if not invalid_blockers: - task.mark_as_initiated() - try: - reason = func(self, task, request, file_path) - task.mark_as_completed(reason) - logger.debug(f'task completed {task_type}, registered reason is {reason}') - except Exception as err: - task.mark_as_failed(str(err)) - logger.exception(f"Task {task_type} failed: {err}") - else: - reason = f"Skipped due to fail in blocking tasks: {', '.join(invalid_blockers)}" - logger.debug(reason) - task.mark_as_skipped(reason) - return wrapper - return decorator - - -@shared_task(bind=True) -@log_execution -def ifc_file_validation_task(self, id, file_name, *args, **kwargs): - - if id is None or file_name is None: - raise ValueError("Arguments 'id' and/or 'file_name' are required.") - - error_task = error_handler.s(id, file_name) - chord_error_task = chord_error_handler.s(id, file_name) - - workflow_started = on_workflow_started.s(id=id, file_name=file_name) - workflow_completed = on_workflow_completed.s(id=id, file_name=file_name) - - serial_tasks = chain( - header_syntax_validation_subtask.s(id=id, file_name=file_name), - header_validation_subtask.s(id=id, file_name=file_name), - syntax_validation_subtask.s(id=id, file_name=file_name), - prerequisites_subtask.s(id=id, file_name=file_name), - ) - - parallel_tasks = group([ - digital_signatures_subtask.s(id=id, file_name=file_name), - schema_validation_subtask.s(id=id, file_name=file_name), - #bsdd_validation_subtask.s(id=id, file_name=file_name), # disabled - normative_rules_ia_validation_subtask.s(id=id, file_name=file_name), - normative_rules_ip_validation_subtask.s(id=id, file_name=file_name), - industry_practices_subtask.s(id=id, file_name=file_name) - ]) - - final_tasks = chain( - instance_completion_subtask.s(id=id, file_name=file_name) - ) - - workflow = ( - workflow_started | - serial_tasks | - chord( - chord(parallel_tasks, final_tasks).on_error(chord_error_task), - workflow_completed - ).on_error(chord_error_task)) - workflow.set(link_error=[error_task]) - workflow.apply_async() - - -@validation_task_runner(ValidationTask.Type.INSTANCE_COMPLETION) -def instance_completion_subtask(self, task, request, file_path, *args, **kwargs): - ifc_file = ifcopenshell.open(file_path) - - with transaction.atomic(): - model_id = request.model.id - model_instances = ModelInstance.objects.filter(model_id=model_id, ifc_type__in=[None, '']) - instance_count = model_instances.count() - logger.info(f'Retrieved {instance_count:,} ModelInstance record(s)') - - for inst in model_instances.iterator(): - inst.ifc_type = ifc_file[inst.stepfile_id].is_a() - inst.save() - - return f'Updated {instance_count:,} ModelInstance record(s)' - - -@validation_task_runner(ValidationTask.Type.NORMATIVE_IA) -def normative_rules_ia_validation_subtask(self, task, request, file_path): - check_program = task_registry[task.type].check_program(file_path, task.id) - logger.info(f'qualname : {self.__qualname__}') - return run_gherkin_subtask(self, task, request, file_path, check_program, 'status_ia') - - -@validation_task_runner(ValidationTask.Type.NORMATIVE_IP) -def normative_rules_ip_validation_subtask(self, task, request, file_path): - check_program = task_registry[task.type].check_program(file_path, task.id) - return run_gherkin_subtask(self, task, request, file_path, check_program, 'status_ip') - - -@validation_task_runner(ValidationTask.Type.PREREQUISITES) -def prerequisites_subtask(self, task, request, file_path): - check_program = task_registry[task.type].check_program(file_path, task.id) - return run_gherkin_subtask(self, task, request, file_path, check_program, 'status_prereq') - - - -@validation_task_runner(ValidationTask.Type.SYNTAX) -def syntax_validation_subtask(self, task, request, file_path): - check_program = task_registry[task.type].check_program(file_path, task.id) - return run_syntax_subtask(self, task, request, file_path, check_program, 'status_syntax') - -@validation_task_runner(ValidationTask.Type.HEADER_SYNTAX) -def header_syntax_validation_subtask(self, task, request, file_path): - check_program = task_registry[task.type].check_program(file_path, task.id) - return run_syntax_subtask(self, task, request, file_path, check_program, 'status_header_syntax') - - -def run_syntax_subtask(self, task, request, file_path, check_program, model_status_field): - proc = run_task( - task=task, - check_program = check_program, - ) - output = proc.stdout - error_output = proc.stderr - success = (len(list(filter(None, output.split("\n")))) == 0) and len(error_output) == 0 - - with with_model(request.id) as model: - - if success: - setattr(model, model_status_field, Model.Status.VALID) - task.outcomes.create( - severity=ValidationOutcome.OutcomeSeverity.PASSED, - outcome_code=ValidationOutcome.ValidationOutcomeCode.PASSED, - observed=output if output else None - ) - elif error_output: - setattr(model, model_status_field, Model.Status.INVALID) - task.outcomes.create( - severity=ValidationOutcome.OutcomeSeverity.ERROR, - outcome_code=ValidationOutcome.ValidationOutcomeCode.SYNTAX_ERROR, - observed=list(filter(None, error_output.split("\n")))[-1] - ) - else: - for msg in json.loads(output): - setattr(model, model_status_field, Model.Status.INVALID) - task.outcomes.create( - severity=ValidationOutcome.OutcomeSeverity.ERROR, - outcome_code=ValidationOutcome.ValidationOutcomeCode.SYNTAX_ERROR, - observed=msg.get("message") - ) - - model.save(update_fields=[model_status_field]) - return "No IFC syntax error(s)." if success else f"Found IFC syntax errors:\n\nConsole: \n{output}\n\nError: {error_output}" - - -@validation_task_runner(ValidationTask.Type.SCHEMA) -def schema_validation_subtask(self, task, request, file_path, *args, **kwargs): - check_program = task_registry[task.type].check_program(file_path, task.id) - - proc = run_task( - task=task, - check_program = check_program, - ) - def is_schema_error(line): - try: - json.loads(line) # ignoring non-JSON messages - except ValueError: - return False - return True - - output = list(filter(is_schema_error, proc.stdout.split("\n"))) - - success = proc.returncode >= 0 - valid = len(output) == 0 - - with with_model(request.id) as model: - - if valid: - model.status_schema = Model.Status.VALID - task.outcomes.create( - severity=ValidationOutcome.OutcomeSeverity.PASSED, - outcome_code=ValidationOutcome.ValidationOutcomeCode.PASSED, - observed=None - ) - else: - model.status_schema = Model.Status.INVALID - outcomes_to_save = list() - outcomes_instances_to_save = list() - - for line in output: - message = json.loads(line) - outcome = ValidationOutcome( - severity=ValidationOutcome.OutcomeSeverity.ERROR, - outcome_code=ValidationOutcome.ValidationOutcomeCode.SCHEMA_ERROR, - observed=message['message'], - feature=json.dumps({ - 'type': message['type'] if 'type' in message else None, - 'attribute': message['attribute'] if 'attribute' in message else None - }), - validation_task=task - ) - outcomes_to_save.append(outcome) - if 'instance' in message and message['instance'] is not None and 'id' in message['instance'] and 'type' in message['instance']: - instance = ModelInstance( - stepfile_id=message['instance']['id'], - ifc_type=message['instance']['type'], - model=model - ) - outcome.instance_id = instance.stepfile_id # store for later reference (not persisted) - outcomes_instances_to_save.append(instance) - - ModelInstance.objects.bulk_create(outcomes_instances_to_save, batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE, ignore_conflicts=True) #ignore existing - model_instances = dict(ModelInstance.objects.filter(model_id=model.id).values_list('stepfile_id', 'id')) # retrieve all - - for outcome in outcomes_to_save: - if outcome.instance_id: - instance_id = model_instances[outcome.instance_id] - if instance_id: - outcome.instance_id = instance_id - - ValidationOutcome.objects.bulk_create(outcomes_to_save, batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE) - - model.save(update_fields=['status_schema']) - - return "No IFC schema errors." if success else f"'ifcopenshell.validate' returned exit code {proc.returncode} and {len(output):,} errors." - - -@validation_task_runner(ValidationTask.Type.HEADER) -def header_validation_subtask(self, task, request, file_path, **kwargs): - check_program = task_registry[task.type].check_program(file_path, task.id) - proc = run_task( - task=task, - check_program = check_program, - ) - - header_validation = {} - stdout_lines = proc.stdout.splitlines() - for line in stdout_lines: - try: - header_validation = json.loads(line) - except json.JSONDecodeError: - continue - - with with_model(request.id) as model: - agg_status = task.determine_aggregate_status() - model.status_prereq = agg_status - model.size = os.path.getsize(file_path) - logger.debug(f'Detected size = {model.size} bytes') - - model.schema = header_validation.get('schema_identifier') - logger.debug(f'The schema identifier = {header_validation.get("schema")}') - # time_stamp - if ifc_file_time_stamp := header_validation.get('time_stamp', False): - try: - logger.debug(f'Timestamp within file = {ifc_file_time_stamp}') - date = datetime.datetime.strptime(ifc_file_time_stamp, "%Y-%m-%dT%H:%M:%S") - date_with_tz = datetime.datetime( - date.year, - date.month, - date.day, - date.hour, - date.minute, - date.second, - tzinfo=datetime.timezone.utc) - model.date = date_with_tz - except ValueError: - try: - model.date = datetime.datetime.fromisoformat(ifc_file_time_stamp) - except ValueError: - pass - - # mvd - model.mvd = header_validation.get('mvd') - - app = header_validation.get('application_name') - - version = header_validation.get('version') - name = None if any(value in (None, "Not defined") for value in (app, version)) else app + ' ' + version - company_name = header_validation.get('company_name') - logger.debug(f'Detected Authoring Tool in file = {name}') - - validation_errors = header_validation.get('validation_errors', []) - invalid_marker_fields = ['originating_system', 'version', 'company_name', 'application_name'] - if any(field in validation_errors for field in invalid_marker_fields): - model.status_header = Model.Status.INVALID - else: - # parsing was successful and model can be considered for scorecards - model.status_header = Model.Status.VALID - authoring_tool = AuthoringTool.find_by_full_name(full_name=name) - if (isinstance(authoring_tool, AuthoringTool)): - - if authoring_tool.company is None: - company, _ = Company.objects.get_or_create(name=company_name) - authoring_tool.company = company - authoring_tool.save() - logger.debug(f'Updated existing Authoring Tool with company: {company.name}') - - model.produced_by = authoring_tool - logger.debug(f'Retrieved existing Authoring Tool from DB = {model.produced_by.full_name}') - - elif authoring_tool is None: - company, _ = Company.objects.get_or_create(name=company_name) - authoring_tool, _ = AuthoringTool.objects.get_or_create( - company=company, - name=app, - version=version - ) - model.produced_by = authoring_tool - logger.debug(f'Authoring app not found, ApplicationFullName = {app}, Version = {version} - created new instance') - else: - model.produced_by = None - logger.warning(f'Retrieved multiple Authoring Tool from DB: {authoring_tool} - could not assign any') - - # update header validation - model.header_validation = header_validation - model.save(update_fields=['status_header', 'header_validation']) - model.save() - - # update Task info and return - return f'agg_status = {Model.Status(agg_status).label}\nraw_output = {header_validation}' - - - -@validation_task_runner(ValidationTask.Type.DIGITAL_SIGNATURES) -def digital_signatures_subtask(self, task, request, file_path, **kwargs): - check_script = os.path.join(os.path.dirname(__file__), "checks", "signatures", "check_signatures.py") - - check_program = [sys.executable, check_script, file_path] - - proc = run_task( - task=task, - check_program = check_program, - ) - - output = list(map(json.loads, filter(None, map(lambda s: s.strip(), proc.stdout.split("\n"))))) - success = proc.returncode >= 0 - valid = all(m['signature'] != "invalid" for m in output) - - with with_model(request.id) as model: - model.status_signatures = Model.Status.NOT_APPLICABLE if not output else Model.Status.VALID if valid else Model.Status.INVALID - - def create_outcome(di): - return ValidationOutcome( - severity=ValidationOutcome.OutcomeSeverity.ERROR if di.get("signature") == "invalid" else ValidationOutcome.OutcomeSeverity.PASSED, - outcome_code=ValidationOutcome.ValidationOutcomeCode.VALUE_ERROR if di.get("signature") == "invalid" else ValidationOutcome.ValidationOutcomeCode.PASSED, - observed=di, - feature=json.dumps({'digital_signature': 1}), - validation_task = task - ) - - ValidationOutcome.objects.bulk_create(list(map(create_outcome, output)), batch_size=DJANGO_DB_BULK_CREATE_BATCH_SIZE) - - model.save(update_fields=['status_signatures']) - return 'Digital signature check completed' if success else f"Script returned exit code {proc.returncode} and {proc.stderr}" - - -@validation_task_runner(ValidationTask.Type.BSDD) -def bsdd_validation_subtask(self, task, request, file_path, *args, **kwargs): - check_program = task_registry[task.type].check_program(file_path, task.id) - - proc = run_task( - task=task, - check_program = check_program, - ) - - raw_output = check_proc_success_or_fail(proc, task) - logger.info(f'Output for {self.__name__}: {raw_output}') - - with with_model(request.id) as model: - - # update Validation Outcomes - json_output = json.loads(raw_output) - for message in json_output['messages']: - - outcome = task.outcomes.create( - severity=[c[0] for c in ValidationOutcome.OutcomeSeverity.choices if c[1] == (message['severity'])][0], - outcome_code=[c[0] for c in ValidationOutcome.ValidationOutcomeCode.choices if c[1] == (message['outcome'])][0], - observed=message['message'], - feature=json.dumps({ - 'rule': message['rule'] if 'rule' in message else None, - 'category': message['category'] if 'category' in message else None, - 'dictionary': message['dictionary'] if 'dictionary' in message else None, - 'class': message['class'] if 'class' in message else None, - 'instance_id': message['instance_id'] if 'instance_id' in message else None - }) - ) - - if 'instance_id' in message and message['instance_id'] is not None: - instance, _ = model.instances.get_or_create( - stepfile_id = message['instance_id'], - model=model - ) - outcome.instance = instance - outcome.save() - - # update Model info - agg_status = task.determine_aggregate_status() - model.status_bsdd = agg_status - model.save(update_fields=['status_bsdd']) - - return f"agg_status = {Model.Status(agg_status).label}\nmessages = {json_output['messages']}" - - - -@validation_task_runner(ValidationTask.Type.INDUSTRY_PRACTICES) -def industry_practices_subtask(self, task, request, file_path): - check_program = task_registry[task.type].check_program(file_path, task.id) - - proc = run_task( - task=task, - check_program = check_program, - ) - - raw_output = check_proc_success_or_fail(proc, task) - with with_model(request.id) as model: - agg_status = task.determine_aggregate_status() - model.status_industry_practices = agg_status - model.save(update_fields=['status_industry_practices']) - - return f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' - -def run_gherkin_subtask(self, task, request, file_path, check_program, status_field): - - proc = run_task( - task=task, - check_program = check_program - ) - - raw_output = check_proc_success_or_fail(proc, task) - - with with_model(request.id) as model: - agg_status = task.determine_aggregate_status() - setattr(model, status_field, agg_status) - model.save(update_fields=[status_field]) - - return f'agg_status = {Model.Status(agg_status).label}\nraw_output = {raw_output}' \ No newline at end of file diff --git a/backend/apps/ifc_validation/tasks/utils.py b/backend/apps/ifc_validation/tasks/utils.py new file mode 100644 index 00000000..74f95701 --- /dev/null +++ b/backend/apps/ifc_validation/tasks/utils.py @@ -0,0 +1,75 @@ +import contextlib +import functools +import os + +from django.db import transaction + +from core.utils import log_execution +from celery.utils.log import get_task_logger + +from apps.ifc_validation_models.settings import MEDIA_ROOT +from apps.ifc_validation_models.decorators import requires_django_user_context +from apps.ifc_validation_models.models import ValidationRequest, Model + +logger = get_task_logger(__name__) + +@log_execution +@requires_django_user_context +@transaction.atomic +# @requires_django_exclusive_table_lock(Model, 'EXCLUSIVE') +# --> table lock, slower - DO NOT USE +def get_or_create_ifc_model(request_id): + + id = request_id + request = ValidationRequest.objects.get(pk=id) + if request.model is None: + + # acquire row lock (... uses "FOR UPDATE" hint) + request = ValidationRequest.objects.select_for_update().get(pk=id) + + model, _ = Model.objects.get_or_create( + file_name=request.file_name, + file=request.file, + size=request.file.size, + uploaded_by=request.created_by + ) + request.model = model + request.save() + + return model + + else: + return request.model + + +@contextlib.contextmanager +def with_model(request_id): + with transaction.atomic(): + yield get_or_create_ifc_model(request_id) + + +@functools.lru_cache(maxsize=1024) +def get_absolute_file_path(file_name): + + """ + Resolves the absolute file path of an uploaded file and checks if it exists. + It tries resolving Django MEDIA_ROOT and current working directory, and caches the result. + + Mandatory Args: + file_name: relative file name of the uploaded file. + + Returns: + Absolute file path of the uploaded file. + """ + + ifc_fn = os.path.join(MEDIA_ROOT, file_name) + + if not os.path.exists(ifc_fn): + ifc_fn2 = os.path.join(os.getcwd(), ifc_fn) + if not os.path.exists(ifc_fn2): + raise FileNotFoundError(f"File path for file_name={file_name} was not found (tried loading '{ifc_fn}' and '{ifc_fn2}').") + + ifc_fn = os.path.abspath(ifc_fn) + + logger.debug(f"get_absolute_file_path(): file_name={file_name} returned '{ifc_fn}'") + return ifc_fn \ No newline at end of file diff --git a/backend/apps/ifc_validation_bff/views_legacy.py b/backend/apps/ifc_validation_bff/views_legacy.py index 963afa94..eacb3dc9 100644 --- a/backend/apps/ifc_validation_bff/views_legacy.py +++ b/backend/apps/ifc_validation_bff/views_legacy.py @@ -21,7 +21,7 @@ from apps.ifc_validation_models.models import Model from apps.ifc_validation_models.models import UserAdditionalInfo -from apps.ifc_validation.tasks.tasks import ifc_file_validation_task +from apps.ifc_validation.tasks import ifc_file_validation_task from core.settings import MEDIA_ROOT, MAX_FILES_PER_UPLOAD from core.settings import DEVELOPMENT, LOGIN_URL, USE_WHITELIST From 411191717b8467f310755a47fa021aba2b63e51c Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Mon, 25 Aug 2025 16:46:11 +0100 Subject: [PATCH 20/21] update ifcopenshell --- .github/workflows/ci_cd.yml | 2 +- backend/Makefile | 6 +++--- docker/backend/Dockerfile | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci_cd.yml b/.github/workflows/ci_cd.yml index a7b24d30..09eb85e0 100644 --- a/.github/workflows/ci_cd.yml +++ b/.github/workflows/ci_cd.yml @@ -87,7 +87,7 @@ jobs: source venv/bin/activate # use version of ifcopenshell with desired schema parsing # TODO: revert to pyPI when schema parsing is published in the future - wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-e13ba76-linux64.zip" + wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-6924012-linux64.zip" mkdir -p venv/lib/python3.11/site-packages unzip -d venv/lib/python3.11/site-packages /tmp/ifcopenshell_python.zip diff --git a/backend/Makefile b/backend/Makefile index abedef64..a8a3ad90 100644 --- a/backend/Makefile +++ b/backend/Makefile @@ -14,7 +14,7 @@ venv: install: venv $(PIP) install --upgrade pip find . -name 'requirements.txt' -exec $(PIP) install -r {} \; - wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-e13ba76-linux64.zip" + wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-6924012-linux64.zip" mkdir -p $(VIRTUAL_ENV)/lib/python3.11/site-packages unzip -f -d $(VIRTUAL_ENV)/lib/python3.11/site-packages /tmp/ifcopenshell_python.zip rm /tmp/ifcopenshell_python.zip @@ -22,7 +22,7 @@ install: venv install-macos: venv find . -name 'requirements.txt' -exec $(PIP) install -r {} \; $(PIP) install -r requirements.txt - wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-e13ba76-macos64.zip" + wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-6924012-macos64.zip" mkdir -p $(VIRTUAL_ENV)/lib/python3.11/site-packages unzip /tmp/ifcopenshell_python.zip -d $(VIRTUAL_ENV)/lib/python3.11/site-packages rm /tmp/ifcopenshell_python.zip @@ -30,7 +30,7 @@ install-macos: venv install-macos-m1: venv find . -name 'requirements.txt' -exec $(PIP) install -r {} \; $(PIP) install -r requirements.txt - wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-e13ba76-macosm164.zip" + wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-6924012-macosm164.zip" mkdir -p $(VIRTUAL_ENV)/lib/python3.11/site-packages unzip /tmp/ifcopenshell_python.zip -d $(VIRTUAL_ENV)/lib/python3.11/site-packages rm /tmp/ifcopenshell_python.zip diff --git a/docker/backend/Dockerfile b/docker/backend/Dockerfile index 12f7472e..99b15cca 100644 --- a/docker/backend/Dockerfile +++ b/docker/backend/Dockerfile @@ -37,7 +37,7 @@ RUN --mount=type=cache,target=/root/.cache \ find /app/backend -name 'requirements.txt' -exec pip install --no-cache-dir -r {} \; && \ # use version of ifcopenshell with desired schema parsing # TODO: revert to pyPI when schema parsing is published in the future - wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-e13ba76-linux64.zip" && \ + wget -O /tmp/ifcopenshell_python.zip "https://s3.amazonaws.com/ifcopenshell-builds/ifcopenshell-python-311-v0.8.4-6924012-linux64.zip" && \ mkdir -p /opt/venv/lib/python3.11/site-packages && \ unzip -d /opt/venv/lib/python3.11/site-packages /tmp/ifcopenshell_python.zip && \ # some cleanup From e68e2bb764628a45638dbef8c6b2a3674f40163e Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Mon, 25 Aug 2025 19:28:28 +0100 Subject: [PATCH 21/21] use task_factory instead of decorator --- backend/apps/ifc_validation/tasks/configs.py | 4 + .../apps/ifc_validation/tasks/task_runner.py | 155 ++++++++---------- 2 files changed, 76 insertions(+), 83 deletions(-) diff --git a/backend/apps/ifc_validation/tasks/configs.py b/backend/apps/ifc_validation/tasks/configs.py index b9e5216a..029b9927 100644 --- a/backend/apps/ifc_validation/tasks/configs.py +++ b/backend/apps/ifc_validation/tasks/configs.py @@ -13,6 +13,10 @@ class TaskConfig: blocks: Optional[List[str]] execution_stage: str = "parallel" process_results: Callable | None = None + + @property + def celery_task_name(self) -> str: + return f"apps.ifc_validation.tasks.{self.type.name.lower()}_subtask" # create blueprint def make_task(*, type, increment, field=None, stage="parallel"): diff --git a/backend/apps/ifc_validation/tasks/task_runner.py b/backend/apps/ifc_validation/tasks/task_runner.py index 8273be2d..f21d7df8 100644 --- a/backend/apps/ifc_validation/tasks/task_runner.py +++ b/backend/apps/ifc_validation/tasks/task_runner.py @@ -91,71 +91,71 @@ def on_workflow_failed(self, *args, **kwargs): send_failure_admin_email_task.delay(id=id, file_name=request.file_name) -def validation_task_runner(task_type): - def decorator(func): - @shared_task(bind=True) - @log_execution - @requires_django_user_context - @functools.wraps(func) - def wrapper(self, *args, **kwargs): - id = kwargs.get('id') - - request = ValidationRequest.objects.get(pk=id) - file_path = get_absolute_file_path(request.file.name) - - # Always create the task record, even if it will be skipped due to blocking conditions, - # so it is logged and its status can be marked as 'skipped' - task = ValidationTask.objects.create(request=request, type=task_type) +def task_factory(task_type): + config = task_registry[task_type] + + @shared_task(bind=True, name=config.celery_task_name) + @log_execution + @requires_django_user_context + def validation_subtask_runner(self, *args, **kwargs): + + id = kwargs.get('id') + + request = ValidationRequest.objects.get(pk=id) + file_path = get_absolute_file_path(request.file.name) + + # Always create the task record, even if it will be skipped due to blocking conditions, + # so it is logged and its status can be marked as 'skipped' + task = ValidationTask.objects.create(request=request, type=task_type) + + if model := request.model: + invalid_blockers = list(filter( + lambda b: getattr(model, task_registry[b].status_field.name) == Model.Status.INVALID, + task_registry.get_blockers_of(task_type) + )) + else: # for testing, we're not instantiating a model + invalid_blockers = [] + + # update progress + increment = config.increment + request.progress = min(request.progress + increment, 100) + request.save() + + # run or skip + if not invalid_blockers: + task.mark_as_initiated() - if model := request.model: - invalid_blockers = list(filter( - lambda b: getattr(model, task_registry[b].status_field.name) == Model.Status.INVALID, - task_registry.get_blockers_of(task_type) + # Execution Layer + try: + context = config.check_program(TaskContext( + config=config, + task=task, + request=request, + file_path=file_path, )) - else: # for testing, we're not instantiating a model - invalid_blockers = [] + except Exception as err: + task.mark_as_failed(str(err)) + logger.exception(f"Execution failed in task {task_type}: {task}") + return + + # Processing Layer / write to DB + try: + reason = config.process_results(context) + task.mark_as_completed(reason) + logger.debug(f"Task {task_type} completed, reason: {reason}") + except Exception as err: + task.mark_as_failed(str(err)) + logger.exception(f"Processing failed in task {task_type}: {err}") + return - # get task configuration - config = task_registry[task_type] + # Handle skipped tasks + else: + reason = f"Skipped due to fail in blocking tasks: {', '.join(invalid_blockers)}" + logger.debug(reason) + task.mark_as_skipped(reason) - # update progress - increment = config.increment - request.progress = min(request.progress + increment, 100) - request.save() - - if not invalid_blockers: - task.mark_as_initiated() - - # Execution Layer - try: - context = config.check_program(TaskContext( - config=config, - task=task, - request=request, - file_path=file_path, - )) - except Exception as err: - task.mark_as_failed(str(err)) - logger.exception(f"Execution failed in task {task_type}: {task}") - return - - # Processing Layer / write to DB - try: - reason = config.process_results(context) - task.mark_as_completed(reason) - logger.debug(f"Task {task_type} completed, reason: {reason}") - except Exception as err: - task.mark_as_failed(str(err)) - logger.exception(f"Processing failed in task {task_type}: {err}") - return - - # Handle skipped tasks - else: - reason = f"Skipped due to fail in blocking tasks: {', '.join(invalid_blockers)}" - logger.debug(reason) - task.mark_as_skipped(reason) - return wrapper - return decorator + validation_subtask_runner.__doc__ = f"Validation task for {task_type} generated by the task_factory func." + return validation_subtask_runner @shared_task(bind=True) @@ -202,35 +202,24 @@ def ifc_file_validation_task(self, id, file_name, *args, **kwargs): workflow.apply_async() -@validation_task_runner(ValidationTask.Type.INSTANCE_COMPLETION) -def instance_completion_subtask(): pass +instance_completion_subtask = task_factory(ValidationTask.Type.INSTANCE_COMPLETION) -@validation_task_runner(ValidationTask.Type.NORMATIVE_IA) -def normative_rules_ia_validation_subtask(): pass +normative_rules_ia_validation_subtask = task_factory(ValidationTask.Type.NORMATIVE_IA) -@validation_task_runner(ValidationTask.Type.NORMATIVE_IP) -def normative_rules_ip_validation_subtask(): pass +normative_rules_ip_validation_subtask = task_factory(ValidationTask.Type.NORMATIVE_IP) -@validation_task_runner(ValidationTask.Type.PREREQUISITES) -def prerequisites_subtask(): pass +prerequisites_subtask = task_factory(ValidationTask.Type.PREREQUISITES) -@validation_task_runner(ValidationTask.Type.SYNTAX) -def syntax_validation_subtask(): pass +syntax_validation_subtask = task_factory(ValidationTask.Type.SYNTAX) -@validation_task_runner(ValidationTask.Type.HEADER_SYNTAX) -def header_syntax_validation_subtask(): pass +header_syntax_validation_subtask = task_factory(ValidationTask.Type.HEADER_SYNTAX) -@validation_task_runner(ValidationTask.Type.SCHEMA) -def schema_validation_subtask(): pass +schema_validation_subtask = task_factory(ValidationTask.Type.SCHEMA) -@validation_task_runner(ValidationTask.Type.HEADER) -def header_validation_subtask(): pass +header_validation_subtask = task_factory(ValidationTask.Type.HEADER) -@validation_task_runner(ValidationTask.Type.DIGITAL_SIGNATURES) -def digital_signatures_subtask(): pass +digital_signatures_subtask = task_factory(ValidationTask.Type.DIGITAL_SIGNATURES) -@validation_task_runner(ValidationTask.Type.BSDD) -def bsdd_validation_subtask(): pass +bsdd_validation_subtask = task_factory(ValidationTask.Type.BSDD) -@validation_task_runner(ValidationTask.Type.INDUSTRY_PRACTICES) -def industry_practices_subtask(): pass \ No newline at end of file +industry_practices_subtask = task_factory(ValidationTask.Type.INDUSTRY_PRACTICES)