Zipstack · chandrasekharan-zipstack · Feb 27, 2025 · Feb 27, 2025 · muhammad-ali-e · Feb 28, 2025
diff --git a/backend/utils/common_utils.py b/backend/utils/common_utils.py
@@ -31,6 +31,25 @@ def is_json(string: str) -> bool:
             return False
         return True
 
+    # TODO: Use from SDK
+    @staticmethod
+    def pretty_file_size(num: float, suffix: str = "B") -> str:
+        """Gets the human readable size for a file,
+
+        Args:
+            num (int): Size in bytes to parse
+            suffix (str, optional): _description_. Defaults to "B".
+
+        Returns:
+            str: Human readable size
+        """
+        for unit in ("", "K", "M", "G", "T"):
+            if abs(num) < 1024.0:
+                # return f"{num:3.1f} {unit}{suffix}"
+                return f"{num:.2f} {unit}{suffix}"
+            num /= 1024.0
+        return f"{num:.2f} {suffix}"
+
 
 class ModelEnum(Enum):
     @classmethod

diff --git a/backend/utils/constants.py b/backend/utils/constants.py
@@ -68,7 +68,5 @@ class ExecutionLogConstants:
     LOGS_BATCH_LIMIT: int = settings.LOGS_BATCH_LIMIT
     LOG_QUEUE_NAME: str = "log_history_queue"
     CELERY_QUEUE_NAME = "celery_periodic_logs"
-    PERIODIC_TASK_NAME = "workflow_log_history"
     PERIODIC_TASK_NAME_V2 = "workflow_log_history_v2"
-    TASK = "workflow_manager.workflow.execution_log_utils.consume_log_history"
     TASK_V2 = "consume_log_history"
diff --git a/backend/utils/dto.py b/backend/utils/dto.py
@@ -1,8 +1,9 @@
 import json
 import logging
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import Any, Optional
 
+from django.utils import timezone as dj_timezone
 from unstract.workflow_execution.enums import LogType
 
 from unstract.core.constants import LogFieldName
@@ -35,7 +36,9 @@ def __init__(
         self.file_execution_id: Optional[str] = file_execution_id
         self.organization_id: str = organization_id
         self.timestamp: int = timestamp
-        self.event_time: datetime = datetime.fromtimestamp(timestamp)
+        self.event_time: datetime = dj_timezone.make_aware(
+            datetime.fromtimestamp(timestamp), timezone.utc
+        )
         self.log_type: LogType = log_type
         self.data: dict[str, Any] = data
 

diff --git a/backend/workflow_manager/execution/serializer/execution.py b/backend/workflow_manager/execution/serializer/execution.py
@@ -31,3 +31,8 @@ def get_successful_files(self, obj: WorkflowExecution) -> int:
     def get_failed_files(self, obj: WorkflowExecution) -> int:
         """Return the count of failed executed files"""
         return obj.file_executions.filter(status=ExecutionStatus.ERROR).count()
+
+    def to_representation(self, obj: WorkflowExecution):
+        data = super().to_representation(obj)
+        data["execution_time"] = obj.pretty_execution_time
+        return data
diff --git a/backend/workflow_manager/execution/serializer/file_centric.py b/backend/workflow_manager/execution/serializer/file_centric.py
@@ -4,21 +4,39 @@
 from workflow_manager.file_execution.models import (
     WorkflowFileExecution as FileExecution,
 )
+from workflow_manager.workflow_v2.enums import ExecutionStatus
+
+INIT_STATUS_MSG = "Waiting for a worker to pick up file's execution..."
+
+DEFAULT_STATUS_MSG = (
+    "No status message available, please check again after a few minutes."
+)
 
 
 class FileCentricExecutionSerializer(serializers.ModelSerializer):
-    latest_log = serializers.SerializerMethodField()
+    status_msg = serializers.SerializerMethodField()
 
     class Meta:
         model = FileExecution
         exclude = ["file_hash"]
 
-    def get_latest_log(self, obj: FileExecution) -> Optional[dict[str, any]]:
+    def get_status_msg(self, obj: FileExecution) -> Optional[dict[str, any]]:
+        if obj.status in [ExecutionStatus.PENDING, ExecutionStatus.QUEUED]:
+            return INIT_STATUS_MSG
+
         latest_log = (
             obj.execution_logs.exclude(data__level__in=["DEBUG", "WARN"])
             .order_by("-event_time")
             .first()
         )
         return (
-            latest_log.data["log"] if latest_log and "log" in latest_log.data else None
+            latest_log.data["log"]
+            if latest_log and "log" in latest_log.data
+            else DEFAULT_STATUS_MSG
         )
+
+    def to_representation(self, obj: FileExecution):
+        data = super().to_representation(obj)
+        data["file_size"] = obj.pretty_file_size
+        data["execution_time"] = obj.pretty_execution_time
+        return data
diff --git a/backend/workflow_manager/execution/views/execution.py b/backend/workflow_manager/execution/views/execution.py
@@ -30,23 +30,23 @@ def get_queryset(self) -> Optional[QuerySet]:
         queryset = WorkflowExecution.objects.all()
 
         # Filter based on execution entity
-        if execution_entity == ExecutionEntity.API:
+        if execution_entity == ExecutionEntity.API.value:
             queryset = queryset.filter(
                 pipeline_id__in=APIDeployment.objects.values_list("id", flat=True)
             )
-        elif execution_entity == ExecutionEntity.ETL:
+        elif execution_entity == ExecutionEntity.ETL.value:
             queryset = queryset.filter(
                 pipeline_id__in=Pipeline.objects.filter(
                     pipeline_type=Pipeline.PipelineType.ETL
                 ).values_list("id", flat=True)
             )
-        elif execution_entity == ExecutionEntity.TASK:
+        elif execution_entity == ExecutionEntity.TASK.value:
             queryset = queryset.filter(
                 pipeline_id__in=Pipeline.objects.filter(
                     pipeline_type=Pipeline.PipelineType.TASK
                 ).values_list("id", flat=True)
             )
-        elif execution_entity == ExecutionEntity.WORKFLOW:
+        elif execution_entity == ExecutionEntity.WORKFLOW.value:
             queryset = queryset.filter(
                 pipeline_id=None,
                 workflow_id__in=Workflow.objects.values_list("id", flat=True),

diff --git a/backend/workflow_manager/file_execution/models.py b/backend/workflow_manager/file_execution/models.py
@@ -1,8 +1,10 @@
 import uuid
+from datetime import timedelta
 from typing import Optional
 
 from django.db import models
 from django.utils import timezone
+from utils.common_utils import CommonUtils
 from utils.models.base_model import BaseModel
 from workflow_manager.workflow_v2.enums import ExecutionStatus
 from workflow_manager.workflow_v2.models.execution import WorkflowExecution
@@ -135,6 +137,24 @@ def update_status(
         self.execution_error = execution_error
         self.save()
 
+    @property
+    def pretty_file_size(self) -> str:
+        """Convert file_size from bytes to human-readable format
+
+        Returns:
+            str: File size with a precision of 2 decimals
+        """
+        return CommonUtils.pretty_file_size(self.file_size)
+
+    @property
+    def pretty_execution_time(self) -> str:
+        """Convert execution_time from seconds to HH:MM:SS format
+
+        Returns:
+            str: Time in HH:MM:SS format
+        """
+        return str(timedelta(seconds=self.execution_time)).split(".")[0]
+
     class Meta:
         verbose_name = "Workflow File Execution"
         verbose_name_plural = "Workflow File Executions"

diff --git a/backend/workflow_manager/workflow_v2/execution.py b/backend/workflow_manager/workflow_v2/execution.py
@@ -299,6 +299,8 @@ def publish_final_workflow_logs(
         Returns:
             None
         """
+        # To not associate final logs with a file execution
+        self.file_execution_id = None
         self.publish_update_log(LogState.END_WORKFLOW, "1", LogComponent.STATUS_BAR)
         self.publish_update_log(
             LogState.SUCCESS, "Executed successfully", LogComponent.WORKFLOW
@@ -321,12 +323,13 @@ def publish_initial_tool_execution_logs(
         Returns:
             None
         """
+        msg = f"Processing file '{file_name}' ({current_file_idx}/{total_files})"
         self.publish_update_log(
             component=LogComponent.STATUS_BAR,
             state=LogState.MESSAGE,
-            message=f"Processing file {file_name} {current_file_idx}/{total_files}",
+            message=msg,
         )
-        self.publish_log(f"Processing file {file_name}")
+        self.publish_log(msg)
 
     def execute_input_file(
         self,

diff --git a/backend/workflow_manager/workflow_v2/models/execution.py b/backend/workflow_manager/workflow_v2/models/execution.py
@@ -1,5 +1,6 @@
 import logging
 import uuid
+from datetime import timedelta
 from typing import Optional
 
 from api_v2.models import APIDeployment
@@ -129,6 +130,15 @@ def pipeline_name(self) -> Optional[str]:
 
         return None
 
+    @property
+    def pretty_execution_time(self) -> str:
+        """Convert execution_time from seconds to HH:MM:SS format
+
+        Returns:
+            str: Time in HH:MM:SS format
+        """
+        return str(timedelta(seconds=self.execution_time)).split(".")[0]
+
     def __str__(self) -> str:
         return (
             f"Workflow execution: {self.id} ("

diff --git a/unstract/workflow-execution/src/unstract/workflow_execution/workflow_execution.py b/unstract/workflow-execution/src/unstract/workflow_execution/workflow_execution.py
@@ -194,8 +194,6 @@ def _execute_step(
         Args:
             step (int): workflow step
             sandbox (ToolSandbox): instance of tool sandbox
-            execution_type (ExecutionType): step or complete
-            last_step_output (list[Any]): output of previous step
 
         Raises:
             error: _description_