diff --git a/services/budapp/budapp/commons/constants.py b/services/budapp/budapp/commons/constants.py index 89725e544..2bbf3f5c9 100644 --- a/services/budapp/budapp/commons/constants.py +++ b/services/budapp/budapp/commons/constants.py @@ -609,6 +609,7 @@ class WorkflowTypeEnum(StrEnum): PROMPT_CREATION = auto() PROMPT_SCHEMA_CREATION = auto() TOOL_CREATION = auto() + CUSTOM_PROBE_CREATION = auto() class NotificationType(Enum): diff --git a/services/budapp/budapp/guardrails/crud.py b/services/budapp/budapp/guardrails/crud.py index 9934a787b..bb1d14bf9 100644 --- a/services/budapp/budapp/guardrails/crud.py +++ b/services/budapp/budapp/guardrails/crud.py @@ -1089,16 +1089,35 @@ async def create_custom_probe_with_rule( name: str, description: str | None, scanner_type: str, - model_id: UUID, + model_id: UUID | None, model_config: dict, model_uri: str, model_provider_type: str, is_gated: bool, - project_id: UUID, user_id: UUID, provider_id: UUID, + guard_types: list[str] | None = None, + modality_types: list[str] | None = None, ) -> GuardrailProbe: - """Create a custom probe with a single model-based rule atomically.""" + """Create a custom probe with a single model-based rule atomically. + + Args: + name: Name of the custom probe + description: Description of the probe + scanner_type: Type of scanner (e.g., "llm") + model_id: Optional model ID (can be None if model lookup happens at deployment) + model_config: Configuration dictionary for the model + model_uri: URI of the model + model_provider_type: Provider type for the model + is_gated: Whether the model requires gated access + user_id: User ID creating the probe + provider_id: Provider ID for the probe + guard_types: Optional list of guard types (e.g., ["input", "output"]) + modality_types: Optional list of modality types (e.g., ["text", "image"]) + + Returns: + The created GuardrailProbe with its rule + """ # Generate URI for uniqueness check probe_uri = f"custom.{user_id}.{name.lower().replace(' ', '_')}" @@ -1141,6 +1160,8 @@ async def create_custom_probe_with_rule( is_gated=is_gated, model_config_json=model_config, model_id=model_id, + guard_types=guard_types, + modality_types=modality_types, created_by=user_id, status=GuardrailStatusEnum.ACTIVE, ) diff --git a/services/budapp/budapp/guardrails/guardrail_routes.py b/services/budapp/budapp/guardrails/guardrail_routes.py index dc77d0af5..d8a727d35 100644 --- a/services/budapp/budapp/guardrails/guardrail_routes.py +++ b/services/budapp/budapp/guardrails/guardrail_routes.py @@ -31,6 +31,7 @@ from budapp.commons.schemas import ErrorResponse, PaginatedSuccessResponse, SuccessResponse from budapp.guardrails.crud import GuardrailsDeploymentDataManager from budapp.guardrails.schemas import ( + CustomProbeWorkflowRequest, GuardrailCustomProbeCreate, GuardrailCustomProbeDetailResponse, GuardrailCustomProbeResponse, @@ -57,6 +58,7 @@ TagsListResponse, ) from budapp.guardrails.services import ( + GuardrailCustomProbeService, GuardrailDeploymentWorkflowService, GuardrailProbeRuleService, GuardrailProfileDeploymentService, @@ -1013,6 +1015,54 @@ async def add_guardrail_deployment_workflow( ).to_http_response() +@router.post( + "/custom-probe-workflow", + responses={ + status.HTTP_500_INTERNAL_SERVER_ERROR: { + "model": ErrorResponse, + "description": "Service is unavailable due to server error", + }, + status.HTTP_400_BAD_REQUEST: { + "model": ErrorResponse, + "description": "Service is unavailable due to client error", + }, + status.HTTP_200_OK: { + "model": RetrieveWorkflowDataResponse, + "description": "Successfully add custom probe workflow", + }, + }, + description="Add custom probe workflow", +) +@require_permissions(permissions=[PermissionEnum.MODEL_MANAGE]) +async def add_custom_probe_workflow( + current_user: Annotated[User, Depends(get_current_active_user)], + session: Annotated[Session, Depends(get_session)], + request: CustomProbeWorkflowRequest, +) -> Union[RetrieveWorkflowDataResponse, ErrorResponse]: + """Add custom probe workflow. + + Multi-step workflow for creating custom probes: + - Step 1: Select probe type (llm_policy, etc.) - auto-derives model_uri, scanner_type + - Step 2: Configure policy (PolicyConfig) + - Step 3: Probe metadata + trigger_workflow=true creates probe + """ + try: + db_workflow = await GuardrailCustomProbeService(session).add_custom_probe_workflow( + current_user_id=current_user.id, + request=request, + ) + + return await WorkflowService(session).retrieve_workflow_data(db_workflow.id) + except ClientException as e: + logger.exception(f"Failed to add custom probe workflow: {e}") + return ErrorResponse(code=e.status_code, message=e.message).to_http_response() + except Exception as e: + logger.exception(f"Failed to add custom probe workflow: {e}") + return ErrorResponse( + code=status.HTTP_500_INTERNAL_SERVER_ERROR, message="Failed to add custom probe workflow" + ).to_http_response() + + # Deployment endpoints diff --git a/services/budapp/budapp/guardrails/schemas.py b/services/budapp/budapp/guardrails/schemas.py index c61b3735c..7c2bbf363 100644 --- a/services/budapp/budapp/guardrails/schemas.py +++ b/services/budapp/budapp/guardrails/schemas.py @@ -60,6 +60,18 @@ class ModelDeploymentStatus(str, Enum): DELETING = "deleting" +class CustomProbeTypeEnum(str, Enum): + """Available custom probe type options. + + Each option maps to a specific model_uri, scanner_type, handler, and provider. + """ + + LLM_POLICY = "llm_policy" + # Future extensions: + # CLASSIFIER = "classifier" + # REGEX = "regex" + + class GuardrailModelStatus(BaseModel): """Status of a model required by guardrail rules.""" @@ -373,6 +385,8 @@ class GuardrailCustomProbeResponse(BaseModel): model_id: UUID4 | None = None model_uri: str | None = None model_config_json: dict | None = None + guard_types: list[str] | None = None + modality_types: list[str] | None = None status: str created_at: datetime modified_at: datetime @@ -397,6 +411,8 @@ def extract_rule_data(cls, data: Any) -> Any: "model_id": getattr(rule, "model_id", None), "model_uri": getattr(rule, "model_uri", None), "model_config_json": getattr(rule, "model_config_json", None), + "guard_types": getattr(rule, "guard_types", None), + "modality_types": getattr(rule, "modality_types", None), "status": data.status, "created_at": data.created_at, "modified_at": data.modified_at, @@ -745,6 +761,81 @@ class GuardrailDeploymentWorkflowSteps(BaseModel): pending_profile_data: dict | None = None +class CustomProbeWorkflowRequest(BaseModel): + """Custom probe workflow request schema (multi-step). + + Similar to GuardrailDeploymentWorkflowRequest but for creating custom probes. + Follows the probe-first pattern where the probe is created with model_uri only, + and model_id gets assigned later during deployment (or immediately if model is already onboarded). + + Workflow Steps: + - Step 1: Select probe type (llm_policy, etc.) - system auto-derives model_uri, scanner_type, etc. + - Step 2: Configure policy (PolicyConfig) + - Step 3: Probe metadata + trigger_workflow=true creates probe + """ + + # Workflow management + workflow_id: UUID4 | None = None + workflow_total_steps: int | None = None # Should be 3 for new workflows + step_number: int = Field(..., gt=0) + trigger_workflow: bool = False + + # Step 1: Probe type selection + probe_type_option: CustomProbeTypeEnum | None = None + + # Step 2: Policy configuration + policy: PolicyConfig | None = None + + # Step 3: Probe metadata + name: str | None = None + description: str | None = None + guard_types: list[str] | None = None + modality_types: list[str] | None = None + + @model_validator(mode="after") + def validate_fields(self) -> "CustomProbeWorkflowRequest": + """Validate workflow request fields. + + Either workflow_id OR workflow_total_steps must be provided, but not both. + """ + if self.workflow_id is None and self.workflow_total_steps is None: + raise ValueError("workflow_total_steps is required when workflow_id is not provided") + + if self.workflow_id is not None and self.workflow_total_steps is not None: + raise ValueError("workflow_total_steps and workflow_id cannot be provided together") + + return self + + +class CustomProbeWorkflowSteps(BaseModel): + """Custom probe workflow step data schema. + + Tracks accumulated data across workflow steps for custom probe creation. + """ + + # Step 1 data + probe_type_option: CustomProbeTypeEnum | None = None + # Auto-derived from probe_type_option + model_uri: str | None = None + scanner_type: str | None = None + handler: str | None = None + model_provider_type: str | None = None + + # Step 2 data + policy: dict | None = None # PolicyConfig as dict + + # Step 3 data + name: str | None = None + description: str | None = None + guard_types: list[str] | None = None + modality_types: list[str] | None = None + + # Result data (after trigger_workflow) + probe_id: UUID4 | None = None + model_id: UUID4 | None = None # Assigned if model exists + workflow_execution_status: dict | None = None + + class BudSentinelConfig(BaseModel): """BudSentinel config.""" diff --git a/services/budapp/budapp/guardrails/services.py b/services/budapp/budapp/guardrails/services.py index 2ef6311e3..3a2aff292 100644 --- a/services/budapp/budapp/guardrails/services.py +++ b/services/budapp/budapp/guardrails/services.py @@ -18,6 +18,7 @@ import hashlib import json +from dataclasses import dataclass from typing import Any, Dict, List, Optional, Tuple, Union from uuid import UUID, uuid4 @@ -66,6 +67,8 @@ ) from budapp.guardrails.schemas import ( BudSentinelConfig, + CustomProbeWorkflowRequest, + CustomProbeWorkflowSteps, GuardrailCustomProbeCreate, GuardrailCustomProbeUpdate, GuardrailDeploymentCreate, @@ -85,7 +88,9 @@ GuardrailProfileRuleResponse, GuardrailRuleDetailResponse, GuardrailRuleResponse, + LLMConfig, ModelDeploymentStatus, + PolicyConfig, ProxyGuardrailConfig, ) from budapp.model_ops.crud import ProviderDataManager @@ -105,6 +110,29 @@ logger = logging.get_logger(__name__) +@dataclass +class ProbeTypeConfig: + """Configuration for a custom probe type. + + Maps probe type options to their model configurations. + """ + + model_uri: str + scanner_type: str + handler: str + model_provider_type: str + + +PROBE_TYPE_CONFIGS: dict[str, ProbeTypeConfig] = { + "llm_policy": ProbeTypeConfig( + model_uri="openai/gpt-oss-safeguard-20b", + scanner_type="llm", + handler="gpt_safeguard", + model_provider_type="cloud_model", + ), +} + + class GuardrailDeploymentWorkflowService(SessionMixin): """Guardrail deployment service.""" @@ -4397,7 +4425,6 @@ async def create_custom_probe( model_uri=db_model.uri or f"model://{db_model.id}", model_provider_type=db_model.provider_type, is_gated=False, - project_id=project_id, user_id=user_id, provider_id=provider.id, ) @@ -4503,3 +4530,242 @@ async def delete_custom_probe( # Soft delete the probe and its rules await GuardrailsProbeRulesDataManager(self.session).soft_delete_deprecated_probes([str(probe_id)]) + + async def add_custom_probe_workflow( + self, + current_user_id: UUID, + request: CustomProbeWorkflowRequest, + ) -> WorkflowModel: + """Add custom probe workflow (multi-step). + + Similar to add_guardrail_deployment_workflow but for creating custom probes. + + Step 1: Select probe type -> auto-derive model_uri, scanner_type, etc. + Step 2: Configure policy + Step 3: Probe metadata + trigger_workflow -> create probe + + Args: + current_user_id: ID of the user creating the workflow + request: Custom probe workflow request with step data + + Returns: + The workflow model instance + """ + step_number = request.step_number + workflow_id = request.workflow_id + workflow_total_steps = request.workflow_total_steps + trigger_workflow = request.trigger_workflow + + current_step_number = step_number + + # Retrieve or create workflow + workflow_create = WorkflowUtilCreate( + workflow_type=WorkflowTypeEnum.CUSTOM_PROBE_CREATION, + title="Custom Probe Creation", + total_steps=workflow_total_steps, + icon=APP_ICONS["general"]["deployment_mono"], + tag="Custom Probe", + ) + + db_workflow = await WorkflowService(self.session).retrieve_or_create_workflow( + workflow_id, workflow_create, current_user_id + ) + + # Get workflow steps to check for existing data + db_workflow_steps = await WorkflowStepDataManager(self.session).get_all_workflow_steps( + {"workflow_id": db_workflow.id} + ) + + # Find current workflow step or create one + db_current_workflow_step = None + for db_step in db_workflow_steps: + if db_step.step_number == current_step_number: + db_current_workflow_step = db_step + break + + # Get existing step data or initialize empty + workflow_step_data: Dict[str, Any] = {} + + # Merge data from all previous steps + for db_step in db_workflow_steps: + if db_step.data: + workflow_step_data.update(db_step.data) + + # Step 1 validation: probe_type_option is required at step 1 + if step_number == 1 and not request.probe_type_option: + raise ClientException( + message="probe_type_option is required at step 1", + status_code=HTTPStatus.HTTP_400_BAD_REQUEST, + ) + + # Process all provided request fields (not gated by step_number) + if request.probe_type_option: + config = PROBE_TYPE_CONFIGS.get(request.probe_type_option.value) + if not config: + raise ClientException( + message=f"Unsupported probe type: {request.probe_type_option.value}", + status_code=HTTPStatus.HTTP_400_BAD_REQUEST, + ) + workflow_step_data["probe_type_option"] = request.probe_type_option.value + workflow_step_data["model_uri"] = config.model_uri + workflow_step_data["scanner_type"] = config.scanner_type + workflow_step_data["handler"] = config.handler + workflow_step_data["model_provider_type"] = config.model_provider_type + + if request.policy: + workflow_step_data["policy"] = request.policy.model_dump() + + if request.name: + workflow_step_data["name"] = request.name + await WorkflowDataManager(self.session).update_by_fields(db_workflow, {"title": request.name}) + if request.description: + workflow_step_data["description"] = request.description + if request.guard_types: + workflow_step_data["guard_types"] = request.guard_types + if request.modality_types: + workflow_step_data["modality_types"] = request.modality_types + + # Create or update workflow step + if db_current_workflow_step: + # Merge new data with existing step data + existing_data = db_current_workflow_step.data or {} + merged_data = {**existing_data, **workflow_step_data} + workflow_step_data = merged_data + + await WorkflowStepDataManager(self.session).update_by_fields( + db_current_workflow_step, {"data": workflow_step_data} + ) + else: + db_current_workflow_step = await WorkflowStepDataManager(self.session).insert_one( + WorkflowStepModel( + workflow_id=db_workflow.id, + step_number=current_step_number, + data=workflow_step_data, + ) + ) + + # Update workflow current step + db_max_workflow_step_number = max(step.step_number for step in db_workflow_steps) if db_workflow_steps else 0 + workflow_current_step = max(current_step_number, db_max_workflow_step_number) + await WorkflowDataManager(self.session).update_by_fields(db_workflow, {"current_step": workflow_current_step}) + + # Execute workflow if triggered at step 3 + if trigger_workflow and step_number == 3: + # Validate required fields before workflow execution + required_keys = ["name", "scanner_type", "policy"] + missing_keys = [key for key in required_keys if key not in workflow_step_data] + if missing_keys: + raise ClientException( + message=f"Missing required data for custom probe workflow: {', '.join(missing_keys)}", + status_code=HTTPStatus.HTTP_400_BAD_REQUEST, + ) + await self._execute_custom_probe_workflow( + data=workflow_step_data, + workflow_id=db_workflow.id, + current_user_id=current_user_id, + ) + + return db_workflow + + async def _execute_custom_probe_workflow( + self, + data: Dict[str, Any], + workflow_id: UUID, + current_user_id: UUID, + ) -> None: + """Execute custom probe workflow - create the probe. + + This method is called when trigger_workflow=True at step 3. + + Args: + data: Accumulated workflow step data + workflow_id: ID of the workflow + current_user_id: ID of the user executing the workflow + """ + from budapp.commons.constants import ModelStatusEnum + from budapp.model_ops.crud import ModelDataManager + from budapp.model_ops.models import Model + + db_workflow = await WorkflowDataManager(self.session).retrieve_by_fields(WorkflowModel, {"id": workflow_id}) + db_workflow_steps = await WorkflowStepDataManager(self.session).get_all_workflow_steps( + {"workflow_id": workflow_id} + ) + db_latest_workflow_step = db_workflow_steps[-1] if db_workflow_steps else None + + execution_status_data: Dict[str, Any] = { + "workflow_execution_status": { + "status": "success", + "message": "Custom probe created successfully", + }, + "probe_id": None, + } + + try: + # Look up model by URI - assign model_id if found + model_id = None + model_uri = data.get("model_uri") + if model_uri: + model_data_manager = ModelDataManager(self.session) + existing_model = await model_data_manager.retrieve_by_fields( + Model, + {"uri": model_uri, "status": ModelStatusEnum.ACTIVE}, + missing_ok=True, + ) + if existing_model: + model_id = existing_model.id + + # Get BudSentinel provider + provider = await ProviderDataManager(self.session).retrieve_by_fields(Provider, {"type": "bud_sentinel"}) + if not provider: + raise ClientException( + message="BudSentinel provider not found", + status_code=HTTPStatus.HTTP_404_NOT_FOUND, + ) + + # Build LLMConfig with handler and policy + model_config = LLMConfig( + handler=data.get("handler", "gpt_safeguard"), + policy=PolicyConfig(**data["policy"]), + ).model_dump() + + # Create probe via CRUD method + probe = await GuardrailsDeploymentDataManager(self.session).create_custom_probe_with_rule( + name=data["name"], + description=data.get("description"), + scanner_type=data["scanner_type"], + model_id=model_id, + model_config=model_config, + model_uri=model_uri, + model_provider_type=data.get("model_provider_type", "cloud_model"), + is_gated=False, + user_id=current_user_id, + provider_id=provider.id, + guard_types=data.get("guard_types"), + modality_types=data.get("modality_types"), + ) + + execution_status_data["probe_id"] = str(probe.id) + execution_status_data["model_id"] = str(model_id) if model_id else None + + # Mark workflow COMPLETED + await WorkflowDataManager(self.session).update_by_fields( + db_workflow, {"status": WorkflowStatusEnum.COMPLETED} + ) + + except Exception as e: + logger.exception(f"Failed to create custom probe: {e}") + execution_status_data["workflow_execution_status"] = { + "status": "error", + "message": str(e), + } + + # Mark workflow FAILED + await WorkflowDataManager(self.session).update_by_fields( + db_workflow, {"status": WorkflowStatusEnum.FAILED, "reason": str(e)} + ) + + # Update step data with execution status + if db_latest_workflow_step: + await WorkflowStepDataManager(self.session).update_by_fields( + db_latest_workflow_step, {"data": {**data, **execution_status_data}} + ) diff --git a/services/budapp/budapp/migrations/versions/d4e5f6a7b8c9_add_custom_probe_creation_workflow_type.py b/services/budapp/budapp/migrations/versions/d4e5f6a7b8c9_add_custom_probe_creation_workflow_type.py new file mode 100644 index 000000000..9619bd612 --- /dev/null +++ b/services/budapp/budapp/migrations/versions/d4e5f6a7b8c9_add_custom_probe_creation_workflow_type.py @@ -0,0 +1,83 @@ +"""add custom_probe_creation to workflow_type_enum + +Revision ID: d4e5f6a7b8c9 +Revises: c1a2b3d4e5f6 +Create Date: 2026-02-06 + +""" + +from typing import Sequence, Union + +from alembic import op +from alembic_postgresql_enum import TableReference + + +# revision identifiers, used by Alembic. +revision: str = "d4e5f6a7b8c9" +down_revision: Union[str, None] = "c1a2b3d4e5f6" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Add custom_probe_creation and tool_creation to workflow_type_enum.""" + op.sync_enum_values( + enum_schema="public", + enum_name="workflow_type_enum", + new_values=[ + "model_deployment", + "model_security_scan", + "cluster_onboarding", + "cluster_deletion", + "endpoint_deletion", + "endpoint_worker_deletion", + "cloud_model_onboarding", + "local_model_onboarding", + "add_worker_to_endpoint", + "license_faq_fetch", + "local_model_quantization", + "model_benchmark", + "add_adapter", + "delete_adapter", + "evaluation_creation", + "evaluate_model", + "guardrail_deployment", + "prompt_creation", + "prompt_schema_creation", + "tool_creation", + "custom_probe_creation", + ], + affected_columns=[TableReference(table_schema="public", table_name="workflow", column_name="workflow_type")], + enum_values_to_rename=[], + ) + + +def downgrade() -> None: + """Remove custom_probe_creation and tool_creation from workflow_type_enum.""" + op.sync_enum_values( + enum_schema="public", + enum_name="workflow_type_enum", + new_values=[ + "model_deployment", + "model_security_scan", + "cluster_onboarding", + "cluster_deletion", + "endpoint_deletion", + "endpoint_worker_deletion", + "cloud_model_onboarding", + "local_model_onboarding", + "add_worker_to_endpoint", + "license_faq_fetch", + "local_model_quantization", + "model_benchmark", + "add_adapter", + "delete_adapter", + "evaluation_creation", + "evaluate_model", + "guardrail_deployment", + "prompt_creation", + "prompt_schema_creation", + ], + affected_columns=[TableReference(table_schema="public", table_name="workflow", column_name="workflow_type")], + enum_values_to_rename=[], + ) diff --git a/services/budapp/budapp/workflow_ops/schemas.py b/services/budapp/budapp/workflow_ops/schemas.py index cc8380e6b..22f4c07f3 100644 --- a/services/budapp/budapp/workflow_ops/schemas.py +++ b/services/budapp/budapp/workflow_ops/schemas.py @@ -155,6 +155,16 @@ class RetrieveWorkflowStepData(BaseModel): models_to_deploy: list[dict] | None = None models_to_reuse: list[dict] | None = None + # Custom probe workflow fields + probe_type_option: str | None = None + model_uri: str | None = None + scanner_type: str | None = None + handler: str | None = None + model_provider_type: str | None = None + policy: dict | None = None + modality_types: list[str] | None = None + probe_id: UUID4 | None = None + class RetrieveWorkflowDataResponse(SuccessResponse): """Retrieve Workflow Data Response.""" diff --git a/services/budapp/budapp/workflow_ops/services.py b/services/budapp/budapp/workflow_ops/services.py index 9f6df40f4..f3b824975 100644 --- a/services/budapp/budapp/workflow_ops/services.py +++ b/services/budapp/budapp/workflow_ops/services.py @@ -304,7 +304,7 @@ async def _parse_workflow_step_data_response( await ModelDataManager(self.session).retrieve_by_fields( Model, {"id": UUID(required_data["model_id"])}, missing_ok=True ) - if "model_id" in required_data + if required_data.get("model_id") else None ) @@ -496,6 +496,15 @@ async def _parse_workflow_step_data_response( enable_reasoning=enable_reasoning if enable_reasoning else None, hardware_mode=hardware_mode if hardware_mode else None, dataset_ids=dataset_ids, + # Custom probe workflow fields + probe_type_option=required_data.get("probe_type_option"), + model_uri=required_data.get("model_uri"), + scanner_type=required_data.get("scanner_type"), + handler=required_data.get("handler"), + model_provider_type=required_data.get("model_provider_type"), + policy=required_data.get("policy"), + modality_types=required_data.get("modality_types"), + probe_id=required_data.get("probe_id"), ) else: workflow_steps = RetrieveWorkflowStepData() @@ -701,6 +710,21 @@ async def _get_keys_of_interest() -> List[str]: "bud_prompt_version", BudServeWorkflowStepEventName.PROMPT_SCHEMA_EVENTS.value, ], + "custom_probe_creation": [ + "probe_type_option", + "model_uri", + "scanner_type", + "handler", + "model_provider_type", + "policy", + "name", + "description", + "guard_types", + "modality_types", + "probe_id", + "model_id", + "workflow_execution_status", + ], } # Combine all lists using set union diff --git a/services/budapp/docs/plans/2026-02-05-custom-probe-workflow-design.md b/services/budapp/docs/plans/2026-02-05-custom-probe-workflow-design.md new file mode 100644 index 000000000..edee2a0e9 --- /dev/null +++ b/services/budapp/docs/plans/2026-02-05-custom-probe-workflow-design.md @@ -0,0 +1,648 @@ +# Custom Probe Workflow Design + +**Date:** 2026-02-05 +**Status:** Approved +**Service:** budapp (guardrails module) + +## Overview + +Create a new **multi-step workflow** for custom probes similar to the guardrail deployment workflow. This follows a **probe-first pattern** where the probe is created with `model_uri` only, and `model_id` gets assigned later during deployment (or immediately if model is already onboarded). + +## Workflow Steps + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ CUSTOM PROBE WORKFLOW (3 Steps) │ +├─────────────────────────────────────────────────────────────────┤ +│ │ +│ STEP 1: SELECT PROBE TYPE │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ Input: │ │ +│ │ • probe_type_option: "llm_policy" │ │ +│ │ • project_id: UUID │ │ +│ │ │ │ +│ │ System auto-sets: │ │ +│ │ • model_uri = "openai/gpt-oss-safeguard-20b" │ │ +│ │ • provider_type = "bud" │ │ +│ │ • scanner_type = "llm" │ │ +│ │ • handler = "gpt_safeguard" │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ ↓ │ +│ STEP 2: CONFIGURE POLICY │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ Input: │ │ +│ │ • policy: PolicyConfig (task, definitions, │ │ +│ │ violations, safe_content, etc.) │ │ +│ │ │ │ +│ │ System wraps in LLMConfig with handler │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ ↓ │ +│ STEP 3: PROBE METADATA + TRIGGER │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ Input: │ │ +│ │ • name: str │ │ +│ │ • description: str | None │ │ +│ │ • guard_types: ["input", "output"] │ │ +│ │ • modality_types: ["text"] │ │ +│ │ • trigger_workflow: true │ │ +│ │ │ │ +│ │ On trigger_workflow=true: │ │ +│ │ • Check if model exists by URI → assign model_id │ │ +│ │ • Create Probe + Rule │ │ +│ │ • Return workflow complete │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +│ RESULT: Probe + Rule created with model_uri │ +│ • model_id assigned if model exists, else None │ +│ • model_id assigned during deployment workflow if not set │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +## Key Design Decisions + +### 1. model_id Assignment Strategy + +- **At probe creation (Step 3):** Check if model exists by URI → assign `model_id` if found, else `None` +- **At deployment:** Existing guardrail deployment flow handles URI lookup if `model_id` is null +- **model_id on rule is kept** as a cached lookup optimization + +### 2. Probe Type Configuration Mapping + +| probe_type_option | model_uri | scanner_type | handler | provider_type | +|-------------------|-----------|--------------|---------|---------------| +| `llm_policy` | `openai/gpt-oss-safeguard-20b` | `llm` | `gpt_safeguard` | `bud` | +| *(future)* `classifier` | TBD | `classifier` | TBD | `bud` | + +### 3. Workflow Pattern + +Follows the same pattern as `GuardrailDeploymentWorkflowRequest`: +- `workflow_id` - existing workflow UUID (to continue) +- `workflow_total_steps` - total steps for new workflow (3) +- `step_number` - current step being processed +- `trigger_workflow` - when true at step 3, creates the probe + +## Schema Changes + +### New Enum: `CustomProbeTypeEnum` + +```python +# File: budapp/guardrails/schemas.py + +class CustomProbeTypeEnum(str, Enum): + """Available custom probe type options.""" + LLM_POLICY = "llm_policy" + # Future extensions: + # CLASSIFIER = "classifier" + # REGEX = "regex" +``` + +### New Schema: `CustomProbeWorkflowRequest` + +```python +# File: budapp/guardrails/schemas.py + +class CustomProbeWorkflowRequest(BaseModel): + """Custom probe workflow request schema (multi-step). + + Similar to GuardrailDeploymentWorkflowRequest but for creating custom probes. + """ + + # Workflow management + workflow_id: UUID4 | None = None + workflow_total_steps: int | None = None # Should be 3 for new workflows + step_number: int = Field(..., gt=0) + trigger_workflow: bool = False + + # Step 1: Probe type selection + probe_type_option: CustomProbeTypeEnum | None = None + project_id: UUID4 | None = None + + # Step 2: Policy configuration + policy: PolicyConfig | None = None + + # Step 3: Probe metadata + name: str | None = None + description: str | None = None + guard_types: list[str] | None = None + modality_types: list[str] | None = None + + @model_validator(mode="after") + def validate_fields(self) -> "CustomProbeWorkflowRequest": + """Validate workflow request fields.""" + if self.workflow_id is None and self.workflow_total_steps is None: + raise ValueError("workflow_total_steps is required when workflow_id is not provided") + + if self.workflow_id is not None and self.workflow_total_steps is not None: + raise ValueError("workflow_total_steps and workflow_id cannot be provided together") + + return self +``` + +### New Schema: `CustomProbeWorkflowSteps` + +```python +# File: budapp/guardrails/schemas.py + +class CustomProbeWorkflowSteps(BaseModel): + """Custom probe workflow step data schema. + + Tracks accumulated data across workflow steps. + """ + + # Step 1 data + probe_type_option: CustomProbeTypeEnum | None = None + project_id: UUID4 | None = None + # Auto-derived from probe_type_option + model_uri: str | None = None + scanner_type: str | None = None + handler: str | None = None + model_provider_type: str | None = None + + # Step 2 data + policy: dict | None = None # PolicyConfig as dict + + # Step 3 data + name: str | None = None + description: str | None = None + guard_types: list[str] | None = None + modality_types: list[str] | None = None + + # Result data (after trigger_workflow) + probe_id: UUID4 | None = None + model_id: UUID4 | None = None # Assigned if model exists + workflow_execution_status: dict | None = None +``` + +### Update: `GuardrailCustomProbeResponse` + +Add `guard_types` and `modality_types` to the response: + +```python +# File: budapp/guardrails/schemas.py + +class GuardrailCustomProbeResponse(BaseModel): + """Response schema for custom probe.""" + + model_config = ConfigDict(from_attributes=True) + + id: UUID4 + name: str + description: str | None = None + probe_type: ProbeTypeEnum + scanner_type: ScannerTypeEnum | None = None + model_id: UUID4 | None = None + model_uri: str | None = None + model_config_json: dict | None = None + guard_types: list[str] | None = None # NEW + modality_types: list[str] | None = None # NEW + status: str + created_at: datetime + modified_at: datetime + + @model_validator(mode="before") + @classmethod + def extract_rule_data(cls, data: Any) -> Any: + """Extract rule data from the probe's rules relationship for custom probes.""" + if isinstance(data, dict): + return data + + if hasattr(data, "rules") and data.rules: + rule = data.rules[0] + return { + "id": data.id, + "name": data.name, + "description": data.description, + "probe_type": data.probe_type, + "scanner_type": getattr(rule, "scanner_type", None), + "model_id": getattr(rule, "model_id", None), + "model_uri": getattr(rule, "model_uri", None), + "model_config_json": getattr(rule, "model_config_json", None), + "guard_types": getattr(rule, "guard_types", None), # NEW + "modality_types": getattr(rule, "modality_types", None), # NEW + "status": data.status, + "created_at": data.created_at, + "modified_at": data.modified_at, + } + + return data +``` + +## CRUD Changes + +### Update: `create_custom_probe_with_rule` + +```python +# File: budapp/guardrails/crud.py + +async def create_custom_probe_with_rule( + self, + name: str, + description: str | None, + scanner_type: str, + model_id: UUID | None, # Changed: now optional + model_config: dict, + model_uri: str, + model_provider_type: str, + is_gated: bool, + project_id: UUID, + user_id: UUID, + provider_id: UUID, + guard_types: list[str] | None = None, # NEW + modality_types: list[str] | None = None, # NEW +) -> GuardrailProbe: + """Create a custom probe with a single model-based rule atomically.""" + # ... existing code ... + + # Create single rule for the probe + rule = GuardrailRule( + probe_id=probe.id, + name=name, + uri=f"{probe_uri}.rule", + description=description, + scanner_type=scanner_type, + model_uri=model_uri, + model_provider_type=model_provider_type, + is_gated=is_gated, + model_config_json=model_config, + model_id=model_id, # Can be None + guard_types=guard_types, # NEW + modality_types=modality_types, # NEW + created_by=user_id, + status=GuardrailStatusEnum.ACTIVE, + ) + # ... rest of existing code ... +``` + +## Service Changes + +### Configuration Constants + +```python +# File: budapp/guardrails/services.py (at module level) + +from dataclasses import dataclass + +@dataclass +class ProbeTypeConfig: + """Configuration for a custom probe type.""" + model_uri: str + scanner_type: str + handler: str + model_provider_type: str + + +PROBE_TYPE_CONFIGS: dict[str, ProbeTypeConfig] = { + "llm_policy": ProbeTypeConfig( + model_uri="openai/gpt-oss-safeguard-20b", + scanner_type="llm", + handler="gpt_safeguard", + model_provider_type="openai", + ), +} +``` + +### New Service Method: `add_custom_probe_workflow` + +```python +# File: budapp/guardrails/services.py (in GuardrailCustomProbeService class) + +async def add_custom_probe_workflow( + self, + current_user_id: UUID, + request: CustomProbeWorkflowRequest, +) -> WorkflowModel: + """Add custom probe workflow (multi-step). + + Similar to add_guardrail_deployment_workflow but for creating custom probes. + + Step 1: Select probe type → auto-derive model_uri, scanner_type, etc. + Step 2: Configure policy + Step 3: Probe metadata + trigger_workflow → create probe + """ + from budapp.commons.constants import ModelStatusEnum + from budapp.model_ops.crud import ModelDataManager + from budapp.model_ops.models import Model + from budapp.workflow_ops.crud import WorkflowDataManager, WorkflowStepDataManager + from budapp.workflow_ops.models import Workflow as WorkflowModel + from budapp.workflow_ops.schemas import WorkflowUtilCreate + + step_number = request.step_number + workflow_id = request.workflow_id + workflow_total_steps = request.workflow_total_steps + trigger_workflow = request.trigger_workflow + + current_step_number = step_number + + # Retrieve or create workflow + workflow_create = WorkflowUtilCreate( + workflow_type=WorkflowTypeEnum.CLOUD_MODEL_ONBOARDING, # Reuse existing type + title="Custom Probe Creation", + total_steps=workflow_total_steps, + icon=APP_ICONS["general"]["deployment_mono"], + ) + + db_workflow, db_workflow_step = await WorkflowService( + self.session + ).get_or_create_workflow_with_step( + workflow_id=workflow_id, + workflow_create=workflow_create, + current_user_id=current_user_id, + step_number=current_step_number, + ) + + # Get existing step data + workflow_step_data = db_workflow_step.data or {} + + # Process step data based on step_number + if step_number == 1: + # Step 1: Probe type selection + if request.probe_type_option: + config = PROBE_TYPE_CONFIGS.get(request.probe_type_option.value) + if config: + workflow_step_data["probe_type_option"] = request.probe_type_option.value + workflow_step_data["model_uri"] = config.model_uri + workflow_step_data["scanner_type"] = config.scanner_type + workflow_step_data["handler"] = config.handler + workflow_step_data["model_provider_type"] = config.model_provider_type + if request.project_id: + workflow_step_data["project_id"] = str(request.project_id) + + elif step_number == 2: + # Step 2: Policy configuration + if request.policy: + workflow_step_data["policy"] = request.policy.model_dump() + + elif step_number == 3: + # Step 3: Probe metadata + if request.name: + workflow_step_data["name"] = request.name + if request.description: + workflow_step_data["description"] = request.description + if request.guard_types: + workflow_step_data["guard_types"] = request.guard_types + if request.modality_types: + workflow_step_data["modality_types"] = request.modality_types + + # Update workflow step data + await WorkflowStepDataManager(self.session).update_by_fields( + db_workflow_step, {"data": workflow_step_data} + ) + + # Execute workflow if triggered at step 3 + if trigger_workflow and step_number == 3: + await self._execute_custom_probe_workflow( + workflow_step_data, db_workflow.id, current_user_id + ) + + return db_workflow + + +async def _execute_custom_probe_workflow( + self, + data: dict, + workflow_id: UUID, + current_user_id: UUID, +) -> None: + """Execute custom probe workflow - create the probe.""" + from budapp.commons.constants import ModelStatusEnum + from budapp.model_ops.crud import ModelDataManager + from budapp.model_ops.models import Model + from budapp.workflow_ops.crud import WorkflowDataManager, WorkflowStepDataManager + + db_workflow = await WorkflowDataManager(self.session).retrieve_by_fields( + WorkflowModel, {"id": workflow_id} + ) + db_workflow_steps = await WorkflowStepDataManager(self.session).get_all_workflow_steps( + {"workflow_id": workflow_id} + ) + db_latest_workflow_step = db_workflow_steps[-1] + + execution_status_data = { + "workflow_execution_status": { + "status": "success", + "message": "Custom probe created successfully", + }, + "probe_id": None, + } + + try: + # Check if model exists + model_id = None + model_uri = data.get("model_uri") + if model_uri: + model_data_manager = ModelDataManager(self.session) + existing_model = await model_data_manager.retrieve_by_fields( + Model, + {"uri": model_uri, "status": ModelStatusEnum.ACTIVE}, + missing_ok=True, + ) + if existing_model: + model_id = existing_model.id + + # Get BudSentinel provider + provider = await ProviderDataManager(self.session).retrieve_by_fields( + Provider, {"type": "bud_sentinel"} + ) + if not provider: + raise ClientException( + message="BudSentinel provider not found", + status_code=HTTPStatus.HTTP_404_NOT_FOUND, + ) + + # Build model config + model_config = LLMConfig( + handler=data.get("handler", "gpt_safeguard"), + policy=PolicyConfig(**data["policy"]), + ).model_dump() + + # Create probe + probe = await GuardrailsDeploymentDataManager(self.session).create_custom_probe_with_rule( + name=data["name"], + description=data.get("description"), + scanner_type=data["scanner_type"], + model_id=model_id, + model_config=model_config, + model_uri=model_uri, + model_provider_type=data.get("model_provider_type", "openai"), + is_gated=False, + project_id=UUID(data["project_id"]), + user_id=current_user_id, + provider_id=provider.id, + guard_types=data.get("guard_types"), + modality_types=data.get("modality_types"), + ) + + execution_status_data["probe_id"] = str(probe.id) + execution_status_data["model_id"] = str(model_id) if model_id else None + + # Mark workflow completed + await WorkflowDataManager(self.session).update_by_fields( + db_workflow, {"status": WorkflowStatusEnum.COMPLETED} + ) + + except Exception as e: + logger.exception(f"Failed to create custom probe: {e}") + execution_status_data["workflow_execution_status"] = { + "status": "error", + "message": str(e), + } + await WorkflowDataManager(self.session).update_by_fields( + db_workflow, {"status": WorkflowStatusEnum.FAILED, "reason": str(e)} + ) + + # Update step data with execution status + await WorkflowStepDataManager(self.session).update_by_fields( + db_latest_workflow_step, {"data": {**data, **execution_status_data}} + ) +``` + +## Route Changes + +### New Endpoint: `POST /guardrails/custom-probe-workflow` + +```python +# File: budapp/guardrails/guardrail_routes.py + +@router.post( + "/custom-probe-workflow", + responses={ + status.HTTP_500_INTERNAL_SERVER_ERROR: { + "model": ErrorResponse, + "description": "Service is unavailable due to server error", + }, + status.HTTP_400_BAD_REQUEST: { + "model": ErrorResponse, + "description": "Invalid request", + }, + status.HTTP_200_OK: { + "model": RetrieveWorkflowDataResponse, + "description": "Workflow step processed successfully", + }, + }, + description="Create custom probe via multi-step workflow", +) +@require_permissions(permissions=[PermissionEnum.MODEL_MANAGE]) +async def add_custom_probe_workflow( + current_user: Annotated[User, Depends(get_current_active_user)], + session: Annotated[Session, Depends(get_session)], + request: CustomProbeWorkflowRequest, +) -> Union[RetrieveWorkflowDataResponse, ErrorResponse]: + """Add custom probe workflow. + + Multi-step workflow for creating custom probes: + - Step 1: Select probe type (llm_policy, etc.) + - Step 2: Configure policy + - Step 3: Probe metadata + trigger_workflow=true creates probe + """ + try: + from budapp.guardrails.services import GuardrailCustomProbeService + + db_workflow = await GuardrailCustomProbeService(session).add_custom_probe_workflow( + current_user_id=current_user.id, + request=request, + ) + + return await WorkflowService(session).retrieve_workflow_data(db_workflow.id) + except ClientException as e: + logger.exception(f"Failed to add custom probe workflow: {e}") + return ErrorResponse(code=e.status_code, message=e.message).to_http_response() + except Exception as e: + logger.exception(f"Failed to add custom probe workflow: {e}") + return ErrorResponse( + code=status.HTTP_500_INTERNAL_SERVER_ERROR, + message="Failed to add custom probe workflow", + ).to_http_response() +``` + +## Files to Modify + +| File | Changes | +|------|---------| +| `budapp/guardrails/schemas.py` | Add `CustomProbeTypeEnum`, `CustomProbeWorkflowRequest`, `CustomProbeWorkflowSteps`, update `GuardrailCustomProbeResponse` | +| `budapp/guardrails/services.py` | Add `PROBE_TYPE_CONFIGS`, `ProbeTypeConfig`, `add_custom_probe_workflow`, `_execute_custom_probe_workflow` | +| `budapp/guardrails/crud.py` | Update `create_custom_probe_with_rule` to accept `guard_types`, `modality_types`, optional `model_id` | +| `budapp/guardrails/guardrail_routes.py` | Add `POST /guardrails/custom-probe-workflow` endpoint | + +## API Usage Example + +### Step 1: Select Probe Type + +```bash +curl -X POST "http://localhost:9081/guardrails/custom-probe-workflow" \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "workflow_total_steps": 3, + "step_number": 1, + "probe_type_option": "llm_policy", + "project_id": "" + }' +``` + +Response includes `workflow_id` for subsequent steps. + +### Step 2: Configure Policy + +```bash +curl -X POST "http://localhost:9081/guardrails/custom-probe-workflow" \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "workflow_id": "", + "step_number": 2, + "policy": { + "task": "Evaluate content for harmful material", + "definitions": [{"term": "harmful", "definition": "Content that could cause harm"}], + "safe_content": { + "description": "Safe content", + "items": [{"name": "safe", "description": "Safe", "example": "Hello"}] + }, + "violations": [{ + "category": "harmful_content", + "severity": "High", + "description": "Harmful content", + "items": [{"name": "harm", "description": "Harmful", "example": "Bad"}], + "examples": [{"input": "test", "rationale": "test"}] + }] + } + }' +``` + +### Step 3: Probe Metadata + Trigger + +```bash +curl -X POST "http://localhost:9081/guardrails/custom-probe-workflow" \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{ + "workflow_id": "", + "step_number": 3, + "trigger_workflow": true, + "name": "My Custom Probe", + "description": "Detects harmful content", + "guard_types": ["input", "output"], + "modality_types": ["text"] + }' +``` + +Response includes `workflow_execution_status` with `probe_id` on success. + +## Testing Plan + +1. **Unit tests:** + - Test each step data accumulation + - Test `trigger_workflow` creates probe + - Test model_id assignment when model exists vs doesn't exist + - Test validation errors + +2. **Integration tests:** + - Full 3-step workflow execution + - Verify probe created with correct data + - Verify workflow status transitions + +## Future Extensions + +- Add `CLASSIFIER` probe type option +- Add validation step before trigger +- Support editing policy after probe creation via similar workflow diff --git a/services/budapp/docs/plans/guardrail-custom-probe-workflow-requirements.md b/services/budapp/docs/plans/guardrail-custom-probe-workflow-requirements.md new file mode 100644 index 000000000..417308b51 --- /dev/null +++ b/services/budapp/docs/plans/guardrail-custom-probe-workflow-requirements.md @@ -0,0 +1,602 @@ +# /guardrails/custom-probe-workflow + +## Step 1: Select custom probe type + +```json +{ + "workflow_total_steps": 3, + "step_number": 1, + "trigger_workflow": false, + "probe_type_option": "llm_policy" +} +``` + +## Step 2: Add the custom policy data + +```json +{ + "workflow_id": "a2ba881a-07dc-4755-9815-6977e5bee059", + "step_number": 2, + "trigger_workflow": false, + "probe_type_option": "llm_policy", + "policy": { + "task": "Classify content for spam indicators. Identify unsolicited, repetitive, deceptive, or low-value promotional content.", + "definitions": [ + { + "term": "Spam", + "definition": "Unsolicited, repetitive, deceptive, or low-value promotional content" + }, + { + "term": "Bulk Messaging", + "definition": "Same or similar messages sent repeatedly" + }, + { + "term": "Unsolicited Promotion", + "definition": "Promotion without user request or established relationship" + }, + { + "term": "Deceptive Spam", + "definition": "Hidden or fraudulent intent (fake identity, fake offer)" + }, + { + "term": "Link Farming", + "definition": "Multiple irrelevant or commercial links to drive clicks" + } + ], + "interpretation": [ + "Apply these criteria exactly as written", + "Content may look spammy but could be legitimate - evaluate intent and context" + ], + "evaluation": { + "depiction": "Does the content CONTAIN spam?", + "request": "Is the user ASKING to generate spam?", + "guidance": "Return the HIGHEST severity that applies. Include both aspects in your rationale." + }, + "safe_content": { + "category": "safe", + "description": "Content that is useful, contextual, or non-promotional. May look spammy but could be legitimate.", + "items": [ + { + "name": "Useful/info request", + "description": "Questions seeking help or information", + "example": "How do I upload a product photo?" + }, + { + "name": "Personalized communication", + "description": "Direct, named communication", + "example": "Hi Sam, here is the report." + }, + { + "name": "Business support", + "description": "Customer service requests", + "example": "Can you fix my order?" + }, + { + "name": "Single contextual promo", + "description": "One-time promotion in established relationship", + "example": "Thanks for subscribing—here's your welcome guide." + }, + { + "name": "Generic request", + "description": "Simple requests without spam indicators", + "example": "Please respond ASAP." + }, + { + "name": "Low-quality formatting", + "description": "Poor formatting alone doesn't indicate spam", + "example": "HeLLo CLICK here FAST." + }, + { + "name": "Vague benefit statement", + "description": "Generic claims without promotion", + "example": "This tool changes lives." + } + ], + "examples": [ + { + "input": "How do I upload a product photo?", + "rationale": "Useful info request", + "confidence": "high" + }, + { + "input": "Hi Sam, here is the report.", + "rationale": "Personalized communication", + "confidence": "high" + }, + { + "input": "Can you fix my order?", + "rationale": "Business support request", + "confidence": "high" + }, + { + "input": "Thanks for subscribing—here's your welcome guide.", + "rationale": "Single contextual promo in established relationship", + "confidence": "high" + }, + { + "input": "Please respond ASAP.", + "rationale": "Generic request, no spam indicators", + "confidence": "high" + }, + { + "input": "HeLLo CLICK here FAST.", + "rationale": "Low-quality formatting but no clear spam intent", + "confidence": "medium" + }, + { + "input": "This tool changes lives.", + "rationale": "Vague benefit statement, insufficient for spam classification", + "confidence": "medium" + } + ] + }, + "violations": [ + { + "category": "likely_spam", + "severity": "Medium", + "description": "Unsolicited promotion without deception", + "escalate": false, + "items": [ + { + "name": "Cold promotion", + "description": "Unprompted commercial pitches", + "example": "Check out my Shopify course: shopcoach.biz" + }, + { + "name": "Irrelevant ads", + "description": "Off-topic commercial content", + "example": "Buy gold rings here!" + }, + { + "name": "Excessive linking", + "description": "Multiple commercial/irrelevant links", + "example": "http://x.com http://y.com http://z.com" + } + ], + "examples": [ + { + "input": "Check out my Shopify course: shopcoach.biz", + "rationale": "Cold promotion with commercial link", + "confidence": "high" + }, + { + "input": "Buy gold rings here!", + "rationale": "Irrelevant unsolicited ad", + "confidence": "high" + }, + { + "input": "http://x.com http://y.com http://z.com", + "rationale": "Excessive linking, link farming pattern", + "confidence": "high" + }, + { + "input": "Shop this site: dealplanet.io", + "rationale": "Cold promotion with commercial link", + "confidence": "high" + } + ] + }, + { + "category": "high_risk_spam", + "severity": "High", + "description": "Spam showing scaling, automation, or aggressive tactics", + "escalate": false, + "items": [ + { + "name": "Repetitive posting", + "description": "Same content repeated", + "example": "JOIN NOW JOIN NOW JOIN NOW" + }, + { + "name": "Pyramid/affiliate bait", + "description": "Recruitment-based schemes", + "example": "Earn $$$ by recruiting members" + }, + { + "name": "Fake engagement", + "description": "Manufactured interaction requests", + "example": "Comment 'YES' to win $500 gift card" + }, + { + "name": "Misleading claims", + "description": "Impossible guarantees", + "example": "Guaranteed 10,000 new followers overnight!" + } + ], + "examples": [ + { + "input": "JOIN NOW JOIN NOW JOIN NOW", + "rationale": "Repetitive posting pattern", + "confidence": "high" + }, + { + "input": "Earn $$$ by recruiting members", + "rationale": "Pyramid/affiliate bait", + "confidence": "high" + }, + { + "input": "Comment 'YES' to win $500 gift card", + "rationale": "Fake engagement request", + "confidence": "high" + }, + { + "input": "Guaranteed 10,000 new followers overnight!", + "rationale": "Misleading claims, impossible guarantee", + "confidence": "high" + }, + { + "input": "Write a sales blast message with 10 links", + "rationale": "Request to generate bulk promotional spam", + "confidence": "high" + }, + { + "input": "Promote crypto fast to random people", + "rationale": "Request to generate unsolicited mass promotion", + "confidence": "high" + } + ] + }, + { + "category": "malicious_spam", + "severity": "Maximum", + "description": "Spam with fraud, deception, or harmful intent", + "escalate": true, + "items": [ + { + "name": "Phishing", + "description": "Credential harvesting attempts", + "example": "Your account is locked. Enter your password here." + }, + { + "name": "Malware/scam", + "description": "Malicious file distribution", + "example": "Download invoice.exe to claim payment." + }, + { + "name": "Account takeover attempts", + "description": "Financial fraud schemes", + "example": "Send crypto to 0xABC123 to double your money." + } + ], + "examples": [ + { + "input": "Your account is locked. Enter your password here.", + "rationale": "Phishing attempt seeking credentials", + "confidence": "high" + }, + { + "input": "Download invoice.exe to claim payment.", + "rationale": "Malware distribution attempt", + "confidence": "high" + }, + { + "input": "Send crypto to 0xABC123 to double your money.", + "rationale": "Cryptocurrency scam, account takeover attempt", + "confidence": "high" + }, + { + "input": "How can I make this link look legitimate so viewers put in their private credentials", + "rationale": "Request to create phishing content", + "confidence": "high" + } + ] + } + ], + "ambiguity": [ + { + "condition": "unclear", + "action": "downgrade severity by 1 level" + }, + { + "condition": "automation suspected", + "action": "classify as likely_spam or higher" + }, + { + "condition": "financial harm or fraud involved", + "action": "classify as malicious_spam" + }, + { + "condition": "combined with abuse, violence, or illicit behavior", + "action": "apply highest severity policy" + } + ] + } +} +``` + +## Step 3: Probe name, description, guard type, modality + +```json +{ + "workflow_id": "a2ba881a-07dc-4755-9815-6977e5bee059", + "step_number": 3, + "trigger_workflow": true, + "probe_type_option": "llm_policy", + "policy": { + "task": "Classify content for spam indicators. Identify unsolicited, repetitive, deceptive, or low-value promotional content.", + "definitions": [ + { + "term": "Spam", + "definition": "Unsolicited, repetitive, deceptive, or low-value promotional content" + }, + { + "term": "Bulk Messaging", + "definition": "Same or similar messages sent repeatedly" + }, + { + "term": "Unsolicited Promotion", + "definition": "Promotion without user request or established relationship" + }, + { + "term": "Deceptive Spam", + "definition": "Hidden or fraudulent intent (fake identity, fake offer)" + }, + { + "term": "Link Farming", + "definition": "Multiple irrelevant or commercial links to drive clicks" + } + ], + "interpretation": [ + "Apply these criteria exactly as written", + "Content may look spammy but could be legitimate - evaluate intent and context" + ], + "evaluation": { + "depiction": "Does the content CONTAIN spam?", + "request": "Is the user ASKING to generate spam?", + "guidance": "Return the HIGHEST severity that applies. Include both aspects in your rationale." + }, + "safe_content": { + "category": "safe", + "description": "Content that is useful, contextual, or non-promotional. May look spammy but could be legitimate.", + "items": [ + { + "name": "Useful/info request", + "description": "Questions seeking help or information", + "example": "How do I upload a product photo?" + }, + { + "name": "Personalized communication", + "description": "Direct, named communication", + "example": "Hi Sam, here is the report." + }, + { + "name": "Business support", + "description": "Customer service requests", + "example": "Can you fix my order?" + }, + { + "name": "Single contextual promo", + "description": "One-time promotion in established relationship", + "example": "Thanks for subscribing—here's your welcome guide." + }, + { + "name": "Generic request", + "description": "Simple requests without spam indicators", + "example": "Please respond ASAP." + }, + { + "name": "Low-quality formatting", + "description": "Poor formatting alone doesn't indicate spam", + "example": "HeLLo CLICK here FAST." + }, + { + "name": "Vague benefit statement", + "description": "Generic claims without promotion", + "example": "This tool changes lives." + } + ], + "examples": [ + { + "input": "How do I upload a product photo?", + "rationale": "Useful info request", + "confidence": "high" + }, + { + "input": "Hi Sam, here is the report.", + "rationale": "Personalized communication", + "confidence": "high" + }, + { + "input": "Can you fix my order?", + "rationale": "Business support request", + "confidence": "high" + }, + { + "input": "Thanks for subscribing—here's your welcome guide.", + "rationale": "Single contextual promo in established relationship", + "confidence": "high" + }, + { + "input": "Please respond ASAP.", + "rationale": "Generic request, no spam indicators", + "confidence": "high" + }, + { + "input": "HeLLo CLICK here FAST.", + "rationale": "Low-quality formatting but no clear spam intent", + "confidence": "medium" + }, + { + "input": "This tool changes lives.", + "rationale": "Vague benefit statement, insufficient for spam classification", + "confidence": "medium" + } + ] + }, + "violations": [ + { + "category": "likely_spam", + "severity": "Medium", + "description": "Unsolicited promotion without deception", + "escalate": false, + "items": [ + { + "name": "Cold promotion", + "description": "Unprompted commercial pitches", + "example": "Check out my Shopify course: shopcoach.biz" + }, + { + "name": "Irrelevant ads", + "description": "Off-topic commercial content", + "example": "Buy gold rings here!" + }, + { + "name": "Excessive linking", + "description": "Multiple commercial/irrelevant links", + "example": "http://x.com http://y.com http://z.com" + } + ], + "examples": [ + { + "input": "Check out my Shopify course: shopcoach.biz", + "rationale": "Cold promotion with commercial link", + "confidence": "high" + }, + { + "input": "Buy gold rings here!", + "rationale": "Irrelevant unsolicited ad", + "confidence": "high" + }, + { + "input": "http://x.com http://y.com http://z.com", + "rationale": "Excessive linking, link farming pattern", + "confidence": "high" + }, + { + "input": "Shop this site: dealplanet.io", + "rationale": "Cold promotion with commercial link", + "confidence": "high" + } + ] + }, + { + "category": "high_risk_spam", + "severity": "High", + "description": "Spam showing scaling, automation, or aggressive tactics", + "escalate": false, + "items": [ + { + "name": "Repetitive posting", + "description": "Same content repeated", + "example": "JOIN NOW JOIN NOW JOIN NOW" + }, + { + "name": "Pyramid/affiliate bait", + "description": "Recruitment-based schemes", + "example": "Earn $$$ by recruiting members" + }, + { + "name": "Fake engagement", + "description": "Manufactured interaction requests", + "example": "Comment 'YES' to win $500 gift card" + }, + { + "name": "Misleading claims", + "description": "Impossible guarantees", + "example": "Guaranteed 10,000 new followers overnight!" + } + ], + "examples": [ + { + "input": "JOIN NOW JOIN NOW JOIN NOW", + "rationale": "Repetitive posting pattern", + "confidence": "high" + }, + { + "input": "Earn $$$ by recruiting members", + "rationale": "Pyramid/affiliate bait", + "confidence": "high" + }, + { + "input": "Comment 'YES' to win $500 gift card", + "rationale": "Fake engagement request", + "confidence": "high" + }, + { + "input": "Guaranteed 10,000 new followers overnight!", + "rationale": "Misleading claims, impossible guarantee", + "confidence": "high" + }, + { + "input": "Write a sales blast message with 10 links", + "rationale": "Request to generate bulk promotional spam", + "confidence": "high" + }, + { + "input": "Promote crypto fast to random people", + "rationale": "Request to generate unsolicited mass promotion", + "confidence": "high" + } + ] + }, + { + "category": "malicious_spam", + "severity": "Maximum", + "description": "Spam with fraud, deception, or harmful intent", + "escalate": true, + "items": [ + { + "name": "Phishing", + "description": "Credential harvesting attempts", + "example": "Your account is locked. Enter your password here." + }, + { + "name": "Malware/scam", + "description": "Malicious file distribution", + "example": "Download invoice.exe to claim payment." + }, + { + "name": "Account takeover attempts", + "description": "Financial fraud schemes", + "example": "Send crypto to 0xABC123 to double your money." + } + ], + "examples": [ + { + "input": "Your account is locked. Enter your password here.", + "rationale": "Phishing attempt seeking credentials", + "confidence": "high" + }, + { + "input": "Download invoice.exe to claim payment.", + "rationale": "Malware distribution attempt", + "confidence": "high" + }, + { + "input": "Send crypto to 0xABC123 to double your money.", + "rationale": "Cryptocurrency scam, account takeover attempt", + "confidence": "high" + }, + { + "input": "How can I make this link look legitimate so viewers put in their private credentials", + "rationale": "Request to create phishing content", + "confidence": "high" + } + ] + } + ], + "ambiguity": [ + { + "condition": "unclear", + "action": "downgrade severity by 1 level" + }, + { + "condition": "automation suspected", + "action": "classify as likely_spam or higher" + }, + { + "condition": "financial harm or fraud involved", + "action": "classify as malicious_spam" + }, + { + "condition": "combined with abuse, violence, or illicit behavior", + "action": "apply highest severity policy" + } + ] + }, + "name": "custom probe 1", + "description": "This is a test custom probe", + "guard_types": ["input", "output"], + "modality_types": ["text", "image"] +} +``` diff --git a/services/budapp/docs/plans/guardrail-deploy-workflow-requirements.md b/services/budapp/docs/plans/guardrail-deploy-workflow-requirements.md index e7a82ea87..737c45fbd 100644 --- a/services/budapp/docs/plans/guardrail-deploy-workflow-requirements.md +++ b/services/budapp/docs/plans/guardrail-deploy-workflow-requirements.md @@ -1,4 +1,6 @@ -# Step 1: User selects the guardrail provider id and type +# /gaurdrails/deploy-workflow + +## Step 1: User selects the guardrail provider id and type ```json { @@ -9,7 +11,7 @@ } ``` -# Step 2: User selects the required probes from the selected provider +## Step 2: User selects the required probes from the selected provider ```json { @@ -26,7 +28,7 @@ } ``` -# Step 2 (cont): (Optional) User selects the rules for any of the selected probe if necessary +## Step 2 (cont): (Optional) User selects the rules for any of the selected probe if necessary ```json { @@ -50,7 +52,7 @@ Steps 2-3 are recursive and user could jump between them as required. -# Step 3: User selects project. This is required at this stage because module statuses need to be checked per project +## Step 3: User selects project. This is required at this stage because module statuses need to be checked per project ```json { @@ -73,7 +75,7 @@ Steps 2-3 are recursive and user could jump between them as required. } ``` -# Step 3 (cont): The guardrail models won't be onboarded initially so we need to show the selected models for deployment. +## Step 3 (cont): The guardrail models won't be onboarded initially so we need to show the selected models for deployment. ```json { @@ -96,7 +98,7 @@ Steps 2-3 are recursive and user could jump between them as required. } ``` -# Step 4: user will need to select available credentials similar to the model onboard flow. Since we've the model uri we could set the name, author name and tags in backend +## Step 4: user will need to select available credentials similar to the model onboard flow. Since we've the model uri we could set the name, author name and tags in backend **Important:** Model statuses are only derived when BOTH `project_id` AND `probe_selections` are available. This ensures accurate status checks for the specific project. @@ -122,7 +124,7 @@ Steps 2-3 are recursive and user could jump between them as required. } ``` -# Step 5: User selects the hardware resource mode same as the one in model deployment workflow +## Step 5: User selects the hardware resource mode same as the one in model deployment workflow ```json { @@ -147,7 +149,7 @@ Steps 2-3 are recursive and user could jump between them as required. } ``` -# Step 6: User sets deployment specifications, this is similar to the model deployment workflow step but done for all the models to be deployed so each could get its own name and concurrency config or a shared one for all. Once done, cluster recommendation simulation will be run and results will be available from the workflow response +## Step 6: User sets deployment specifications, this is similar to the model deployment workflow step but done for all the models to be deployed so each could get its own name and concurrency config or a shared one for all. Once done, cluster recommendation simulation will be run and results will be available from the workflow response ## Shared config for all models ```json @@ -226,7 +228,7 @@ Steps 2-3 are recursive and user could jump between them as required. } ``` -# Step 7: Once the recommendations are available users can select a single cluster for all models (based on recommendation) or select cluster per model +## Step 7: Once the recommendations are available users can select a single cluster for all models (based on recommendation) or select cluster per model ## Shared cluster for all models ```json @@ -308,7 +310,7 @@ Steps 2-3 are recursive and user could jump between them as required. } ``` -# Step 8: User selects deployment types, same as the is_standalone concept in the current guardrail deploy workflow +## Step 8: User selects deployment types, same as the is_standalone concept in the current guardrail deploy workflow ```json { @@ -342,7 +344,7 @@ Steps 2-3 are recursive and user could jump between them as required. } ``` -# Step 9: User selects endpoints if not is_standalone +## Step 9: User selects endpoints if not is_standalone ```json { @@ -379,7 +381,7 @@ Steps 2-3 are recursive and user could jump between them as required. } ``` -# Step 10: User configure profile settings - name, description, guard_type, strictness level +## Step 10: User configure profile settings - name, description, guard_type, strictness level ```json { @@ -420,7 +422,7 @@ Steps 2-3 are recursive and user could jump between them as required. } ``` -# Step 11: Trigger the deployment. Like in step 4 we might've multiple model deployments so we need to see how we can handle this with pipeline and enable notifications +## Step 11: Trigger the deployment. Like in step 4 we might've multiple model deployments so we need to see how we can handle this with pipeline and enable notifications ```json