Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prompt service v2 refactor #1149

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
68 changes: 68 additions & 0 deletions prompt-service/src/unstract/prompt_service_v2/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import logging
from logging.config import dictConfig
from os import environ as env

from dotenv import load_dotenv
from flask import Flask
from unstract.prompt_service_v2.controllers import api
from unstract.prompt_service_v2.extensions import db
from unstract.prompt_service_v2.helper.lifecycle_helper import register_lifecycle_hooks
from unstract.prompt_service_v2.helper.plugin_helper import plugin_loader
from unstract.prompt_service_v2.utils.env_loader import get_env_or_die
from unstract.sdk.constants import LogLevel

load_dotenv()

dictConfig(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be useful if we can log a requestID as well with a filter. Check this. Ideally we should obtain the requestID from backend / tool - but for the time being if its not there let's create one to help correlate logs within this service

{
"version": 1,
"formatters": {
"default": {
"format": "[%(asctime)s] %(levelname)s in %(module)s: %(message)s",
}
},
"handlers": {
"wsgi": {
"class": "logging.StreamHandler",
"stream": "ext://flask.logging.wsgi_errors_stream",
"formatter": "default",
}
},
"root": {"level": "INFO", "handlers": ["wsgi"]},
}
)


def create_app() -> Flask:
app = Flask("prompt-service")
log_level = env.get("LOG_LEVEL", LogLevel.WARN.value)
if log_level == LogLevel.DEBUG.value:
app.logger.setLevel(logging.DEBUG)
elif log_level == LogLevel.INFO.value:
app.logger.setLevel(logging.INFO)
else:
app.logger.setLevel(logging.WARNING)
Comment on lines +38 to +44
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT: We could pass the log level and handle it with a simple conversion in the dictConfig call itself


# Load required environment variables
db_host = get_env_or_die("PG_BE_HOST")
db_port = get_env_or_die("PG_BE_PORT")
db_user = get_env_or_die("PG_BE_USERNAME")
db_pass = get_env_or_die("PG_BE_PASSWORD")
db_name = get_env_or_die("PG_BE_DATABASE")
application_name = env.get("APPLICATION_NAME", "unstract-prompt-service")

# Initialize and connect to the database
db.init(
database=db_name,
user=db_user,
password=db_pass,
host=db_host,
port=db_port,
options=f"-c application_name={application_name}",
)

# Load plugins
plugin_loader(app)
register_lifecycle_hooks(app)
app.register_blueprint(api)
return app
138 changes: 138 additions & 0 deletions prompt-service/src/unstract/prompt_service_v2/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from enum import Enum


class PromptServiceContants:
WORD = "word"
SYNONYMS = "synonyms"
OUTPUTS = "outputs"
TOOL_ID = "tool_id"
RUN_ID = "run_id"
FILE_NAME = "file_name"
FILE_HASH = "file_hash"
NAME = "name"
ACTIVE = "active"
PROMPT = "prompt"
CHUNK_SIZE = "chunk-size"
PROMPTX = "promptx"
VECTOR_DB = "vector-db"
EMBEDDING = "embedding"
X2TEXT_ADAPTER = "x2text_adapter"
CHUNK_OVERLAP = "chunk-overlap"
LLM = "llm"
IS_ASSERT = "is_assert"
ASSERTION_FAILURE_PROMPT = "assertion_failure_prompt"
RETRIEVAL_STRATEGY = "retrieval-strategy"
SIMPLE = "simple"
SUBQUESTION = "subquestion"
TYPE = "type"
NUMBER = "number"
EMAIL = "email"
DATE = "date"
BOOLEAN = "boolean"
JSON = "json"
PREAMBLE = "preamble"
SIMILARITY_TOP_K = "similarity-top-k"
PROMPT_TOKENS = "prompt_tokens"
COMPLETION_TOKENS = "completion_tokens"
TOTAL_TOKENS = "total_tokens"
RESPONSE = "response"
POSTAMBLE = "postamble"
GRAMMAR = "grammar"
PLATFORM_SERVICE_API_KEY = "PLATFORM_SERVICE_API_KEY"
EMBEDDING_SUFFIX = "embedding_suffix"
EVAL_SETTINGS = "eval_settings"
EVAL_SETTINGS_EVALUATE = "evaluate"
EVAL_SETTINGS_MONITOR_LLM = "monitor_llm"
EVAL_SETTINGS_EXCLUDE_FAILED = "exclude_failed"
TOOL_SETTINGS = "tool_settings"
LOG_EVENTS_ID = "log_events_id"
CHALLENGE_LLM = "challenge_llm"
CHALLENGE = "challenge"
ENABLE_CHALLENGE = "enable_challenge"
EXTRACTION = "extraction"
SUMMARIZE = "summarize"
SINGLE_PASS_EXTRACTION = "single-pass-extraction"
SIMPLE_PROMPT_STUDIO = "simple-prompt-studio"
LLM_USAGE_REASON = "llm_usage_reason"
METADATA = "metadata"
OUTPUT = "output"
CONTEXT = "context"
INCLUDE_METADATA = "include_metadata"
TABLE = "table"
TABLE_SETTINGS = "table_settings"
EPILOGUE = "epilogue"
PLATFORM_POSTAMBLE = "platform_postamble"
HIGHLIGHT_DATA_PLUGIN = "highlight-data"
CLEAN_CONTEXT = "clean-context"
SUMMARIZE_AS_SOURCE = "summarize_as_source"
VARIABLE_MAP = "variable_map"
RECORD = "record"
TEXT = "text"
ENABLE_HIGHLIGHT = "enable_highlight"
FILE_PATH = "file_path"
HIGHLIGHT_DATA = "highlight_data"
CONFIDENCE_DATA = "confidence_data"
REQUIRED_FIELDS = "required_fields"
REQUIRED = "required"
EXECUTION_SOURCE = "execution_source"
METRICS = "metrics"
LINE_ITEM = "line-item"
PAID_FEATURE_MSG = (
"It is a cloud / enterprise feature. If you have purchased a plan and still "
"face this issue, please contact support"
)
NO_CONTEXT_ERROR = (
"Couldn't fetch context from vector DB. "
"This happens usually due to a delay by the Vector DB "
"provider to confirm writes to DB. "
"Please try again after some time"
)


class RunLevel(Enum):
"""Different stages of prompt execution.

Comprises of prompt run and response evaluation stages.
"""

RUN = "RUN"
EVAL = "EVAL"
CHALLENGE = "CHALLENGE"
TABLE_EXTRACTION = "TABLE_EXTRACTION"


class DBTableV2:
"""Database tables."""

ORGANIZATION = "organization"
ADAPTER_INSTANCE = "adapter_instance"
PROMPT_STUDIO_REGISTRY = "prompt_studio_registry"
PLATFORM_KEY = "platform_key"
TOKEN_USAGE = "usage"


class FileStorageKeys:
PERMANENT_REMOTE_STORAGE = "PERMANENT_REMOTE_STORAGE"
TEMPORARY_REMOTE_STORAGE = "TEMPORARY_REMOTE_STORAGE"


class FileStorageType(Enum):
PERMANENT = "permanent"
TEMPORARY = "temporary"


class ExecutionSource(Enum):
IDE = "ide"
TOOL = "tool"


class VariableType(str, Enum):
STATIC = "STATIC"
DYNAMIC = "DYNAMIC"


class VariableConstants:

VARIABLE_REGEX = "{{(.+?)}}"
DYNAMIC_VARIABLE_DATA_REGEX = r"\[(.*?)\]"
DYNAMIC_VARIABLE_URL_REGEX = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))" # noqa: E501
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from flask import Blueprint

from .answer_prompt_controller import answer_prompt_bp
from .health_controller import health_bp

api = Blueprint("api", __name__)

# Register blueprint to the API Blueprint
api.register_blueprint(health_bp)
api.register_blueprint(answer_prompt_bp)
Loading