Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
071af24
Adding final response evaluation and some minor improvements
lotif Feb 10, 2026
cad754a
Finished online scores, need to put it in a thread
lotif Feb 10, 2026
3aedc8e
Moving the score reporting to the lasngfuse module for better reusabi…
lotif Feb 11, 2026
aed2cf2
Adding missing init files
lotif Feb 12, 2026
efa320e
Using the trace fetch functions instead of making them myself
lotif Feb 12, 2026
1de6a37
Adjusting the token threshold to 15k
lotif Feb 12, 2026
ac7ba2c
Adding log for every metric reported
lotif Feb 12, 2026
0bf2351
Adding agent.py to the demo as well
lotif Feb 12, 2026
20ede2d
Adding feedback button
lotif Feb 12, 2026
1f29c7d
Using init_tracing instead
lotif Feb 13, 2026
7d2471d
Merge branch 'main' into marcelo/online-eval
amrit110 Feb 13, 2026
3bd9f55
Merge branch 'marcelo/online-eval' into marcelo/thumbs
lotif Feb 17, 2026
046025e
Merge branch 'main' into marcelo/online-eval
lotif Feb 17, 2026
dac7beb
Merge branch 'marcelo/online-eval' into marcelo/thumbs
lotif Feb 17, 2026
5b027c9
Merge branch 'main' into marcelo/online-eval
amrit110 Feb 17, 2026
80717ba
Adding missing docstring and return type hints
lotif Feb 17, 2026
ad07efd
Merge branch 'marcelo/online-eval' into marcelo/thumbs
amrit110 Feb 17, 2026
fd69cc3
Adding max concurrency parameter to evaluation and updating the readm…
lotif Feb 17, 2026
1edacd5
Merge branch 'marcelo/online-eval' into marcelo/thumbs
lotif Feb 17, 2026
fc12d49
Adding user feedback to the readme file
lotif Feb 17, 2026
a850f09
Adding one more paragraph
lotif Feb 17, 2026
5a8b103
Merge branch 'marcelo/online-eval' into marcelo/thumbs
lotif Feb 17, 2026
02e6318
Merge branch 'main' into marcelo/online-eval
amrit110 Feb 17, 2026
c30b56f
Adding missing docstring
lotif Feb 17, 2026
9bc2437
Merge branch 'marcelo/online-eval' into marcelo/thumbs
lotif Feb 17, 2026
52ecf04
Merge branch 'main' into marcelo/thumbs
lotif Feb 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions implementations/report_generation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,10 @@ check if the final result is present and contains a link to the report.

Those evaluation results will be sent to Langfuse as scores, where they can be analyzed
both in an aggregate fashion as well as individually.

### User Feedback

On the Gradio Demo UI, there are two buttons to record user feedback: a thumbs
up button to record positive user feedback and a thumbs down button to record
negative user feedback. The buttons will appear at the end of the agent's execution
and it will record the user feedback as Langfuse scores.
129 changes: 105 additions & 24 deletions implementations/report_generation/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
logger = logging.getLogger(__name__)


GRADIO_STATE = gr.State(value={"trace_id": None})


async def agent_session_handler(
query: str,
history: list[ChatMessage],
Expand All @@ -61,6 +64,9 @@ async def agent_session_handler(
AsyncGenerator[list[ChatMessage], Any]
An async chat messages generator.
"""
# Reset the trace ID in the state
GRADIO_STATE.value["trace_id"] = None

# Initialize list of chat messages for a single turn
turn_messages: list[ChatMessage] = []

Expand Down Expand Up @@ -111,15 +117,21 @@ def calculate_and_send_scores(callback_context: CallbackContext) -> None:
"""
for event in callback_context.session.events:
if event.is_final_response() and event.content and event.content.role == "model":
langfuse_client = AsyncClientManager.get_instance().langfuse_client
trace_id = langfuse_client.get_current_trace_id()

# Storing the trace ID in the state so it can be used
# in the feedback buttons callback
GRADIO_STATE.value["trace_id"] = trace_id

# Report the final response evaluation to Langfuse
report_final_response_score(event, string_match="](gradio_api/file=")

# Run usage scoring in a thread so it doesn't block the UI
langfuse_client = AsyncClientManager.get_instance().langfuse_client
thread = threading.Thread(
target=report_usage_scores,
kwargs={
"trace_id": langfuse_client.get_current_trace_id(),
"trace_id": trace_id,
"token_threshold": 15000,
"latency_threshold": 60,
},
Expand All @@ -132,6 +144,56 @@ def calculate_and_send_scores(callback_context: CallbackContext) -> None:
logger.error("No final response found in the callback context. Will not report scores to Langfuse.")


def on_feedback(liked: bool) -> tuple[dict[str, Any], dict[str, Any]] | None:
"""Handle thumbs up (liked=True) or thumbs down (liked=False).

Send the result of the feedback to Langfuse and returns the updated
states for the feedback row and the thank you message row.

Parameters
----------
liked : bool
Whether the user liked the agent's response.

Returns
-------
tuple[dict[str, Any], dict[str, Any]] | None
The updated states for the feedback row and the thank you message row.
If no trace ID is found in the state, returns None.
"""
trace_id = GRADIO_STATE.value["trace_id"]
if trace_id is None:
logger.error("No trace ID found in the state. Will not report feedback to Langfuse.")
return None

score = 1 if liked else 0

logger.info(f"Reporting user feedback score for trace {trace_id} with value {score}")
langfuse_client = AsyncClientManager.get_instance().langfuse_client
langfuse_client.create_score(
value=score,
name="User Feedback",
comment=f"The user gave this response a thumbs {'up' if liked else 'down'}.",
trace_id=trace_id,
)
langfuse_client.flush()

GRADIO_STATE.value["trace_id"] = None
return gr.update(visible=False), gr.update(visible=True)


def toggle_feedback_row() -> tuple[dict[str, Any], dict[str, Any]]:
"""Toggle the feedback row if there is a trace ID in the state.

Returns
-------
tuple[dict[str, Any], dict[str, Any]]
The updated states for the feedback row and the thank you message row.
"""
trace_id = GRADIO_STATE.value["trace_id"]
return gr.update(visible=trace_id is not None and trace_id != ""), gr.update(visible=False)


@click.command()
@click.option("--enable-trace", required=False, default=True, help="Whether to enable tracing with Langfuse.")
@click.option(
Expand All @@ -154,33 +216,52 @@ def start_gradio_app(enable_trace: bool = True, enable_public_link: bool = False
"""
partial_agent_session_handler = partial(agent_session_handler, enable_trace=enable_trace)

demo = gr.ChatInterface(
partial_agent_session_handler,
chatbot=gr.Chatbot(height=600),
textbox=gr.Textbox(lines=1, placeholder="Enter your prompt"),
# Additional input to maintain session state across multiple turns
# NOTE: Examples must be a list of lists when additional inputs are provided
additional_inputs=gr.State(value={}, render=False),
examples=[
["Generate a monthly sales performance report."],
["Generate a report of the top 5 selling products per year and the total sales value for each product."],
["Generate a report of the average order value per invoice per month."],
[
"Generate a report with the month-over-month trends in sales. The report should include the monthly sales, the month-over-month change and the percentage change."
],
["Generate a report on sales revenue by country per year."],
["Generate a report on the 5 highest-value customers per year vs. the average customer."],
[
"Generate a report on the average amount spent by one time buyers for each year vs. the average customer."
],
],
title="2.1: ReAct for Retrieval-Augmented Generation with OpenAI Agent SDK",
)
with gr.Blocks(title="Report Generator Agent") as demo:
with gr.Row():
gradio_chatbot = gr.Chatbot(height=600)
gr.ChatInterface(
partial_agent_session_handler,
chatbot=gradio_chatbot,
textbox=gr.Textbox(lines=1, placeholder="Enter your prompt"),
# Additional input to maintain session state across multiple turns
# NOTE: Examples must be a list of lists when additional inputs
# are provided
additional_inputs=gr.State(value={}, render=False),
examples=[
["Generate a monthly sales performance report."],
[
"Generate a report of the top 5 selling products per year and the total sales value for each product."
],
["Generate a report of the average order value per invoice per month."],
[
"Generate a report with the month-over-month trends in sales. The report should include the monthly sales, the month-over-month change and the percentage change."
],
["Generate a report on sales revenue by country per year."],
["Generate a report on the 5 highest-value customers per year vs. the average customer."],
[
"Generate a report on the average amount spent by one time buyers for each year vs. the average customer."
],
],
)

with gr.Row(elem_id="thank_you_msg", visible=False) as thank_you_row:
gr.Markdown("Thank you for your feedback πŸ™‚")

# Feedback buttons
with gr.Row(elem_id="feedback_buttons", visible=False) as feedback_row:
gr.Markdown("Provide feedback on the response:")
thumbs_up = gr.Button("πŸ‘")
thumbs_up.click(fn=lambda: on_feedback(True), outputs=[feedback_row, thank_you_row])
thumbs_down = gr.Button("πŸ‘Ž")
thumbs_down.click(fn=lambda: on_feedback(False), outputs=[feedback_row, thank_you_row])

gradio_chatbot.change(fn=toggle_feedback_row, outputs=[feedback_row, thank_you_row])

try:
demo.launch(
share=enable_public_link,
allowed_paths=[str(get_reports_output_path().absolute())],
css="#feedback_buttons { width: 600px; }",
)
finally:
DbManager.get_instance().close()
Expand Down