Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 46 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
/node_modules
/.pnp
.pnp.js
.yarn/install-state.gz

# testing
/coverage
Expand All @@ -30,7 +31,51 @@ yarn-debug.log*
yarn-error.log*

# local env files
.env*.local
.env
.env.local
.env.development.local
.env.test.local
.env.production.local

# service keys
service_key.json
service_key.yaml
service_key.yml
*service_key*

# virtual environment
venv/
.venv/
env/
ENV/

# IDE
.idea/
.vscode/
*.swp
*.swo

# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# vercel
.vercel
Expand Down
83 changes: 83 additions & 0 deletions api/agents/agents.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import datetime
from zoneinfo import ZoneInfo
from google.adk.agents import Agent
from google.adk.tools.agent_tool import AgentTool
from google.adk.tools import google_search # Import the tool
from typing import Union, List, Optional, Iterable
import os
import json
import gzip

import io

# ────────────────────────────────────────────────────────────────────────────
# 1. Tools
# ────────────────────────────────────────────────────────────────────────────
# moved Mixpanel tool to its own file
from .tools.bigquery_tools import query_bigquery


#moved prompts to a separate file
from .prompts import (

ROOT_AGENT_INSTRUCTION,
GOOGLE_SEARCH_AGENT_INSTRUCTION,
BIGQUERY_QUERY_RUNNER_AGENT_INSTRUCTION,
GET_BIGQUERY_QUERY_AGENT_INSTRUCTION,
)


bigquery_query_runner_agent = Agent(
name="bigquery_query_runner_agent",
model="gemini-2.0-flash",
description="Builds & executes the BigQuery query, returns raw rows.",
instruction=BIGQUERY_QUERY_RUNNER_AGENT_INSTRUCTION,
tools=[query_bigquery],
output_key="bigquery_data"
)



get_bigquery_query_agent = Agent(
name="get_bigquery_query_agent",
model="gemini-2.0-flash",
description="Returns the BigQuery SQL query to use with bigquery_query_runner_agent which queries the table.",
instruction=GET_BIGQUERY_QUERY_AGENT_INSTRUCTION,
tools=[AgentTool(agent=bigquery_query_runner_agent)],
output_key="bigquery_query"
)


# google search agent
google_search_agent = Agent(
model='gemini-2.0-flash-exp',
name='google_search_agent',
instruction=GOOGLE_SEARCH_AGENT_INSTRUCTION,
tools=[google_search]
)

# ────────────────────────────────────────────────────────────────────────────
# 3. Root orchestration agent
# ────────────────────────────────────────────────────────────────────────────

root_agent_x = Agent(
name="agent_router",
model="gemini-2.0-flash",
description="Job is to route the user's request to the right sub-agent.",
instruction=ROOT_AGENT_INSTRUCTION,
sub_agents=[
get_bigquery_query_agent,
],
tools=[
AgentTool(agent=google_search),
]
)


root_agent = Agent(
name="agent_router",
model="gemini-2.0-flash-live-001",
description="Job is to route the user's request to the right sub-agent.",
instruction=ROOT_AGENT_INSTRUCTION
)

208 changes: 208 additions & 0 deletions api/agents/prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
# agent_7_first_demo/prompts.py
"""
Prompt templates for all agents in the system.
"""
BIGQUERY_SCHEMA = '''
{
"table_name": "event_data",
"columns": [
{
"name": "time",
"type": "STRING",
"description": "The timestamp of the event in ISO 8601 format (e.g., '2025-05-08T12:34:56Z')."
},
{
"name": "event",
"type": "STRING",
"description": "The name of the event (e.g., 'purchase', 'page_view', 'signup')."
},
{
"name": "device_id",
"type": "STRING",
"description": "A unique identifier for the user's device (e.g., 'abc123deviceid')."
},
{
"name": "distinct_id",
"type": "STRING",
"description": "A unique identifier for the user across devices or sessions (e.g., 'user_456')."
},
{
"name": "report_date",
"type": "STRING",
"description": "The date the event was recorded, in 'YYYY-MM-DD' format (e.g., '2025-05-08'). THIS MUST BE WRAPPED IN DATE() IN QUERY"
},
{
"name": "utm_campaign",
"type": "STRING",
"description": "UTM campaign name for marketing attribution (e.g., 'spring_sale')."
},
{
"name": "utm_source",
"type": "STRING",
"description": "UTM source (e.g., 'google', 'facebook')."
},
{
"name": "utm_medium",
"type": "STRING",
"description": "UTM medium (e.g., 'cpc', 'email')."
},
{
"name": "utm_content",
"type": "STRING",
"description": "UTM content tag (e.g., 'banner_ad')."
},
{
"name": "utm_term",
"type": "STRING",
"description": "UTM term for paid search keywords (e.g., 'running+shoes')."
},
{
"name": "utm_id",
"type": "STRING",
"description": "UTM id for custom campaign tracking (e.g., '12345')."
},
{
"name": "utm_source_platform",
"type": "FLOAT",
"description": "Source platform ID (numeric value identifying source platform)."
},
{
"name": "utm_campaign_id",
"type": "FLOAT",
"description": "Campaign ID as numeric identifier (e.g., 987654321)."
},
{
"name": "utm_creative_format",
"type": "FLOAT",
"description": "Identifier for the creative format used in the ad."
},
{
"name": "utm_marketing_tactic",
"type": "STRING",
"description": "Describes the marketing tactic used (e.g., 'retargeting')."
},
{
"name": "gclid",
"type": "STRING",
"description": "Google Click ID for tracking Google Ads clicks (e.g., 'Cj0KCQjw')."
},
{
"name": "msclkid",
"type": "FLOAT",
"description": "Microsoft Click ID for tracking Bing Ads."
},
{
"name": "fbclid",
"type": "STRING",
"description": "Facebook Click ID for tracking Facebook Ads (e.g., 'IwAR3h...')."
},
{
"name": "ttclid",
"type": "FLOAT",
"description": "TikTok Click ID for tracking TikTok Ads."
},
{
"name": "twclid",
"type": "FLOAT",
"description": "Twitter Click ID for tracking Twitter Ads."
},
{
"name": "sccid",
"type": "FLOAT",
"description": "Snapchat Click ID for tracking Snapchat Ads."
},
{
"name": "dclid",
"type": "FLOAT",
"description": "DoubleClick ID for tracking DoubleClick campaigns."
},
{
"name": "ko_click_id",
"type": "FLOAT",
"description": "Kakao Click ID for tracking Kakao campaigns."
},
{
"name": "li_fat_id",
"type": "FLOAT",
"description": "LinkedIn Click ID for tracking LinkedIn campaigns."
},
{
"name": "wbraid",
"type": "STRING",
"description": "Wbraid ID used by Google to support enhanced conversions (e.g., 'ABwEA...')."
},
{
"name": "product_price",
"type": "FLOAT",
"description": "Price of the product involved in the event, in the transaction currency (e.g., 29.99)."
}
]
}
'''

GET_BIGQUERY_QUERY_AGENT_INSTRUCTION = '''
You are a SQL query generator.
You are querying a table called event_data that contains marketing event tracking data. The dataset and table you will be querying is called 'gam-dwh.mixpanel_data_3324357.mixpanel_all_data_export_*`.
Once you get the query from the user, use the bigquery_query_runner_agent to run the querys

Below is the schema with descriptions:

{BIGQUERY_SCHEMA}

Rules:

Only query columns that exist in the schema.

Always use the column descriptions to understand what each column represents.

If a column is a string but stores IDs or numeric codes, you can filter or group by it.

Always use report_date to filter date ranges in the query, but wrap report_date in DATE() in the query.

Return valid SQL syntax compatible with BigQuery.

Example user requests:

"Show me total purchases by campaign for last month"

"Give me daily number of page views grouped by source and medium"

"What was the average product price per campaign in April 2025?"

When I ask a question, generate a SQL query using the schema.
'''



BIGQUERY_QUERY_RUNNER_AGENT_INSTRUCTION = '''
Use the run_bigquery_query tool with the SQL defined to fetch data for the selected events.
Return the rows exactly as received.
'''

GOOGLE_SEARCH_AGENT_INSTRUCTION = '''
You are a specialist in Google Search. When a user query requires up-to-date, factual, or external information, use the Google Search tool to find and summarize the most relevant and trustworthy results.

- Always prioritize official, reputable, and recent sources.
- Provide concise, actionable, and well-cited answers.
- If the user asks for sources, include URLs or references in your response.
- If the answer cannot be found, say so clearly.
- If the user query is ambiguous, ask clarifying questions before searching.

Default behavior: Use your best judgment to decide when to search and how to present the results in a user-friendly way.
'''

ROOT_AGENT_INSTRUCTION = '''
You are a helpful analytics assistant. Understand the user's request and route it to the right sub-agent. At moment we have 4 sub-agents/tools:
- mixpanel_query_agent: Use when the user wants to analyze existing data or get insights from collected events
- data_planner: Use when the user wants to track a new type of event or create tracking requirements
- google_search: Use when the user wants to search the web or needs up-to-date, factual, or external information

Routing guidelines:
- If the client is asking about analyzing existing data (e.g., "how much revenue we made last month"), route to the researcher agent.
- If the client wants to set up tracking for a new event type (e.g., "I want to track newsletter subscriptions"), route to the data_planner agent.
- If the client specifically wants to run a Mixpanel query, route to query_runner agent.
- If the client asks for information that requires a web search, or if you need to supplement your answer with up-to-date or external information, use the google_search agent.

Ask clarifying questions if needed.
If you can't find the right sub-agent, just say "I don't know"
'''
33 changes: 33 additions & 0 deletions api/agents/tools/bigquery_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from typing import Dict, List, Optional, TypedDict, Any
from datetime import datetime
import pandas as pd
from langgraph.graph import Graph, StateGraph
from langgraph.checkpoint.memory import InMemorySaver
from langgraph.types import interrupt, Command
from google.cloud import bigquery
from google.oauth2 import service_account


def query_bigquery(query: str):
print("▶️ get_event_data()")
credentials = service_account.Credentials.from_service_account_file(
'service_key.json',
scopes=['https://www.googleapis.com/auth/cloud-platform']
)
try:

client = bigquery.Client(credentials=credentials)
job_config = bigquery.QueryJobConfig()
event_data_df = client.query(query, job_config=job_config).to_dataframe()

result = {
"data": event_data_df.to_dict(orient='records'),
"columns": event_data_df.columns.tolist(),
"row_count": len(event_data_df)
}
return result
except Exception as e:
error_msg = f"❌ Error fetching event data: {str(e)}"
print(error_msg)
return error_msg

Loading