diff --git a/prediction_prophet/app.py b/prediction_prophet/app.py index d80ce82c..06a47156 100644 --- a/prediction_prophet/app.py +++ b/prediction_prophet/app.py @@ -19,7 +19,7 @@ def research( goal: str, tavily_api_key: SecretStr, - model: str = "gpt-4-0125-preview", + model: str = "gpt-4-turbo", initial_subqueries_limit: int = 20, subqueries_limit: int = 4, scrape_content_split_chunk_size: int = 800, @@ -144,7 +144,7 @@ def research( ) with st.status("Making prediction"): - prediction = _make_prediction(market_question=question, additional_information=report, engine="gpt-4-0125-preview", temperature=0.0) + prediction = _make_prediction(market_question=question, additional_information=report, engine="gpt-4-turbo", temperature=0.0) if prediction.outcome_prediction == None: st.container().error("The agent failed to generate a prediction") diff --git a/prediction_prophet/autonolas/research.py b/prediction_prophet/autonolas/research.py index 53d4794e..c6a9acb6 100644 --- a/prediction_prophet/autonolas/research.py +++ b/prediction_prophet/autonolas/research.py @@ -52,7 +52,7 @@ ] TOOL_TO_ENGINE = { "prediction-sentence-embedding-conservative": "gpt-3.5-turbo", - "prediction-sentence-embedding-bold": "gpt-4", + "prediction-sentence-embedding-bold": "gpt-4-turbo", } diff --git a/prediction_prophet/deployment/models.py b/prediction_prophet/deployment/models.py index 882ded17..856cc288 100644 --- a/prediction_prophet/deployment/models.py +++ b/prediction_prophet/deployment/models.py @@ -105,7 +105,7 @@ class DeployableAgentER_PredictionProphetGPT3(DeployableAgentER): class DAPredictionProphetGPT4(DeployableAgentER): - agent = PredictionProphetAgent(model="gpt-4-0125-preview") + agent = PredictionProphetAgent(model="gpt-4-turbo") # Limit to just 1, because so far it seems that 20x higher costs aren't justified by the prediction performance. max_markets_per_run = 1 diff --git a/prediction_prophet/functions/debate_prediction.py b/prediction_prophet/functions/debate_prediction.py index f2d63eba..3435f03a 100644 --- a/prediction_prophet/functions/debate_prediction.py +++ b/prediction_prophet/functions/debate_prediction.py @@ -92,7 +92,7 @@ def make_debated_prediction(prompt: str, additional_information: str, api_key: S prediction_chain = ( prediction_prompt | - ChatOpenAI(model="gpt-4-0125-preview", api_key=secretstr_to_v1_secretstr(api_key)) | + ChatOpenAI(model="gpt-4-turbo", api_key=secretstr_to_v1_secretstr(api_key)) | StrOutputParser() ) @@ -106,7 +106,7 @@ def make_debated_prediction(prompt: str, additional_information: str, api_key: S ConversableAgent( name=f"Predictor_Agent_{i}", system_message=PREDICTION_PROMPT, - llm_config={"config_list": [{"model": "gpt-4-0125-preview", "api_key": api_key.get_secret_value()}]}, + llm_config={"config_list": [{"model": "gpt-4-turbo", "api_key": api_key.get_secret_value()}]}, human_input_mode="NEVER") for i in range(2) ] diff --git a/prediction_prophet/functions/evaluate_question.py b/prediction_prophet/functions/evaluate_question.py index d971c4b9..5203809c 100644 --- a/prediction_prophet/functions/evaluate_question.py +++ b/prediction_prophet/functions/evaluate_question.py @@ -41,7 +41,7 @@ @persistent_inmemory_cache def is_predictable( question: str, - engine: str = "gpt-4-0125-preview", + engine: str = "gpt-4-turbo", prompt_template: str = QUESTION_EVALUATE_PROMPT, api_key: SecretStr | None = None ) -> tuple[bool, str]: diff --git a/prediction_prophet/functions/is_predictable_and_binary.py b/prediction_prophet/functions/is_predictable_and_binary.py index e579a091..5f93f56f 100644 --- a/prediction_prophet/functions/is_predictable_and_binary.py +++ b/prediction_prophet/functions/is_predictable_and_binary.py @@ -42,7 +42,7 @@ @persistent_inmemory_cache def is_predictable_and_binary( question: str, - engine: str = "gpt-4-0125-preview", + engine: str = "gpt-4-turbo", prompt_template: str = QUESTION_EVALUATE_PROMPT, api_key: SecretStr | None = None ) -> tuple[bool, str]: diff --git a/prediction_prophet/functions/rephrase_question.py b/prediction_prophet/functions/rephrase_question.py index 5a297f1c..08788418 100644 --- a/prediction_prophet/functions/rephrase_question.py +++ b/prediction_prophet/functions/rephrase_question.py @@ -25,7 +25,7 @@ class RephrasedQuestion(BaseModel): def rephrase_question( question: str, - engine: str = "gpt-4-0125-preview" + engine: str = "gpt-4-turbo" ) -> RephrasedQuestion: """ Rephrase the original question, by asking it in negation and universally, for example: diff --git a/prediction_prophet/functions/rerank_results.py b/prediction_prophet/functions/rerank_results.py index 8ffa08c3..4869ae2f 100644 --- a/prediction_prophet/functions/rerank_results.py +++ b/prediction_prophet/functions/rerank_results.py @@ -19,7 +19,7 @@ def rerank_results(results: list[str], goal: str) -> list[str]: rerank_results_chain = ( rerank_results_prompt | - ChatOpenAI(model="gpt-4-0125-preview") | + ChatOpenAI(model="gpt-4-turbo") | CommaSeparatedListOutputParser() ) diff --git a/prediction_prophet/functions/research.py b/prediction_prophet/functions/research.py index 4ab5f003..1cc85d6a 100644 --- a/prediction_prophet/functions/research.py +++ b/prediction_prophet/functions/research.py @@ -13,7 +13,7 @@ def research( goal: str, use_summaries: bool, - model: str = "gpt-4-0125-preview", + model: str = "gpt-4-turbo", initial_subqueries_limit: int = 20, subqueries_limit: int = 4, scrape_content_split_chunk_size: int = 800, diff --git a/prediction_prophet/main.py b/prediction_prophet/main.py index 29dfdfa9..af264334 100644 --- a/prediction_prophet/main.py +++ b/prediction_prophet/main.py @@ -40,7 +40,7 @@ def research( start = time.time() with get_openai_callback() as cb: - report = prophet_research(goal=prompt, use_summaries=False, model="gpt-4-0125-preview") + report = prophet_research(goal=prompt, use_summaries=False, model="gpt-4-turbo") end = time.time() @@ -66,7 +66,7 @@ def predict(prompt: str, path: str | None = None) -> None: else: logger = logging.getLogger("research") logger.setLevel(logging.INFO) - report = prophet_research(goal=prompt, model="gpt-4-0125-preview", use_summaries=False, logger=logger) + report = prophet_research(goal=prompt, model="gpt-4-turbo", use_summaries=False, logger=logger) prediction = make_debated_prediction(prompt=prompt, additional_information=report) diff --git a/scripts/benchmark.py b/scripts/benchmark.py index 333ea8af..fa0f8bdd 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -83,7 +83,7 @@ def main( agent_name="prediction_prophet_gpt-3.5-turbo-0125_tavilyrawcontent", use_tavily_raw_content=True, ), - # PredictionProphetAgent(model="gpt-4-0125-preview", max_workers=max_workers, agent_name="prediction_prophet_gpt-4-0125-preview"), # Too expensive to be enabled by default. + # PredictionProphetAgent(model="gpt-4-turbo", max_workers=max_workers, agent_name="prediction_prophet_gpt-4-turbo"), # Too expensive to be enabled by default. ], cache_path=cache_path, only_cached=only_cached,