From fefa4c9e3eb7c5a99fc1bc30694760a1512f3317 Mon Sep 17 00:00:00 2001 From: Tomas Sikora Date: Tue, 24 Jun 2025 17:01:37 +0200 Subject: [PATCH 1/5] Scalar agent implementation --- prediction_prophet/autonolas/research.py | 111 +++++++++++++++++++++++ prediction_prophet/benchmark/agents.py | 24 ++++- 2 files changed, 133 insertions(+), 2 deletions(-) diff --git a/prediction_prophet/autonolas/research.py b/prediction_prophet/autonolas/research.py index dd3ad2d0..146903cc 100644 --- a/prediction_prophet/autonolas/research.py +++ b/prediction_prophet/autonolas/research.py @@ -118,6 +118,50 @@ * The sum of "p_yes" and "p_no" must equal 1. * Output only the JSON object in your response. Do not include any other contents in your response. """ + +PREDICTION_PROMPT_SCALAR = """ +INTRODUCTION: +You are a Large Language Model (LLM) within a multi-agent system. Your primary task is to accurately estimate the 'scalar_value' for the outcome of a 'market question', \ +found in 'USER_PROMPT'. The market question is part of a prediction market, where users can place bets on the outcomes of market questions and earn rewards if the predicted 'scalar_value' is close to the actual outcome. +Each market has a closing date at which the outcome is evaluated. This date is typically stated within the market question. \ +The closing date is considered to be 23:59:59 of the date provided in the market question. \ +You are provided an itemized list of information under the label "ADDITIONAL_INFORMATION", which is \ +sourced from a Google search engine query performed a few seconds ago and is meant to assist you in your 'scalar_value' estimation. You must adhere to the following 'INSTRUCTIONS'. + + +INSTRUCTIONS: +* Examine the user's input labeled 'USER_PROMPT'. Focus on the part enclosed in double quotes, which contains the 'market question'. +* Estimate the 'scalar_value' for the outcome of the market question, which is can be any value between {market_upper_bound} and {market_lower_bound}, but the value can be outside of this range if you think it is more likely. +* Consider the prediction market with the market question, the closing date and the outcomes in an isolated context that has no influence on the protagonists that are involved in the event in the real world, specified in the market question. The closing date is always arbitrarily set by the market creator and has no influence on the real world. So it is likely that the protagonists of the event in the real world are not even aware of the prediction market and do not care about the market's closing date. +* The 'scalar_value' estimations of the market question outcomes must be as accurate as possible, as an inaccurate estimation will lead to financial loss for the user. +* Utilize your training data and the information provided under "ADDITIONAL_INFORMATION" to generate 'scalar_value' estimations for the outcomes of the 'market question'. +* Examine the itemized list under "ADDITIONAL_INFORMATION" thoroughly and use all the relevant information for your 'scalar_value' estimation. This data is sourced from a Google search engine query done a few seconds ago. +* Use any relevant item in "ADDITIONAL_INFORMATION" in addition to your training data to make the 'scalar_value' estimation. You can assume that you have been provided with the most current and relevant information available on the internet. Still pay close attention on the release and modification timestamps provided in parentheses right before each information item. Some information might be outdated and not relevant anymore. +* More recent information indicated by the timestamps provided in parentheses right before each information item overrides older information within ADDITIONAL_INFORMATION and holds more weight for your 'scalar_value' estimation. +* If there exist contradicting information, evaluate the release and modification dates of those information and prioritize the information that is more recent and adjust your confidence in the probability estimation accordingly. +* Even if not all information might not be released today, you can assume that there haven't been publicly available updates in the meantime except for those inside ADDITIONAL_INFORMATION. +* You must provide your response in the format specified under "OUTPUT_FORMAT". +* Do not include any other contents in your response. + + +USER_PROMPT: +``` +{user_prompt} +``` + +ADDITIONAL_INFORMATION: +``` +{additional_information} +``` + +OUTPUT_FORMAT: +* Your output response must be only a single JSON object to be parsed by Python's "json.loads()". +* The JSON must contain {n_fields} fields: {fields_list}. +{fields_description} +* The 'scalar_value' can be any value between {market_upper_bound} and {market_lower_bound}, but the value can be outside of this range if you think it is more likely. +* Output only the JSON object in your response. Do not include any other contents in your response. +""" + PREDICTION_PROMPT_CATEGORICAL = """ INTRODUCTION: You are a Large Language Model (LLM) within a multi-agent system. Your primary task is to accurately estimate the probabilities for the outcome of a 'market question', \ @@ -184,6 +228,13 @@ "info_utility": "Utility of the information provided in 'ADDITIONAL_INFORMATION' to help you make the probability estimation ranging from 0 (lowest utility) to 1 (maximum utility).", } +FIELDS_DESCRIPTIONS_SCALAR = { + "reasoning": "A string containing the reasoning behind your decision, and the rest of the answer you're about to give.", + "scalar_value": "Predicted value is float. It is expected to be in [{market_upper_bound},{market_lower_bound}], but value can be outside of this range if you think it is more likely.", + "confidence": "Indicating the confidence in the estimated value you provided ranging from 0 (lowest confidence) to 1 (maximum confidence). Confidence can be calculated based on the quality and quantity of data used for the estimation.", + "info_utility": "Utility of the information provided in 'ADDITIONAL_INFORMATION' to help you make the probability estimation ranging from 0 (lowest utility) to 1 (maximum utility).", +} + URL_QUERY_PROMPT = """ You are a Large Language Model in a multi-agent system. Your task is to formulate search engine queries based on \ a user's 'event question', which specifies an event and any accompanying conditions. The 'event question' allows \ @@ -388,6 +439,13 @@ class Prediction(BaseModel): logprobs: Optional[list[FieldLogprobs]] = [] +class ScalarPrediction(TypedDict): + scalar_value: float + upperBound: float + lowerBound: float + confidence: float + info_utility: float + class CategoricalPrediction(TypedDict): decision: str probabilities: Dict[str, Probability] @@ -1324,6 +1382,59 @@ def make_prediction_categorical( return response +@observe() +def make_prediction_scalar( + prompt: str, + market_upper_bound: float, + market_lower_bound: float, + additional_information: str, + agent: Agent | None, + include_reasoning: bool = False, +) -> ScalarPrediction: + agent = agent or Agent(model="gpt-3.5-turbo-0125", model_settings=ModelSettings(temperature=0.7)) + + current_time_utc = datetime.now(timezone.utc) + formatted_time_utc = current_time_utc.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-6] + "Z" + + field_descriptions = FIELDS_DESCRIPTIONS_SCALAR.copy() + if not include_reasoning: + field_descriptions.pop("reasoning") + prediction_prompt = PREDICTION_PROMPT_SCALAR.format( + user_prompt=prompt, + market_upper_bound=market_upper_bound, + market_lower_bound=market_lower_bound, + additional_information=additional_information, + n_fields=len(field_descriptions), + fields_list=list_to_list_str(list(field_descriptions)), + fields_description=fields_dict_to_bullet_list(field_descriptions), + timestamp=formatted_time_utc, + ) + result = agent.run_sync(prediction_prompt) + + logprobs = None + messages = result.all_messages() + if messages and hasattr(messages[-1], 'vendor_details'): + vendor_details = messages[-1].vendor_details + if vendor_details: + logprobs = vendor_details.get("logprobs") + + completion = result.data + logger.info(f"Completion: {completion}") + completion_clean = clean_completion_json(completion) + logger.info(f"Completion cleaned: {completion_clean}") + + try: + response: ScalarPrediction = json.loads(completion_clean) + except json.decoder.JSONDecodeError as e: + raise UnexpectedModelBehavior(f"The response from {agent=} could not be parsed as JSON: {completion_clean=}") from e + + if logprobs: + response['logprobs'] = LogprobsParser(skip_fields = ["reasoning"]).parse_logprobs(logprobs, Prediction) # type: ignore + + response['upperBound'] = market_upper_bound + response['lowerBound'] = market_lower_bound + + return response def clean_completion_json(completion: str) -> str: """ diff --git a/prediction_prophet/benchmark/agents.py b/prediction_prophet/benchmark/agents.py index 051ef0ba..eebe6b1d 100644 --- a/prediction_prophet/benchmark/agents.py +++ b/prediction_prophet/benchmark/agents.py @@ -7,7 +7,7 @@ AbstractBenchmarkedAgent, ) from prediction_market_agent_tooling.benchmark.utils import ( - Prediction, + Prediction, ScalarProbabilisticAnswer ) from prediction_market_agent_tooling.markets.data_models import ProbabilisticAnswer, CategoricalProbabilisticAnswer from prediction_market_agent_tooling.tools.is_predictable import is_predictable_binary @@ -15,7 +15,7 @@ from pydantic_ai import Agent from prediction_prophet.autonolas.research import EmbeddingModel -from prediction_prophet.autonolas.research import make_prediction, get_urls_from_queries, make_prediction_categorical +from prediction_prophet.autonolas.research import make_prediction, get_urls_from_queries, make_prediction_categorical, make_prediction_scalar from prediction_prophet.autonolas.research import research as research_autonolas from prediction_prophet.functions.rephrase_question import rephrase_question from prediction_prophet.functions.research import NoResulsFoundError, NotEnoughScrapedSitesError, Research, \ @@ -209,6 +209,26 @@ def research(self, market_question: str) -> Research: logger=self.logger, ) + @observe() + def _make_prediction_scalar( + self, + market_question: str, + market_upper_bound: float, + market_lower_bound: float, + additional_information: str, + agent: Agent, + include_reasoning: bool = False, + ) -> ScalarProbabilisticAnswer: + prediction = make_prediction_scalar( + prompt=market_question, + market_upper_bound=market_upper_bound, + market_lower_bound=market_lower_bound, + additional_information=additional_information, + agent=agent, + include_reasoning=include_reasoning, + ) + return ScalarProbabilisticAnswer.model_validate(prediction) + def predict(self, market_question: str) -> Prediction: try: research = self.research(market_question) From c26816ac4f2af867531c900f06180bdd5eb7ffe9 Mon Sep 17 00:00:00 2001 From: Tomas Sikora Date: Wed, 25 Jun 2025 18:32:58 +0200 Subject: [PATCH 2/5] Added possibility to process n responses for scalar agents --- prediction_prophet/autonolas/research.py | 69 +++++++++++++++--------- prediction_prophet/benchmark/agents.py | 54 ++++++++++++++++--- 2 files changed, 92 insertions(+), 31 deletions(-) diff --git a/prediction_prophet/autonolas/research.py b/prediction_prophet/autonolas/research.py index 146903cc..d34e18aa 100644 --- a/prediction_prophet/autonolas/research.py +++ b/prediction_prophet/autonolas/research.py @@ -230,7 +230,7 @@ FIELDS_DESCRIPTIONS_SCALAR = { "reasoning": "A string containing the reasoning behind your decision, and the rest of the answer you're about to give.", - "scalar_value": "Predicted value is float. It is expected to be in [{market_upper_bound},{market_lower_bound}], but value can be outside of this range if you think it is more likely.", + "scalar_value": "Predicted value is integer. It is expected to be in [{market_upper_bound},{market_lower_bound}], but value can be outside of this range if you think it is more likely.", "confidence": "Indicating the confidence in the estimated value you provided ranging from 0 (lowest confidence) to 1 (maximum confidence). Confidence can be calculated based on the quality and quantity of data used for the estimation.", "info_utility": "Utility of the information provided in 'ADDITIONAL_INFORMATION' to help you make the probability estimation ranging from 0 (lowest utility) to 1 (maximum utility).", } @@ -441,10 +441,12 @@ class Prediction(BaseModel): class ScalarPrediction(TypedDict): scalar_value: float - upperBound: float - lowerBound: float + upperBound: int + lowerBound: int confidence: float info_utility: float + reasoning: Optional[str] + logprobs: Optional[list[FieldLogprobs]] class CategoricalPrediction(TypedDict): decision: str @@ -1382,11 +1384,15 @@ def make_prediction_categorical( return response +def avg(key: str, parsed: list[dict[str, Any]]) -> float: + vals = [p[key] for p in parsed if key in p] + return float(sum(vals) / len(vals)) if vals else float("nan") + @observe() def make_prediction_scalar( prompt: str, - market_upper_bound: float, - market_lower_bound: float, + market_upper_bound: int, + market_lower_bound: int, additional_information: str, agent: Agent | None, include_reasoning: bool = False, @@ -1410,29 +1416,42 @@ def make_prediction_scalar( timestamp=formatted_time_utc, ) result = agent.run_sync(prediction_prompt) - - logprobs = None - messages = result.all_messages() - if messages and hasattr(messages[-1], 'vendor_details'): - vendor_details = messages[-1].vendor_details - if vendor_details: - logprobs = vendor_details.get("logprobs") - completion = result.data - logger.info(f"Completion: {completion}") - completion_clean = clean_completion_json(completion) - logger.info(f"Completion cleaned: {completion_clean}") - try: - response: ScalarPrediction = json.loads(completion_clean) - except json.decoder.JSONDecodeError as e: - raise UnexpectedModelBehavior(f"The response from {agent=} could not be parsed as JSON: {completion_clean=}") from e + if agent.model and hasattr(agent.model, "_n") and agent.model._n > 1: + jsons = re.findall(r"\{[^{}]*\}", result.data, flags=re.S) - if logprobs: - response['logprobs'] = LogprobsParser(skip_fields = ["reasoning"]).parse_logprobs(logprobs, Prediction) # type: ignore - - response['upperBound'] = market_upper_bound - response['lowerBound'] = market_lower_bound + parsed: list[dict[str, Any]] = [] + for block in jsons: + try: + parsed.append(json.loads(block)) + except json.JSONDecodeError: + continue # silently drop malformed blocks + + responses: ScalarPrediction = { + "scalar_value": avg("scalar_value", parsed), + "confidence": avg("confidence", parsed), + "info_utility": avg("info_utility", parsed), + "upperBound": market_upper_bound, + "lowerBound": market_lower_bound, + "reasoning": "\n\n---\n\n".join(p.get("reasoning", "") for p in parsed if p.get("reasoning")), + "logprobs": [lp for p in parsed for lp in p.get("logprobs", [])] or None, + } + return responses + + else: + completion = result.data + logger.info(f"Completion: {completion}") + completion_clean = clean_completion_json(completion) + logger.info(f"Completion cleaned: {completion_clean}") + + try: + response: ScalarPrediction = json.loads(completion_clean) + except json.decoder.JSONDecodeError as e: + raise UnexpectedModelBehavior(f"The response from {agent=} could not be parsed as JSON: {completion_clean=}") from e + + response['upperBound'] = market_upper_bound + response['lowerBound'] = market_lower_bound return response diff --git a/prediction_prophet/benchmark/agents.py b/prediction_prophet/benchmark/agents.py index eebe6b1d..ba3125e2 100644 --- a/prediction_prophet/benchmark/agents.py +++ b/prediction_prophet/benchmark/agents.py @@ -7,9 +7,9 @@ AbstractBenchmarkedAgent, ) from prediction_market_agent_tooling.benchmark.utils import ( - Prediction, ScalarProbabilisticAnswer + Prediction, ScalarPrediction ) -from prediction_market_agent_tooling.markets.data_models import ProbabilisticAnswer, CategoricalProbabilisticAnswer +from prediction_market_agent_tooling.markets.data_models import ProbabilisticAnswer, CategoricalProbabilisticAnswer, ScalarProbabilisticAnswer from prediction_market_agent_tooling.tools.is_predictable import is_predictable_binary from prediction_market_agent_tooling.tools.langfuse_ import observe from pydantic_ai import Agent @@ -46,7 +46,24 @@ def _make_prediction( ) return ProbabilisticAnswer.model_validate(prediction) - +@observe() +def _make_prediction_scalar( + market_question: str, + market_upper_bound: int, + market_lower_bound: int, + additional_information: str, + agent: Agent, + include_reasoning: bool = False, +) -> ScalarProbabilisticAnswer: + prediction = make_prediction_scalar( + prompt=market_question, + market_upper_bound=market_upper_bound, + market_lower_bound=market_lower_bound, + additional_information=additional_information, + agent=agent, + include_reasoning=include_reasoning, + ) + return ScalarProbabilisticAnswer.model_validate(prediction) @observe() def _make_prediction_categorical( @@ -213,8 +230,8 @@ def research(self, market_question: str) -> Research: def _make_prediction_scalar( self, market_question: str, - market_upper_bound: float, - market_lower_bound: float, + market_upper_bound: int, + market_lower_bound: int, additional_information: str, agent: Agent, include_reasoning: bool = False, @@ -228,7 +245,7 @@ def _make_prediction_scalar( include_reasoning=include_reasoning, ) return ScalarProbabilisticAnswer.model_validate(prediction) - + def predict(self, market_question: str) -> Prediction: try: research = self.research(market_question) @@ -264,6 +281,31 @@ def predict_categorical(self, market_question: str, market_outcomes: t.Sequence[ self.logger.error(f"Error in PredictionProphet's predict_categorical: {e}") return Prediction() + def predict_scalar(self, market_question: str, market_upper_bound: int, market_lower_bound: int) -> ScalarPrediction: + try: + research = self.research(market_question) + prediction=_make_prediction_scalar( + market_question=market_question, + market_upper_bound=market_upper_bound, + market_lower_bound=market_lower_bound, + additional_information=research.report, + agent=self.prediction_agent, + include_reasoning=self.include_reasoning, + ) + return ScalarPrediction( + outcome_prediction=ScalarProbabilisticAnswer( + scalar_value=prediction.scalar_value, + upperBound=market_upper_bound, + lowerBound=market_lower_bound, + confidence=prediction.confidence, + reasoning=prediction.reasoning, + logprobs=prediction.logprobs, + ) + ) + except (NoResulsFoundError, NotEnoughScrapedSitesError) as e: + self.logger.warning(f"Problem in PredictionProphet's predict_scalar: {e}") + return ScalarPrediction() + def predict_restricted( self, market_question: str, time_restriction_up_to: datetime ) -> Prediction: From 13fed7203b259a374f5157bccc3172d9f9e41f24 Mon Sep 17 00:00:00 2001 From: Tomas Sikora Date: Fri, 27 Jun 2025 16:29:21 +0200 Subject: [PATCH 3/5] Changes in prompt and changed boundaries and response to WEI --- prediction_prophet/autonolas/research.py | 22 ++++++++++++---------- prediction_prophet/benchmark/agents.py | 7 ++++--- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/prediction_prophet/autonolas/research.py b/prediction_prophet/autonolas/research.py index d34e18aa..23406e53 100644 --- a/prediction_prophet/autonolas/research.py +++ b/prediction_prophet/autonolas/research.py @@ -39,6 +39,7 @@ from prediction_market_agent_tooling.loggers import logger from prediction_market_agent_tooling.tools.google_utils import search_google_gcp from prediction_market_agent_tooling.logprobs_parser import LogprobsParser, FieldLogprobs +from prediction_market_agent_tooling.gtypes import Wei load_dotenv() @@ -123,15 +124,16 @@ INTRODUCTION: You are a Large Language Model (LLM) within a multi-agent system. Your primary task is to accurately estimate the 'scalar_value' for the outcome of a 'market question', \ found in 'USER_PROMPT'. The market question is part of a prediction market, where users can place bets on the outcomes of market questions and earn rewards if the predicted 'scalar_value' is close to the actual outcome. +Each market has {market_upper_bound} and {market_lower_bound} values use those values to calibrate your expectation about your prediction . Each market has a closing date at which the outcome is evaluated. This date is typically stated within the market question. \ The closing date is considered to be 23:59:59 of the date provided in the market question. \ You are provided an itemized list of information under the label "ADDITIONAL_INFORMATION", which is \ -sourced from a Google search engine query performed a few seconds ago and is meant to assist you in your 'scalar_value' estimation. You must adhere to the following 'INSTRUCTIONS'. +sourced from a Google search engine query performed a few seconds ago and is meant to assist you in your 'scalar_value' estimation. You must adhere to the following 'INSTRUCTIONS'. INSTRUCTIONS: * Examine the user's input labeled 'USER_PROMPT'. Focus on the part enclosed in double quotes, which contains the 'market question'. -* Estimate the 'scalar_value' for the outcome of the market question, which is can be any value between {market_upper_bound} and {market_lower_bound}, but the value can be outside of this range if you think it is more likely. +* Estimate the 'scalar_value' for the outcome of the market question. * Consider the prediction market with the market question, the closing date and the outcomes in an isolated context that has no influence on the protagonists that are involved in the event in the real world, specified in the market question. The closing date is always arbitrarily set by the market creator and has no influence on the real world. So it is likely that the protagonists of the event in the real world are not even aware of the prediction market and do not care about the market's closing date. * The 'scalar_value' estimations of the market question outcomes must be as accurate as possible, as an inaccurate estimation will lead to financial loss for the user. * Utilize your training data and the information provided under "ADDITIONAL_INFORMATION" to generate 'scalar_value' estimations for the outcomes of the 'market question'. @@ -158,7 +160,7 @@ * Your output response must be only a single JSON object to be parsed by Python's "json.loads()". * The JSON must contain {n_fields} fields: {fields_list}. {fields_description} -* The 'scalar_value' can be any value between {market_upper_bound} and {market_lower_bound}, but the value can be outside of this range if you think it is more likely. +* The 'scalar_value' is float number. * Output only the JSON object in your response. Do not include any other contents in your response. """ @@ -230,7 +232,7 @@ FIELDS_DESCRIPTIONS_SCALAR = { "reasoning": "A string containing the reasoning behind your decision, and the rest of the answer you're about to give.", - "scalar_value": "Predicted value is integer. It is expected to be in [{market_upper_bound},{market_lower_bound}], but value can be outside of this range if you think it is more likely.", + "scalar_value": "Predicted value of the market question, float number", "confidence": "Indicating the confidence in the estimated value you provided ranging from 0 (lowest confidence) to 1 (maximum confidence). Confidence can be calculated based on the quality and quantity of data used for the estimation.", "info_utility": "Utility of the information provided in 'ADDITIONAL_INFORMATION' to help you make the probability estimation ranging from 0 (lowest utility) to 1 (maximum utility).", } @@ -440,9 +442,9 @@ class Prediction(BaseModel): class ScalarPrediction(TypedDict): - scalar_value: float - upperBound: int - lowerBound: int + scalar_value: Wei + upperBound: Wei + lowerBound: Wei confidence: float info_utility: float reasoning: Optional[str] @@ -1391,8 +1393,8 @@ def avg(key: str, parsed: list[dict[str, Any]]) -> float: @observe() def make_prediction_scalar( prompt: str, - market_upper_bound: int, - market_lower_bound: int, + market_upper_bound: Wei, + market_lower_bound: Wei, additional_information: str, agent: Agent | None, include_reasoning: bool = False, @@ -1429,7 +1431,7 @@ def make_prediction_scalar( continue # silently drop malformed blocks responses: ScalarPrediction = { - "scalar_value": avg("scalar_value", parsed), + "scalar_value": Wei(avg("scalar_value", parsed)), "confidence": avg("confidence", parsed), "info_utility": avg("info_utility", parsed), "upperBound": market_upper_bound, diff --git a/prediction_prophet/benchmark/agents.py b/prediction_prophet/benchmark/agents.py index ba3125e2..367d7c85 100644 --- a/prediction_prophet/benchmark/agents.py +++ b/prediction_prophet/benchmark/agents.py @@ -6,6 +6,7 @@ from prediction_market_agent_tooling.benchmark.agents import ( AbstractBenchmarkedAgent, ) +from prediction_market_agent_tooling.gtypes import Wei from prediction_market_agent_tooling.benchmark.utils import ( Prediction, ScalarPrediction ) @@ -49,8 +50,8 @@ def _make_prediction( @observe() def _make_prediction_scalar( market_question: str, - market_upper_bound: int, - market_lower_bound: int, + market_upper_bound: Wei, + market_lower_bound: Wei, additional_information: str, agent: Agent, include_reasoning: bool = False, @@ -281,7 +282,7 @@ def predict_categorical(self, market_question: str, market_outcomes: t.Sequence[ self.logger.error(f"Error in PredictionProphet's predict_categorical: {e}") return Prediction() - def predict_scalar(self, market_question: str, market_upper_bound: int, market_lower_bound: int) -> ScalarPrediction: + def predict_scalar(self, market_question: str, market_upper_bound: Wei, market_lower_bound: Wei) -> ScalarPrediction: try: research = self.research(market_question) prediction=_make_prediction_scalar( From a89f5de029a4f6655be1840c4d3fa5b92021c37f Mon Sep 17 00:00:00 2001 From: Tomas Sikora Date: Fri, 27 Jun 2025 16:38:02 +0200 Subject: [PATCH 4/5] Codestyle fixes --- prediction_prophet/autonolas/research.py | 2 +- prediction_prophet/benchmark/agents.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/prediction_prophet/autonolas/research.py b/prediction_prophet/autonolas/research.py index 23406e53..3f2fc1ff 100644 --- a/prediction_prophet/autonolas/research.py +++ b/prediction_prophet/autonolas/research.py @@ -1431,7 +1431,7 @@ def make_prediction_scalar( continue # silently drop malformed blocks responses: ScalarPrediction = { - "scalar_value": Wei(avg("scalar_value", parsed)), + "scalar_value": Wei(int(avg("scalar_value", parsed))), "confidence": avg("confidence", parsed), "info_utility": avg("info_utility", parsed), "upperBound": market_upper_bound, diff --git a/prediction_prophet/benchmark/agents.py b/prediction_prophet/benchmark/agents.py index 367d7c85..ce469bc4 100644 --- a/prediction_prophet/benchmark/agents.py +++ b/prediction_prophet/benchmark/agents.py @@ -231,8 +231,8 @@ def research(self, market_question: str) -> Research: def _make_prediction_scalar( self, market_question: str, - market_upper_bound: int, - market_lower_bound: int, + market_upper_bound: Wei, + market_lower_bound: Wei, additional_information: str, agent: Agent, include_reasoning: bool = False, From eb905cba1164921dbef8f2cfd95d474a34243a5c Mon Sep 17 00:00:00 2001 From: Tomas Sikora Date: Thu, 3 Jul 2025 12:58:25 +0200 Subject: [PATCH 5/5] Bump PMAT --- poetry.lock | 24 ++++++++++++------------ pyproject.toml | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/poetry.lock b/poetry.lock index 8a70b417..9bff30e7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -1269,26 +1269,26 @@ test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist", "wurlitzer"] [[package]] -name = "cowdao-cowpy-kongzii" -version = "1.0.0rc2" +name = "cowdao-cowpy" +version = "1.0.0rc5" description = "" optional = false python-versions = "<4.0,>=3.10" groups = ["main"] files = [ - {file = "cowdao_cowpy_kongzii-1.0.0rc2-py3-none-any.whl", hash = "sha256:359d22d06faff41e3e84ec47c7ce7729f9f46a5af22b41cd04b4c9361c3e0628"}, - {file = "cowdao_cowpy_kongzii-1.0.0rc2.tar.gz", hash = "sha256:b78ef90e4e0ca4f1f37bae4d8f62af7270828df93b4677f5f6532432d617dab5"}, + {file = "cowdao_cowpy-1.0.0rc5-py3-none-any.whl", hash = "sha256:07ce36296ee9993ade27796757b9c84990dd9a738961651e3fb18b31b50d0e1d"}, + {file = "cowdao_cowpy-1.0.0rc5.tar.gz", hash = "sha256:5941532beb215851c37f322e05d5e258e28f9f6c7bdb53247f1a0a6dccc9b86a"}, ] [package.dependencies] aiolimiter = ">=1.1.0,<2.0.0" backoff = ">=2.2.1,<3.0.0" -httpx = ">=0.25.0,<1.0.0" +httpx = ">=0.23.0,<1.0.0" multiformats = ">=0.3.1.post4,<0.4.0" pybars3 = ">=0.9.7,<0.10.0" pydantic = ">=2.7.0,<3.0.0" pytest-mock = ">=3.14.0,<4.0.0" -web3 = ">=6.15.1,<7.0.0" +web3 = ">=6,<7" [[package]] name = "cron-validator" @@ -5219,20 +5219,20 @@ test = ["anthropic", "coverage", "django", "flake8", "freezegun (==1.5.1)", "lan [[package]] name = "prediction-market-agent-tooling" -version = "0.65.3" +version = "0.66.4" description = "Tools to benchmark, deploy and monitor prediction market agents." optional = false python-versions = "<3.13,>=3.10" groups = ["main"] files = [ - {file = "prediction_market_agent_tooling-0.65.3-py3-none-any.whl", hash = "sha256:58d0efec25e26dc8e3fe93cb45c14d44e152609bed177273381c02238609808c"}, - {file = "prediction_market_agent_tooling-0.65.3.tar.gz", hash = "sha256:733aedb195d0ea5b2b12cb47b4d89316a317acd0c43a515054608014bf97e9bd"}, + {file = "prediction_market_agent_tooling-0.66.4-py3-none-any.whl", hash = "sha256:5066c01cd29052daaa1428adf947cdf8b63c876ac306b4bebd1958cf48b42a30"}, + {file = "prediction_market_agent_tooling-0.66.4.tar.gz", hash = "sha256:069613ef96f234f6204ebc8e31ef13a95c88aa8d979cfc78b3c95e77fc39009d"}, ] [package.dependencies] autoflake = ">=2.2.1,<3.0.0" base58 = ">=1.0.2,<2.0" -cowdao-cowpy-kongzii = "1.0.0rc2" +cowdao-cowpy = "1.0.0rc5" cron-validator = ">=1.0.8,<2.0.0" eth-account = ">=0.8.0,<0.12.0" eth-keys = ">=0.6.1,<0.7.0" @@ -8885,4 +8885,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<3.13" -content-hash = "1edbb8ba2ff6e1f096d69f4680e6a950d19c1b189d348ce497e275d3ebd44bd4" +content-hash = "eed298081e18b62db6d7ef71a16b2d239d4755c2a100b77cc6a63d95c92af92e" diff --git a/pyproject.toml b/pyproject.toml index 9739f8dd..1dc835c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ scikit-learn = "^1.4.0" typer = ">=0.9.0,<1.0.0" types-requests = "^2.31.0.20240125" types-python-dateutil = "^2.9.0" -prediction-market-agent-tooling = { version = ">=0.65.3,<1", extras = ["langchain", "google"] } +prediction-market-agent-tooling = { version = ">=0.66.4,<1", extras = ["langchain", "google"] } langchain-community = "^0.3.0" memory-profiler = "^0.61.0" matplotlib = "^3.8.3"