Skip to content

Pre/beta #925

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 3 additions & 12 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,10 @@
## [1.38.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.38.0...v1.38.1) (2025-02-15)
## [1.39.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.38.1-beta.1...v1.39.0-beta.1) (2025-02-17)


### Bug Fixes

* filter links ([04b9197](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/04b91972e88b69b722454d54c8635dfb49b38b44))


### Test

* Add coverage improvement test for tests/test_scrape_do.py ([4ce6d1b](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/4ce6d1b94306d0ae94a74748726468a5132b7969))

### Features

### CI
* add the new handling exception ([5c0bc46](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5c0bc46c6322ea07efa31d95819d7da47462f981))

* **release:** 1.38.1-beta.1 [skip ci] ([83be82a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/83be82a11e83eb2be60a945deac361c46526c785))

## [1.38.1-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.38.0...v1.38.1-beta.1) (2025-02-13)

Expand Down
Binary file modified docs/assets/api-banner.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[project]
name = "scrapegraphai"

version = "1.38.1"
version = "1.39.0b1"


description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
Expand Down
1 change: 1 addition & 0 deletions scrapegraphai/helpers/models_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
"llama3.2": 128000,
"llama3.2:1b": 128000,
"llama3.2:3b": 128000,
"llama3.3": 128000,
"llama3.3:70b": 128000,
"scrapegraph": 8192,
"mistral-small": 128000,
Expand Down
54 changes: 33 additions & 21 deletions scrapegraphai/nodes/generate_answer_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import time
import json
from typing import List, Optional

from langchain.prompts import PromptTemplate
Expand Down Expand Up @@ -120,7 +121,11 @@ def execute(self, state: dict) -> dict:
else:
if not isinstance(self.llm_model, ChatBedrock):
output_parser = JsonOutputParser()
format_instructions = output_parser.get_format_instructions()
format_instructions = (
"You must respond with a JSON object. Your response should be formatted as a valid JSON "
"with a 'content' field containing your analysis. For example:\n"
'{"content": "your analysis here"}'
)
else:
output_parser = None
format_instructions = ""
Expand All @@ -131,13 +136,25 @@ def execute(self, state: dict) -> dict:
and not self.script_creator
or self.is_md_scraper
):
template_no_chunks_prompt = TEMPLATE_NO_CHUNKS_MD
template_chunks_prompt = TEMPLATE_CHUNKS_MD
template_merge_prompt = TEMPLATE_MERGE_MD
template_no_chunks_prompt = (
TEMPLATE_NO_CHUNKS_MD + "\n\nIMPORTANT: " + format_instructions
)
template_chunks_prompt = (
TEMPLATE_CHUNKS_MD + "\n\nIMPORTANT: " + format_instructions
)
template_merge_prompt = (
TEMPLATE_MERGE_MD + "\n\nIMPORTANT: " + format_instructions
)
else:
template_no_chunks_prompt = TEMPLATE_NO_CHUNKS
template_chunks_prompt = TEMPLATE_CHUNKS
template_merge_prompt = TEMPLATE_MERGE
template_no_chunks_prompt = (
TEMPLATE_NO_CHUNKS + "\n\nIMPORTANT: " + format_instructions
)
template_chunks_prompt = (
TEMPLATE_CHUNKS + "\n\nIMPORTANT: " + format_instructions
)
template_merge_prompt = (
TEMPLATE_MERGE + "\n\nIMPORTANT: " + format_instructions
)

if self.additional_info is not None:
template_no_chunks_prompt = self.additional_info + template_no_chunks_prompt
Expand All @@ -161,8 +178,9 @@ def execute(self, state: dict) -> dict:
answer = self.invoke_with_timeout(
chain, {"question": user_prompt}, self.timeout
)
except Timeout:
state.update({self.output[0]: {"error": "Response timeout exceeded"}})
except (Timeout, json.JSONDecodeError) as e:
error_msg = "Response timeout exceeded" if isinstance(e, Timeout) else "Invalid JSON response format"
state.update({self.output[0]: {"error": error_msg, "raw_response": str(e)}})
return state

state.update({self.output[0]: answer})
Expand Down Expand Up @@ -191,14 +209,9 @@ def execute(self, state: dict) -> dict:
batch_results = self.invoke_with_timeout(
async_runner, {"question": user_prompt}, self.timeout
)
except Timeout:
state.update(
{
self.output[0]: {
"error": "Response timeout exceeded during chunk processing"
}
}
)
except (Timeout, json.JSONDecodeError) as e:
error_msg = "Response timeout exceeded during chunk processing" if isinstance(e, Timeout) else "Invalid JSON response format in chunk processing"
state.update({self.output[0]: {"error": error_msg, "raw_response": str(e)}})
return state

merge_prompt = PromptTemplate(
Expand All @@ -216,10 +229,9 @@ def execute(self, state: dict) -> dict:
{"context": batch_results, "question": user_prompt},
self.timeout,
)
except Timeout:
state.update(
{self.output[0]: {"error": "Response timeout exceeded during merge"}}
)
except (Timeout, json.JSONDecodeError) as e:
error_msg = "Response timeout exceeded during merge" if isinstance(e, Timeout) else "Invalid JSON response format during merge"
state.update({self.output[0]: {"error": error_msg, "raw_response": str(e)}})
return state

state.update({self.output[0]: answer})
Expand Down