diff --git a/g4f/gui/client/static/js/chat.v1.js b/g4f/gui/client/static/js/chat.v1.js index f6569ffcb03..9537f78cdb5 100644 --- a/g4f/gui/client/static/js/chat.v1.js +++ b/g4f/gui/client/static/js/chat.v1.js @@ -402,7 +402,7 @@ const handle_ask = async (do_ask_gpt = true) => { await add_conversation(window.conversation_id); // Is message a url? - const expression = /https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)/gi; + const expression = /^https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$/gi; const regex = new RegExp(expression); if (message.match(regex)) { paperclip.classList.add("blink"); diff --git a/g4f/tools/files.py b/g4f/tools/files.py index 6d6d22068fe..1c7257c0266 100644 --- a/g4f/tools/files.py +++ b/g4f/tools/files.py @@ -416,7 +416,7 @@ def read_links(html: str, base: str) -> set[str]: async def download_urls( bucket_dir: Path, urls: list[str], - max_depth: int = 1, + max_depth: int = 0, loading_urls: set[str] = set(), lock: asyncio.Lock = None, delay: int = 3, @@ -515,7 +515,7 @@ def stream_chunks(bucket_dir: Path, delete_files: bool = False, refine_chunks_wi if refine_chunks_with_spacy: for chunk in stream_read_parts_and_refine(bucket_dir, delete_files): if event_stream: - size += len(chunk.decode('utf-8')) + size += len(chunk.encode()) yield f'data: {json.dumps({"action": "refine", "size": size})}\n\n' else: yield chunk @@ -524,7 +524,7 @@ def stream_chunks(bucket_dir: Path, delete_files: bool = False, refine_chunks_wi streaming = cache_stream(streaming, bucket_dir) for chunk in streaming: if event_stream: - size += len(chunk.decode('utf-8')) + size += len(chunk.encode()) yield f'data: {json.dumps({"action": "load", "size": size})}\n\n' else: yield chunk diff --git a/g4f/tools/web_search.py b/g4f/tools/web_search.py index 9ff6bf21666..f8f9b53b03c 100644 --- a/g4f/tools/web_search.py +++ b/g4f/tools/web_search.py @@ -89,10 +89,10 @@ def scrape_text(html: str, max_words: int = None, add_source=True, count_images: if select: select.extract() - image_select = "img[alt][src^=http]:not([alt='']):not(.avatar)" + image_select = "img[alt][src^=http]:not([alt='']):not(.avatar):not([width])" image_link_select = f"a:has({image_select})" yield_words = [] - for paragraph in soup.select(f"h1, h2, h3, h4, h5, h6, p, table:not(:has(p)), ul:not(:has(p)), {image_link_select}"): + for paragraph in soup.select(f"h1, h2, h3, h4, h5, h6, p, pre, table:not(:has(p)), ul:not(:has(p)), {image_link_select}"): if count_images > 0: image = paragraph.select_one(image_select) if image: