Skip to content

Commit a71f68f

Browse files
enhance url2note
1 parent 42490b2 commit a71f68f

File tree

2 files changed

+82
-32
lines changed

2 files changed

+82
-32
lines changed

dialoghelper/core.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,12 +272,16 @@ def run_msg(
272272
def url2note(
273273
url:str, # URL to read
274274
extract_section:bool=True, # If url has an anchor, return only that section
275-
selector:str=None # Select section(s) using BeautifulSoup.select (overrides extract_section)
275+
selector:str=None, # Select section(s) using BeautifulSoup.select (overrides extract_section)
276+
ai_img:bool=True, # Make images visible to the AI
277+
split_re:str=r'(?=^#{1,6} .+)' # Regex to split content into multiple notes, set to False for single note
276278
):
277-
"Read URL as markdown, and add a note below current message with the result"
278-
res = read_url(url, as_md=True, extract_section=extract_section, selector=selector)
279+
"Read URL as markdown, and add note(s) below current message with the result"
280+
res = read_url(url, as_md=True, extract_section=extract_section, selector=selector, ai_img=ai_img)
281+
if split_re: return [add_msg(s) for s in re.split(split_re, res, flags=re.MULTILINE) if s.strip()]
279282
return add_msg(res)
280283

284+
281285
# %% ../nbs/00_core.ipynb
282286
def ast_py(code:str):
283287
"Get an SgRoot root node for python `code`"

nbs/00_core.ipynb

Lines changed: 75 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,7 @@
614614
"output_type": "stream",
615615
"text": [
616616
"_9c544573\n",
617-
"_1ddfd0ed\n"
617+
"_9558b075\n"
618618
]
619619
}
620620
],
@@ -811,6 +811,16 @@
811811
"_edit_id = add_msg('This message should be found.\\n\\nThis is a multiline message.')"
812812
]
813813
},
814+
{
815+
"cell_type": "markdown",
816+
"id": "57797a25",
817+
"metadata": {},
818+
"source": [
819+
"This message should be found.\n",
820+
"\n",
821+
"This is a multiline message."
822+
]
823+
},
814824
{
815825
"cell_type": "code",
816826
"execution_count": null,
@@ -892,6 +902,16 @@
892902
"id": "6e354677",
893903
"metadata": {},
894904
"outputs": [
905+
{
906+
"data": {
907+
"text/plain": [
908+
"2"
909+
]
910+
},
911+
"execution_count": null,
912+
"metadata": {},
913+
"output_type": "execute_result"
914+
},
895915
{
896916
"data": {
897917
"text/plain": [
@@ -949,11 +969,14 @@
949969
"def url2note(\n",
950970
" url:str, # URL to read\n",
951971
" extract_section:bool=True, # If url has an anchor, return only that section\n",
952-
" selector:str=None # Select section(s) using BeautifulSoup.select (overrides extract_section)\n",
972+
" selector:str=None, # Select section(s) using BeautifulSoup.select (overrides extract_section)\n",
973+
" ai_img:bool=True, # Make images visible to the AI\n",
974+
" split_re:str=r'(?=^#{1,6} .+)' # Regex to split content into multiple notes, set to False for single note\n",
953975
"):\n",
954-
" \"Read URL as markdown, and add a note below current message with the result\"\n",
955-
" res = read_url(url, as_md=True, extract_section=extract_section, selector=selector)\n",
956-
" return add_msg(res)"
976+
" \"Read URL as markdown, and add note(s) below current message with the result\"\n",
977+
" res = read_url(url, as_md=True, extract_section=extract_section, selector=selector, ai_img=ai_img)\n",
978+
" if split_re: return [add_msg(s) for s in re.split(split_re, res, flags=re.MULTILINE) if s.strip()]\n",
979+
" return add_msg(res)\n"
957980
]
958981
},
959982
{
@@ -976,6 +999,26 @@
976999
"del_msg(_id)"
9771000
]
9781001
},
1002+
{
1003+
"cell_type": "code",
1004+
"execution_count": null,
1005+
"id": "43554bd9",
1006+
"metadata": {},
1007+
"outputs": [],
1008+
"source": [
1009+
"_ids = url2note('https://www.answer.ai/posts/2025-10-01-cachy.html')"
1010+
]
1011+
},
1012+
{
1013+
"cell_type": "code",
1014+
"execution_count": null,
1015+
"id": "b02115e6",
1016+
"metadata": {},
1017+
"outputs": [],
1018+
"source": [
1019+
"_ = [del_msg(i) for i in _ids]"
1020+
]
1021+
},
9791022
{
9801023
"cell_type": "code",
9811024
"execution_count": null,
@@ -1045,25 +1088,25 @@
10451088
{
10461089
"data": {
10471090
"text/plain": [
1048-
"[(\"xpost('http://localhost:5001/pop_data_blocking_', data={'data_id': idx})\",\n",
1049-
" {'B': {'text': \"{'data_id': idx}\",\n",
1050-
" 'range': {'byteOffset': {'start': 1185, 'end': 1201},\n",
1051-
" 'start': {'line': 38, 'column': 72},\n",
1052-
" 'end': {'line': 38, 'column': 88}}},\n",
1053-
" 'A': {'text': \"'http://localhost:5001/pop_data_blocking_'\",\n",
1054-
" 'range': {'byteOffset': {'start': 1136, 'end': 1178},\n",
1055-
" 'start': {'line': 38, 'column': 23},\n",
1056-
" 'end': {'line': 38, 'column': 65}}}},\n",
1057-
" 'dialoghelper/experimental.py'),\n",
1058-
" ('xpost(f\\'http://localhost:{dh_settings[\"port\"]}/{path}\\', data=data)',\n",
1059-
" {'A': {'text': 'f\\'http://localhost:{dh_settings[\"port\"]}/{path}\\'',\n",
1060-
" 'range': {'byteOffset': {'start': 2624, 'end': 2672},\n",
1061-
" 'start': {'line': 70, 'column': 16},\n",
1062-
" 'end': {'line': 70, 'column': 64}}},\n",
1063-
" 'B': {'text': 'data',\n",
1064-
" 'range': {'byteOffset': {'start': 2679, 'end': 2683},\n",
1091+
"[('xpost(f\\'http://localhost:{dh_settings[\"port\"]}/{path}\\', data=data)',\n",
1092+
" {'B': {'text': 'data',\n",
1093+
" 'range': {'byteOffset': {'start': 2753, 'end': 2757},\n",
10651094
" 'start': {'line': 70, 'column': 71},\n",
1066-
" 'end': {'line': 70, 'column': 75}}}},\n",
1095+
" 'end': {'line': 70, 'column': 75}}},\n",
1096+
" 'A': {'text': 'f\\'http://localhost:{dh_settings[\"port\"]}/{path}\\'',\n",
1097+
" 'range': {'byteOffset': {'start': 2698, 'end': 2746},\n",
1098+
" 'start': {'line': 70, 'column': 16},\n",
1099+
" 'end': {'line': 70, 'column': 64}}}},\n",
1100+
" 'dialoghelper/core.py'),\n",
1101+
" (\"xpost(url, data={'data_id': idx, 'timeout': timeout})\",\n",
1102+
" {'B': {'text': \"{'data_id': idx, 'timeout': timeout}\",\n",
1103+
" 'range': {'byteOffset': {'start': 4450, 'end': 4486},\n",
1104+
" 'start': {'line': 121, 'column': 36},\n",
1105+
" 'end': {'line': 121, 'column': 72}}},\n",
1106+
" 'A': {'text': 'url',\n",
1107+
" 'range': {'byteOffset': {'start': 4440, 'end': 4443},\n",
1108+
" 'start': {'line': 121, 'column': 26},\n",
1109+
" 'end': {'line': 121, 'column': 29}}}},\n",
10671110
" 'dialoghelper/core.py')]"
10681111
]
10691112
},
@@ -1112,7 +1155,7 @@
11121155
{
11131156
"data": {
11141157
"text/plain": [
1115-
"{'success': 'Inserted text after line 5 in message _c3581eea'}"
1158+
"{'success': 'Inserted text after line 5 in message _f813f590'}"
11161159
]
11171160
},
11181161
"execution_count": null,
@@ -1176,7 +1219,7 @@
11761219
{
11771220
"data": {
11781221
"text/plain": [
1179-
"{'success': 'Replaced text in message _c3581eea'}"
1222+
"{'success': 'Replaced text in message _f813f590'}"
11801223
]
11811224
},
11821225
"execution_count": null,
@@ -1238,7 +1281,7 @@
12381281
{
12391282
"data": {
12401283
"text/plain": [
1241-
"{'success': 'Successfully replaced all the strings in message _c3581eea'}"
1284+
"{'success': 'Successfully replaced all the strings in message _f813f590'}"
12421285
]
12431286
},
12441287
"execution_count": null,
@@ -1301,7 +1344,7 @@
13011344
{
13021345
"data": {
13031346
"text/plain": [
1304-
"{'success': 'Replaced lines 2 to 4 in message _c3581eea'}"
1347+
"{'success': 'Replaced lines 2 to 4 in message _f813f590'}"
13051348
]
13061349
},
13071350
"execution_count": null,
@@ -1774,9 +1817,12 @@
17741817
"- &`find_var`: Search for var in all frames of the call stack\n",
17751818
"- &`set_var`: Set var to val after finding it in all frames of the call stack\n",
17761819
"- &`find_dname`: Get the message id by searching the call stack for __dialog_id.\n",
1777-
"- &`find_msg_id`: Get the message id by searching the call stack for __dialog_id.\n",
1820+
"- &`find_msg_id`: Get the message id by searching the call stack for __msg_id.\n",
17781821
"- &`curr_dialog`: Get the current dialog info.\n",
17791822
"- &`msg_idx`: Get absolute index of message in dialog.\n",
1823+
"- &`add_scr`: Swap a script element to the end of the js-script element\n",
1824+
"- &`iife`: Wrap javascript code string in an IIFE and execute it via `add_html`\n",
1825+
"- &`event_get`: Call `fire_event` and then `pop_data` to get a response\n",
17801826
"- &`find_msgs`: Find `list[dict]` of messages in current specific dialog that contain the given information. To refer to a message found later, use its `id` field.\n",
17811827
"- &`add_html`: Send HTML to the browser to be swapped into the DOM\n",
17821828
"- &`read_msg`: Get the message indexed in the current dialog.\n",
@@ -1789,7 +1835,7 @@
17891835
" - Use `content` param to update contents.\n",
17901836
" - Only include parameters to update--missing ones will be left unchanged.\n",
17911837
"- &`run_msg`: Adds a message to the run queue. Use read_msg to see the output once it runs.\n",
1792-
"- &`url2note`: Read URL as markdown, and add a note below current message with the result\n",
1838+
"- &`url2note`: Read URL as markdown, and add note(s) below current message with the result\n",
17931839
"- &`ast_py`: Get an SgRoot root node for python `code`\n",
17941840
"- &`ast_grep`: Use the `ast-grep` command to find `pattern` in `path`\n",
17951841
"- &`msg_insert_line`: Insert text at a specific line number in a message\n",

0 commit comments

Comments
 (0)