From 3b982ab448d1ed338ec52dfe40842505b4ac7097 Mon Sep 17 00:00:00 2001 From: zonghui <243156680@qq.com> Date: Sun, 5 Oct 2025 01:26:57 +0800 Subject: [PATCH 1/3] make new rag agent file --- .../rag/main_test.ipynb" | 323 ++++++++++++++++++ .../rag/new_test.py" | 174 ++++++++++ 2 files changed, 497 insertions(+) create mode 100644 "\344\275\277\347\224\250Python\346\236\204\345\273\272RAG\347\263\273\347\273\237/rag/main_test.ipynb" create mode 100644 "\344\275\277\347\224\250Python\346\236\204\345\273\272RAG\347\263\273\347\273\237/rag/new_test.py" diff --git "a/\344\275\277\347\224\250Python\346\236\204\345\273\272RAG\347\263\273\347\273\237/rag/main_test.ipynb" "b/\344\275\277\347\224\250Python\346\236\204\345\273\272RAG\347\263\273\347\273\237/rag/main_test.ipynb" new file mode 100644 index 0000000..4d9afce --- /dev/null +++ "b/\344\275\277\347\224\250Python\346\236\204\345\273\272RAG\347\263\273\347\273\237/rag/main_test.ipynb" @@ -0,0 +1,323 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "id": "cf723ed9-d8e0-4f1a-911f-887b927f8569", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0] # 哆啦A梦与超级赛亚人:时空之战\n", + "\n", + "[1] 在一个寻常的午后,大雄依旧坐在书桌前发呆,作业堆得像山,连第一页都没动。哆啦A梦在一旁翻着漫画,时不时叹口气,觉得这孩子还是一如既往的不靠谱。正当他们的生活照常进行时,一道强光突然从天而降,整个房间震动不已。光芒中走出一名金发少年,身披战甲、气势惊人,他就是来自未来的超级赛亚人——特兰克斯。他一出现便说出了惊人的话:未来的地球即将被黑暗势力摧毁,他来此是为了寻求哆啦A梦的帮助。\n", + "\n", + "[2] 哆啦A梦与大雄听后大惊,但也从特兰克斯坚定的眼神中读出了不容拒绝的决心。特兰克斯解释说,未来的敌人并非普通反派,而是一个名叫“黑暗赛亚人”的存在,他由邪恶科学家复制了贝吉塔的基因并加以改造,实力超乎想象。这个敌人不仅拥有赛亚人战斗力,还能操纵扭曲的时间能量,几乎无人可敌。特兰克斯已经独自战斗多年,但每一次都以惨败告终。他说:“科技,是我那个时代唯一缺失的武器,而你们,正好拥有它。”\n", + "\n", + "[3] 于是,哆啦A梦带着特兰克斯与大雄启动时光机,穿越到了那个即将崩溃的未来世界。眼前的景象令人震撼:城市沦为废墟,大地裂痕纵横,天空中浮动着压抑的黑雾。特兰克斯说,这正是黑暗赛亚人带来的结果,一切生命几乎都被抹杀,只剩他在苦苦支撑。大雄虽感到恐惧,但看到无辜的人类遭殃,内心逐渐燃起斗志。哆啦A梦则冷静地分析局势,决定使用他最强的三样秘密道具来对抗黑暗势力。\n", + "\n", + "[4] 三件秘密道具分别是:可以临时赋予超级战力的“复制斗篷”,能暂停时间五秒的“时间停止手表”,以及可在一分钟中完成一年修行的“精神与时光屋便携版”。大雄被推进精神屋内,在其中接受密集的训练,虽然只有几分钟现实时间,他却经历了整整一年的苦修。刚开始他依旧软弱,想放弃、想逃跑,但当他想起静香、父母,还有哆啦A梦那坚定的眼神时,他终于咬牙坚持了下来。出来之后,他的身体与精神都焕然一新,眼神中多了一份成熟与自信。\n", + "\n", + "[5] 最终战在黑暗赛亚人的空中要塞前爆发,特兰克斯率先出击,释放全力与敌人正面对决。哆啦A梦则用任意门和道具支援,从各个方向制造混乱,尽量压制敌人的时空能力。但黑暗赛亚人太过强大,仅凭特兰克斯一人根本无法压制,更别说击败。就在特兰克斯即将被击倒之际,大雄披上复制斗篷、冲破恐惧从高空跃下。他的拳头燃烧着金色光焰,目标直指敌人心脏。\n", + "\n", + "[6] 时间停止装置在关键时刻启动,世界陷入静止,大雄用这个短短五秒接近了敌人的盲点。他集中全力,一记重拳击穿了黑暗赛亚人的能量核心,引发巨大的能量反冲。黑暗赛亚人尖叫着化为碎光,天空中的黑雾瞬间散去,阳光重新洒落大地。特兰克斯倒在地上,看着眼前这个曾经懦弱的少年,露出了欣慰的笑容。他知道,这一次,是大雄救了世界。\n", + "\n", + "[7] 战后,未来世界开始恢复,植物重新生长,人类重建家园。特兰克斯告别时紧紧握住大雄的手,说:“你是我见过最特别的战士。”哆啦A梦也为大雄感到骄傲,说他终于真正成长了一次。三人站在山丘上,看着远方重新明亮的地平线,心中感受到从未有过的安宁。随后,哆啦A梦与大雄乘坐时光机返回了属于他们的那个年代,一切仿佛又恢复平静。\n", + "\n", + "[8] 回到现代后,大雄仿佛变了一个人,不再轻易抱怨、不再逃避责任。他认真写完作业,帮妈妈买菜,甚至主动练习体育,哆啦A梦惊讶得说不出话来。他知道,这不是一时兴起,而是大雄真正内心成长的结果。大雄有时会望着天空出神,仿佛还能看见未来世界的那一片废墟与重生的希望。他不会说出来,但他心中永远铭记那一战。\n", + "\n", + "[9] 几天后,电视新闻中突然出现一则画面:一位金发少年在街头击退了失控的机器人,引发市民围观与猜测。大雄放下手中的课本,望向哆啦A梦,两人心照不宣地笑了。也许,特兰克斯又回来了,也许,新的敌人正在逼近。冒险从未真正结束,而他们,早已准备好了。无论时空如何动荡,他们将永远并肩作战。\n", + "\n" + ] + } + ], + "source": [ + "from typing import List\n", + "\n", + "def split_into_chunks(doc_file: str) -> List[str]:\n", + " with open(doc_file, 'r', encoding=\"utf-8\") as file:\n", + " content = file.read()\n", + "\n", + " return [chunk for chunk in content.split(\"\\n\\n\")]\n", + "\n", + "chunks = split_into_chunks(\"doc.md\")\n", + "\n", + "for i, chunk in enumerate(chunks):\n", + " print(f\"[{i}] {chunk}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "cfe9bf60-5d21-4696-99a5-7e7f3b94dd06", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "768\n", + "[0.02680545672774315, 0.008382013067603111, 0.00034338096156716347, 0.007298988290131092, 0.0543331615626812, -0.053255874663591385, 0.0013655376387760043, -0.0013182086404412985, -0.036711182445287704, 0.07188180834054947, -0.007270656060427427, -0.007053027860820293, 0.04253281280398369, -0.036752745509147644, -0.054750557988882065, -0.009598535485565662, 0.01710556633770466, 0.05915365368127823, -0.03334999457001686, 0.06237657368183136, -0.004888511728495359, -0.03453953564167023, -0.07407599687576294, 0.044221941381692886, 0.010516931302845478, -0.037077855318784714, -0.027029821649193764, 0.038303639739751816, 0.021282462403178215, -0.011811506003141403, -0.0054087648168206215, 0.002659057267010212, -0.0232985969632864, 0.05299097299575806, 0.005149415228515863, 0.029624195769429207, -0.03080972470343113, -0.01785614900290966, 0.042446069419384, -0.0076923188753426075, -0.010638098232448101, 0.032108668237924576, -0.06592470407485962, -0.012100904248654842, 0.006814638618379831, -0.0011549569899216294, -0.020827798172831535, 0.027529586106538773, -0.04546991363167763, 0.0517701655626297, -0.051474303007125854, 0.2564859688282013, 0.050316303968429565, 0.017564255744218826, -0.01156520377844572, -0.013389557600021362, 0.016082564368844032, 0.018451189622282982, -0.006780544761568308, 0.031050026416778564, 0.050575438886880875, -0.012529836036264896, 0.06348630785942078, -0.0484028123319149, 0.0009181015775538981, 0.02100476250052452, -0.010703859850764275, -0.060850370675325394, 0.00680469861254096, 0.05067189037799835, -0.02494342438876629, -0.012202014215290546, 0.018374791368842125, 0.032339852303266525, 0.04231114685535431, 0.00020392672740854323, -0.017155056819319725, -0.01603800058364868, -0.03760530799627304, -0.04362509027123451, -0.04164882004261017, 0.016573457047343254, 0.009964766912162304, -0.0601021908223629, -0.027282947674393654, 0.020031146705150604, 0.010414338670670986, -0.047514408826828, -0.018612220883369446, -0.011965067125856876, -0.06262185424566269, -0.040165022015571594, -0.00942965131253004, 0.019113559275865555, 0.009682372212409973, 0.00197153864428401, 0.04423249140381813, -0.003862475510686636, 0.0002937867830041796, 0.035808976739645004, -0.03436824679374695, -0.006869373377412558, -0.029506148770451546, 0.0021211456041783094, -0.01658952236175537, -0.014159905724227428, -0.01872507855296135, 1.7324486179859377e-05, 0.0009454170940443873, 0.04507480189204216, 0.006473745219409466, 0.03398652747273445, -0.004506762605160475, 0.04930449649691582, -0.012377961538732052, 0.02852027863264084, 0.028152700513601303, -0.012006602250039577, 0.012232234701514244, 0.0034028650261461735, -0.015172946266829967, -0.02224711887538433, -0.014169580303132534, 0.0231088325381279, 0.009890848770737648, 0.04284046217799187, -0.030760997906327248, -0.033989448100328445, 0.018358461558818817, 0.03800961747765541, 0.02349192276597023, 0.02149427868425846, 0.0005227977526374161, -0.09578023105859756, 0.020698750391602516, -0.012416617013514042, 9.56628646235913e-05, -0.004413300193846226, -0.06176115199923515, -0.06370136141777039, 0.04251594468951225, -0.01927574910223484, -0.021949488669633865, 0.009037368930876255, 0.015709184110164642, 0.03664666786789894, -0.011252621188759804, 0.03090802952647209, -0.007981711067259312, -0.02050383761525154, 0.018466167151927948, 0.039743516594171524, -0.025002015754580498, 0.050263628363609314, 0.007021953817456961, 0.00883780512958765, 0.05353932082653046, -0.022880349308252335, -0.06497207283973694, 0.045547906309366226, -0.01892911083996296, -0.009569504298269749, 0.01566331833600998, -0.019115205854177475, -0.03303706273436546, 0.009130405262112617, -0.014804601669311523, -0.0005449393065646291, 0.037823598831892014, -0.02482825331389904, -0.020784858614206314, 0.0655641257762909, 0.05158456042408943, 0.027921020984649658, 0.021408144384622574, 0.018681319430470467, -0.006588073447346687, 0.04769238457083702, 0.011029910296201706, 0.022862423211336136, -0.1066974475979805, 0.017980746924877167, 0.06006710231304169, -0.020247280597686768, -0.013238539919257164, -0.0037571254651993513, -0.03314712643623352, 0.0349603071808815, 0.0029006621334701777, 0.005402370356023312, -0.011998461559414864, -0.0029791025444865227, -0.019136959686875343, -0.01963259093463421, 0.042408112436532974, -0.009020988829433918, -0.007898883894085884, -0.021550849080085754, -0.03288105130195618, -0.03879507631063461, -0.00441418681293726, -0.013777362182736397, 0.002389701548963785, 0.008905721828341484, 0.01703585870563984, 0.018444115296006203, -0.0013178186491131783, -0.03948662430047989, -0.041275616735219955, -0.01126676332205534, 0.020374303683638573, -0.03370970860123634, 0.00020459984079934657, -0.009100588038563728, 0.0252525694668293, 0.03549853339791298, 0.03128746896982193, 0.014305452816188335, -0.060188885778188705, -0.014268000610172749, -0.025481753051280975, -0.02024471014738083, -0.026308320462703705, 0.031466078013181686, -0.0014750019181519747, 0.002453559311106801, -0.003878361778333783, -0.07658117264509201, 0.03446296602487564, -0.021065598353743553, -0.02600516565144062, -0.03663502261042595, -0.042249616235494614, 0.043299224227666855, 0.013586850836873055, 0.01875089667737484, 0.025812413543462753, 0.039144739508628845, -0.03560351952910423, -0.03973193094134331, -0.01938369870185852, 0.038813188672065735, -0.028673527762293816, -0.04575640335679054, -0.030043575912714005, -0.005910512525588274, 0.030279841274023056, -0.00714161666110158, 0.019756365567445755, -0.02329571545124054, -0.03364109620451927, 0.028905002400279045, -0.024780623614788055, 0.0008984786691144109, 0.06980860233306885, 0.020071782171726227, 0.02953667752444744, 0.011422592215240002, 0.043476253747940063, -0.06445600837469101, -0.01440176460891962, 0.014391295611858368, 0.05619253218173981, -0.024886639788746834, -0.04148560389876366, 0.03721334785223007, -0.06418786942958832, -0.004165215417742729, 0.00036514384555630386, -0.029715675860643387, -0.019475223496556282, 0.029696673154830933, -0.012616616673767567, -0.0030557289719581604, -0.002162736840546131, -0.0742952898144722, 0.012262576259672642, -0.00017994933295994997, 0.012211671099066734, -0.02500840462744236, -0.01811891980469227, -0.051545627415180206, 0.01281850878149271, -0.04199879989027977, -0.011001660488545895, -0.03452015668153763, -0.048933472484350204, 0.026315128430724144, 0.04358912631869316, -0.025372864678502083, 0.013372937217354774, -0.015835780650377274, -0.018673595041036606, 0.00046083368943072855, 0.14620471000671387, -0.02234950289130211, 0.0020660196896642447, 0.02097468078136444, 0.05730254575610161, 0.05779646337032318, -0.007015390787273645, -0.05949034541845322, -0.05624832957983017, 0.07196555286645889, 0.008784071542322636, 0.031273532658815384, -0.03282938897609711, -0.031061802059412003, -0.03810758888721466, -0.08086159825325012, -0.022178180515766144, 0.01967567577958107, 0.06560680270195007, -0.013616591691970825, -0.0432109571993351, 3.215598189854063e-05, 0.007617859169840813, 0.05968445912003517, 0.011839330196380615, -0.0077759758569300175, -0.022673407569527626, 0.060271427035331726, 0.030707674100995064, 0.10095945000648499, -0.018273772671818733, 0.005221754312515259, -0.018574098125100136, 0.04666252061724663, -0.03984097018837929, 0.06332745403051376, 0.017829347401857376, -0.01672130636870861, 0.05946601182222366, 0.05359216779470444, 0.03375041484832764, 0.016197973862290382, -0.02781630866229534, -0.046106159687042236, 0.02885262481868267, 0.007923408411443233, -0.0038214686792343855, -0.02448604442179203, 0.016442393884062767, -0.01432204619050026, -0.01953461579978466, -0.02292843908071518, 0.0008579075220040977, -0.05842095986008644, -0.02213693968951702, 0.008564683608710766, 0.0034135151654481888, 0.06435391306877136, -0.02846105955541134, -0.01898171193897724, -0.003873697482049465, -0.015333324670791626, 0.03334102779626846, 0.004719573073089123, 0.051634207367897034, 0.012878542765974998, 0.01577749475836754, -0.017106913030147552, -0.02635161019861698, -0.0024792454205453396, 0.023901553824543953, 0.020814191550016403, -0.007952230051159859, 0.029625192284584045, -0.0962781012058258, 0.04161232337355614, 0.014889311045408249, 0.03456719592213631, 0.021369846537709236, -0.023458002135157585, -0.0010088596027344465, 0.02120639942586422, 0.020208759233355522, 0.05265497788786888, -0.014558163471519947, -0.007277484517544508, -0.02099815383553505, -0.013605242595076561, 0.0324534997344017, -0.05961483344435692, -0.03337838500738144, 0.020222235471010208, 0.012565594166517258, -0.03519878908991814, -0.007077865302562714, -0.028356298804283142, -0.08278725296258926, 0.01319239754229784, 0.011490881443023682, -0.010274969972670078, 0.1108994409441948, 0.007383056916296482, -0.024794939905405045, 0.07341893017292023, -0.03335026279091835, -0.02382080815732479, -0.0029024542309343815, 0.0020614725071936846, -0.0057262638583779335, 0.024756094440817833, 0.05605892464518547, -0.1111818253993988, -0.021946683526039124, -0.01614011451601982, 0.043390464037656784, 0.0037102061323821545, -0.03505485877394676, 0.03899580240249634, 0.011435099877417088, 0.020221702754497528, -0.02669031172990799, 0.004832306411117315, -0.015849681571125984, -0.05317364260554314, 0.08263996988534927, -0.027410464361310005, 0.00380671676248312, 0.021086186170578003, 0.011895693838596344, 0.004174353089183569, -0.010561530478298664, -0.04180792346596718, -0.034174732863903046, -0.04522694647312164, 0.010197068564593792, -0.030837582424283028, -0.004010622389614582, -0.06798151880502701, -0.01155101414769888, 0.007941692136228085, -0.0156096825376153, 0.002579117426648736, -0.015110064297914505, -0.008954649791121483, 0.02007051557302475, -0.03537007421255112, -0.05616573616862297, -0.002300516003742814, 0.024881331250071526, -0.008479481562972069, 0.03194728121161461, 0.04894423112273216, 0.021891983225941658, -0.03589446097612381, 0.032447777688503265, -0.0005914715584367514, 0.004301981069147587, 0.04572702571749687, -0.04888239502906799, -0.059860747307538986, 0.06363468617200851, -0.024566134437918663, -0.0077338311821222305, -0.001632812898606062, 0.0020885036792606115, -0.04106181859970093, 0.06061312183737755, -0.021700801327824593, -0.06142508238554001, 0.028309090062975883, 0.044431742280721664, -0.020188920199871063, -0.0032107389997690916, -0.006333114579319954, 0.05330382287502289, 0.03841032087802887, 0.023930702358484268, 0.07729266583919525, -0.007035836111754179, 0.010095838457345963, 0.003508212510496378, -0.04364698380231857, 0.019586000591516495, -0.02730482444167137, -0.03865279629826546, -0.008418343961238861, 0.016983970999717712, -0.08224364370107651, -0.003916498739272356, -0.03603892773389816, -0.002917575417086482, -0.01797187142074108, -0.019280629232525826, 0.03086366131901741, 0.04893186688423157, -0.009585113264620304, -0.08360493183135986, -0.022593555971980095, -0.012387734837830067, -0.011543926782906055, -0.0378672294318676, -0.06550993025302887, 0.03519187867641449, 0.041023895144462585, -0.08397971838712692, -0.01796313188970089, 0.006989792454987764, -0.0484766811132431, 0.015128003433346748, -0.041086550801992416, -0.012682567350566387, -0.00676239188760519, -0.08201915770769119, -0.02128657139837742, 0.015313142910599709, 0.07352940738201141, -0.038938969373703, -0.01536314096301794, 0.00020182890875730664, 0.03244830667972565, -0.025801466777920723, 0.012067257426679134, 0.023155761882662773, 0.05740971863269806, 0.03198213875293732, 0.0014487067237496376, -0.007571092341095209, 0.0018173549324274063, -0.014613238163292408, -0.020077301189303398, -0.019916843622922897, 0.022061720490455627, -0.020931994542479515, -0.007911395281553268, -0.015219923108816147, 0.04887213557958603, -0.02920399233698845, -0.017386117950081825, 0.0005062664858996868, 0.01998252049088478, 0.03453020378947258, 0.036018434911966324, -0.022892391309142113, -0.03786936029791832, 0.012512446381151676, -0.022280888631939888, -0.06235316023230553, 0.03337499126791954, 0.024999123066663742, -0.0029817752074450254, -0.03126863017678261, 0.04454130306839943, -0.02072736620903015, -0.050438910722732544, 0.027192559093236923, 0.00459620077162981, 0.011618764139711857, -0.015578383579850197, -0.010279949754476547, 0.03370492905378342, 0.013912416063249111, -0.04848866909742355, -0.025850331410765648, 0.015299155376851559, 0.0294966883957386, 0.00893339142203331, 0.022771712392568588, 0.05251256376504898, -0.05872466415166855, -0.016355643048882484, 0.0043891132809221745, -0.004956144839525223, -0.007998066022992134, -0.01745356246829033, -0.05093008279800415, -0.03511336073279381, -0.041876088827848434, -0.029814571142196655, -0.0313619002699852, 0.016929293051362038, 0.036951977759599686, 0.016865476965904236, 0.014302478171885014, 0.007788642309606075, 0.016147229820489883, 0.0008381165680475533, 0.037243399769067764, 0.02870718576014042, 0.03142762929201126, 0.021611150354146957, 0.021806776523590088, 0.04758168011903763, -0.027289945632219315, -0.02252698317170143, 0.022611673921346664, 0.02077341638505459, -0.0410599447786808, -0.007196098566055298, -0.036041900515556335, -0.01956062950193882, 0.03793586418032646, 0.033026691526174545, -0.00551182497292757, 0.02156761847436428, -0.03302361071109772, 0.030616017058491707, -0.01310468465089798, -0.02195853367447853, 0.003641830524429679, 0.02930266037583351, 0.03702780604362488, -0.023991471156477928, -0.01368630863726139, 0.0018303184770047665, 0.01949324831366539, -0.030949240550398827, -0.021212473511695862, -0.016473688185214996, 0.011225881054997444, 0.03063664212822914, -0.006342778448015451, 0.02723691612482071, -0.021282276138663292, -0.014434609562158585, -0.012314235791563988, -0.00016164248518180102, -0.0018798577366396785, 0.05209753289818764, -0.035325877368450165, -0.06617747247219086, 0.03259540721774101, -0.014060654677450657, 0.06100106239318848, 0.07049667835235596, -0.006168799474835396, 0.00529314624145627, -0.0500921756029129, -0.03336140885949135, -0.015566741116344929, 0.029270607978105545, -0.013397591188549995, -0.03303976356983185, 0.0014945378061383963, 0.007068077567964792, -0.014381800778210163, 0.02100297622382641, 0.04071948304772377, -0.0381028912961483, 0.036459244787693024, -0.01619097962975502, 0.009750700555741787, 0.043142132461071014, 0.0357314869761467, 0.03532516583800316, -0.0162619948387146, -0.01936044543981552, -0.01356697827577591, -0.024645069614052773, 0.05361146852374077, -0.008533887565135956, 0.04902371019124985, 0.024370459839701653, 0.04396418109536171, -0.014774004928767681, -0.01016099564731121, -0.053560011088848114, 0.006330367177724838, -0.040334030985832214, 0.022206781432032585, -0.0013751245569437742, -0.03988051414489746, -0.06964969635009766, 0.00015011003415565938, -0.000504028401337564, 0.06797169148921967, 0.03835142403841019, 0.078280508518219, 0.022008784115314484, 0.005389402154833078, 0.017445208504796028, 0.00010459028999321163, 0.02805521711707115, 0.0318351611495018, 0.014127428643405437, -0.045648254454135895, -0.049907613545656204, -0.010870414786040783, 0.005909312982112169, 0.04774390161037445, 0.005949071142822504, -0.009914555586874485, -0.05552609637379646, -0.01754576526582241, -0.008860040456056595, -0.04103095829486847, -0.05164513736963272, 0.012700868770480156, 0.06362908333539963, -0.023315828293561935, 0.013669322244822979, 0.016287067905068398, 0.28642988204956055, -0.029474984854459763, 0.010040832683444023, -0.0447385273873806, 0.03887912631034851, 0.006894036196172237, 0.022399308159947395, -0.0007777736755087972, -0.015402178280055523, 0.00022744473244529217, 0.0025212489999830723, 0.012183398008346558, 0.037950336933135986, -0.03939296677708626, 0.0036948046181350946, 0.00881923921406269, 0.01187853328883648, -0.0031688010785728693, 0.014764880761504173, 0.02646557241678238, 0.0054681552574038506, -0.014688610099256039, 0.010408890433609486, -0.0008929488831199706, 0.03106280229985714, 0.005883801728487015, 0.0022634007036685944, 0.03287530317902565, -0.021614378318190575, 0.061867605894804, 0.03122793138027191, -0.014700361527502537, 0.043684203177690506, -0.002161747310310602, 0.0046876720152795315, -0.03353724628686905, -0.0183130893856287, 0.01288651954382658, -0.017576588317751884, 0.013265094719827175, 0.016103120520710945, 0.008023972623050213, -0.044100720435380936, -0.009516917169094086, 0.026512037962675095, -0.045305285602808, 0.0014347424730658531, 0.0005016771028749645, -0.019016485661268234, -0.032560572028160095, -0.06451265513896942, 0.025237517431378365, -0.030762219801545143, -0.01027892529964447, -0.025683093816041946, 0.00099581154063344, 0.001753860735334456, -0.048313967883586884, -0.024519676342606544, 0.06815529614686966, -0.024713266640901566, 0.044039662927389145, 0.002624423010274768, -0.020195702090859413, 0.025076067075133324, 0.013134357519447803, 0.03032144531607628, 0.04385117441415787, -0.01843116246163845, -0.07384990155696869, -0.03507031872868538, -0.053929828107357025, -0.00839767511934042]\n" + ] + } + ], + "source": [ + "from sentence_transformers import SentenceTransformer\n", + "\n", + "embedding_model = SentenceTransformer(\"shibing624/text2vec-base-chinese\", cache_folder=r\"E:\\huggingface_cache\")\n", + "\n", + "def embed_chunk(chunk: str) -> List[float]:\n", + " embedding = embedding_model.encode(chunk, normalize_embeddings=True)\n", + " return embedding.tolist()\n", + "\n", + "\n", + "embedding = embed_chunk(\"测试内容\")\n", + "print(len(embedding))\n", + "print(embedding)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "87f48192-d9f7-4270-ae08-e5e0300bbb32", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10\n", + "[-0.019575251266360283, 0.007184455171227455, 0.02307003363966942, -0.012436430901288986, 0.039207518100738525, -0.053741805255413055, 0.028527148067951202, -0.021042052656412125, -0.0017695847200229764, 0.04136238247156143, -0.025198310613632202, -0.05593809485435486, 0.0725792944431305, 0.021626582369208336, -0.004362826235592365, -0.0002864825364667922, 0.06021153926849365, 0.026215121150016785, -0.04922761023044586, 0.009307672269642353, 0.013933547772467136, -0.005938069429248571, -0.036834150552749634, 0.023301661014556885, 0.010850664228200912, 0.0042643160559237, 0.0037719926331192255, -0.02469753846526146, 0.0013592997565865517, 0.0558088980615139, 0.021838363260030746, 0.046078361570835114, -0.06695901602506638, 0.02910565584897995, 0.01936660148203373, -0.021051181480288506, 0.015360511839389801, -0.003088689176365733, 0.010731683112680912, 0.02203546091914177, 0.03437018021941185, 0.04636267572641373, -0.057696688920259476, -0.05955018848180771, 0.0017393060261383653, 0.05571894720196724, 0.000427858904004097, 0.047776561230421066, -0.032583363354206085, 0.03387891501188278, -0.055904045701026917, 0.3162909150123596, 0.031006362289190292, -0.024298125877976418, -0.009092366322875023, 0.06546440720558167, 0.023344943299889565, 0.006341041065752506, 0.018706751987338066, 0.022990936413407326, 0.013833885081112385, -0.013355112634599209, -0.01658429019153118, -0.024968065321445465, -0.014794421382248402, -0.01368589699268341, -0.024329233914613724, -0.03959382697939873, -0.04097596928477287, -0.045947689563035965, 0.029489288106560707, 0.03400910645723343, -0.06798189878463745, -0.0006202664808370173, 0.03166962042450905, 0.0025442824698984623, 0.03436622768640518, -0.02024359628558159, 0.002167297527194023, 0.010164228267967701, 0.008175177499651909, -0.06498740613460541, -0.008412396535277367, -0.03819175437092781, -0.005446240771561861, -0.011968792416155338, 0.00968853197991848, -0.019435057416558266, 0.07741902023553848, -0.047102879732847214, -0.018973268568515778, -0.032317839562892914, -0.02241707779467106, 0.026514258235692978, -0.0012653270969167352, -0.012755249626934528, 0.02653946727514267, -0.016730768606066704, -0.032214343547821045, -0.03031822480261326, 0.03064882569015026, -0.008278422057628632, -0.0463629812002182, -0.04409265145659447, -1.9279934804217191e-07, -0.03114074282348156, 0.03442677855491638, 0.008577431552112103, 0.0038429265841841698, 0.03769106790423393, -0.025073127821087837, -0.048826415091753006, 0.049011699855327606, 0.049355946481227875, -0.009122971445322037, 0.051346343010663986, -0.043780770152807236, 0.03778241202235222, -0.005145940463989973, 0.044100575149059296, 0.02287045679986477, 0.025381727144122124, -0.008953521028161049, -0.01889835111796856, -0.0040758708491921425, 0.009194869548082352, 0.0036783854011446238, -0.010642198845744133, -0.0008921885746531188, 0.005787290632724762, 0.010112789459526539, -0.009371737949550152, -0.08127477020025253, -0.00020790072449017316, 0.012738284654915333, -0.02297128178179264, -0.04988085851073265, -0.043112218379974365, 0.004414520692080259, -0.035868000239133835, 0.04770401492714882, -0.01699790172278881, 0.041053589433431625, 0.021058136597275734, 0.054827213287353516, -0.018240004777908325, 0.033797506242990494, -0.014233243651688099, 0.012422489002346992, -0.013608183711767197, 0.018477650359272957, 0.019524376839399338, -0.0533285029232502, 0.056810520589351654, 0.003922652453184128, -0.03331362456083298, -0.0509318970143795, 0.0021759839728474617, -0.07231572270393372, 0.0241323821246624, -0.023186897858977318, 0.01914399117231369, -0.008616648614406586, 0.0268754530698061, 0.027584044262766838, 0.0437968447804451, -0.025746334344148636, 0.012599880807101727, 0.09022201597690582, -0.020604439079761505, -0.0356358140707016, -0.006897665560245514, 0.014002724550664425, 0.01763490028679371, 0.028757089748978615, 0.029499627649784088, 0.046383049339056015, -0.024498820304870605, -0.06291425228118896, -0.049077220261096954, -0.021351447328925133, 0.026465274393558502, 0.04578236863017082, -0.00586283253505826, -0.03215055540204048, 0.0030406564474105835, -0.010758347809314728, -0.010290564969182014, 0.02317490242421627, 0.007631361950188875, -0.03857004642486572, -0.005427168682217598, -0.04518400877714157, -0.012234360910952091, 0.009643793106079102, 0.025938350707292557, 0.022813979536294937, -0.039789292961359024, -0.04089173674583435, -0.011615586467087269, -0.03620956093072891, 0.0012964762281626463, -0.02004282735288143, -0.05397961661219597, 0.008687205612659454, 0.03580185025930405, -0.034619301557540894, -0.032284874469041824, 0.021346990019083023, 0.01985388621687889, -0.007873228751122952, 0.0013645989820361137, 0.014947439543902874, 0.014988511800765991, 0.027462482452392578, 6.534192652907223e-05, -0.030344929546117783, 0.0026997255627065897, -0.020725013688206673, 0.03037252649664879, -0.04398975148797035, -0.028308242559432983, 0.029070334509015083, 0.0018611910054460168, -0.011147873476147652, 0.03601811081171036, -0.008285618387162685, -0.04343605786561966, -0.0023190395440906286, 0.030167335644364357, -0.004698507022112608, -0.015231081284582615, -0.042644184082746506, -0.022069057449698448, -0.0012205112725496292, -0.018037285655736923, -0.04970845207571983, 0.008847898803651333, -0.04872329160571098, -0.03382817283272743, 0.04059728980064392, 0.00846568401902914, -0.025226743891835213, -0.0006588103133253753, 0.01767011173069477, 0.013550370000302792, 0.005875382572412491, -0.027556132525205612, 0.0028684078715741634, 0.005567182321101427, 0.052889931946992874, 0.002786352764815092, 0.011704697273671627, -0.05445023253560066, 0.049064815044403076, 0.02999345026910305, -0.0010608008597046137, -0.05468842014670372, 0.02185436338186264, 0.017882538959383965, 0.0498606413602829, -0.08198913931846619, 0.02732446976006031, -0.0029180103447288275, 0.03234078735113144, -0.020740199834108353, -0.0005490780458785594, -0.028165698051452637, -0.03474273905158043, 0.07092525064945221, -0.025202777236700058, -0.0014516079099848866, 0.01010479498654604, -0.055701952427625656, -0.002845433307811618, -0.014473427087068558, 0.024009695276618004, -0.0297626294195652, -0.0388142429292202, 0.007417464628815651, -0.005394780542701483, 0.021935105323791504, 0.02445458620786667, 0.0004472240980248898, 0.0007251670467667282, -0.019695723429322243, -0.021868376061320305, 9.932041575666517e-05, -0.038301240652799606, -0.03355368226766586, -0.018670985475182533, -0.05672973394393921, 0.023213626816868782, -0.004610552452504635, 0.06102435663342476, -0.03861944377422333, -0.029414307326078415, -0.023253435268998146, 0.062324684113264084, -0.020949050784111023, 0.008914695121347904, 0.019913911819458008, -0.014146707952022552, 0.023585261777043343, -0.029241500422358513, -0.015371210873126984, 0.07652346789836884, 0.0312560498714447, -0.0226167980581522, 0.037117719650268555, -0.015307099558413029, -0.004552456084638834, 0.025201184675097466, -0.03407273441553116, 0.0007222200511023402, 0.024411480873823166, -0.001728990115225315, 0.0138123519718647, 0.08538073301315308, 0.023145539686083794, 0.02020256407558918, 0.05879046767950058, 0.04270468279719353, 0.059611476957798004, -0.04502655565738678, -0.033291857689619064, -0.010204355232417583, -0.031464628875255585, -0.011219717562198639, 0.0054338229820132256, -0.0056101856753230095, -0.013676022179424763, -0.014545722864568233, 0.054272133857011795, -0.009991344064474106, 0.018363244831562042, 0.028053410351276398, 0.0049317749217152596, -0.04974064230918884, -0.07254651933908463, 0.0021212315186858177, -0.03046795167028904, -0.030541827902197838, -0.03873365372419357, 0.014980845153331757, -0.02760285884141922, -0.0015530993696302176, -0.026116497814655304, -0.07665720582008362, 0.0384332574903965, 0.029880056157708168, 0.03893925994634628, 0.02296687848865986, -0.006102382205426693, -0.01258459035307169, -0.010450910776853561, 0.030525023117661476, 0.02369798719882965, 0.019688881933689117, 0.019036881625652313, -0.03237951174378395, 0.02658090554177761, -0.0081338444724679, -0.011671943590044975, 0.029618678614497185, 0.009454024024307728, 0.032548725605010986, -0.034176766872406006, -0.016558626666665077, -0.0026682838797569275, 0.026708073914051056, 0.05767471343278885, 0.012620345689356327, -0.02012227661907673, -0.04360421374440193, -0.008301271125674248, -0.027785686776041985, -0.012901523150503635, 0.027029305696487427, 0.03629692643880844, 0.035488855093717575, -0.03295552730560303, 0.021105023100972176, -0.042404547333717346, 0.026610244065523148, -0.026204992085695267, 0.001453594071790576, 0.0044026863761246204, -0.014022812247276306, -0.03229159489274025, -0.010603916831314564, -0.01765233837068081, -0.001992808422073722, -0.04984705150127411, -0.036025986075401306, 0.005037286784499884, -0.04908370226621628, 0.013484405353665352, -0.026446666568517685, 0.019589988514780998, -0.024231083691120148, -0.009068620391190052, -0.01086485106498003, -0.02524660900235176, -0.00032512936741113663, -0.01643010601401329, 0.019868040457367897, 0.03290139511227608, 0.05426554009318352, -0.07008349150419235, -0.021705375984311104, 0.04489170387387276, -0.004063727799803019, -0.020953208208084106, 0.005982259754091501, -0.03865431994199753, -0.061571136116981506, 0.00543921859934926, 0.027117028832435608, -0.008448577485978603, -0.01644066907465458, -0.02595209889113903, 0.022699281573295593, -0.02252832055091858, 0.06538814306259155, -0.08372665196657181, -0.05089447274804115, -0.031535252928733826, -0.014182116836309433, -0.0245163943618536, 0.05083751305937767, 0.0011484043207019567, -0.011153033003211021, 0.01942560076713562, -0.004413404036313295, -0.011585780419409275, -0.008473953232169151, 0.006571292411535978, 0.005964442156255245, 0.004132512956857681, -0.03928954526782036, 0.016739338636398315, 0.059326086193323135, 0.029311468824744225, 0.03729157894849777, -0.032178040593862534, 0.09054069221019745, 0.0035413666628301144, -0.02040100283920765, 0.033569060266017914, 0.002342680236324668, 0.0011930913897231221, -0.009094802662730217, -0.05840176343917847, 0.07354900240898132, -0.020121941342949867, 0.012296426109969616, -0.0022190422751009464, -0.006940566934645176, 0.05098889395594597, 0.025116536766290665, -0.02006465382874012, 0.025356940925121307, 0.05129018798470497, 0.02122398279607296, -0.005277230869978666, -0.0513467937707901, 0.01607401855289936, 0.0034901422914117575, 0.075712189078331, 0.06060842424631119, 0.01965726912021637, -0.006239638663828373, -0.021264422684907913, 0.01771720126271248, -0.028178969398140907, -0.05186501517891884, -0.03138476610183716, -0.027058297768235207, -0.10381457209587097, 0.030338797718286514, -0.06478022038936615, -0.060983411967754364, 0.02107156626880169, 0.025293050333857536, 0.06755193322896957, -0.03659011051058769, -0.032003168016672134, 0.021191349253058434, 0.012678739614784718, -0.020296242088079453, -0.006004476919770241, 0.04356568679213524, 0.008365025743842125, -0.03111179545521736, -0.14221766591072083, 0.013288440182805061, -0.029158705845475197, -0.031019870191812515, 0.014649777673184872, 0.022822536528110504, -0.0026551522314548492, 0.04423387721180916, 0.04054328426718712, -0.014595555141568184, 0.011473341844975948, -0.09130791574716568, 0.005705442279577255, 0.0074379220604896545, 0.0034434255212545395, -0.012913963757455349, -0.028172103688120842, 0.04175342246890068, -0.02622818760573864, 0.057913754135370255, 0.01109427958726883, -0.02007254585623741, 0.018362635746598244, 0.039502546191215515, 0.014905662275850773, 0.05232277140021324, -0.013581573031842709, -0.02619059570133686, 0.021684015169739723, -0.048471659421920776, 0.017087170854210854, -0.036277979612350464, 0.010164432227611542, -0.05211498960852623, 0.029964536428451538, 0.017332442104816437, 0.05204577371478081, -0.07522930949926376, -0.04038762301206589, 0.024310991168022156, 0.044961247593164444, 0.053168393671512604, 0.05639899894595146, 0.046994663774967194, -0.01425096020102501, -0.019624512642621994, 0.0009378197719343007, 0.031094498932361603, -0.017893236130475998, -0.03482428565621376, -0.005534043535590172, 0.015844427049160004, -0.016579151153564453, -0.010253782384097576, -0.0047560581006109715, -0.00924755074083805, -0.030618900433182716, 0.008117240853607655, 0.01956489309668541, 0.01977531611919403, -0.0282338447868824, -0.0736929252743721, 0.002558609703555703, -0.00937797874212265, -0.0032988362945616245, -0.0027087805792689323, -0.05723098665475845, 0.010283361189067364, -0.04216745123267174, -0.03646434098482132, -0.016274170950055122, 0.02037033997476101, -0.032444436103105545, -0.051685333251953125, -0.001773986965417862, 0.027014005929231644, -0.018742822110652924, 0.0422854945063591, 0.08014561980962753, -0.024416934698820114, 0.017382996156811714, -0.011250916868448257, -0.039670754224061966, 0.00026210176292806864, -0.03529912605881691, 0.03928031027317047, 0.014210453256964684, 0.031595177948474884, -0.04646534100174904, 0.02043098583817482, 0.003706033807247877, 0.0017986869206652045, -0.056359149515628815, -0.0013847334776073694, 0.014804858714342117, -0.017672264948487282, 0.00022008948144502938, 0.009399876929819584, 0.059968214482069016, -0.014980431646108627, 0.015514204278588295, -0.05771442875266075, 0.009875739924609661, 0.029005272313952446, 0.003136205021291971, 0.0012562497286126018, 0.10040702670812607, -0.02637237310409546, -0.04681915417313576, 0.0291389562189579, -0.007474660407751799, 0.04505525901913643, 0.008728796616196632, -0.011935579590499401, -0.07580936700105667, -0.034775588661432266, 0.008312659338116646, -0.05544693395495415, 0.04934822767972946, -0.007774950936436653, 0.04374038800597191, -0.020004864782094955, 0.03448215872049332, 0.01088371779769659, -0.06000566855072975, -0.0187141764909029, -0.03956097364425659, -0.014007270336151123, -0.01143618207424879, 0.008793538436293602, -0.004188937600702047, -0.023026466369628906, 0.00079348124563694, 0.006244283635169268, 0.029402894899249077, -0.037492938339710236, 0.0026707188226282597, 0.06962321698665619, -0.04893491417169571, 0.009129677899181843, 0.03313241899013519, -0.007266656961292028, -0.012852249667048454, 0.003959850873798132, 0.019211065024137497, 0.033672213554382324, 0.02461634762585163, -0.027333421632647514, -0.04864327609539032, 0.07117064297199249, 0.006693932693451643, 0.0496504083275795, 0.004121765960007906, -0.014042004942893982, 0.0042402842082083225, -0.0017320626648142934, 0.0068808868527412415, 0.05296021327376366, 0.012016513384878635, 0.008241659961640835, -0.05940235033631325, 0.07657932490110397, -0.03656348958611488, -0.052963655441999435, -0.049769122153520584, 0.02750290557742119, -0.007105795666575432, -0.036572087556123734, -0.03655020520091057, -0.0018084811745211482, -0.07224298268556595, 0.009658253751695156, -0.011073511093854904, 0.008183703757822514, 0.01194060780107975, 0.11663929373025894, 0.07610756158828735, -0.006856649182736874, -0.0034990820568054914, -0.019065409898757935, 0.030794845893979073, 0.009789157658815384, 0.016759157180786133, 0.02532179467380047, 0.002362749306485057, -0.025858156383037567, 0.012383234687149525, -0.0023598051629960537, -0.01874879188835621, -0.01522752270102501, -0.02255314402282238, -0.0406072624027729, -0.02734001725912094, -0.011350843124091625, -0.004592748824506998, -0.036703288555145264, 0.0441879965364933, -0.0038624682929366827, -0.03565923497080803, 0.03560398519039154, 0.12003617733716965, -0.049474820494651794, 0.013182542286813259, 0.004166133236140013, 0.03974482789635658, 0.02396700344979763, -0.014209208078682423, 0.04398912563920021, -0.06685982644557953, 0.0017545870505273342, 0.02253366820514202, -0.050653085112571716, -0.0992334708571434, -0.004559328779578209, -0.0017454829066991806, -0.03800128400325775, 0.04672865569591522, -0.013196355663239956, 0.04957873374223709, 0.017210159450769424, -0.04172078147530556, 0.011581012979149818, 0.061649780720472336, 0.054958276450634, 0.023676065728068352, 0.022868208587169647, -0.026328900828957558, 0.05087104067206383, 0.007852423936128616, 0.0493755005300045, -0.047566015273332596, 0.03424824774265289, 0.05096571892499924, 0.03547510877251625, 0.0010320801520720124, -0.01896739937365055, 0.037362825125455856, 0.012608674354851246, -0.03988951817154884, -0.050076261162757874, 0.04336462914943695, -0.020738635212183, 0.0872432142496109, -0.01960838958621025, -0.007971313782036304, 0.009576483629643917, -0.018797826021909714, 0.007388936821371317, -0.010044346563518047, -0.003023626748472452, 0.012985956855118275, 0.013595462776720524, 0.03613189607858658, -0.03293899819254875, 0.004596285987645388, 0.02112470380961895, -0.03887394815683365, -0.0007361902971751988, 0.0021365561988204718, 0.01691422238945961, -0.048614099621772766, 0.0870027095079422, -0.029589220881462097, 0.06149928644299507, -0.013487732037901878, -0.003979076165705919, 0.020023053511977196, 0.09603308141231537, 0.02000700868666172, -0.019597256556153297, -0.002821275033056736, -0.055465202778577805, -0.0405719056725502]\n" + ] + } + ], + "source": [ + "embeddings = [embed_chunk(chunk) for chunk in chunks]\n", + "\n", + "print(len(embeddings))\n", + "print(embeddings[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7595f15", + "metadata": {}, + "outputs": [], + "source": [ + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "babfbd91-76fc-4467-9ff7-ccaf5ffbbd54", + "metadata": {}, + "outputs": [], + "source": [ + "import chromadb\n", + "\n", + "chromadb_client = chromadb.EphemeralClient()\n", + "chromadb_collection = chromadb_client.get_or_create_collection(name=\"default\")\n", + "\n", + "def save_embeddings(chunks: List[str], embeddings: List[List[float]]) -> None:\n", + " for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):\n", + " chromadb_collection.add(\n", + " documents=[chunk],\n", + " embeddings=[embedding],\n", + " ids=[str(i)]\n", + " )\n", + "\n", + "save_embeddings(chunks, embeddings)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "9e47b06d-3f7a-40bd-886a-aca6c7e19f0b", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0] # 哆啦A梦与超级赛亚人:时空之战\n", + "\n", + "[1] 三件秘密道具分别是:可以临时赋予超级战力的“复制斗篷”,能暂停时间五秒的“时间停止手表”,以及可在一分钟中完成一年修行的“精神与时光屋便携版”。大雄被推进精神屋内,在其中接受密集的训练,虽然只有几分钟现实时间,他却经历了整整一年的苦修。刚开始他依旧软弱,想放弃、想逃跑,但当他想起静香、父母,还有哆啦A梦那坚定的眼神时,他终于咬牙坚持了下来。出来之后,他的身体与精神都焕然一新,眼神中多了一份成熟与自信。\n", + "\n", + "[2] 最终战在黑暗赛亚人的空中要塞前爆发,特兰克斯率先出击,释放全力与敌人正面对决。哆啦A梦则用任意门和道具支援,从各个方向制造混乱,尽量压制敌人的时空能力。但黑暗赛亚人太过强大,仅凭特兰克斯一人根本无法压制,更别说击败。就在特兰克斯即将被击倒之际,大雄披上复制斗篷、冲破恐惧从高空跃下。他的拳头燃烧着金色光焰,目标直指敌人心脏。\n", + "\n", + "[3] 战后,未来世界开始恢复,植物重新生长,人类重建家园。特兰克斯告别时紧紧握住大雄的手,说:“你是我见过最特别的战士。”哆啦A梦也为大雄感到骄傲,说他终于真正成长了一次。三人站在山丘上,看着远方重新明亮的地平线,心中感受到从未有过的安宁。随后,哆啦A梦与大雄乘坐时光机返回了属于他们的那个年代,一切仿佛又恢复平静。\n", + "\n", + "[4] 哆啦A梦与大雄听后大惊,但也从特兰克斯坚定的眼神中读出了不容拒绝的决心。特兰克斯解释说,未来的敌人并非普通反派,而是一个名叫“黑暗赛亚人”的存在,他由邪恶科学家复制了贝吉塔的基因并加以改造,实力超乎想象。这个敌人不仅拥有赛亚人战斗力,还能操纵扭曲的时间能量,几乎无人可敌。特兰克斯已经独自战斗多年,但每一次都以惨败告终。他说:“科技,是我那个时代唯一缺失的武器,而你们,正好拥有它。”\n", + "\n" + ] + } + ], + "source": [ + "def retrieve(query: str, top_k: int) -> List[str]:\n", + " query_embedding = embed_chunk(query)\n", + " results = chromadb_collection.query(\n", + " query_embeddings=[query_embedding],\n", + " n_results=top_k\n", + " )\n", + " return results['documents'][0]\n", + "\n", + "query = \"哆啦A梦使用的3个秘密道具分别是什么?\"\n", + "retrieved_chunks = retrieve(query, 5)\n", + "\n", + "for i, chunk in enumerate(retrieved_chunks):\n", + " print(f\"[{i}] {chunk}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "e57ac85d-d634-4c1d-93fa-e627cf09a6f1", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0] 三件秘密道具分别是:可以临时赋予超级战力的“复制斗篷”,能暂停时间五秒的“时间停止手表”,以及可在一分钟中完成一年修行的“精神与时光屋便携版”。大雄被推进精神屋内,在其中接受密集的训练,虽然只有几分钟现实时间,他却经历了整整一年的苦修。刚开始他依旧软弱,想放弃、想逃跑,但当他想起静香、父母,还有哆啦A梦那坚定的眼神时,他终于咬牙坚持了下来。出来之后,他的身体与精神都焕然一新,眼神中多了一份成熟与自信。\n", + "\n", + "[1] 最终战在黑暗赛亚人的空中要塞前爆发,特兰克斯率先出击,释放全力与敌人正面对决。哆啦A梦则用任意门和道具支援,从各个方向制造混乱,尽量压制敌人的时空能力。但黑暗赛亚人太过强大,仅凭特兰克斯一人根本无法压制,更别说击败。就在特兰克斯即将被击倒之际,大雄披上复制斗篷、冲破恐惧从高空跃下。他的拳头燃烧着金色光焰,目标直指敌人心脏。\n", + "\n", + "[2] 战后,未来世界开始恢复,植物重新生长,人类重建家园。特兰克斯告别时紧紧握住大雄的手,说:“你是我见过最特别的战士。”哆啦A梦也为大雄感到骄傲,说他终于真正成长了一次。三人站在山丘上,看着远方重新明亮的地平线,心中感受到从未有过的安宁。随后,哆啦A梦与大雄乘坐时光机返回了属于他们的那个年代,一切仿佛又恢复平静。\n", + "\n" + ] + } + ], + "source": [ + "from sentence_transformers import CrossEncoder\n", + "\n", + "def rerank(query: str, retrieved_chunks: List[str], top_k: int) -> List[str]:\n", + " cross_encoder = CrossEncoder('cross-encoder/mmarco-mMiniLMv2-L12-H384-v1')\n", + " pairs = [(query, chunk) for chunk in retrieved_chunks]\n", + " scores = cross_encoder.predict(pairs)\n", + "\n", + " scored_chunks = list(zip(retrieved_chunks, scores))\n", + " scored_chunks.sort(key=lambda x: x[1], reverse=True)\n", + "\n", + " return [chunk for chunk, _ in scored_chunks][:top_k]\n", + "\n", + "reranked_chunks = rerank(query, retrieved_chunks, 3)\n", + "\n", + "for i, chunk in enumerate(reranked_chunks):\n", + " print(f\"[{i}] {chunk}\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79d844d8-846e-4a88-a19f-c8e282839b99", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "你是一位知识助手,请根据用户的问题和下列片段生成准确的回答。\n", + "\n", + "用户问题: 哆啦A梦使用的3个秘密道具分别是什么?\n", + "\n", + "相关片段:\n", + "三件秘密道具分别是:可以临时赋予超级战力的“复制斗篷”,能暂停时间五秒的“时间停止手表”,以及可在一分钟中完成一年修行的“精神与时光屋便携版”。大雄被推进精神屋内,在其中接受密集的训练,虽然只有几分钟现实时间,他却经历了整整一年的苦修。刚开始他依旧软弱,想放弃、想逃跑,但当他想起静香、父母,还有哆啦A梦那坚定的眼神时,他终于咬牙坚持了下来。出来之后,他的身体与精神都焕然一新,眼神中多了一份成熟与自信。\n", + "\n", + "最终战在黑暗赛亚人的空中要塞前爆发,特兰克斯率先出击,释放全力与敌人正面对决。哆啦A梦则用任意门和道具支援,从各个方向制造混乱,尽量压制敌人的时空能力。但黑暗赛亚人太过强大,仅凭特兰克斯一人根本无法压制,更别说击败。就在特兰克斯即将被击倒之际,大雄披上复制斗篷、冲破恐惧从高空跃下。他的拳头燃烧着金色光焰,目标直指敌人心脏。\n", + "\n", + "战后,未来世界开始恢复,植物重新生长,人类重建家园。特兰克斯告别时紧紧握住大雄的手,说:“你是我见过最特别的战士。”哆啦A梦也为大雄感到骄傲,说他终于真正成长了一次。三人站在山丘上,看着远方重新明亮的地平线,心中感受到从未有过的安宁。随后,哆啦A梦与大雄乘坐时光机返回了属于他们的那个年代,一切仿佛又恢复平静。\n", + "\n", + "请基于上述内容作答,不要编造信息。\n", + "\n", + "---\n", + "\n", + "\n", + "{\"model\":\"qwen3:latest\",\"created_at\":\"2025-10-04T13:07:40.4939377Z\",\"response\":\"\\u003cthink\\u003e\\n好的,用户问的是哆啦A梦使用的三个秘密道具分别是什么。我需要根据提供的片段来回答,不能编造信息。\\n\\n首先,我得仔细看一下相关片段。片段里明确提到三件道具:复制斗篷、时间停止手表,还有精神与时光屋便携版。接下来要确认每个道具的描述是否正确。\\n\\n复制斗篷的作用是临时赋予超级战力,这在片段中提到大雄使用它冲破恐惧,所以没问题。时间停止手表能暂停时间五秒,虽然片段里没有直接提到五秒,但根据常见设定,可能是指这个。不过用户的问题可能只需要名称,所以可能不需要详细说明时间长度。不过原文里确实提到五秒,应该包括进去。\\n\\n第三个是精神与时光屋便携版,用来在一分钟内完成一年修行。片段中大雄在里面训练,现实时间几分钟,但经历一年,所以这个描述正确。需要确认是否在片段中有提到“便携版”,是的,原文有“精神与时光屋便携版”。\\n\\n然后要确保没有其他道具被提及。用户的问题是三个,而片段里确实只提到了这三个,所以答案应该是这三个。需要注意的是,虽然最终战中提到任意门,但任意门可能属于其他道具,但根据片段内容,三个秘密道具是上述三个。需要确认是否在片段中明确提到这三个是秘密道具,是的,开头就说了“三件秘密道具分别是”,所以正确。\\n\\n最后,组织语言,用简洁的方式列出三个道具,每个的名称和功能,确保准确,不添加额外信息。\\n\\u003c/think\\u003e\\n\\n哆啦A梦使用的三个秘密道具分别是: \\n1. **复制斗篷**:可临时赋予使用者超级战力,用于战斗中增强能力。 \\n2. **时间停止手表**:能暂停时间五秒,用于短暂冻结时间以应对危机。 \\n3. **精神与时光屋便携版**:可在一分钟内完成一年的修行,通过高强度训练提升使用者的身体与精神力量。 \\n\\n(注:片段中提到的“任意门”虽为哆啦A梦的道具,但未被明确列为“三件秘密道具”之一,故未列入答案。)\",\"done\":true,\"done_reason\":\"stop\",\"context\":[151644,872,198,56568,109182,100032,110498,37945,100345,20002,103936,33108,107976,115076,43959,102188,111423,3407,20002,86119,25,4891,241,228,104060,32,99815,105899,18,18947,102848,107182,101127,102021,26850,78556,115076,510,44991,14224,102848,107182,107163,5122,73670,104875,106908,104453,99191,47534,9370,2073,105173,99761,103640,33590,26232,107276,20450,75108,45918,9370,2073,20450,104390,111844,33590,101034,108436,115067,15946,60548,100695,107908,9370,2073,100150,57218,105129,100436,99364,100914,40301,55807,26288,100759,99250,99664,100150,100436,31843,96050,90919,100669,105807,9370,104034,3837,103925,101043,112535,101163,20450,3837,42411,99786,106014,110167,100695,9370,99746,23081,1773,108250,42411,102448,64272,99932,3837,99172,102256,5373,99172,114563,3837,77288,110780,105545,99541,99662,5373,103953,3837,100626,119356,104060,32,99815,99212,102405,109563,13343,3837,42411,104020,105118,100446,100356,107008,1773,99898,103922,3837,100648,101099,57218,100150,71268,103493,60894,14777,16628,3837,108564,15946,42140,113361,102226,57218,102435,3407,103941,99191,18493,105913,99273,99449,103947,105438,30534,101202,24562,104716,3837,65278,99533,107229,105019,112816,3837,104739,101341,57218,105076,36556,101928,99351,1773,119356,104060,32,99815,46448,11622,108112,64689,33108,107182,106385,3837,45181,101284,100696,100184,107977,3837,104638,111509,101271,103947,109113,99788,1773,77288,105913,99273,99449,17340,108288,102553,3837,99373,100284,65278,99533,107229,104855,100232,101068,111509,3837,33126,110725,109055,1773,104158,65278,99533,107229,102238,99250,52726,99805,106254,3837,26288,100759,101488,17447,105173,99761,103640,5373,99907,99577,106904,45181,110331,100560,16872,1773,100648,114441,106578,99164,108979,99225,103369,3837,100160,73145,63367,101271,102677,100568,3407,99191,33447,3837,100353,99489,55286,102005,3837,104155,70361,104569,3837,103971,107295,106549,1773,65278,99533,107229,108526,13343,102987,116504,26288,100759,105871,3837,36587,36987,56568,104927,102877,31235,100654,9370,100853,32945,119356,104060,32,99815,109001,26288,100759,104048,107640,3837,36587,42411,104020,100690,100423,99593,32571,1773,106569,104224,57811,105697,17447,3837,101952,111993,70361,108575,116241,49111,43268,3837,104219,105322,106111,18830,105565,114219,1773,104221,3837,119356,104060,32,99815,57218,26288,100759,106825,105129,32648,31526,34187,100409,104056,101095,104227,3837,101109,104700,99518,102005,106837,3407,14880,104210,104120,43815,19403,99590,3837,100148,30868,66078,27369,1773,151645,198,151644,77091,198,151667,198,99692,3837,20002,56007,100146,119356,104060,32,99815,105899,101124,102848,107182,101127,102021,1773,35946,85106,100345,103008,115076,36407,102104,3837,53153,30868,66078,27369,3407,101140,3837,35946,49828,104857,111320,78556,115076,1773,115076,69249,100692,104496,44991,14224,107182,5122,105173,99761,103640,5373,20450,104390,111844,3837,100626,100150,57218,105129,100436,99364,100914,40301,1773,104326,30534,81167,103991,107182,9370,53481,64471,88991,3407,105173,99761,103640,104149,20412,104875,106908,104453,99191,47534,3837,43288,18493,115076,15946,104496,26288,100759,37029,99652,99907,99577,106904,3837,99999,114815,1773,20450,104390,111844,26232,107276,20450,75108,45918,3837,103925,115076,69249,80443,101041,104496,75108,45918,3837,77288,100345,101536,105924,3837,87267,104442,99487,1773,100632,20002,103936,87267,107525,29991,3837,99999,87267,104689,100700,66394,20450,98402,1773,100632,103283,69249,102068,104496,75108,45918,3837,99730,100630,104880,3407,112363,20412,100150,57218,105129,100436,99364,100914,40301,3837,102688,114846,83031,31843,60548,100695,107908,1773,115076,15946,26288,100759,111267,104034,3837,101163,20450,112535,3837,77288,100798,100695,3837,99999,99487,53481,88991,1773,85106,81167,64471,18493,115076,105656,104496,2073,99364,100914,40301,33590,20412,9370,3837,103283,18830,2073,100150,57218,105129,100436,99364,100914,40301,96332,101889,30534,103944,80443,92894,107182,99250,109361,1773,20002,103936,20412,101124,3837,68536,115076,69249,102068,91680,28072,99495,112935,3837,99999,102349,104583,112935,1773,107916,100146,3837,103925,103941,99191,15946,104496,108112,64689,3837,77288,108112,64689,87267,100409,92894,107182,3837,77288,100345,115076,43815,3837,101124,102848,107182,20412,104120,101124,1773,85106,81167,64471,18493,115076,15946,100692,104496,112935,20412,102848,107182,3837,20412,9370,3837,111749,80158,105317,2073,44991,14224,102848,107182,107163,33590,99999,88991,3407,100161,3837,99877,102064,3837,11622,110485,101990,114116,101124,107182,3837,103991,9370,29991,33108,98380,3837,103944,102188,3837,16530,42855,108593,27369,8997,151668,271,119356,104060,32,99815,105899,101124,102848,107182,107163,5122,2303,16,13,3070,105173,99761,103640,334,5122,30440,104875,106908,101468,104453,99191,47534,3837,100751,101272,15946,101138,99788,1773,2303,17,13,3070,20450,104390,111844,334,5122,26232,107276,20450,75108,45918,3837,100751,108658,111831,20450,23031,104397,102323,1773,2303,18,13,3070,100150,57218,105129,100436,99364,100914,40301,334,5122,108436,115067,31843,60548,100695,9370,107908,3837,67338,115432,104034,100341,101468,106214,57218,100150,101102,1773,18611,9909,25074,5122,115076,15946,104496,9370,2073,108112,64689,854,100628,17714,119356,104060,32,99815,9370,107182,3837,77288,38342,99250,100692,113507,2073,44991,14224,102848,107182,854,100653,3837,99535,38342,103057,102349,1773,7552],\"total_duration\":230250378100,\"load_duration\":9994500400,\"prompt_eval_count\":404,\"prompt_eval_duration\":20025606500,\"eval_count\":464,\"eval_duration\":199923876100}\n" + ] + } + ], + "source": [ + "import requests\n", + "import json\n", + "from typing import List\n", + "\n", + "def generate(url: str, model: str, query: str, chunks: List[str]) -> str:\n", + " prompt = f\"\"\"你是一位知识助手,请根据用户的问题和下列片段生成准确的回答。\n", + "\n", + "用户问题: {query}\n", + "\n", + "相关片段:\n", + "{\"\\n\\n\".join(chunks)}\n", + "\n", + "请基于上述内容作答,不要编造信息。\"\"\"\n", + "\n", + " print(f\"{prompt}\\n\\n---\\n\")\n", + " pyload = {\n", + " \"model\": model,\n", + " \"prompt\": prompt,\n", + " \"stream\": False\n", + " }\n", + " response = requests.post(url=url, json=pyload)\n", + " print(response)\n", + " return response.text\n", + "\n", + "url = \"http://localhost:11434/api/generate\"\n", + "model = \"qwen3:latest\"\n", + "answer = generate(url, model, query, reranked_chunks)\n", + "print(answer.json()[\"response\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git "a/\344\275\277\347\224\250Python\346\236\204\345\273\272RAG\347\263\273\347\273\237/rag/new_test.py" "b/\344\275\277\347\224\250Python\346\236\204\345\273\272RAG\347\263\273\347\273\237/rag/new_test.py" new file mode 100644 index 0000000..71e5aac --- /dev/null +++ "b/\344\275\277\347\224\250Python\346\236\204\345\273\272RAG\347\263\273\347\273\237/rag/new_test.py" @@ -0,0 +1,174 @@ +import os +from typing import List +import chromadb +import requests +import json +from sentence_transformers import SentenceTransformer, CrossEncoder + + + +class RAGAgent(object): + def __init__(self, embeding_mode_config, cross_encoder_config, llm_config, db_config): + self.embeding_mode_config = embeding_mode_config + self.cross_encoder_config = cross_encoder_config + self.llm_config =llm_config + self.db_config = db_config + # ==================================== + self.embedding_model = SentenceTransformer(self.embeding_mode_config["model"], cache_folder=self.embeding_mode_config["cache_folder"]) + self.cross_encoder_model = CrossEncoder(self.cross_encoder_config["model"]) + self.chromadb_collection = self.create_db(db_config["db_name"], db_config["collection_name"]) + self.llm_url = self.llm_config["url"] + self.llm_model = self.llm_config["model"] + + + def load_file(self, file): + chunks = self.split_into_chuncks(file) + embeds = [] + for chunk in chunks: + embeds.append(self.embed_chunk(chunk)) + self.save_embeddings(chunks, embeds) + + def query_answer(self, query, db_query_k=5, rerank_k=3): + retrieved_chunks = self.retrieve(query, self.db_config["db_name"], self.db_config["collection_name"], db_query_k) + if rerank_k > db_query_k: + rerank_k = db_query_k + reranked_chunks = self.rerank(query, retrieved_chunks, rerank_k) + + answer = self.request_answer(self.llm_url, self.llm_model, query, reranked_chunks) + print(f"\n完整回答:\n{answer}") + + + def split_into_chuncks(self, file): + chunks = [] + if os.path.exists(file): + with open(file, "r", encoding="utf-8") as f: + content = f.read() + + chunks = content.split("\n\n") + + return chunks + + def embed_chunk(self, chunk): + embeded = self.embedding_model.encode(chunk, normalize_embeddings=True) + embedding_list = embeded.tolist() + return embedding_list + + def create_db(self, db_name, collection_name): + chromadb_client = chromadb.PersistentClient(path=db_name) + chromadb_collection = chromadb_client.get_or_create_collection(collection_name) + return chromadb_collection + + def save_embeddings(self, chunks, embeds): + for i, (chunk, embedding) in enumerate(zip(chunks, embeds)): + self.chromadb_collection.add( + documents=[chunk], + embeddings = [embedding], + ids = [str(i)] + ) + + def retrieve(self, query, db_name, collection_name, top_k): + chromadb_client = chromadb.PersistentClient(path=db_name) + chromadb_collection = chromadb_client.get_or_create_collection(collection_name) + query_embedding = self.embed_chunk(query) + results = chromadb_collection.query( + query_embeddings=[query_embedding], + n_results=top_k + ) + documents = results['documents'][0] + return documents + + def rerank(self, query, retrieved_chunks, top_k): + pairs = [] + for chunk in retrieved_chunks: + pairs.append((query, chunk)) + scores = self.cross_encoder_model.predict(pairs) + scored_chunks = list(zip(retrieved_chunks, scores)) + scored_chunks.sort(key=lambda x: x[1], reverse=True) + documents = [chunk for chunk, score in scored_chunks[:top_k]] + + return documents + + def request_answer(self, url: str, model: str, query: str, chunks: List[str]) -> str: + prompt = f"""你是一位知识助手,请根据用户的问题和下列片段生成准确的回答。 + + 用户问题: {query} + + 相关片段: + {"\n\n".join(chunks)} + + 请基于上述内容作答,不要编造信息。""" + + print(f"{prompt}\n\n---\n") + payload = { + "model": model, + "prompt": prompt, + "stream": True + } + + # 用于汇总流式输出的变量 + full_response = "" + + try: + response = requests.post(url=url, json=payload, stream=True) + response.raise_for_status() + + print("流式输出:") + for line in response.iter_lines(): + if line: + try: + # 解析JSON响应 + data = json.loads(line.decode('utf-8')) + + # 检查是否包含响应内容 + if 'response' in data: + chunk_text = data['response'] + print(chunk_text, end='', flush=True) # 实时显示 + full_response += chunk_text # 累积到汇总变量 + + # 检查是否完成 + if data.get('done', False): + break + + except json.JSONDecodeError: + continue + + except requests.exceptions.RequestException as e: + print(f"请求错误: {e}") + return "" + + print("\n\n---流式输出完成---") + return full_response + + + + + +if __name__ == "__main__": + embeding_mode_config={ + "model":"shibing624/text2vec-base-chinese", + "cache_folder": r"E:\huggingface_cache" + } + cross_encoder_config={ + "model":'cross-encoder/mmarco-mMiniLMv2-L12-H384-v1' + } + llm_config={ + "url": "http://localhost:11434/api/generate", + "model": "qwen3:latest" + } + db_config={ + "db_name": "rag_test", + "collection_name": "default" + } + rag_agent = RAGAgent(embeding_mode_config, cross_encoder_config, llm_config, db_config) + # rag_agent.load_file(file = r"E:\code_project\VideoCode\使用Python构建RAG系统\rag\doc.md") + rag_agent.query_answer(query = "哆啦A梦使用的3个秘密道具分别是什么?") + + + + + + + + + + From e3b337b43a2be35442530964912da0903cd8217c Mon Sep 17 00:00:00 2001 From: zonghui <243156680@qq.com> Date: Mon, 6 Oct 2025 20:58:02 +0800 Subject: [PATCH 2/3] add new langraph and mcp test --- langgraph_mcp/new_test.py | 199 +++++++++++++++++++++++++++++ langgraph_mcp/simple_mpc_server.py | 77 +++++++++++ 2 files changed, 276 insertions(+) create mode 100644 langgraph_mcp/new_test.py create mode 100644 langgraph_mcp/simple_mpc_server.py diff --git a/langgraph_mcp/new_test.py b/langgraph_mcp/new_test.py new file mode 100644 index 0000000..f875342 --- /dev/null +++ b/langgraph_mcp/new_test.py @@ -0,0 +1,199 @@ +import asyncio +from langgraph.checkpoint.memory import InMemorySaver +from langgraph.prebuilt import create_react_agent +from langchain_mcp_adapters.client import MultiServerMCPClient +from langchain_core.messages import SystemMessage, HumanMessage +from langchain.chat_models import init_chat_model +from typing import Dict, List, Any + + + +# Author:@南哥AGI研习社 (B站 or YouTube 搜索“南哥AGI研习社”) + +awap_mpas_api_keys = "2fbca7baa159b56b10e513d4cd98dc19" + +# 使用langgraph推荐方式定义大模型 +llm = init_chat_model( + model="openai:deepseek-v3", + temperature=0, + base_url="https://nangeai.top/v1", + api_key="sk-qafWNW0qbyMhfH3MJL2FknaKB2DVgzBFyDKukenUdRaW0op8" +) + + +# 解析消息列表 +def parse_messages(messages: List[Any]) -> None: + """ + 解析消息列表,打印 HumanMessage、AIMessage 和 ToolMessage 的详细信息 + + Args: + messages: 包含消息的列表,每个消息是一个对象 + """ + print("=== 消息解析结果 ===") + for idx, msg in enumerate(messages, 1): + print(f"\n消息 {idx}:") + # 获取消息类型 + msg_type = msg.__class__.__name__ + print(f"类型: {msg_type}") + # 提取消息内容 + content = getattr(msg, 'content', '') + print(f"内容: {content if content else '<空>'}") + # 处理附加信息 + additional_kwargs = getattr(msg, 'additional_kwargs', {}) + if additional_kwargs: + print("附加信息:") + for key, value in additional_kwargs.items(): + if key == 'tool_calls' and value: + print(" 工具调用:") + for tool_call in value: + print(f" - ID: {tool_call['id']}") + print(f" 函数: {tool_call['function']['name']}") + print(f" 参数: {tool_call['function']['arguments']}") + else: + print(f" {key}: {value}") + # 处理 ToolMessage 特有字段 + if msg_type == 'ToolMessage': + tool_name = getattr(msg, 'name', '') + tool_call_id = getattr(msg, 'tool_call_id', '') + print(f"工具名称: {tool_name}") + print(f"工具调用 ID: {tool_call_id}") + # 处理 AIMessage 的工具调用和元数据 + if msg_type == 'AIMessage': + tool_calls = getattr(msg, 'tool_calls', []) + if tool_calls: + print("工具调用:") + for tool_call in tool_calls: + print(f" - 名称: {tool_call['name']}") + print(f" 参数: {tool_call['args']}") + print(f" ID: {tool_call['id']}") + # 提取元数据 + metadata = getattr(msg, 'response_metadata', {}) + if metadata: + print("元数据:") + token_usage = metadata.get('token_usage', {}) + print(f" 令牌使用: {token_usage}") + print(f" 模型名称: {metadata.get('model_name', '未知')}") + print(f" 完成原因: {metadata.get('finish_reason', '未知')}") + # 打印消息 ID + msg_id = getattr(msg, 'id', '未知') + print(f"消息 ID: {msg_id}") + print("-" * 50) + + +# 保存状态图的可视化表示 +def save_graph_visualization(graph, filename: str = "graph.png") -> None: + """保存状态图的可视化表示。 + + Args: + graph: 状态图实例。 + filename: 保存文件路径。 + """ + # 尝试执行以下代码块 + try: + # 以二进制写模式打开文件 + with open(filename, "wb") as f: + # 将状态图转换为Mermaid格式的PNG并写入文件 + f.write(graph.get_graph().draw_mermaid_png()) + # 记录保存成功的日志 + print(f"Graph visualization saved as {filename}") + # 捕获IO错误 + except IOError as e: + # 记录警告日志 + print(f"Failed to save graph visualization: {e}") + + +# 定义并运行agent +async def run_agent(): + # 实例化MCP Server客户端 + client = MultiServerMCPClient({ + # 高德地图MCP Server + # "amap-amap-sse": { + # "url": f"https://mcp.amap.com/sse?key={awap_mpas_api_keys}", + # "transport": "sse", + # }, + "simple-mcp":{ + "url": f"http://0.0.0.0:5001/mcp", + "transport": "streamable_http", + } + }) + + # 从MCP Server中获取可提供使用的全部工具 + tools = await client.get_tools() + print(f"tools:{tools}\n") + + # 基于内存存储的short-term + checkpointer = InMemorySaver() + + # 定义系统消息,指导如何使用工具 + system_message = SystemMessage(content=( + "你是一个AI助手,使用高德地图工具获取信息。" + )) + + # 创建ReAct风格的agent + agent = create_react_agent( + model=llm, + tools=tools, + prompt=system_message, + # prompt=f"你是一个乐于助人的AI助手。", + checkpointer=checkpointer + ) + + # 将定义的agent的graph进行可视化输出保存至本地 + # save_graph_visualization(agent) + + # 定义short-term需使用的thread_id + config = {"configurable": {"thread_id": "1"}} + + # # 1、非流式处理查询 + # # 高德地图接口测试 + # agent_response = await agent.ainvoke({"messages": [HumanMessage(content="这个118.79815,32.01112经纬度对应的地方是哪里")]}, config) + # # agent_response = await agent.ainvoke({"messages": [HumanMessage(content="夫子庙的经纬度坐标是多少")]}, config) + # # agent_response = await agent.ainvoke({"messages": [HumanMessage(content="112.10.22.229这个IP所在位置")]}, config) + # # agent_response = await agent.ainvoke({"messages": [HumanMessage(content="上海的天气如何")]}, config) + # # agent_response = await agent.ainvoke({"messages": [HumanMessage(content="我要从苏州的虎丘区骑行到相城区,帮我规划下路径")]}, config) + # # agent_response = await agent.ainvoke({"messages": [HumanMessage(content="我要从上海豫园骑行到上海人民广场,帮我规划下路径")]}, config) + # # agent_response = await agent.ainvoke({"messages": [HumanMessage(content="我要从上海豫园步行到上海人民广场,帮我规划下路径")]}, config) + # # agent_response = await agent.ainvoke({"messages": [HumanMessage(content="我要从上海豫园驾车到上海人民广场,帮我规划下路径")]}, config) + # # agent_response = await agent.ainvoke({"messages": [HumanMessage(content="我要从上海豫园坐公共交通到上海人民广场,帮我规划下路径")]}, config) + # # agent_response = await agent.ainvoke({"messages": [HumanMessage(content="测量下从上海豫园到上海人民广场驾车距离是多少")]}, config) + # # agent_response = await agent.ainvoke({"messages": [HumanMessage(content="在上海豫园附近的中石化的加油站有哪些,需要有POI的ID")]}, config) + # # agent_response = await agent.ainvoke({"messages": [HumanMessage(content="POI为B00155LA8A的详细信息")]}, config) + # # agent_response = await agent.ainvoke({"messages": [HumanMessage(content="在上海豫园周围10公里的中石化的加油站")]}, config) + # # 将返回的messages进行格式化输出 + # parse_messages(agent_response['messages']) + # agent_response_content = agent_response["messages"][-1].content + # print(f"agent_response:{agent_response_content}") + + + # 2、流式处理查询 + async for message_chunk, metadata in agent.astream( + # input={"messages": [HumanMessage(content="这个118.79815,32.01112经纬度对应的地方是哪里。输出的内容中不要出现{}")]}, + input={"messages": [HumanMessage(content="帮我读取一个abcdefg.txt文件,输出其中的文件内容,若为空,则输出“此文件为空”")]}, + config=config, + stream_mode="messages" + ): + # 测试原始输出 + # print(f"Token:{message_chunk}\n") + # print(f"Metadata:{metadata}\n\n") + + # # 跳过工具输出 + # if metadata["langgraph_node"]=="tools": + # continue + node_name = metadata.get("langgraph_node", "unknown") + # print(f"\n[节点: {node_name}]", end="") + + if node_name == "tools": + print(f" 工具调用: {message_chunk}") + continue + + # 输出最终结果 + if message_chunk.content: + print(message_chunk.content, end="|", flush=True) + + + +if __name__ == "__main__": + asyncio.run(run_agent()) + + + diff --git a/langgraph_mcp/simple_mpc_server.py b/langgraph_mcp/simple_mpc_server.py new file mode 100644 index 0000000..d1517e7 --- /dev/null +++ b/langgraph_mcp/simple_mpc_server.py @@ -0,0 +1,77 @@ +from mcp.server.fastmcp import FastMCP + +mcp = FastMCP("simplemcp", port=5001) + + +@mcp.tool() +def read_file(file_path: str) -> str: + """ + 读取指定文件的内容 + + Args: + file_path (str): 要读取的文件路径,支持相对路径和绝对路径 + + Returns: + str: 文件的内容,以字符串形式返回 + + Raises: + FileNotFoundError: 当文件不存在时抛出 + PermissionError: 当没有读取权限时抛出 + UnicodeDecodeError: 当文件编码不是UTF-8时抛出 + """ + with open(file_path, "r", encoding="utf-8") as f: + return f.read() + +@mcp.tool() +def write_to_file(file_path: str, content: str) -> str: + """ + 将指定内容写入到文件中 + + Args: + file_path (str): 要写入的文件路径,支持相对路径和绝对路径 + content (str): 要写入的内容,会自动将\\n转换为换行符 + + Returns: + str: 操作结果信息,"写入成功"表示成功 + + Raises: + PermissionError: 当没有写入权限时抛出 + OSError: 当文件路径无效或磁盘空间不足时抛出 + """ + with open(file_path, "w", encoding="utf-8") as f: + f.write(content.replace("\\n", "\n")) + return "写入成功" + +@mcp.tool() +def run_terminal_command(command: str) -> str: + """ + 在终端中执行指定的命令 + + Args: + command (str): 要执行的终端命令,支持shell命令 + + Returns: + str: 命令执行结果,成功时返回"执行成功",失败时返回错误信息 + + Note: + 此函数会使用shell=True执行命令,请确保命令来源可信 + 对于长时间运行的命令,可能会阻塞执行 + """ + import subprocess + run_result = subprocess.run(command, shell=True, capture_output=True, text=True) + return "执行成功" if run_result.returncode == 0 else run_result.stderr + +@mcp.resource("greeting://{name}") +def greeting(name: str) -> str: + """Greet a person by name.""" + print(f"roy mcp demo called : greeting({name})") + return f"Hello, {name}!" + +if __name__ == "__main__": + # 以sse协议暴露服务。 + mcp.settings.host = "0.0.0.0" + mcp.run(transport='streamable-http') + # 以stdio协议暴露服务。 + # mcp.run(transport='stdio') + + \ No newline at end of file From f2e1ebd23c6a8c3b241dd80472335d7a204f4ef9 Mon Sep 17 00:00:00 2001 From: zonghui <243156680@qq.com> Date: Tue, 7 Oct 2025 00:19:45 +0800 Subject: [PATCH 3/3] add es as mcp --- langgraph_mcp/elasticsearch_demo.py | 351 ++++++++++++++++++++++++++++ langgraph_mcp/mcp_es_query.py | 80 +++++++ langgraph_mcp/new_test.py | 13 +- 3 files changed, 440 insertions(+), 4 deletions(-) create mode 100644 langgraph_mcp/elasticsearch_demo.py create mode 100644 langgraph_mcp/mcp_es_query.py diff --git a/langgraph_mcp/elasticsearch_demo.py b/langgraph_mcp/elasticsearch_demo.py new file mode 100644 index 0000000..e7b3360 --- /dev/null +++ b/langgraph_mcp/elasticsearch_demo.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Elasticsearch 极简交互示例 +包括数据写入和查询功能 +""" + +from elasticsearch import Elasticsearch +import json +from datetime import datetime +from typing import Dict, List, Any + + +class ElasticsearchDemo: + def __init__(self, host: str = "localhost", port: int = 9200): + """ + 初始化 Elasticsearch 客户端 + + Args: + host: Elasticsearch 服务器地址 + port: Elasticsearch 服务器端口 + """ + # Elasticsearch 9.x 版本需要不同的配置 + self.es = Elasticsearch( + [f"http://{host}:{port}"], + verify_certs=False, + ssl_show_warn=False, + request_timeout=30 + ) + self.index_name = "demo_index" + + def check_connection(self) -> bool: + """检查 Elasticsearch 连接状态""" + try: + if self.es.ping(): + print("Elasticsearch 连接成功") + return True + else: + print("Elasticsearch 连接失败") + return False + except Exception as e: + print(f"连接错误: {e}") + return False + + def create_index(self) -> bool: + """创建索引""" + try: + if not self.es.indices.exists(index=self.index_name): + # 定义索引映射 + mapping = { + "mappings": { + "properties": { + "title": {"type": "text"}, + "content": {"type": "text"}, + "author": {"type": "keyword"}, + "tags": {"type": "keyword"}, + "created_at": {"type": "date"}, + "views": {"type": "integer"}, + "is_published": {"type": "boolean"} + } + } + } + self.es.indices.create(index=self.index_name, **mapping) + print(f"索引 '{self.index_name}' 创建成功") + else: + print(f"索引 '{self.index_name}' 已存在") + return True + except Exception as e: + print(f"创建索引失败: {e}") + return False + + def insert_document(self, doc: Dict[str, Any], doc_id: str = None) -> bool: + """ + 插入单个文档 + + Args: + doc: 要插入的文档数据 + doc_id: 文档ID,如果不提供则自动生成 + + Returns: + bool: 插入是否成功 + """ + try: + # 添加时间戳 + doc["created_at"] = datetime.now().isoformat() + + if doc_id: + result = self.es.index(index=self.index_name, id=doc_id, document=doc) + else: + result = self.es.index(index=self.index_name, document=doc) + + print(f"文档插入成功,ID: {result['_id']}") + return True + except Exception as e: + print(f"插入文档失败: {e}") + return False + + def bulk_insert(self, docs: List[Dict[str, Any]]) -> bool: + """ + 批量插入文档 + + Args: + docs: 要插入的文档列表 + + Returns: + bool: 批量插入是否成功 + """ + try: + actions = [] + for i, doc in enumerate(docs): + # 添加时间戳 + doc["created_at"] = datetime.now().isoformat() + + action = { + "index": { + "_index": self.index_name, + "_source": doc + } + } + actions.append(action) + + # 执行批量插入 + result = self.es.bulk(operations=actions) + + if result["errors"]: + print("批量插入部分失败") + for item in result["items"]: + if "error" in item["index"]: + print(f"错误: {item['index']['error']}") + else: + print(f"批量插入成功,共 {len(docs)} 个文档") + + return True + except Exception as e: + print(f"批量插入失败: {e}") + return False + + def search_documents(self, query: Dict[str, Any], size: int = 10) -> List[Dict[str, Any]]: + """ + 搜索文档 + + Args: + query: 搜索查询 + size: 返回结果数量 + + Returns: + List[Dict]: 搜索结果列表 + """ + try: + result = self.es.search( + index=self.index_name, + query=query["query"], + size=size + ) + + hits = result["hits"]["hits"] + print(f"找到 {result['hits']['total']['value']} 个结果") + + documents = [] + for hit in hits: + doc = hit["_source"] + doc["_id"] = hit["_id"] + doc["_score"] = hit["_score"] + documents.append(doc) + + return documents + except Exception as e: + print(f"搜索失败: {e}") + return [] + + def get_document_by_id(self, doc_id: str) -> Dict[str, Any]: + """ + 根据ID获取文档 + + Args: + doc_id: 文档ID + + Returns: + Dict: 文档数据 + """ + try: + result = self.es.get(index=self.index_name, id=doc_id) + doc = result["_source"] + doc["_id"] = result["_id"] + return doc + except Exception as e: + print(f"获取文档失败: {e}") + return {} + + def delete_document(self, doc_id: str) -> bool: + """ + 删除文档 + + Args: + doc_id: 文档ID + + Returns: + bool: 删除是否成功 + """ + try: + result = self.es.delete(index=self.index_name, id=doc_id) + print(f"文档 {doc_id} 删除成功") + return True + except Exception as e: + print(f"删除文档失败: {e}") + return False + + def get_index_stats(self) -> Dict[str, Any]: + """获取索引统计信息""" + try: + stats = self.es.indices.stats(index=self.index_name) + return stats["indices"][self.index_name] + except Exception as e: + print(f"获取统计信息失败: {e}") + return {} + + +def main(): + """主函数 - 演示 Elasticsearch 基本操作""" + print("Elasticsearch 极简交互示例") + print("=" * 50) + + # 初始化客户端 + es_demo = ElasticsearchDemo() + + # 检查连接 + if not es_demo.check_connection(): + print("请确保 Elasticsearch 服务正在运行") + return + + # 创建索引 + es_demo.create_index() + + # 示例数据 + sample_docs = [ + { + "title": "Python 编程入门", + "content": "Python 是一种简单易学的编程语言,适合初学者学习。", + "author": "张三", + "tags": ["Python", "编程", "入门"], + "views": 100, + "is_published": True + }, + { + "title": "Elasticsearch 基础教程", + "content": "Elasticsearch 是一个分布式搜索和分析引擎。", + "author": "李四", + "tags": ["Elasticsearch", "搜索", "数据库"], + "views": 150, + "is_published": True + }, + { + "title": "机器学习实战", + "content": "机器学习是人工智能的一个重要分支。", + "author": "王五", + "tags": ["机器学习", "AI", "算法"], + "views": 200, + "is_published": False + } + ] + + print("\n插入示例数据") + print("-" * 30) + + # 插入单个文档 + es_demo.insert_document(sample_docs[0], "doc_1") + + # 批量插入文档 + es_demo.bulk_insert(sample_docs[1:]) + + print("\n搜索示例") + print("-" * 30) + + # 1. 全文搜索 + print("1. 全文搜索 'Python':") + query = { + "query": { + "match": { + "content": "Python" + } + } + } + results = es_demo.search_documents(query) + for doc in results: + print(f" - {doc['title']} (作者: {doc['author']})") + + # 2. 精确匹配 + print("\n2. 精确匹配作者 '张三':") + query = { + "query": { + "term": { + "author": "张三" + } + } + } + results = es_demo.search_documents(query) + for doc in results: + print(f" - {doc['title']}") + + # 3. 范围查询 + print("\n3. 浏览量大于 120 的文章:") + query = { + "query": { + "range": { + "views": { + "gte": 120 + } + } + } + } + results = es_demo.search_documents(query) + for doc in results: + print(f" - {doc['title']} (浏览量: {doc['views']})") + + # 4. 布尔查询 + print("\n4. 已发布且包含 '搜索' 标签的文章:") + query = { + "query": { + "bool": { + "must": [ + {"term": {"is_published": True}}, + {"term": {"tags": "搜索"}} + ] + } + } + } + results = es_demo.search_documents(query) + for doc in results: + print(f" - {doc['title']} (标签: {doc['tags']})") + + print("\n获取文档详情") + print("-" * 30) + + # 根据ID获取文档 + doc = es_demo.get_document_by_id("doc_1") + if doc: + print(f"文档详情: {json.dumps(doc, ensure_ascii=False, indent=2)}") + + print("\n索引统计信息") + print("-" * 30) + + stats = es_demo.get_index_stats() + if stats: + print(f"文档总数: {stats['total']['docs']['count']}") + print(f"索引大小: {stats['total']['store']['size_in_bytes']} 字节") + + print("\n演示完成!") + + +if __name__ == "__main__": + main() diff --git a/langgraph_mcp/mcp_es_query.py b/langgraph_mcp/mcp_es_query.py new file mode 100644 index 0000000..bcb0dd8 --- /dev/null +++ b/langgraph_mcp/mcp_es_query.py @@ -0,0 +1,80 @@ +from mcp.server.fastmcp import FastMCP +from elasticsearch import Elasticsearch +import json +from datetime import datetime +from typing import Dict, List, Any + +from elasticsearch_demo import ElasticsearchDemo + +mcp = FastMCP("es_query", port=5005) + + +@mcp.tool() +def search_and_display(query_text: str, max_results: int = 10) -> str: + """ + 搜索并显示结果(类似 main 函数的功能) + + Args: + query_text: 搜索查询文本 + max_results: 最大返回结果数量 + """ + try: + # 初始化客户端 + es_demo = ElasticsearchDemo() + + # 检查连接 + if not es_demo.check_connection(): + return "Elasticsearch 连接失败,请确保服务正在运行" + + # 确保索引存在 + es_demo.create_index() + + # 构建搜索查询 + query = { + "query": { + "multi_match": { + "query": query_text, + "fields": ["title", "content", "tags"], + "type": "best_fields", + "fuzziness": "AUTO" + } + } + } + + # 执行搜索 + results = es_demo.search_documents(query, size=max_results) + + if not results: + return "未找到相关文档" + + # 构建结果字符串 + result_lines = [] + result_lines.append(f"找到 {len(results)} 个相关文档:") + result_lines.append("-" * 30) + + for i, doc in enumerate(results, 1): + result_lines.append(f"{i}. 标题: {doc['title']}") + result_lines.append(f" 作者: {doc['author']}") + result_lines.append(f" 内容: {doc['content'][:100]}...") + result_lines.append(f" 标签: {doc['tags']}") + result_lines.append(f" 浏览量: {doc['views']}") + result_lines.append(f" 相关度分数: {doc.get('_score', 'N/A')}") + result_lines.append("-" * 30) + + return "\n".join(result_lines) + + except Exception as e: + return f"搜索出错: {str(e)}" + + + + + +if __name__ == "__main__": + # 以sse协议暴露服务。 + mcp.settings.host = "0.0.0.0" + mcp.run(transport='streamable-http') + # 以stdio协议暴露服务。 + # mcp.run(transport='stdio') + + \ No newline at end of file diff --git a/langgraph_mcp/new_test.py b/langgraph_mcp/new_test.py index f875342..5abb766 100644 --- a/langgraph_mcp/new_test.py +++ b/langgraph_mcp/new_test.py @@ -111,8 +111,12 @@ async def run_agent(): # "url": f"https://mcp.amap.com/sse?key={awap_mpas_api_keys}", # "transport": "sse", # }, - "simple-mcp":{ - "url": f"http://0.0.0.0:5001/mcp", + # "simple-mcp":{ + # "url": f"http://0.0.0.0:5001/mcp", + # "transport": "streamable_http", + # }, + "es-query":{ + "url": f"http://0.0.0.0:5005/mcp", "transport": "streamable_http", } }) @@ -126,7 +130,7 @@ async def run_agent(): # 定义系统消息,指导如何使用工具 system_message = SystemMessage(content=( - "你是一个AI助手,使用高德地图工具获取信息。" + "" )) # 创建ReAct风格的agent @@ -168,7 +172,8 @@ async def run_agent(): # 2、流式处理查询 async for message_chunk, metadata in agent.astream( # input={"messages": [HumanMessage(content="这个118.79815,32.01112经纬度对应的地方是哪里。输出的内容中不要出现{}")]}, - input={"messages": [HumanMessage(content="帮我读取一个abcdefg.txt文件,输出其中的文件内容,若为空,则输出“此文件为空”")]}, + # input={"messages": [HumanMessage(content="帮我读取一个abcdefg.txt文件,输出其中的文件内容,若为空,则输出“此文件为空”")]}, + input={"messages": [HumanMessage(content="帮我查询下本地数据库,有关机器学习的文档”")]}, config=config, stream_mode="messages" ):