-
Notifications
You must be signed in to change notification settings - Fork 498
Description
I am new to both python n ai....Plz help...
`(langchain-rag-tutorial) aimlstudio@aimlstudio-Alpha-15-A3DD:/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial$ python3 create_database.py
Error loading file data/books/alice_in_wonderland.md
Traceback (most recent call last):
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/create_database.py", line 71, in
main()
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/create_database.py", line 25, in main
generate_data_store()
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/create_database.py", line 29, in generate_data_store
documents = load_documents()
^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/create_database.py", line 36, in load_documents
documents = loader.load()
^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/langchain_community/document_loaders/directory.py", line 117, in load
return list(self.lazy_load())
^^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/langchain_community/document_loaders/directory.py", line 182, in lazy_load
yield from self._lazy_load_file(i, p, pbar)
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/langchain_community/document_loaders/directory.py", line 220, in _lazy_load_file
raise e
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/langchain_community/document_loaders/directory.py", line 210, in _lazy_load_file
for subdoc in loader.lazy_load():
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/langchain_community/document_loaders/unstructured.py", line 88, in lazy_load
elements = self._get_elements()
^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/langchain_community/document_loaders/unstructured.py", line 180, in _get_elements
return partition(filename=self.file_path, **self.unstructured_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/partition/auto.py", line 415, in partition
elements = _partition_md(
^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/documents/elements.py", line 591, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/file_utils/filetype.py", line 618, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/file_utils/filetype.py", line 582, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/chunking/dispatch.py", line 74, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/partition/md.py", line 112, in partition_md
return partition_html(
^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/documents/elements.py", line 591, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/file_utils/filetype.py", line 618, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/file_utils/filetype.py", line 582, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/chunking/dispatch.py", line 74, in wrapper
elements = func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/partition/html.py", line 149, in partition_html
document_to_element_list(
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/partition/common.py", line 559, in document_to_element_list
num_pages = len(document.pages)
^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/documents/xml.py", line 54, in pages
self._pages = self._parse_pages_from_element_tree()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/documents/html.py", line 173, in _parse_pages_from_element_tree
_page_elements, descendanttag_elems = _process_text_tag(tag_elem)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/documents/html.py", line 630, in _process_text_tag
element = _parse_tag(tag_elem, include_tail_text)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/documents/html.py", line 438, in _parse_tag
return _text_to_element(
^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/documents/html.py", line 486, in _text_to_element
elif is_narrative_tag(text, tag):
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/documents/html.py", line 536, in is_narrative_tag
return tag not in HEADING_TAGS and is_possible_narrative_text(text)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/partition/text_type.py", line 80, in is_possible_narrative_text
if exceeds_cap_ratio(text, threshold=cap_threshold):
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/partition/text_type.py", line 276, in exceeds_cap_ratio
if sentence_count(text, 3) > 1:
^^^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/partition/text_type.py", line 225, in sentence_count
sentences = sent_tokenize(text)
^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/unstructured/nlp/tokenize.py", line 30, in sent_tokenize
return _sent_tokenize(text)
^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/nltk/tokenize/init.py", line 119, in sent_tokenize
tokenizer = _get_punkt_tokenizer(language)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/nltk/tokenize/init.py", line 105, in _get_punkt_tokenizer
return PunktTokenizer(language)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/nltk/tokenize/punkt.py", line 1744, in init
self.load_lang(lang)
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/nltk/tokenize/punkt.py", line 1749, in load_lang
lang_dir = find(f"tokenizers/punkt_tab/{lang}/")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/python3.12/site-packages/nltk/data.py", line 579, in find
raise LookupError(resource_not_found)
LookupError:
Resource punkt_tab not found.
Please use the NLTK Downloader to obtain the resource:
import nltk
nltk.download('punkt_tab')
For more information see: https://www.nltk.org/data.html
Attempted to load tokenizers/punkt_tab/english/
Searched in:
- '/home/aimlstudio/nltk_data'
- '/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/nltk_data'
- '/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/share/nltk_data'
- '/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial/lib/nltk_data'
- '/usr/share/nltk_data'
- '/usr/local/share/nltk_data'
- '/usr/lib/nltk_data'
- '/usr/local/lib/nltk_data'
(langchain-rag-tutorial) aimlstudio@aimlstudio-Alpha-15-A3DD:/media/aimlstudio/c72fb1a2-b0e8-4777-9e09-f86e94a685be/home/aimlstudio/Documents/workspace/aiml/projects/rag_langchain/langchain-rag-tutorial$
`