Skip to content

Commit

Permalink
feat complete embeddings
Browse files Browse the repository at this point in the history
  • Loading branch information
Gupta-Anubhav12 committed Feb 24, 2023
1 parent 3e582a5 commit a277947
Show file tree
Hide file tree
Showing 224 changed files with 12,381,060 additions and 430,083 deletions.
12 changes: 9 additions & 3 deletions Vectorise_Script/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,30 @@

@retry(wait=wait_fixed(2))
def get_embedding(text, model="text-embedding-ada-002"):

text = tokenizer.decode(tokenizer.encode(text)[:8000])
print("embedding")
return openai.Embedding.create(input=[text], model=model)["data"][0]["embedding"]




canto_path = "srimad-bhagavatam/With Purport/"
vectorized_canto_path = "srimad-bhagavatam/vectorized/"
tokens = 0
for canto_number in range(5,13): #12 cantos in Srimad Bhagavatam
for canto_number in range(9,13): #12 cantos in Srimad Bhagavatam
# canto_number = 8
# done_chapters = ["1","2","3","4","5","8","9","13","14","15","18","19","22","23","25"]
print("enterint canto",canto_number)
for file in os.listdir(f"{canto_path}/Canto {canto_number}"):
print(file)
chapter_number = file.replace(".json","").replace("Chapter","")
# if chapter_number in done_chapters:
# continue
with open(f'{canto_path}/Canto {canto_number}/{file}', encoding='utf-8') as fh:
dataset = json.load(fh)
embeddings = []
for verse in dataset:
chapter_number = file.replace(".json","").replace("Chapter","")

emb = {
"id": verse["verse"],
"devnagari":verse["devanagari"],
Expand Down
98,577 changes: 98,577 additions & 0 deletions srimad-bhagavatam/vectorized/Canto_10/chapter1.json

Large diffs are not rendered by default.

Loading

0 comments on commit a277947

Please sign in to comment.