-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata.py
59 lines (51 loc) · 1.43 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import pandas as pd
from client import Client
schema = {
"classes": [
{
"class": "Article",
"vectorizer": "text2vec-cohere",
"properties": [
{
"name": "category",
"dataType": ["text"],
"description": "article category"
},
{
"name": "heading",
"dataType": ["text"],
"description": "article title"
},
{
"name": "article",
"dataType": ["text"],
"description": "article content"
}
]
}
]
}
Client.batch.configure(
batch_size=10,
# dynamically update the `batch_size` based on import speed
dynamic=True,
timeout_retries=3,
)
data=pd.read_csv('headings.csv')
for i in range (0,len(data)):
item = data.iloc[i]
article_object = {
'article': str(item['Article']),
'category':str(item['Category']),
'heading':str(item['Heading'])
}
try:
Client.batch.add_data_object(article_object, 'Article')
except BaseException as error:
print("Import Failed at: ",i)
print("An exception occurred: {}".format(error))
# Stop the import on error
break
print("Status: ", str(i)+"/"+str(len(data)-1))
Client.batch.flush()
print('Job done...')