1
1
from typing import Any , no_type_check
2
2
3
3
import pytest
4
+ from requests import ConnectionError
4
5
5
6
from arango_datasets .datasets import Datasets
6
7
7
- from .conftest import db
8
+ from .conftest import cleanup_collections , db
8
9
10
+ global test_metadata_url
11
+ global root_metadata_url
12
+ global bad_metadata_url
13
+ test_metadata_url = (
14
+ "https://arangodb-dataset-library.s3.amazonaws.com/test_metadata.json" # noqa: E501
15
+ )
16
+ root_metadata_url = (
17
+ "https://arangodb-dataset-library.s3.amazonaws.com/root_metadata.json" # noqa: E501
18
+ )
19
+ bad_metadata_url = "http://bad_url.arangodb.com/"
9
20
21
+
22
+ @no_type_check
10
23
def test_dataset_constructor () -> None :
11
24
assert Datasets (db )
12
25
assert Datasets (db , batch_size = 1000 )
@@ -17,21 +30,31 @@ def test_dataset_constructor() -> None:
17
30
assert Datasets (
18
31
db ,
19
32
batch_size = 1000 ,
20
- metadata_file = "https://arangodb-dataset-library.s3.amazonaws.com/root_metadata.json" , # noqa: E501
33
+ metadata_file = root_metadata_url ,
21
34
)
35
+ with pytest .raises (TypeError ):
36
+ assert Datasets (
37
+ db = "some none db object" ,
38
+ batch_size = 1000 ,
39
+ metadata_file = root_metadata_url ,
40
+ )
22
41
with pytest .raises (Exception ):
23
42
assert Datasets ({})
24
43
25
- with pytest .raises (Exception ):
26
- assert Datasets (db , metadata_file = "bad_url" )
44
+ with pytest .raises (ConnectionError ):
45
+ assert Datasets (db , metadata_file = bad_metadata_url )
27
46
28
47
48
+ @no_type_check
29
49
def test_list_datasets (capfd : Any ) -> None :
30
- datasets = Datasets (db ).list_datasets ()
50
+ datasets = Datasets (
51
+ db ,
52
+ metadata_file = test_metadata_url ,
53
+ ).list_datasets ()
31
54
out , err = capfd .readouterr ()
32
- assert "FLIGHTS " in out
55
+ assert "TEST " in out
33
56
assert type (datasets ) is list
34
- assert "FLIGHTS " in datasets
57
+ assert "TEST " in datasets
35
58
36
59
37
60
@no_type_check
@@ -42,17 +65,101 @@ def test_dataset_info(capfd: Any) -> None:
42
65
with pytest .raises (Exception ):
43
66
Datasets (db ).dataset_info (2 )
44
67
45
- dataset = Datasets (db ).dataset_info ("FLIGHTS" )
68
+ dataset = Datasets (
69
+ db ,
70
+ metadata_file = test_metadata_url ,
71
+ ).dataset_info ("TEST" )
46
72
assert type (dataset ) is dict
47
73
74
+ assert dataset ["TEST" ]["file_type" ] == "json"
75
+
48
76
out , err = capfd .readouterr ()
49
77
assert len (out ) > 0
50
78
51
79
80
+ @no_type_check
81
+ def test_load_file () -> None :
82
+ with pytest .raises (Exception ):
83
+ Datasets .load_file (collection_name = "test" , edge_type = None , file_url = "false" )
84
+
85
+
86
+ @no_type_check
87
+ def test_load_json () -> None :
88
+ cleanup_collections ()
89
+ collection_name = "test_vertex"
90
+ edge_type = False
91
+ file_url = "https://arangodb-dataset-library.s3.amazonaws.com/test_files/json/vertex_collection/test_vertex.json" # noqa: E501
92
+ collection = db .create_collection ("test_vertex" )
93
+ assert None == (
94
+ Datasets .load_json (
95
+ Datasets (db ),
96
+ collection_name = collection_name ,
97
+ edge_type = edge_type ,
98
+ file_url = file_url ,
99
+ collection = collection ,
100
+ )
101
+ )
102
+
103
+
104
+ @no_type_check
105
+ def json_bad_url () -> None :
106
+ cleanup_collections ()
107
+ collection_name = "test_vertex"
108
+ edge_type = False
109
+ collection = db .create_collection ("test_vertex" )
110
+
111
+ with pytest .raises (ConnectionError ):
112
+ Datasets .load_json (
113
+ Datasets (db ),
114
+ collection_name = collection_name ,
115
+ edge_type = edge_type ,
116
+ file_url = bad_metadata_url ,
117
+ collection = collection ,
118
+ )
119
+
120
+
121
+ @no_type_check
122
+ def test_load_jsonl () -> None :
123
+ cleanup_collections ()
124
+ collection_name = "test_vertex"
125
+ edge_type = False
126
+ file_url = "https://arangodb-dataset-library.s3.amazonaws.com/test_files/jsonl/vertex_collection/test_vertex.jsonl" # noqa: E501
127
+ collection = db .create_collection ("test_vertex" )
128
+ assert None == (
129
+ Datasets .load_jsonl (
130
+ Datasets (db ),
131
+ collection_name = collection_name ,
132
+ edge_type = edge_type ,
133
+ file_url = file_url ,
134
+ collection = collection ,
135
+ )
136
+ )
137
+
138
+
139
+ @no_type_check
140
+ def jsonl_bad_url () -> None :
141
+ cleanup_collections ()
142
+ collection_name = "test_vertex"
143
+ edge_type = False
144
+ collection = db .create_collection ("test_vertex" )
145
+ with pytest .raises (ConnectionError ):
146
+ Datasets .load_jsonl (
147
+ Datasets (db ),
148
+ collection_name = collection_name ,
149
+ edge_type = edge_type ,
150
+ file_url = bad_metadata_url ,
151
+ collection = collection ,
152
+ )
153
+
154
+
52
155
@no_type_check
53
156
def test_load () -> None :
54
- Datasets (db ).load ("FLIGHTS" )
157
+ cleanup_collections ()
158
+ Datasets (
159
+ db ,
160
+ metadata_file = test_metadata_url ,
161
+ ).load ("TEST" )
55
162
with pytest .raises (Exception ):
56
163
Datasets (db ).load (2 )
57
- assert db .collection ("airports " ).count () == 3375
58
- assert db .collection ("flights " ).count () == 286463
164
+ assert db .collection ("test_vertex " ).count () == 2
165
+ assert db .collection ("test_edge " ).count () == 1
0 commit comments