@@ -20,27 +20,78 @@ class GitlabConfig(BaseModel):
20
20
21
21
22
22
class GitlabDataSource (BaseDataSource ):
23
+
24
+ def _parse_issues (self , documents : [], project_id : str , project_url : str ):
25
+ issues_url = f"{ GITLAB_BASE_URL } /projects/{ project_id } /issues"
26
+
27
+ issues_response = self ._session .get (issues_url )
28
+ issues_response .raise_for_status ()
29
+ issues_json = issues_response .json ()
30
+
31
+ for issue in issues_json :
32
+ last_modified = datetime .strptime (issue ["updated_at" ], "%Y-%m-%dT%H:%M:%S.%fZ" )
33
+ if last_modified < self ._last_index_time :
34
+ continue
35
+
36
+ documents .append (BasicDocument (
37
+ id = issue ["id" ],
38
+ data_source_id = self ._data_source_id ,
39
+ type = DocumentType .GIT_ISSUE ,
40
+ title = issue ['title' ],
41
+ content = issue ["description" ] if not None else "" ,
42
+ author = issue ['author' ]['name' ],
43
+ author_image_url = issue ['author' ]['avatar_url' ],
44
+ location = project_url ,
45
+ url = issue ['web_url' ],
46
+ timestamp = last_modified
47
+ ))
48
+
49
+ def _parse_pull_requests (self , documents : [], project_id : str , project_url : str ):
50
+ pull_requests_url = f"{ GITLAB_BASE_URL } /projects/{ project_id } /merge_requests"
51
+
52
+ pull_requests_response = self ._session .get (pull_requests_url )
53
+ pull_requests_response .raise_for_status ()
54
+ pull_requests_json = pull_requests_response .json ()
55
+
56
+ for pull_request in pull_requests_json :
57
+ last_modified = datetime .strptime (pull_request ["updated_at" ], "%Y-%m-%dT%H:%M:%S.%fZ" )
58
+ if last_modified < self ._last_index_time :
59
+ continue
60
+
61
+ documents .append (BasicDocument (
62
+ id = pull_request ["id" ],
63
+ data_source_id = self ._data_source_id ,
64
+ type = DocumentType .GIT_PR ,
65
+ title = pull_request ['title' ],
66
+ content = pull_request ["description" ] if not None else "" ,
67
+ author = pull_request ['author' ]['name' ],
68
+ author_image_url = pull_request ['author' ]['avatar_url' ],
69
+ location = project_url ,
70
+ url = pull_request ['web_url' ],
71
+ timestamp = last_modified
72
+ ))
73
+
23
74
@staticmethod
24
75
def validate_config (config : Dict ) -> None :
25
76
try :
26
77
parsed_config = GitlabConfig (** config )
27
78
session = requests .Session ()
28
79
session .headers .update ({"PRIVATE-TOKEN" : parsed_config .access_token })
29
80
projects_response = session .get (PROJECTS_URL )
30
- if projects_response .status_code != 200 :
31
- raise ValueError ("Invalid api key" )
81
+ projects_response .raise_for_status ()
32
82
except (KeyError , ValueError ) as e :
33
83
raise InvalidDataSourceConfig from e
34
84
35
85
def __init__ (self , * args , ** kwargs ):
36
86
super ().__init__ (* args , ** kwargs )
37
87
# Create a access token with sufficient permissions in https://gitlab.com/-/profile/personal_access_tokens
38
88
self .gitlab_config = GitlabConfig (** self ._config )
39
- self .session = requests .Session ()
40
- self .session .headers .update ({"PRIVATE-TOKEN" : self .gitlab_config .access_token })
89
+ self ._session = requests .Session ()
90
+ self ._session .headers .update ({"PRIVATE-TOKEN" : self .gitlab_config .access_token })
41
91
42
92
def _feed_new_documents (self ) -> None :
43
- projects_response = self .session .get (PROJECTS_URL )
93
+ projects_response = self ._session .get (PROJECTS_URL )
94
+ projects_response .raise_for_status ()
44
95
projects = projects_response .json ()
45
96
46
97
self ._parse_projects_in_parallel (projects )
@@ -51,49 +102,9 @@ def _parse_projects_worker(self, projects):
51
102
52
103
for project in projects :
53
104
project_id = project ["id" ]
54
- issues_url = f"{ GITLAB_BASE_URL } /projects/{ project_id } /issues"
55
- issues_response = self .session .get (issues_url )
56
- issues_json = issues_response .json ()
57
-
58
- for issue in issues_json :
59
- last_modified = datetime .strptime (issue ["updated_at" ], "%Y-%m-%dT%H:%M:%S.%fZ" )
60
- if last_modified < self ._last_index_time :
61
- continue
62
-
63
- documents .append (BasicDocument (
64
- id = issue ["id" ],
65
- data_source_id = self ._data_source_id ,
66
- type = DocumentType .DOCUMENT ,
67
- title = issue ['title' ],
68
- content = issue ["description" ] if not None else "" ,
69
- author = issue ['author' ]['name' ],
70
- author_image_url = issue ['author' ]['avatar_url' ],
71
- location = project ["web_url" ],
72
- url = issue ['web_url' ],
73
- timestamp = last_modified
74
- ))
75
-
76
- pull_requests_url = f"{ GITLAB_BASE_URL } /projects/{ project_id } /merge_requests"
77
- pull_requests_response = self .session .get (pull_requests_url )
78
- pull_requests_json = pull_requests_response .json ()
79
-
80
- for pull_request in pull_requests_json :
81
- last_modified = datetime .strptime (pull_request ["updated_at" ], "%Y-%m-%dT%H:%M:%S.%fZ" )
82
- if last_modified < self ._last_index_time :
83
- continue
84
-
85
- documents .append (BasicDocument (
86
- id = pull_request ["id" ],
87
- data_source_id = self ._data_source_id ,
88
- type = DocumentType .DOCUMENT ,
89
- title = pull_request ['title' ],
90
- content = pull_request ["description" ] if not None else "" ,
91
- author = pull_request ['author' ]['name' ],
92
- author_image_url = pull_request ['author' ]['avatar_url' ],
93
- location = project ["web_url" ],
94
- url = pull_request ['web_url' ],
95
- timestamp = last_modified
96
- ))
105
+ project_url = project ["web_url" ]
106
+ self ._parse_issues (documents , project_id , project_url )
107
+ self ._parse_pull_requests (documents , project_id , project_url )
97
108
98
109
IndexingQueue .get ().feed (documents )
99
110
0 commit comments