diff --git a/backend/analytics_server/mhq/exapi/github.py b/backend/analytics_server/mhq/exapi/github.py index 50cd1929..acf77e39 100644 --- a/backend/analytics_server/mhq/exapi/github.py +++ b/backend/analytics_server/mhq/exapi/github.py @@ -1,7 +1,8 @@ import contextlib +import dataclasses from datetime import datetime from http import HTTPStatus -from typing import Optional, Dict, Tuple, List +from typing import Any, Optional, Dict, Tuple, List import requests from github import Github, UnknownObjectException @@ -12,6 +13,16 @@ from github.Repository import Repository as GithubRepository from mhq.exapi.models.github import GitHubContributor +from mhq.exapi.types.timeline import ( + GitHubReadyForReviewEventDict, + GitHubReviewEventFullDict, +) +from mhq.exapi.models.timeline import ( + GitHubTimeline, + GitHubReadyForReviewEvent, + GitHubReviewEvent, + GitHubUser, +) from mhq.utils.log import LOG PAGE_SIZE = 100 @@ -114,6 +125,230 @@ def get_pr_commits(self, pr: GithubPullRequest): def get_pr_reviews(self, pr: GithubPullRequest) -> GithubPaginatedList: return pr.get_reviews() + def get_pr_timeline( + self, org_login: str, repo_name: str, pr_number: int + ) -> List[GitHubTimeline]: + """ + Fetches the timeline events for a pull request. + + Args: + org_login: The organization login name + repo_name: The repository name + pr_number: The pull request number + + Returns: + List of GitHub timeline events + + Raises: + GithubException: If the API request fails + """ + try: + timeline_events = self._fetch_all_timeline_events( + org_login, repo_name, pr_number + ) + return self._process_timeline_events(timeline_events) + except GithubException as e: + LOG.error( + f"Failed to fetch PR timeline for {org_login}/{repo_name}#{pr_number}: {str(e)}" + ) + raise + + def _process_timeline_events( + self, timeline_events: List[Dict[str, Any]] + ) -> List[GitHubTimeline]: + """Process raw timeline events into model objects.""" + result = [] + for event in timeline_events: + if not self._is_supported_event(event): + continue + + timeline_model = self._convert_to_timeline_model(event) + if timeline_model: + result.append(timeline_model) + + return result + + def _fetch_all_timeline_events( + self, org_login: str, repo_name: str, pr_number: int + ) -> List[Dict[str, Any]]: + """ + Fetches all timeline events with pagination. + + Args: + org_login: The organization login name + repo_name: The repository name + pr_number: The pull request number + + Returns: + List of raw timeline events + + Raises: + GithubException: If the API request fails + """ + all_events = [] + page = 1 + + while True: + github_url = f"{self.base_url}/repos/{org_login}/{repo_name}/issues/{pr_number}/timeline" + query_params = {"per_page": PAGE_SIZE, "page": page} + + response = requests.get( + github_url, + headers={ + **self.headers, + "Accept": "application/vnd.github.mockingbird-preview+json", + }, + params=query_params, + timeout=15, + ) + + if response.status_code != HTTPStatus.OK: + raise GithubException(response.status_code, response.json()) + + data = response.json() + all_events.extend(data) + + if len(data) < PAGE_SIZE: + break + + page += 1 + + return all_events + + def _is_supported_event(self, timeline_event: Dict[str, Any]) -> bool: + """ + Checks if the event type is supported. + + Args: + timeline_event: Raw timeline event data + + Returns: + True if event type is supported, False otherwise + """ + SUPPORTED_EVENTS = {"ready_for_review", "reviewed"} + return timeline_event.get("event") in SUPPORTED_EVENTS + + def _convert_to_timeline_model( + self, timeline_event: Dict[str, Any] + ) -> Optional[GitHubTimeline]: + """ + Converts raw event data into a typed model instance. + + Args: + timeline_event: Raw timeline event data + + Returns: + GitHubTimeline object or None if event type is not supported + """ + event_type = timeline_event.get("event") + + event_converters = { + "ready_for_review": self._create_ready_for_review_event, + "reviewed": self._create_review_event, + } + + converter = event_converters.get(event_type) + if not converter: + return None + + return converter(timeline_event) + + def _create_ready_for_review_event( + self, event_data: Dict[str, Any] + ) -> GitHubReadyForReviewEvent: + """ + Creates a ReadyForReview event from raw data. + + Args: + event_data: Raw event data + + Returns: + GitHubReadyForReviewEvent object + """ + typed_dict = GitHubReadyForReviewEventDict( + id=event_data.get("id"), + node_id=event_data.get("node_id"), + url=event_data.get("url"), + actor=event_data.get("actor"), + event=event_data.get("event"), + commit_id=event_data.get("commit_id"), + commit_url=event_data.get("commit_url"), + created_at=event_data.get("created_at"), + performed_via_github_app=event_data.get("performed_via_github_app"), + ) + + return GitHubReadyForReviewEvent( + id=typed_dict["id"], + node_id=typed_dict["node_id"], + url=typed_dict["url"], + actor=( + GitHubUser( + **{ + k: v + for k, v in typed_dict["actor"].items() + if k in {f.name for f in dataclasses.fields(GitHubUser)} + }, + ) + if typed_dict["actor"] + else None + ), + event=typed_dict["event"], + commit_id=typed_dict.get("commit_id"), + commit_url=typed_dict.get("commit_url"), + created_at=typed_dict["created_at"], + performed_via_github_app=typed_dict.get("performed_via_github_app"), + ) + + def _create_review_event(self, event_data: Dict[str, Any]) -> GitHubReviewEvent: + """ + Creates a Review event from raw data. + + Args: + event_data: Raw event data + + Returns: + GitHubReviewEvent object + """ + typed_dict = GitHubReviewEventFullDict( + id=event_data.get("id"), + node_id=event_data.get("node_id"), + user=event_data.get("user"), + body=event_data.get("body"), + commit_id=event_data.get("commit_id"), + submitted_at=event_data.get("submitted_at"), + state=event_data.get("state"), + html_url=event_data.get("html_url"), + pull_request_url=event_data.get("pull_request_url"), + author_association=event_data.get("author_association"), + _links=event_data.get("_links"), + event=event_data.get("event"), + ) + + return GitHubReviewEvent( + id=typed_dict["id"], + node_id=typed_dict["node_id"], + user=( + GitHubUser( + **{ + k: v + for k, v in typed_dict["user"].items() + if k in {f.name for f in dataclasses.fields(GitHubUser)} + }, + ) + if typed_dict["user"] + else None + ), + body=typed_dict["body"], + commit_id=typed_dict["commit_id"], + submitted_at=typed_dict["submitted_at"], + state=typed_dict["state"], + html_url=typed_dict["html_url"], + pull_request_url=typed_dict["pull_request_url"], + author_association=typed_dict["author_association"], + _links=typed_dict["_links"], + event=typed_dict["event"], + ) + def get_contributors( self, org_login: str, repo_name: str ) -> List[GitHubContributor]: diff --git a/backend/analytics_server/mhq/exapi/models/timeline.py b/backend/analytics_server/mhq/exapi/models/timeline.py new file mode 100644 index 00000000..5d6013bc --- /dev/null +++ b/backend/analytics_server/mhq/exapi/models/timeline.py @@ -0,0 +1,115 @@ +from typing import Optional, Dict, Literal, Union +from dataclasses import dataclass, field +from datetime import datetime + + +@dataclass +class GitHubUser: + """GitHub user information. + + Required fields: + login: GitHub username + id: Unique user identifier + node_id: GitHub node identifier + type: User type (e.g., "User", "Organization") + """ + + # Required core fields + login: str + id: int + node_id: str + type: str + + # Optional URL fields + avatar_url: Optional[str] = None + html_url: Optional[str] = None + url: Optional[str] = None + gravatar_id: Optional[str] = None + + # Optional relation URLs + followers_url: Optional[str] = None + following_url: Optional[str] = None + gists_url: Optional[str] = None + starred_url: Optional[str] = None + subscriptions_url: Optional[str] = None + organizations_url: Optional[str] = None + repos_url: Optional[str] = None + events_url: Optional[str] = None + received_events_url: Optional[str] = None + + +@dataclass +class GitHubReviewEvent: + """GitHub pull request review event. + + Required fields: + id: Unique event identifier + node_id: GitHub node identifier + user: User who performed the review + event: Event type (always "reviewed") + """ + + # Required core fields + id: int + node_id: str + user: GitHubUser + event: Literal["reviewed"] + + # Optional content fields + body: Optional[str] = None + state: Optional[str] = None + + # Optional reference fields + commit_id: Optional[str] = None + html_url: Optional[str] = None + pull_request_url: Optional[str] = None + author_association: Optional[str] = None + + # Optional metadata + submitted_at: Optional[Union[str, datetime]] = None + _links: Optional[Dict] = field(default_factory=dict) + + def __post_init__(self): + # Convert string timestamps to datetime objects + if isinstance(self.submitted_at, str) and self.submitted_at: + self.submitted_at = datetime.fromisoformat( + self.submitted_at.replace("Z", "+00:00") + ) + + +@dataclass +class GitHubReadyForReviewEvent: + """GitHub ready for review event for pull requests. + + Required fields: + id: Unique event identifier + node_id: GitHub node identifier + actor: User who marked PR as ready for review + event: Event type (always "ready_for_review") + """ + + # Required core fields + id: int + node_id: str + actor: GitHubUser + event: Literal["ready_for_review"] + + # Optional reference fields + url: Optional[str] = None + commit_id: Optional[str] = None + commit_url: Optional[str] = None + + # Optional metadata + created_at: Optional[Union[str, datetime]] = None + performed_via_github_app: Optional[str] = None + + def __post_init__(self): + # Convert string timestamps to datetime objects + if isinstance(self.created_at, str) and self.created_at: + self.created_at = datetime.fromisoformat( + self.created_at.replace("Z", "+00:00") + ) + + +# Type alias for timeline events +GitHubTimeline = Union[GitHubReviewEvent, GitHubReadyForReviewEvent] diff --git a/backend/analytics_server/mhq/exapi/types/__init__.py b/backend/analytics_server/mhq/exapi/types/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/analytics_server/mhq/exapi/types/timeline.py b/backend/analytics_server/mhq/exapi/types/timeline.py new file mode 100644 index 00000000..d071b6e6 --- /dev/null +++ b/backend/analytics_server/mhq/exapi/types/timeline.py @@ -0,0 +1,106 @@ +from typing import TypedDict, Dict, Union, Literal + + +class GitHubUserDict(TypedDict): + """GitHub user information as a dictionary.""" + + # Required fields + login: str + id: int + node_id: str + type: str + + +class GitHubUserOptionalDict(TypedDict, total=False): + """Optional fields for GitHub users.""" + + # URL fields + avatar_url: str + html_url: str + url: str + gravatar_id: str + + # Relation URLs + followers_url: str + following_url: str + gists_url: str + starred_url: str + subscriptions_url: str + organizations_url: str + repos_url: str + events_url: str + received_events_url: str + + +class GitHubUserFullDict(GitHubUserDict, GitHubUserOptionalDict): + """Complete GitHub user dictionary with both required and optional fields.""" + + +# Review event required fields +class GitHubReviewEventDict(TypedDict): + """GitHub review event information.""" + + # Required fields + id: int + node_id: str + user: GitHubUserFullDict + event: Literal["reviewed"] + submitted_at: str + state: str + html_url: str + pull_request_url: str + + +# Review event optional fields +class GitHubReviewEventOptionalDict(TypedDict, total=False): + """Optional fields for GitHub review events.""" + + body: str + commit_id: str + author_association: str + _links: Dict + + +# Combined GitHub review event with both required and optional fields +class GitHubReviewEventFullDict(GitHubReviewEventDict, GitHubReviewEventOptionalDict): + """Complete GitHub review event dictionary.""" + + +# Base event required fields +class GitHubEventBaseDict(TypedDict): + """Base GitHub event information.""" + + id: int + node_id: str + url: str + created_at: str + event: str + + +# Base event optional fields +class GitHubEventBaseOptionalDict(TypedDict, total=False): + """Optional fields for base GitHub events.""" + + actor: GitHubUserFullDict + commit_id: str + commit_url: str + performed_via_github_app: str + + +# Combined base event with both required and optional fields +class GitHubEventBaseFullDict(GitHubEventBaseDict, GitHubEventBaseOptionalDict): + """Complete base GitHub event dictionary.""" + + +# Ready for review event +class GitHubReadyForReviewEventDict(GitHubEventBaseFullDict): + """GitHub ready for review event information.""" + + event: Literal["ready_for_review"] + + +# Timeline item union type +GitHubTimelineItemDict = Union[ + GitHubReviewEventFullDict, + GitHubReadyForReviewEventDict, +] diff --git a/backend/analytics_server/mhq/service/code/sync/etl_code_analytics.py b/backend/analytics_server/mhq/service/code/sync/etl_code_analytics.py index fb85dd21..fd36c7aa 100644 --- a/backend/analytics_server/mhq/service/code/sync/etl_code_analytics.py +++ b/backend/analytics_server/mhq/service/code/sync/etl_code_analytics.py @@ -8,7 +8,9 @@ PullRequestCommit, PullRequestEventState, PullRequestState, + PullRequestEventType, ) +from mhq.service.code.sync.utils.timeline import TimelineEventUtils from mhq.utils.time import Interval @@ -53,18 +55,23 @@ def create_pr_metrics( @staticmethod def get_pr_performance(pr: PullRequest, pr_events: [PullRequestEvent]): - pr_events.sort(key=lambda x: x.created_at) - first_review = pr_events[0] if pr_events else None + review_events = TimelineEventUtils.get_sorted_events_by_type( + pr_events, PullRequestEventType.REVIEW.value + ) + ready_for_review_events = TimelineEventUtils.get_sorted_events_by_type( + pr_events, PullRequestEventType.READY_FOR_REVIEW.value + ) + first_review = review_events[0] if review_events else None approved_reviews = list( filter( lambda x: x.data["state"] == PullRequestEventState.APPROVED.value, - pr_events, + review_events, ) ) blocking_reviews = list( filter( lambda x: x.data["state"] != PullRequestEventState.APPROVED.value, - pr_events, + review_events, ) ) @@ -86,8 +93,12 @@ def get_pr_performance(pr: PullRequest, pr_events: [PullRequestEvent]): ).total_seconds() # Prevent garbage state when PR is approved post merging merge_time = -1 if merge_time < 0 else merge_time - - cycle_time = pr.state_changed_at - pr.created_at + pr_ready_time = ( + ready_for_review_events[0].created_at + if ready_for_review_events + else pr.created_at + ) + cycle_time = pr.state_changed_at - pr_ready_time if isinstance(cycle_time, timedelta): cycle_time = cycle_time.total_seconds() @@ -101,7 +112,7 @@ def get_pr_performance(pr: PullRequest, pr_events: [PullRequestEvent]): merge_time=merge_time, cycle_time=cycle_time if pr.state == PullRequestState.MERGED else -1, blocking_reviews=len(blocking_reviews), - approving_reviews=len(pr_events) - len(blocking_reviews), + approving_reviews=len(review_events) - len(blocking_reviews), requested_reviews=len(pr.requested_reviews), ) @@ -118,23 +129,25 @@ def get_rework_cycles( if not pr_commits: return 0 - pr_events.sort(key=lambda x: x.created_at) + review_events = TimelineEventUtils.get_sorted_events_by_type( + pr_events, PullRequestEventType.REVIEW.value + ) pr_commits.sort(key=lambda x: x.created_at) first_blocking_review = None last_relevant_approval_review = None pr_reviewers = dict.fromkeys(pr.reviewers, True) - for pr_event in pr_events: + for review_event in review_events: if ( - pr_event.state != PullRequestEventState.APPROVED.value - and pr_reviewers.get(pr_event.actor_username) + review_event.state != PullRequestEventState.APPROVED.value + and pr_reviewers.get(review_event.actor_username) and not first_blocking_review ): - first_blocking_review = pr_event + first_blocking_review = review_event - if pr_event.state == PullRequestEventState.APPROVED.value: - last_relevant_approval_review = pr_event + if review_event.state == PullRequestEventState.APPROVED.value: + last_relevant_approval_review = review_event break if not first_blocking_review: @@ -161,7 +174,7 @@ def get_rework_cycles( and x.actor_username != pr.author and pr_reviewers.get(x.actor_username) and x.created_at in interval, - pr_events, + review_events, ) ) all_events = sorted(pr_commits + blocking_reviews, key=lambda x: x.created_at) diff --git a/backend/analytics_server/mhq/service/code/sync/etl_github_handler.py b/backend/analytics_server/mhq/service/code/sync/etl_github_handler.py index 8f5c1acf..74206c87 100644 --- a/backend/analytics_server/mhq/service/code/sync/etl_github_handler.py +++ b/backend/analytics_server/mhq/service/code/sync/etl_github_handler.py @@ -1,11 +1,11 @@ +from dataclasses import asdict import uuid from datetime import datetime -from typing import List, Dict, Optional, Tuple, Set +from typing import Any, List, Dict, Optional, Tuple, Set import pytz from github.PaginatedList import PaginatedList as GithubPaginatedList from github.PullRequest import PullRequest as GithubPullRequest -from github.PullRequestReview import PullRequestReview as GithubPullRequestReview from github.Repository import Repository as GithubRepository from mhq.exapi.github import GithubApiService @@ -26,10 +26,16 @@ PullRequestRevertPRMapping, CodeProvider, ) +from mhq.exapi.models.timeline import ( + GitHubTimeline, + GitHubReviewEvent, + GitHubReadyForReviewEvent, +) from mhq.store.repos.code import CodeRepoService from mhq.store.repos.core import CoreRepoService from mhq.utils.log import LOG from mhq.utils.time import time_now, ISO_8601_DATE_FORMAT +from mhq.service.code.sync.utils.timeline import TimelineEventUtils PR_PROCESSING_CHUNK_SIZE = 100 @@ -169,11 +175,21 @@ def process_pr( ) pr_commits_model_list: List = [] - reviews: List[GithubPullRequestReview] = list(self._api.get_pr_reviews(pr)) + owner_login = pr.base.repo.owner.login + timeline: List[GitHubTimeline] = self._api.get_pr_timeline( + owner_login, pr.base.repo.name, pr.number + ) + reviews: List[GitHubReviewEvent] = TimelineEventUtils.get_review_events( + timeline + ) + ready_for_review: Optional[GitHubReadyForReviewEvent] = ( + TimelineEventUtils.get_earliest_ready_for_review_event(timeline) + ) pr_model: PullRequest = self._to_pr_model(pr, pr_model, repo_id, len(reviews)) pr_events_model_list: List[PullRequestEvent] = self._to_pr_events( - reviews, pr_model, pr_event_model_list + reviews, ready_for_review, pr_model, pr_event_model_list ) + if pr.merged_at: commits: List[Dict] = list( map( @@ -282,34 +298,82 @@ def _get_state(pr: GithubPullRequest) -> PullRequestState: @staticmethod def _to_pr_events( - reviews: [GithubPullRequestReview], + reviews: List[GitHubReviewEvent], + ready_for_review: Optional[GitHubReadyForReviewEvent], pr_model: PullRequest, - pr_events_model: [PullRequestEvent], + pr_events_model: List[PullRequestEvent], ) -> List[PullRequestEvent]: pr_events: List[PullRequestEvent] = [] pr_event_id_map = {event.idempotency_key: event.id for event in pr_events_model} + # Process reviews for review in reviews: if not review.submitted_at: - continue # Discard incomplete reviews - - actor = review.raw_data.get("user", {}) - username = actor.get("login", "") if actor else "" + continue # Skip incomplete reviews pr_events.append( - PullRequestEvent( - id=pr_event_id_map.get(str(review.id), uuid.uuid4()), - pull_request_id=str(pr_model.id), - type=PullRequestEventType.REVIEW.value, - data=review.raw_data, - created_at=review.submitted_at.replace(tzinfo=pytz.UTC), + GithubETLHandler._create_pr_event( + event_id=pr_event_id_map.get(str(review.id), uuid.uuid4()), + pr_id=str(pr_model.id), + event_type=PullRequestEventType.REVIEW.value, + event_data=asdict(review), + created_at=review.submitted_at, idempotency_key=str(review.id), - org_repo_id=pr_model.repo_id, - actor_username=username, + repo_id=pr_model.repo_id, + actor=review.user, + ) + ) + + # Process ready_for_review event + if ready_for_review: + pr_events.append( + GithubETLHandler._create_pr_event( + event_id=pr_event_id_map.get( + str(ready_for_review.id), uuid.uuid4() + ), + pr_id=str(pr_model.id), + event_type=PullRequestEventType.READY_FOR_REVIEW.value, + event_data=asdict(ready_for_review), + created_at=ready_for_review.created_at, + idempotency_key=str(ready_for_review.id), + repo_id=pr_model.repo_id, + actor=ready_for_review.actor, ) ) + return pr_events + @staticmethod + def _create_pr_event( + event_id: uuid.UUID, + pr_id: str, + event_type: str, + event_data: Dict[str, Any], + created_at: datetime, + idempotency_key: str, + repo_id: str, + actor: Any, + ) -> PullRequestEvent: + """Helper method to create a PullRequestEvent with consistent formatting""" + data_copy = event_data.copy() + if "submitted_at" in data_copy: + del data_copy["submitted_at"] + if "created_at" in data_copy: + del data_copy["created_at"] + + username = actor.login if actor else "" + + return PullRequestEvent( + id=event_id, + pull_request_id=pr_id, + type=event_type, + data=data_copy, + created_at=created_at, + idempotency_key=idempotency_key, + org_repo_id=repo_id, + actor_username=username, + ) + def _to_pr_commits( self, commits: List[Dict], diff --git a/backend/analytics_server/mhq/service/code/sync/utils/__init__.py b/backend/analytics_server/mhq/service/code/sync/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/analytics_server/mhq/service/code/sync/utils/timeline.py b/backend/analytics_server/mhq/service/code/sync/utils/timeline.py new file mode 100644 index 00000000..49f3d868 --- /dev/null +++ b/backend/analytics_server/mhq/service/code/sync/utils/timeline.py @@ -0,0 +1,94 @@ +from typing import List, TypeVar, Optional, Type + +from mhq.exapi.models.timeline import ( + GitHubTimeline, + GitHubReviewEvent, + GitHubReadyForReviewEvent, +) +from mhq.store.models.code import PullRequestEvent + +T = TypeVar("T") +E = TypeVar("E") + + +class TimelineEventUtils: + @staticmethod + def filter_by_type(events: List[T], event_type: str) -> List[T]: + """ + Filter events by their type attribute. + + Args: + events: List of events to filter + event_type: Type to filter by + + Returns: + List of events matching the specified type + """ + return [event for event in events if event.type == event_type] + + @staticmethod + def filter_by_instance_type(events: List[E], event_class: Type[T]) -> List[T]: + """ + Filter events by their class type. + + Args: + events: List of events to filter + event_class: Class type to filter by + + Returns: + List of events that are instances of the specified class + """ + return [event for event in events if isinstance(event, event_class)] + + @staticmethod + def get_review_events(timeline: List[GitHubTimeline]) -> List[GitHubReviewEvent]: + """ + Extract review events from the PR timeline. + + Args: + timeline: List of GitHub timeline events + + Returns: + List of GitHub review events + """ + return TimelineEventUtils.filter_by_instance_type(timeline, GitHubReviewEvent) + + @staticmethod + def get_earliest_ready_for_review_event( + timeline: List[GitHubTimeline], + ) -> Optional[GitHubReadyForReviewEvent]: + """ + Find the earliest ready for review event from the PR timeline. + + Args: + timeline: List of GitHub timeline events + + Returns: + The earliest ready for review event, or None if no such events exist + """ + ready_events = TimelineEventUtils.filter_by_instance_type( + timeline, GitHubReadyForReviewEvent + ) + return ( + min(ready_events, key=lambda event: event.created_at) + if ready_events + else None + ) + + @staticmethod + def get_sorted_events_by_type( + pr_events: List[PullRequestEvent], event_type: str + ) -> List[PullRequestEvent]: + """ + Get pull request events filtered by type and sorted by creation time. + + Args: + pr_events: List of pull request events + event_type: Type to filter by (can be enum or string value) + + Returns: + List of filtered events sorted by creation time + """ + + events = TimelineEventUtils.filter_by_type(pr_events, event_type) + return sorted(events, key=lambda x: x.created_at) diff --git a/backend/analytics_server/mhq/store/models/code/enums.py b/backend/analytics_server/mhq/store/models/code/enums.py index 5494e536..ef9e01ce 100644 --- a/backend/analytics_server/mhq/store/models/code/enums.py +++ b/backend/analytics_server/mhq/store/models/code/enums.py @@ -29,6 +29,7 @@ class PullRequestEventState(Enum): class PullRequestEventType(Enum): REVIEW = "REVIEW" + READY_FOR_REVIEW = "READY_FOR_REVIEW" class PullRequestRevertPRMappingActorType(Enum): diff --git a/backend/analytics_server/tests/factories/models/code.py b/backend/analytics_server/tests/factories/models/code.py index 793dc262..af110a24 100644 --- a/backend/analytics_server/tests/factories/models/code.py +++ b/backend/analytics_server/tests/factories/models/code.py @@ -86,6 +86,7 @@ def get_pull_request_event( idempotency_key=None, org_repo_id=None, data=None, + actor_username=None, ): return PullRequestEvent( id=id or uuid4(), @@ -103,7 +104,7 @@ def get_pull_request_event( created_at=created_at or time_now(), idempotency_key=idempotency_key or str(randint(10, 100)), org_repo_id=org_repo_id or uuid4(), - actor_username=reviewer or "randomuser", + actor_username=reviewer or actor_username or "randomuser", ) diff --git a/backend/analytics_server/tests/factories/models/exapi/github.py b/backend/analytics_server/tests/factories/models/exapi/github.py index 2f5615ac..70da23b6 100644 --- a/backend/analytics_server/tests/factories/models/exapi/github.py +++ b/backend/analytics_server/tests/factories/models/exapi/github.py @@ -29,11 +29,16 @@ def get_github_commit_dict( } +@dataclass +class GitHubUser: + login: str + + @dataclass class GithubPullRequestReview: id: str submitted_at: datetime - user_login: str + user: GitHubUser @property def raw_data(self): @@ -41,7 +46,7 @@ def raw_data(self): "id": self.id, "submitted_at": self.submitted_at, "user": { - "login": self.user_login, + "login": self.user.login, }, } @@ -51,8 +56,8 @@ def get_github_pull_request_review( submitted_at: datetime = time_now(), user_login: str = "abc", ) -> GithubPullRequestReview: - - return GithubPullRequestReview(review_id, submitted_at, user_login) + # Create with a proper GitHubUser instance + return GithubPullRequestReview(review_id, submitted_at, GitHubUser(user_login)) Branch = namedtuple("Branch", ["ref"]) diff --git a/backend/analytics_server/tests/service/code/sync/test_etl_code_analytics.py b/backend/analytics_server/tests/service/code/sync/test_etl_code_analytics.py index 77e7ca1f..de3caaaf 100644 --- a/backend/analytics_server/tests/service/code/sync/test_etl_code_analytics.py +++ b/backend/analytics_server/tests/service/code/sync/test_etl_code_analytics.py @@ -1,7 +1,11 @@ from datetime import timedelta from mhq.service.code.sync.etl_code_analytics import CodeETLAnalyticsService -from mhq.store.models.code import PullRequestState, PullRequestEventState +from mhq.store.models.code import ( + PullRequestState, + PullRequestEventState, + PullRequestEventType, +) from mhq.utils.time import time_now from tests.factories.models.code import ( get_pull_request, @@ -500,3 +504,113 @@ def test_rework_cycles_returs_1_for_multiple_approvals(): ) == 1 ) + + +def test_pr_performance_calculates_cycle_time_from_ready_for_review_event(): + pr_service = CodeETLAnalyticsService() + t1 = time_now() + t2 = t1 + timedelta(hours=2) + t3 = t1 + timedelta(hours=5) + + pr = get_pull_request( + state=PullRequestState.MERGED, state_changed_at=t3, created_at=t1, updated_at=t3 + ) + + ready_for_review_event = get_pull_request_event( + pull_request_id=pr.id, + type=PullRequestEventType.READY_FOR_REVIEW.value, + created_at=t2, + ) + + performance = pr_service.get_pr_performance(pr, [ready_for_review_event]) + + expected_cycle_time = (t3 - t2).total_seconds() + assert performance.cycle_time == expected_cycle_time + + +def test_pr_performance_uses_pr_creation_time_when_no_ready_for_review_event(): + pr_service = CodeETLAnalyticsService() + t1 = time_now() + t2 = t1 + timedelta(hours=3) + + pr = get_pull_request( + state=PullRequestState.MERGED, state_changed_at=t2, created_at=t1, updated_at=t2 + ) + + performance = pr_service.get_pr_performance(pr, []) + + expected_cycle_time = (t2 - t1).total_seconds() + assert performance.cycle_time == expected_cycle_time + + +def test_pr_performance_uses_earliest_ready_for_review_event(): + pr_service = CodeETLAnalyticsService() + t1 = time_now() + t2 = t1 + timedelta(hours=1) + t3 = t1 + timedelta(hours=2) + t4 = t1 + timedelta(hours=5) + + pr = get_pull_request( + state=PullRequestState.MERGED, state_changed_at=t4, created_at=t1, updated_at=t4 + ) + + first_ready_event = get_pull_request_event( + pull_request_id=pr.id, + type=PullRequestEventType.READY_FOR_REVIEW.value, + created_at=t2, + ) + + second_ready_event = get_pull_request_event( + pull_request_id=pr.id, + type=PullRequestEventType.READY_FOR_REVIEW.value, + created_at=t3, + ) + + performance = pr_service.get_pr_performance( + pr, [second_ready_event, first_ready_event] + ) + + expected_cycle_time = (t4 - t2).total_seconds() + assert performance.cycle_time == expected_cycle_time + + +def test_pr_performance_includes_review_and_ready_for_review_events(): + pr_service = CodeETLAnalyticsService() + t1 = time_now() + t2 = t1 + timedelta(hours=1) + t3 = t1 + timedelta(hours=2) + t4 = t1 + timedelta(hours=3) + t5 = t1 + timedelta(hours=4) + + pr = get_pull_request( + state=PullRequestState.MERGED, state_changed_at=t5, created_at=t1, updated_at=t5 + ) + + ready_event = get_pull_request_event( + pull_request_id=pr.id, + type=PullRequestEventType.READY_FOR_REVIEW.value, + created_at=t2, + ) + + review_event = get_pull_request_event( + pull_request_id=pr.id, + type=PullRequestEventType.REVIEW.value, + state=PullRequestEventState.CHANGES_REQUESTED.value, + created_at=t3, + ) + + approval_event = get_pull_request_event( + pull_request_id=pr.id, + type=PullRequestEventType.REVIEW.value, + state=PullRequestEventState.APPROVED.value, + created_at=t4, + ) + + performance = pr_service.get_pr_performance( + pr, [ready_event, review_event, approval_event] + ) + + assert performance.cycle_time == (t5 - t2).total_seconds() + assert performance.first_review_time == (t3 - t1).total_seconds() + assert performance.rework_time == (t4 - t3).total_seconds() + assert performance.merge_time == (t5 - t4).total_seconds() diff --git a/backend/analytics_server/tests/service/code/sync/test_etl_github_handler.py b/backend/analytics_server/tests/service/code/sync/test_etl_github_handler.py index a51b7a79..d398fdd2 100644 --- a/backend/analytics_server/tests/service/code/sync/test_etl_github_handler.py +++ b/backend/analytics_server/tests/service/code/sync/test_etl_github_handler.py @@ -1,3 +1,4 @@ +from dataclasses import asdict from datetime import datetime import pytz @@ -194,7 +195,7 @@ def test__to_pr_model_given_a_github_pr_and_db_pr_returns_updated_pr_model(): def test__to_pr_events_given_an_empty_list_of_events_returns_an_empty_list(): pr_model = get_pull_request() - assert GithubETLHandler._to_pr_events([], pr_model, []) == [] + assert GithubETLHandler._to_pr_events([], None, pr_model, []) == [] def test__to_pr_events_given_a_list_of_only_new_events_returns_a_list_of_pr_events(): @@ -203,26 +204,32 @@ def test__to_pr_events_given_a_list_of_only_new_events_returns_a_list_of_pr_even event2 = get_github_pull_request_review() events = [event1, event2] - pr_events = GithubETLHandler._to_pr_events(events, pr_model, []) + pr_events = GithubETLHandler._to_pr_events(events, None, pr_model, []) + + # Create expected event data without submitted_at field + event1_data = asdict(event1) + event1_data.pop("submitted_at", None) + event2_data = asdict(event2) + event2_data.pop("submitted_at", None) expected_pr_events = [ get_pull_request_event( pull_request_id=str(pr_model.id), org_repo_id=pr_model.repo_id, - data=event1.raw_data, + data=event1_data, created_at=event1.submitted_at, type="REVIEW", idempotency_key=event1.id, - reviewer=event1.user_login, + actor_username=event1.user.login, ), get_pull_request_event( pull_request_id=str(pr_model.id), org_repo_id=pr_model.repo_id, - data=event2.raw_data, + data=event2_data, created_at=event2.submitted_at, type="REVIEW", idempotency_key=event2.id, - reviewer=event2.user_login, + actor_username=event2.user.login, ), ] @@ -236,28 +243,34 @@ def test__to_pr_events_given_a_list_of_new_events_and_old_events_returns_a_list_ event2 = get_github_pull_request_review() events = [event1, event2] + # Create event data without submitted_at field for the old event + event1_data = asdict(event1) + event1_data.pop("submitted_at", None) + event2_data = asdict(event2) + event2_data.pop("submitted_at", None) + old_event = get_pull_request_event( pull_request_id=str(pr_model.id), org_repo_id=pr_model.repo_id, - data=event1.raw_data, + data=event1_data, created_at=event1.submitted_at, type="REVIEW", idempotency_key=event1.id, - reviewer=event1.user_login, + actor_username=event1.user.login, ) - pr_events = GithubETLHandler._to_pr_events(events, pr_model, [old_event]) + pr_events = GithubETLHandler._to_pr_events(events, None, pr_model, [old_event]) expected_pr_events = [ old_event, get_pull_request_event( pull_request_id=str(pr_model.id), org_repo_id=pr_model.repo_id, - data=event2.raw_data, + data=event2_data, created_at=event2.submitted_at, type="REVIEW", idempotency_key=event2.id, - reviewer=event2.user_login, + actor_username=event2.user.login, ), ]