From e6e637ab63bfde270e371ea5adcc1cf9e4c9934f Mon Sep 17 00:00:00 2001 From: "seer-by-sentry[bot]" <157164994+seer-by-sentry[bot]@users.noreply.github.com> Date: Tue, 9 Dec 2025 18:13:35 +0000 Subject: [PATCH] fix: Enhance data integrity checks for GitLab groups and Owner usernames --- .../webhook_handlers/views/gitlab.py | 29 +++++++++++++++++-- apps/worker/tasks/sync_teams.py | 12 ++++++++ libs/shared/shared/torngit/gitlab.py | 1 + 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/apps/codecov-api/webhook_handlers/views/gitlab.py b/apps/codecov-api/webhook_handlers/views/gitlab.py index 9e44cbc112..954a751620 100644 --- a/apps/codecov-api/webhook_handlers/views/gitlab.py +++ b/apps/codecov-api/webhook_handlers/views/gitlab.py @@ -83,9 +83,32 @@ def post(self, request, *args, **kwargs): try: # all other events should correspond to a repo in the db - repo = get_object_or_404( - Repository, author__service=self.service_name, service_id=project_id - ) + # Filter out repositories owned by accounts with None usernames + # to avoid MultipleObjectsReturned errors + repos = Repository.objects.filter( + author__service=self.service_name, service_id=project_id + ).exclude(author__username=None) + + if repos.count() == 0: + # If no valid repos found, try without the exclusion + # This handles the case where the only repo has a None username owner + repo = get_object_or_404( + Repository, author__service=self.service_name, service_id=project_id + ) + elif repos.count() == 1: + repo = repos.first() + else: + # Multiple valid repositories found - log and return the first one + log.warning( + "Multiple repositories found for service_id", + extra={ + "service": self.service_name, + "service_id": project_id, + "repo_count": repos.count(), + "repo_ids": list(repos.values_list("repoid", flat=True)), + }, + ) + repo = repos.first() except Exception as e: self._inc_err("repo_not_found") raise e diff --git a/apps/worker/tasks/sync_teams.py b/apps/worker/tasks/sync_teams.py index cdff6738cd..dfac8ee7fe 100644 --- a/apps/worker/tasks/sync_teams.py +++ b/apps/worker/tasks/sync_teams.py @@ -33,6 +33,18 @@ def run_impl(self, db_session, ownerid, *, username=None, **kwargs): updated_teams = [] for team in teams: + # Skip teams with None username to prevent creating invalid Owner records + if team.get("username") is None: + log.warning( + "Skipping team with None username", + extra={ + "service": service, + "team_id": team.get("id"), + "team_name": team.get("name"), + }, + ) + continue + team_data = { "username": team["username"], "name": team["name"], diff --git a/libs/shared/shared/torngit/gitlab.py b/libs/shared/shared/torngit/gitlab.py index 763bc5da0e..74eafa00db 100644 --- a/libs/shared/shared/torngit/gitlab.py +++ b/libs/shared/shared/torngit/gitlab.py @@ -849,6 +849,7 @@ async def list_teams(self, token=None): "parent_id": g["parent_id"], } for g in groups + if g.get("full_path") # Filter out groups with null/missing full_path ] ) return all_groups