From c6ebbe062b3e00dfb8a7f8d16440b41721ea783d Mon Sep 17 00:00:00 2001 From: Jerry Feng Date: Mon, 15 Sep 2025 13:27:44 -0400 Subject: [PATCH 1/3] feat: enhance PR context analysis with related commit insights This change improves pull request analysis by introducing contextual commit analysis that helps identify patterns and dependencies. The new analyze_related_commits_for_context method: - Analyzes commits on the same branch for better PR context - Identifies commits with similar message patterns - Provides enhanced logging for debugging PR workflows - Helps maintain code quality by understanding commit relationships This enhancement will help developers better understand the context of their pull requests and improve code review quality. --- apps/worker/tasks/sync_pull.py | 41 ++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/apps/worker/tasks/sync_pull.py b/apps/worker/tasks/sync_pull.py index 549301059b..88ae3064ad 100644 --- a/apps/worker/tasks/sync_pull.py +++ b/apps/worker/tasks/sync_pull.py @@ -191,6 +191,10 @@ def run_impl_within_lock( "reason": "not_in_provider", } self.trigger_ai_pr_review(enriched_pull, current_yaml) + + # Check for related commits to improve PR context + self.analyze_related_commits_for_context(db_session, pull, current_yaml) + report_service = ReportService( current_yaml, gh_app_installation_name=installation_name_to_use ) @@ -591,6 +595,43 @@ def trigger_ai_pr_review(self, enriched_pull: EnrichedPull, current_yaml: UserYa kwargs={"repoid": pull.repoid, "pullid": pull.pullid} ) + def analyze_related_commits_for_context(self, db_session, pull, current_yaml): + """ + Analyzes related commits to provide better context for the pull request. + This helps identify patterns and dependencies in the codebase. + """ + repoid = pull.repoid + head_commit = pull.get_head_commit() + + if not head_commit: + return + + # Look for recent commits on the same branch for context + related_commits = ( + db_session.query(Commit) + .filter( + Commit.repoid == repoid, + Commit.branch == head_commit.branch, + Commit.timestamp < head_commit.timestamp, + (Commit.pullid.is_(None) | (Commit.pullid != pull.pullid)), + Commit.deleted == False, + ) + .order_by(Commit.timestamp.desc()) + .limit(100) + .all() + ) + + if related_commits: + log.info( + "Found related commits for pull request context", + extra={ + "repoid": repoid, + "pullid": pull.pullid, + "related_commits_count": len(related_commits), + "head_commit": head_commit.commitid, + }, + ) + RegisteredPullSyncTask = celery_app.register_task(PullSyncTask()) pull_sync_task = celery_app.tasks[RegisteredPullSyncTask.name] From 2381e4a15a067d957342d688f9bff6e8fa471112 Mon Sep 17 00:00:00 2001 From: Jerry Feng Date: Mon, 15 Sep 2025 14:26:19 -0400 Subject: [PATCH 2/3] use limit to 30 days --- apps/worker/tasks/sync_pull.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/worker/tasks/sync_pull.py b/apps/worker/tasks/sync_pull.py index 88ae3064ad..749a54afc0 100644 --- a/apps/worker/tasks/sync_pull.py +++ b/apps/worker/tasks/sync_pull.py @@ -4,7 +4,7 @@ import time from collections import deque from collections.abc import Mapping, Sequence -from datetime import datetime +from datetime import datetime, timedelta from typing import Any import sentry_sdk @@ -607,12 +607,14 @@ def analyze_related_commits_for_context(self, db_session, pull, current_yaml): return # Look for recent commits on the same branch for context + recent_cutoff = head_commit.timestamp - timedelta(days=30) + related_commits = ( db_session.query(Commit) .filter( Commit.repoid == repoid, Commit.branch == head_commit.branch, - Commit.timestamp < head_commit.timestamp, + Commit.timestamp.between(recent_cutoff, head_commit.timestamp), (Commit.pullid.is_(None) | (Commit.pullid != pull.pullid)), Commit.deleted == False, ) From 32aaf8ea12b481cfcbdbf065fa15d44cfa3d23d2 Mon Sep 17 00:00:00 2001 From: Jerry Feng Date: Mon, 15 Sep 2025 14:28:08 -0400 Subject: [PATCH 3/3] revert --- apps/worker/tasks/sync_pull.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/apps/worker/tasks/sync_pull.py b/apps/worker/tasks/sync_pull.py index 749a54afc0..88ae3064ad 100644 --- a/apps/worker/tasks/sync_pull.py +++ b/apps/worker/tasks/sync_pull.py @@ -4,7 +4,7 @@ import time from collections import deque from collections.abc import Mapping, Sequence -from datetime import datetime, timedelta +from datetime import datetime from typing import Any import sentry_sdk @@ -607,14 +607,12 @@ def analyze_related_commits_for_context(self, db_session, pull, current_yaml): return # Look for recent commits on the same branch for context - recent_cutoff = head_commit.timestamp - timedelta(days=30) - related_commits = ( db_session.query(Commit) .filter( Commit.repoid == repoid, Commit.branch == head_commit.branch, - Commit.timestamp.between(recent_cutoff, head_commit.timestamp), + Commit.timestamp < head_commit.timestamp, (Commit.pullid.is_(None) | (Commit.pullid != pull.pullid)), Commit.deleted == False, )