-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcode_churn.py
138 lines (116 loc) · 4.7 KB
/
code_churn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""
Calculates and analyzes code churn metrics for repository files.
Code churn represents the magnitude of changes in terms of line modifications.
"""
from statistics import mean
from typing import Dict, Optional, Tuple
from gitanalyzer import ModificationType
from gitanalyzer.metrics.process.base_metric import BaseProcessMetric
class ChangeVolume(BaseProcessMetric):
"""
Analyzes the volume of changes (code churn) in repository files over time.
Measures file modifications using two possible approaches:
1. Net change volume: (additions - deletions)
2. Total change volume: (additions + deletions)
Provides metrics for:
- Total change volume over time
- Largest single-commit change
- Average change volume per commit
"""
def __init__(self, repository_path: str,
start_date=None,
end_date=None,
start_commit: Optional[str] = None,
end_commit: Optional[str] = None,
skip_new_files=False,
use_total_changes=False):
"""
Initialize the change volume analyzer.
Args:
repository_path: Path to git repository
start_date: Starting date for analysis
end_date: Ending date for analysis
start_commit: Starting commit hash
end_commit: Ending commit hash
skip_new_files: Exclude newly added files from analysis
use_total_changes: Use sum instead of difference of changes
"""
super().__init__(
repository_path,
start_date=start_date,
end_date=end_date,
start_commit=start_commit,
end_commit=end_commit
)
self.skip_new_files = skip_new_files
self.use_total_changes = use_total_changes
self.line_changes: Dict[str, Tuple[int, int]] = {}
self.file_changes: Dict[str, list] = {}
self._collect_changes()
def _collect_changes(self) -> None:
"""
Analyze repository history and collect change statistics for each file.
"""
file_renames = {}
for commit in self.repository.traverse_commits():
for changed_file in commit.modified_files:
current_path = file_renames.get(
changed_file.new_path,
changed_file.new_path
)
# Track file renames
if changed_file.change_type == ModificationType.RENAME:
file_renames[changed_file.old_path] = current_path
# Skip new files if configured
if self.skip_new_files and changed_file.change_type == ModificationType.ADD:
continue
# Store raw line changes
additions = changed_file.added_lines
deletions = changed_file.deleted_lines
self.line_changes[current_path] = (additions, deletions)
# Calculate change volume based on configuration
if self.use_total_changes:
change_volume = additions + deletions
else:
change_volume = additions - deletions
# Store change history
if current_path not in self.file_changes:
self.file_changes[current_path] = []
self.file_changes[current_path].append(change_volume)
def get_line_modifications(self) -> Dict[str, Tuple[int, int]]:
"""
Get the raw line modification counts for each file.
Returns:
Dictionary mapping file paths to (additions, deletions) tuples
"""
return self.line_changes
def total_changes(self) -> Dict[str, int]:
"""
Calculate total change volume for each file across all commits.
Returns:
Dictionary mapping file paths to total change volumes
"""
return {
path: sum(changes)
for path, changes in self.file_changes.items()
}
def peak_change(self) -> Dict[str, int]:
"""
Find the maximum change volume for each file in any single commit.
Returns:
Dictionary mapping file paths to maximum change volumes
"""
return {
path: max(changes)
for path, changes in self.file_changes.items()
}
def average_change(self) -> Dict[str, int]:
"""
Calculate the average change volume per commit for each file.
Returns:
Dictionary mapping file paths to rounded average change volumes
"""
return {
path: round(mean(changes))
for path, changes in self.file_changes.items()
}