You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
528 lines
16 KiB
528 lines
16 KiB
"""Diffs one repo source tree an upstream repo source tree.
|
|
|
|
Matches the projects from a Gerrit repo workspace to the projects
|
|
of an upstream workspace. After identifying exist both in the
|
|
downstream and the upstream workspace it then diffs the each project.
|
|
|
|
Finally, the results of the project matching and diffing are reported.
|
|
|
|
"""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
import argparse
|
|
import csv
|
|
import datetime
|
|
import multiprocessing
|
|
import multiprocessing.pool
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import xml.etree.ElementTree as et
|
|
import git_commits_not_upstreamed
|
|
|
|
|
|
def get_projects(source_tree):
|
|
"""Retrieve the dict of projects names and paths.
|
|
|
|
Args:
|
|
source_tree: A path to the source tree.
|
|
|
|
Returns:
|
|
A dict of project paths keyed by project names.
|
|
"""
|
|
|
|
projects = {}
|
|
|
|
manifest = source_tree + '/.repo/manifest.xml'
|
|
tree = et.parse(manifest)
|
|
root = tree.getroot()
|
|
|
|
for project in root.findall('project'):
|
|
# Ignore projects that are not synced by default
|
|
if 'notdefault' in project.get('groups', ''):
|
|
continue
|
|
path = project.get('path', project.get('name'))
|
|
path = os.path.abspath(os.path.join(source_tree, path))
|
|
name = project.get('name')
|
|
|
|
# check if project files actually exist
|
|
if not os.path.exists(path):
|
|
continue
|
|
|
|
projects[name] = path
|
|
|
|
return projects
|
|
|
|
|
|
def git(args):
|
|
"""Git command.
|
|
|
|
Args:
|
|
args: A list of arguments to be sent to the git command.
|
|
|
|
Returns:
|
|
The output of the git command.
|
|
"""
|
|
|
|
command = ['git']
|
|
command.extend(args)
|
|
with open(os.devnull, 'w') as devull:
|
|
return subprocess.check_output(command, stderr=devull)
|
|
|
|
|
|
def get_revision_diff_stats(directory, rev_a, rev_b):
|
|
"""Retrieves stats of diff between two git revisions.
|
|
|
|
Args:
|
|
directory: A path to the git directory to diff.
|
|
rev_a: A git revision to diff.
|
|
rev_b: A git revision to diff.
|
|
|
|
Returns:
|
|
A dict with the count of files modified, lines added
|
|
and lines removed.
|
|
"""
|
|
stats = {
|
|
'file': 0,
|
|
'insertion': 0,
|
|
'deletion': 0,
|
|
}
|
|
|
|
git_diffstat = git(
|
|
['-C', directory, 'diff', '--shortstat', rev_a, rev_b])
|
|
for element in git_diffstat.split(','):
|
|
for key in stats:
|
|
if key in element:
|
|
stats[key] = int(element.split()[0])
|
|
|
|
return stats
|
|
|
|
|
|
def get_project_stats(upstream_dir, downstream_dir):
|
|
"""Retrieves stats of diff between two git projects.
|
|
|
|
Diffs a downstream directory against an upstream directory.
|
|
Lines that exist only in the downstream directory are considered insertions.
|
|
Lines that exist only in the upstream directory are considered deletions.
|
|
|
|
Args:
|
|
upstream_dir: A path to the upstream directory to compare.
|
|
downstream_dir: A path to the downstream directory to compare.
|
|
|
|
Returns:
|
|
A dict with the count of files modified, lines added
|
|
and lines removed.
|
|
"""
|
|
stats = {
|
|
'file': 0,
|
|
'insertion': 0,
|
|
'deletion': 0,
|
|
}
|
|
|
|
if upstream_dir and downstream_dir:
|
|
print('Diffing %s vs %s' % (downstream_dir, upstream_dir))
|
|
git(['-C', downstream_dir, 'fetch', '--update-shallow', upstream_dir])
|
|
stats = get_revision_diff_stats(downstream_dir, 'FETCH_HEAD', 'HEAD')
|
|
|
|
return stats
|
|
|
|
|
|
def match_project_by_root_commits(
|
|
downstream_project_name, downstream_project_path, upstream_root_commits):
|
|
"""Match a downstream project to an upstream project using their root commits.
|
|
|
|
Find all root commits in a downstream project and find a matching
|
|
upstream project that have a root commit in common.
|
|
|
|
Args:
|
|
downstream_project_name: A string with the downstream project name.
|
|
downstream_project_path: A string with the downstream project path.
|
|
upstream_root_commits: A dict of root commits and their upstream project.
|
|
|
|
Returns:
|
|
A string with the matched upstream project name.
|
|
"""
|
|
upstream_match = None
|
|
downstream_root_commits = find_root_commits_in_path(downstream_project_path)
|
|
for root in downstream_root_commits:
|
|
if root in upstream_root_commits:
|
|
upstream_project_list = upstream_root_commits[root]
|
|
if len(upstream_project_list) > 1:
|
|
print('Warning: ' + downstream_project_name +
|
|
' matches multiple projects')
|
|
print(upstream_project_list)
|
|
else:
|
|
upstream_match = upstream_project_list[0]['name']
|
|
# Once there's a root commit match, stop looking for a project match
|
|
break
|
|
|
|
return upstream_match
|
|
|
|
|
|
def match_projects(upstream_projects, downstream_projects):
|
|
"""Match downstream projects to upstream projects.
|
|
|
|
Args:
|
|
upstream_projects: A dict of upstream projects.
|
|
downstream_projects: A dict of downstream projects.
|
|
|
|
Returns:
|
|
A list of upstream and downstream project pairs.
|
|
"""
|
|
|
|
project_matches = []
|
|
|
|
# keep a list of upstream projects that have not been matched
|
|
unmatched_upstream_projects = set(upstream_projects.keys())
|
|
|
|
upstream_root_commits = find_root_commits_in_projects(upstream_projects)
|
|
# Match all downstream projects to an upstream project
|
|
for downstream_name, downstream_path in downstream_projects.iteritems():
|
|
# First try to match projects by name
|
|
if downstream_name in upstream_projects:
|
|
upstream_match = downstream_name
|
|
# If there is no project name match then try matching by commit
|
|
else:
|
|
upstream_match = match_project_by_root_commits(
|
|
downstream_name, downstream_path, upstream_root_commits)
|
|
|
|
project_matches.append({
|
|
'upstream': upstream_match,
|
|
'downstream': downstream_name,
|
|
})
|
|
unmatched_upstream_projects.discard(upstream_match)
|
|
|
|
# Add all upstream projects that have not been matched
|
|
for project in unmatched_upstream_projects:
|
|
project_matches.append({
|
|
'upstream': project,
|
|
'downstream': None,
|
|
})
|
|
|
|
return project_matches
|
|
|
|
|
|
def filter_exclusion_list(projects, exclusion_file):
|
|
"""Removes all projects that match the exclusion patterns."""
|
|
|
|
filtered = {}
|
|
|
|
exclusion_list = []
|
|
if exclusion_file:
|
|
with open(exclusion_file) as f:
|
|
exclusion_list = f.readlines()
|
|
exclusion_list = [line.strip() for line in exclusion_list]
|
|
exclusion_pattern = '|'.join(exclusion_list)
|
|
|
|
if exclusion_pattern:
|
|
for name, path in projects.iteritems():
|
|
if re.match(exclusion_pattern, name):
|
|
print('Excluding ' + name)
|
|
else:
|
|
filtered[name] = path
|
|
else:
|
|
filtered = projects
|
|
|
|
return filtered
|
|
|
|
|
|
def get_all_projects_stats(upstream_source_tree,
|
|
downstream_source_tree,
|
|
exclusion_file):
|
|
"""Finds the stats of all project in a source tree.
|
|
|
|
Args:
|
|
upstream_source_tree: A string with the path to the upstream gerrit
|
|
source tree.
|
|
downstream_source_tree: A string with the path to the downstream gerrit
|
|
source tree.
|
|
exclusion_file: A string with the path to the exclusion file.
|
|
|
|
Returns:
|
|
A list of dicts of matching upstream and downstream projects
|
|
including stats for projects that matches.
|
|
"""
|
|
upstream_projects, downstream_projects = map(
|
|
lambda t: get_projects_with_filter(t, exclusion_file),
|
|
(upstream_source_tree, downstream_source_tree),
|
|
)
|
|
|
|
return multiprocessing.pool.ThreadPool(
|
|
processes=multiprocessing.cpu_count()
|
|
).map(
|
|
lambda match: stats_from_match(
|
|
upstream_projects,
|
|
downstream_projects,
|
|
match,
|
|
),
|
|
match_projects(upstream_projects, downstream_projects),
|
|
)
|
|
|
|
|
|
def stats_from_match(upstream_projects, downstream_projects, match):
|
|
"""Finds the stats of a single match of two projects.
|
|
|
|
Args:
|
|
upstream_projects: list of dicts obtained from get_project_stats
|
|
downstream_projects: list of dicts obtained from get_project_stats
|
|
match: a single match dict obtained from match_projects
|
|
|
|
Returns:
|
|
A dict of stats for this particular match
|
|
"""
|
|
|
|
def display_status(upstream_project_name,
|
|
downstream_project_name,
|
|
project_stats):
|
|
if not upstream_project_name:
|
|
return 'Downstream Only Projects'
|
|
elif not downstream_project_name:
|
|
return 'Upstream Only Projects'
|
|
elif project_stats['file'] == 0:
|
|
return 'Intact Projects'
|
|
elif upstream_project_name == downstream_project_name:
|
|
return 'Modified Projects'
|
|
return 'Forked Projects'
|
|
|
|
upstream_project_name = match['upstream']
|
|
downstream_project_name = match['downstream']
|
|
|
|
project_stats = get_project_stats(
|
|
upstream_projects.get(upstream_project_name),
|
|
downstream_projects.get(downstream_project_name),
|
|
)
|
|
project_stats.update({
|
|
'status': display_status(
|
|
upstream_project_name,
|
|
downstream_project_name,
|
|
project_stats
|
|
),
|
|
'downstream_path': downstream_projects.get(downstream_project_name)
|
|
})
|
|
project_stats.update(match)
|
|
return project_stats
|
|
|
|
|
|
def get_projects_with_filter(source_tree, exclusion_file):
|
|
""" Helper function to get projects with an exclusion file filter applied."""
|
|
return filter_exclusion_list(
|
|
get_projects(source_tree),
|
|
exclusion_file,
|
|
)
|
|
|
|
|
|
def find_root_commits_in_path(path):
|
|
"""Returns a list of root commits in a git project path."""
|
|
print('Analyzing history of ' + path)
|
|
rev_list = git(['-C', path, 'rev-list', '--max-parents=0', 'HEAD'])
|
|
return rev_list.splitlines()
|
|
|
|
|
|
def find_root_commits_in_projects(projects):
|
|
"""Returns a dict of root commits with all projects with that root commit."""
|
|
root_commits = {}
|
|
for name, path in projects.iteritems():
|
|
for root in find_root_commits_in_path(path):
|
|
root_list = root_commits.get(root, [])
|
|
root_list.append({
|
|
'name': name,
|
|
'path': path,
|
|
})
|
|
root_commits[root] = root_list
|
|
return root_commits
|
|
|
|
|
|
def get_commit_stats_in_project(project):
|
|
"""Extract commits that have not been upstreamed in a specific project.
|
|
|
|
Args:
|
|
project: A dict of a project name and path.
|
|
|
|
Returns:
|
|
A dict of commits not upstreamed.
|
|
"""
|
|
name = project['name']
|
|
path = project['downstream_path']
|
|
print('Finding commits not upstreamed in ' + name)
|
|
commits = git_commits_not_upstreamed.find('FETCH_HEAD', 'HEAD', path)
|
|
print('Found commits not upstreamed in ' + name)
|
|
stats = []
|
|
for commit in commits:
|
|
author = git(['-C', path, 'show', '--no-patch', '--format=%ae', commit])
|
|
author = author.strip()
|
|
subject = git(['-C', path, 'show', '--no-patch', '--format=%s', commit])
|
|
subject = subject.strip()
|
|
stats.append({
|
|
'commit': commit,
|
|
'author': author,
|
|
'subject': subject,
|
|
})
|
|
|
|
return {
|
|
'name': name,
|
|
'stats': stats,
|
|
}
|
|
|
|
|
|
def get_all_commits_stats(project_stats):
|
|
"""Extract commits that have not been upstreamed in all projects.
|
|
|
|
Args:
|
|
project_stats: A dict of matching upstream and downstream projects
|
|
including stats for projects that matches.
|
|
|
|
Returns:
|
|
A dict of commits not upstreamed.
|
|
"""
|
|
commit_stats = {}
|
|
downstream_stats = {match['downstream']: match for match in project_stats}
|
|
|
|
# Only analyze modified projects
|
|
modified_projects = []
|
|
for name, stats in downstream_stats.iteritems():
|
|
if stats['status'].startswith('Modified'):
|
|
stats['name'] = name
|
|
modified_projects.append(stats)
|
|
|
|
pool = multiprocessing.Pool()
|
|
|
|
commit_stats = pool.map(get_commit_stats_in_project, modified_projects)
|
|
|
|
commit_stats = {stats['name']: stats['stats'] for stats in commit_stats}
|
|
|
|
return commit_stats
|
|
|
|
|
|
def write_commit_csv(commit_stats, commit_output_file):
|
|
"""Write project comparison data to a CSV file.
|
|
|
|
Args:
|
|
commit_stats: The dict of the stats for all commits.
|
|
commit_output_file: Path to the output file.
|
|
"""
|
|
with open(commit_output_file, 'w') as f:
|
|
fieldnames = [
|
|
'Date',
|
|
'Commit',
|
|
'Downstream Project',
|
|
'Author',
|
|
'Subject',
|
|
]
|
|
today = datetime.datetime.today().strftime('%Y/%m/%d')
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
for project, stats in commit_stats.iteritems():
|
|
for stat in stats:
|
|
writer.writerow({
|
|
'Date': today,
|
|
'Commit': stat['commit'],
|
|
'Downstream Project': project,
|
|
'Author': stat['author'],
|
|
'Subject': stat['subject'],
|
|
})
|
|
print('Wrote commit stats to ' + commit_output_file)
|
|
|
|
|
|
def write_project_csv(project_stats, commit_stats, project_output_file):
|
|
"""Write project comparison data to a CSV file.
|
|
|
|
Args:
|
|
project_stats: The dict of the stats for all projects.
|
|
commit_stats: The dict of the stats for all commits.
|
|
project_output_file: Path to the output file.
|
|
"""
|
|
with open(project_output_file, 'w') as f:
|
|
fieldnames = [
|
|
'Date',
|
|
'Downstream Project',
|
|
'Upstream Project',
|
|
'Diff Status',
|
|
'Files Changed',
|
|
'Line Insertions',
|
|
'Line Deletions',
|
|
'Line Changes',
|
|
'Commits Not Upstreamed',
|
|
]
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
today = datetime.datetime.today().strftime('%Y/%m/%d')
|
|
for stat in project_stats:
|
|
commits_not_upstreamed = 0
|
|
downstream_project = stat['downstream']
|
|
if downstream_project in commit_stats:
|
|
commits_not_upstreamed = len(commit_stats[downstream_project])
|
|
writer.writerow({
|
|
'Date': today,
|
|
'Downstream Project': downstream_project,
|
|
'Upstream Project': stat['upstream'],
|
|
'Diff Status': stat['status'],
|
|
'Files Changed': stat['file'],
|
|
'Line Insertions': stat['insertion'],
|
|
'Line Deletions': stat['deletion'],
|
|
'Line Changes': stat['insertion'] + stat['deletion'],
|
|
'Commits Not Upstreamed': commits_not_upstreamed,
|
|
})
|
|
print('Wrote project stats to ' + project_output_file)
|
|
|
|
|
|
def diff(upstream_source_tree, downstream_source_tree, project_output_file,
|
|
commit_output_file, exclusions_file):
|
|
"""Diff one repo source tree against another.
|
|
|
|
Args:
|
|
upstream_source_tree: A string with the path to a gerrit source tree.
|
|
downstream_source_tree: A string with the path to a gerrit source tree.
|
|
project_output_file: Path to the project output file.
|
|
commit_output_file: Path to the commit output file.
|
|
exclusions_file: Path to exclusions file.
|
|
"""
|
|
project_stats = get_all_projects_stats(upstream_source_tree,
|
|
downstream_source_tree,
|
|
exclusions_file)
|
|
commit_stats = get_all_commits_stats(project_stats)
|
|
write_commit_csv(commit_stats, commit_output_file)
|
|
write_project_csv(project_stats, commit_stats, project_output_file)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Diff a repo source tree against an upstream source tree.')
|
|
parser.add_argument('upstream_path', help='Path to an upstream source tree.')
|
|
parser.add_argument(
|
|
'downstream_path', help='Path to a downstream source tree.')
|
|
parser.add_argument(
|
|
'-p',
|
|
'--project_output_file',
|
|
help='Path to write the project output file',
|
|
default='project.csv',)
|
|
parser.add_argument(
|
|
'-c',
|
|
'--commit_output_file',
|
|
help='Path to write the commit output file',
|
|
default='commit.csv',)
|
|
parser.add_argument(
|
|
'-e',
|
|
'--exclusions_file',
|
|
help='Path to file with a list of project names to be excluded from'
|
|
'the diff. You may use a regular expression to match project names as'
|
|
'described in https://docs.python.org/2/howto/regex.html',
|
|
default='',
|
|
)
|
|
args = parser.parse_args()
|
|
upstream_source_tree = os.path.abspath(args.upstream_path)
|
|
downstream_source_tree = os.path.abspath(args.downstream_path)
|
|
project_output_file = os.path.abspath(args.project_output_file)
|
|
commit_output_file = os.path.abspath(args.commit_output_file)
|
|
exclusions_file = ''
|
|
if args.exclusions_file:
|
|
exclusions_file = os.path.abspath(args.exclusions_file)
|
|
|
|
diff(upstream_source_tree, downstream_source_tree, project_output_file,
|
|
commit_output_file, exclusions_file)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|