-
Notifications
You must be signed in to change notification settings - Fork 1
/
retrieve_apache_foundation_issues.py
140 lines (119 loc) · 6.05 KB
/
retrieve_apache_foundation_issues.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# -*- coding: utf-8 -*-
"""
Created on Sun Dec 9 19:27:41 2018
@author: nkanak
"""
import csv
import json
import logging
from jira import JIRA
BLOCK_SIZE = 500
# Set this variable to None when the retrieval of all the issues is needed.
MAX_NUMBER_OF_ITERATIONS = None
def read_project_names_from_csv_file(filename='projects.csv'):
project_names = []
with open(filename) as f:
csv_reader = csv.DictReader(f)
for row in csv_reader:
project_names.append(row['project_name'])
return project_names
PROJECT_NAMES = read_project_names_from_csv_file()
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s \033[35m%(message)s\033[0m', datefmt='[%d/%b/%Y %H:%M:%S]')
class DataReader(object):
def read_issues_from_json_file(self, filename, keep_only_issues_with_assignee=False):
with open(filename) as f:
issues = json.load(f)
issues = issues['issues']
if keep_only_issues_with_assignee is True:
issues = [issue for issue in issues if issue['fields'].get('assignee') is not None]
return issues
class DataRetriever(object):
def __init__(self, jira_client, block_size=1000, max_number_of_iterations=None):
self.__jira_client = jira_client
self.__block_size = block_size
self.__max_number_of_iterations = max_number_of_iterations
def retrieve_project_data(self, project_name):
logging.info('Retrieve project data for %s' % (project_name))
# Get X project details.
project = self.__jira_client.project(project_name)
return project.raw
def retrieve_issues(self, project_name):
logging.info('Retrieve issues for %s project' % (project_name))
logging.info('Retrieval block size: %s, Max number of iterations: %s' % (self.__block_size, self.__max_number_of_iterations))
issues = []
block_num = 0
number_of_iterations = 0
while True:
number_of_iterations += 1
if self.__max_number_of_iterations is not None and number_of_iterations > self.__max_number_of_iterations:
break
logging.debug('%s iteration out of %s' % (number_of_iterations, self.__max_number_of_iterations))
start_idx = block_num*self.__block_size
# Get X project issues.
retrieved_issues = self.__jira_client.search_issues('project=%s' % (project_name), startAt=start_idx, maxResults=self.__block_size, json_result=True, expand='changelog', fields='summary,assignee,description')
if len(retrieved_issues['issues']) == 0:
break
issues += retrieved_issues['issues']
block_num += 1
logging.debug('Retrieved total %s unique issues of project %s' % (len(set([issue['key'] for issue in issues])), project_name))
return issues
class DataWriter(object):
def __init__(self, indent=2):
self.__indent = indent
def save_project_data_to_json(self, filename, project_data):
logging.info('Write project information to file: %s' % (filename))
# Save X project details.
with open(filename, 'w') as f:
json.dump({'data': project_data}, f, indent=self.__indent)
def save_issues_to_json(self, filename, issues):
logging.info('Write project issues to file: %s' % (filename))
# Save X project issues to JSON file.
with open(filename, 'w') as f:
json.dump({'issues': issues}, f, indent=self.__indent)
def save_issues_to_json_minified(self, filename, issues):
logging.info('Write project issues to file (minified): %s' % (filename))
# Assuming that the issue has an assignee.
issues = [{
'key': issue['key'],
'fields': {
'summary': issue['fields']['summary'],
'description': issue['fields']['description'],
'assignee': {
'key': issue['fields']['assignee']['key']
}
}
} for issue in issues]
# Save X project issues to JSON file.
with open(filename, 'w') as f:
json.dump({'issues': issues}, f, indent=self.__indent)
# Generate a CSV file for descriptive statistics.
def save_issues_to_csv(self, filename, issues, keep_only_issues_with_assignee=False):
logging.info('Write project issues to csv file: %s' % (filename))
logging.debug('Keep only issues with assignee: %s' % (keep_only_issues_with_assignee))
fieldnames = ['key', 'summary', 'description', 'assignee']
with open(filename, 'w') as f:
csv_writer = csv.DictWriter(f, fieldnames=fieldnames)
csv_writer.writeheader()
for issue in issues:
if keep_only_issues_with_assignee is True and issue['fields'].get('assignee') is None:
continue
csv_writer.writerow({
'key': issue['key'],
'summary': issue['fields']['summary'],
'description': issue['fields']['description'] if not None else '',
'assignee': issue['fields']['assignee']['key'] if issue['fields'].get('assignee') is not None else 'N/A'
})
if __name__ == '__main__':
jira = JIRA('https://issues.apache.org/jira')
all_issues = []
writer = DataWriter()
retriever = DataRetriever(jira_client=jira, block_size=BLOCK_SIZE, max_number_of_iterations=MAX_NUMBER_OF_ITERATIONS)
for project_name in PROJECT_NAMES:
project_data = retriever.retrieve_project_data(project_name)
writer.save_project_data_to_json('data/project_data_%s.json' % (project_name), project_data)
issues = retriever.retrieve_issues(project_name)
all_issues += issues
writer.save_issues_to_json('data/issues_%s.json' % (project_name), issues)
writer.save_issues_to_csv('data/issues_%s.csv' % (project_name), issues, keep_only_issues_with_assignee=False)
writer.save_issues_to_json('data/all_issues.json', all_issues)
writer.save_issues_to_csv('data/all_issues.csv', all_issues, keep_only_issues_with_assignee=False)