-
Notifications
You must be signed in to change notification settings - Fork 1
/
github-data.sh
125 lines (85 loc) · 3.27 KB
/
github-data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/bin/sh
# Script to grab key data on GitHub organizations, their repos,
# and their contributors and export it to a JSON file.
# Add you GitHub 40-charcter authentication token:
TOKEN=put-your-github-token-here
# List all GitHub usernames you would like collect data on
# separate with spaces or line breaks, no commas
ORGS=(
gettypubs
thegetty
)
# Loop through list of users, make call to GitHub API
# construct new JSON object with the needed information,
# and save to tempoarary file
# NEED FIX: Currently only gets the first page of results
# with 100 repositories so the data on any org with more
# repositories than that is incomplete
for i in "${ORGS[@]}"
do
curl -H 'Authorization: token '$TOKEN'' "https://api.github.com/orgs/$i/repos?per_page=100" | jq '.[] | {
name_full: .full_name,
name: .name,
url: .html_url,
description: .description,
type: (if .fork == true then "Fork" else "Original" end),
date_created: (.created_at | fromdateiso8601 | strftime("%Y-%m-%d") ),
date_pushed: (.pushed_at | fromdateiso8601 | strftime("%Y-%m-%d") ),
size_kb: .size,
size_span: (
if (.size/1000) < 1 then "0-1 MB"
elif (.size/1000) < 10 then "1-10 MB"
elif (.size/1000) < 100 then "10-100 MB"
elif (.size/1000) < 1000 then "100-1000 MB"
else "1000+ MB" end),
stars: .stargazers_count,
language: .language,
forks: .forks_count,
license: (if .license.name then .license.name else .license end),
owner_name: .owner.login,
owner_url: .owner.html_url }' >> REPOS.txt
done
# The sections below follow the same pattern:
#
# 1. Create a temporary file with variable array
# of repository names generated in the step above
#
# 2. Loop through that array, make call to GitHub API
# for information, and create new array of objects
# Get contributor names and contribution counts
# for all museum repositories
echo "REPO_NAMES=(" >> REPO_NAMES.txt
cat REPOS.txt | jq --slurp ".[] | if .name_full == null then null else .name_full end" >> REPO_NAMES.txt
echo ")" >> REPO_NAMES.txt
source REPO_NAMES.txt
for i in "${REPO_NAMES[@]}"
do
curl -H 'Authorization: token '$TOKEN'' "https://api.github.com/repos/$i/contributors" | jq --arg repo "$i" '{
name_full: $repo,
contributor_count: (length),
contributor: [.[] | {name: .login, contributions: .contributions}]}' >> NAMES.txt
done
# Get the parent information for any museum
# repository identified as a Fork
echo "REPO_FORKS=(" >> REPO_FORKS.txt
cat REPOS.txt | jq --slurp ".[] | if .type == \"Fork\" then .name_full else empty end" >> REPO_FORKS.txt
echo ")" >> REPO_FORKS.txt
source REPO_FORKS.txt
for i in "${REPO_FORKS[@]}"
do
curl -H 'Authorization: token '$TOKEN'' "https://api.github.com/repos/$i" | jq --arg repo "$i" '{
name_full: $repo,
parent: .parent.full_name}' >> FORKS.txt
done
# Combine two resulting json texts from above (REPOS and PEOPLE),
# group into arrays by repository name, and output a JSON file
# with a timestamp.
NOW=$(date +%F-%s)
jq --slurp --sort-keys '[group_by(.name_full) | .[] | add ]' NAMES.txt REPOS.txt FORKS.txt >> GITHUB-DATA-$NOW.json
echo "Your JSON file is ready! --> GITHUB-DATA-$NOW.json"
# Remove all temporary files
rm -f REPOS.txt
rm -f REPO_NAMES.txt
rm -f NAMES.txt
rm -f REPO_FORKS.txt
rm -f FORKS.txt