-
Notifications
You must be signed in to change notification settings - Fork 20
/
minset.py
146 lines (112 loc) · 4.2 KB
/
minset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""
Tool to calculate the minimum set of files that have
approximatelly the best coverage.
The tool needs an input directory that contains the input
files which were used along with the trace files.
The name of the trace file must be <input_file>.log. This
way when we are processing the trace, we can refer to the
input file.
Example:
$ ls /tmp/InputFiles
-rw------- 1 anon staff 47K Jan 4 01:16 ffeb4d186925e9538707719e8374d217e1030943.log
-rw------- 1 anon staff 8.8K Jan 3 14:05 ffeb4d186925e9538707719e8374d217e1030943
$ python minset.py -i /tmp/InputFiles -o /tmp/OutputFiles
Size of the universe: 6257
Current covered=0 new=4053
Current covered=4053 new=581
...
Original trace count: 1484
Min set trace count : 48
This will copy the input files that cover the most blocks
from the input directory into the output directory.
"""
import os
import re
import sys
import struct
import shutil
import argparse
from collections import namedtuple
TraceFile = namedtuple("TraceFile", [
"path",
"entries"
])
BasicBlock = namedtuple("BasicBlock", [
"start",
"size",
"id"
])
def load_drcov_trace(filename):
"""
A drcov trace file consists of a preamble with ascii data up to
the start of the basic block table. The basic block table can be
located by searching for the ascii header:
'BB Table: <number> bbs'
Following the header, the table consists of N structures:
struct __attribute__((packed)) drcov_bb {
uint32_t start;
uint16_t size;
uint16_t id;
};
"""
trace = open(filename, "rb").read()
match = re.search(r"BB Table:\s+(\d+)\s+bbs\n", trace)
if not match:
return TraceFile(filename, set())
block_count = int(match.group(1))
# An empty trace is possible so bail before tryint to process.
if not block_count:
return TraceFile(filename, set())
# Size of a single basic block entry.
entry_size = 8
# Extract the basic block table.
table = trace[match.end():]
if len(table) != entry_size * block_count:
print "The size of the table does not match the count of basic blocks."
# Unpack the entries into a set to avoid repeats.
entries = set()
for i in range(0, block_count):
block_offset = i * entry_size
block_data = table[block_offset:block_offset+entry_size]
start, size, id_ = struct.unpack("<LHH", block_data)
entries.add(BasicBlock(start, size, id_))
return TraceFile(filename, entries)
parser = argparse.ArgumentParser(description="Coverage Minimum Set")
parser.add_argument("-i", dest="input_dir")
parser.add_argument("-o", dest="output_dir")
args = parser.parse_args()
if not args.input_dir:
print "No input directory supplied, use option -i to supply one."
sys.exit(-1)
if not args.output_dir:
print "No output directory supplied, use option -o to supply one."
sys.exit(-1)
output_dir = os.path.abspath(args.output_dir)
# Get all trace files in the traces directory.
input_dir = os.path.abspath(args.input_dir)
traces_paths = filter(lambda fn: fn.endswith(".log"), os.listdir(input_dir))
traces_paths = map(lambda fn: os.path.join(input_dir, fn), traces_paths)
# Load the actual traces.
traces = map(load_drcov_trace, traces_paths)
# Implementation of https://en.wikipedia.org/wiki/Set_cover_problem#Greedy_algorithm.
universe = map(lambda x: x.entries, traces)
universe = set.union(*universe)
# Set of covered blocks.
covered = set()
# List with the actual files that create the min set.
min_set = []
print "Size of the universe: %u" % len(universe)
# Iterate until the 'covered' set equals the 'universe'.
while covered != universe:
# Get the set that adds the most hits to the 'covered' set.
subset = max(traces, key=lambda x: len(x.entries - covered))
print "Current covered=%u new=%u" % (
len(covered), len(subset.entries - covered))
min_set.append(subset)
covered |= subset.entries
print "Original trace count: %u" % len(traces)
print "Min set trace count : %u" % len(min_set)
# Get the input file name from the trace file and copy it to the output directory.
for trace in min_set:
path = trace.path.replace(".log", "").replace("drcov.", "")
shutil.copy(path, output_dir)