-
Notifications
You must be signed in to change notification settings - Fork 0
/
seq_tagging_baseline.py
147 lines (132 loc) · 5.54 KB
/
seq_tagging_baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import localized_narratives
import os
import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import pickle as pkl
from scipy.spatial import ConvexHull
from matplotlib.path import Path
from PIL import Image
import itertools
import string
# https://github.com/google/localized-narratives/blob/master/demo.py
local_dir = os.getcwd()
data_loader = localized_narratives.DataLoader(local_dir)
data_loader.download_annotations('ade20k_val')
# Get object name lookup table
with open('/Volumes/gordonssd/ADE20K_2021_17_01/index_ade20k.pkl', 'rb') as f:
index_ade20k = pkl.load(f)
# Replace "-" with "other"
index_ade20k['objectnames'][0] = "other"
def get_documents(num_narratives):
loc_narrs = data_loader.load_annotations(
'ade20k_val', num_narratives) # Change number of narratives used
raw_word_documents = []
raw_labels_documents = []
for loc_narr in loc_narrs:
raw_labels_doc = ''
raw_word_documents.append(clean_text(loc_narr.caption))
image_id = loc_narr.image_id
path_to_image = f'/Volumes/gordonssd/ADE20K_2021_17_01/images/ADE/validation/*/*/{image_id}.jpg'
path_to_image_seg = f'/Volumes/gordonssd/ADE20K_2021_17_01/images/ADE/validation/*/*/{image_id}_seg.png'
path_to_image_json = f'/Volumes/gordonssd/ADE20K_2021_17_01/images/ADE/validation/*/*/{image_id}.json'
# Get image
for filename in glob.glob(path_to_image):
path_to_image = filename
image = mpimg.imread(path_to_image)
# Get image segmentation
for filename in glob.glob(path_to_image_seg):
path_to_image_seg = filename
image_seg = mpimg.imread(path_to_image_seg)
# Get image height, width
image_height = image.shape[0]
image_width = image.shape[1]
# Get Object Labels
with Image.open(path_to_image_seg) as io:
seg = np.array(io)
R = seg[:, :, 0]
G = seg[:, :, 1]
B = seg[:, :, 2]
ObjectClassMasks = (R/10).astype(np.int32)*256+(G.astype(np.int32))
# Concatenate all trace segments
traces = list(itertools.chain.from_iterable(loc_narr.traces))
# Align traces to word #
# Get list of word_trace_align objects
save_word = ''
for word in loc_narr.timed_caption:
w = save_word + word['utterance']
w = clean_text(w)
save_word = ''
# Check if start_time match end_time
if (word['start_time'] == word['end_time']):
# Make phrase with next word
save_word = word['utterance'] + ' '
continue
# Convert (start_time, end_time) to trace coordinates that fall within the time window
start_time = word['start_time']
end_time = word['end_time']
# Filter trace_seg for items with t value within start_time:end_time
points = list(filter(lambda coord: start_time <=
coord['t'] <= end_time, traces))
points = np.array([[point['x'], point['y']] for point in points])
try:
hull = ConvexHull(points)
except:
pass
# hull_to_name
# make a polygon from the hull verticies
points = hull.points
points = np.array([[point[0]*image_width, point[1]*image_height]
for point in points])
tupVerts = [(points[vtx, 0], points[vtx, 1])
for vtx in hull.vertices]
p = Path(tupVerts)
# make a canvas of coordinates corresponding to the image
x, y = np.meshgrid(np.arange(image_width), np.arange(image_height))
x, y = x.flatten(), y.flatten()
points = np.vstack((x, y)).T
# get points in image within hull
grid = p.contains_points(points)
# now you have a mask with points inside a polygon
mask = grid.reshape(image_height, image_width)
# get labels at those points
labels_within_hull = mask*ObjectClassMasks
labels = labels_within_hull[np.nonzero(labels_within_hull)]
# get count for unique labels
values, counts = np.unique(labels, return_counts=True)
# if empty set label to 0
# Edge case: some utterances the traces entirely out of the image
if len(counts) == 0:
tag = index_ade20k['objectnames'][0]
else:
# get most frequent label within hull
ind = np.argmax(counts)
label = values[ind]
# object name lookup with label
tag = index_ade20k['objectnames'][label - 1]
# one tag may have multiple words eg. person;individual;someone;somebody;mortal;soul
# taking only first
tag = tag.split()[0]
tag = clean_text(tag)
# Add labels to labels document
for i in range(len(w.split())):
raw_labels_doc += tag + ' '
raw_labels_documents.append(raw_labels_doc.strip())
return raw_word_documents, raw_labels_documents
##################
####HELPERS#######
##################
def clean_text(text):
return text.translate(str.maketrans('', '', string.punctuation)).lower()
# for printing with colors
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'