-
Notifications
You must be signed in to change notification settings - Fork 42
/
DataLoader.py
131 lines (110 loc) · 4.99 KB
/
DataLoader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
''' Imports '''
import numpy as np
import tensorflow as tf
import os
import argparse
from Var import Var
class DataLoader:
def __init__(self, num_frames, use_arm, m_score):
self.num_frames = num_frames
self.use_arm = use_arm
self.m_score = m_score
self.debug = False
self.working_dir = os.getcwd() + "/"
self.v = Var(use_arm)
self.classes = self.v.get_classes()
self.num_classes = self.v.get_num_classes()
self.num_features = self.v.get_num_features()
self.num_joints = self.v.get_size()
def setDebug(self):
self.debug = True
def npz_to_npy(self, fn, label_fn):
'''Change input npz file to numpy arrays'''
data = fn['data'].item()
new_data = {}
score_idx = self.num_features - 1
score = data[score_idx]
det = fn['isBadData']
new_labels = np.zeros((0, self.num_classes))
num_data = data[0].shape[0]
data_size = data[0].shape[1]
min_data = num_data * 100 #really big number
for feature_num in range(self.num_features):
min_data = data[feature_num].shape[0] if data[feature_num].shape[0] < min_data else min_data
new_data[feature_num] = np.zeros((0, data_size))
num_data = min_data
''' Read label '''
f = open(label_fn, 'r')
s = f.read()
''' Check and set one hot encoded value '''
label = np.zeros(self.num_classes)
for key, val in self.classes.items():
if s == val:
label[key] = 1
''' Stack as many inputs as needed in data '''
labels = np.stack(label for i in range(num_data))
for idx in range(num_data):
isBad = det[idx]
if not isBad:
for feature_num in range(self.num_features):
if self.m_score:
if feature_num != score_idx:
multiplied = data[feature_num][idx].reshape(1, self.num_joints) * score[idx].reshape(1, self.num_joints)
new_data[feature_num] = np.concatenate((new_data[feature_num], multiplied))
else:
new_data[feature_num] = np.concatenate(
(new_data[feature_num], data[feature_num][idx].reshape(1, self.num_joints)))
new_labels = np.concatenate(
(new_labels, labels[idx].reshape(1, self.num_classes)))
if (self.debug):
print("SHAPE OF INPUTS: ", new_data[0].shape)
''' Returns array with inputs and data quality of inputs '''
return new_data, new_labels
def load_npz_data(self):
''' Uses npzToNpy to take all the npz files in a data folder and generate numpy arrays of inputs and good/bad data samples '''
score_idx = self.num_features - 1
''' Set filepaths for data/label folders '''
data_path = self.working_dir + "data/GestureData/%d/gestureData" % self.num_frames
label_path = self.working_dir + 'data/Labels/%d/label' % self.num_frames
print list(os.walk(self.working_dir+'data/Labels/%d' % self.num_frames))
try:
data_amount = len(
next(os.walk(self.working_dir+'data/Labels/%d' % self.num_frames))[2])
except:
raise Exception("your data cannot be found in %s. If you have data in this folder, the next function (for iterators) is not working properly." % data_path)
info = {}
labels = np.zeros((0, self.num_classes))
for feature_num in range(self.num_features):
if self.m_score and feature_num == score_idx:
break
else:
info[feature_num] = np.zeros((0, self.num_joints))
for i in range(data_amount):
datum, label = self.npz_to_npy(np.load(data_path+str(i+1)+'.npz'), label_path+str(i+1)+'.txt')
for feature_num in range(self.num_features):
if self.m_score and feature_num == score_idx:
break
else:
info[feature_num] = np.vstack((info[feature_num], datum[feature_num]))
labels = np.vstack((labels, label))
if (self.debug):
print("FULL INPUT SHAPE: ", labels.shape)
return info, labels
def load_all(self):
''' Loads full npz file and properly converts and prunes it for training.'''
''' Load all input data from the files '''
data, out = self.load_npz_data()
values = np.array(data.values())
for idx, val in enumerate(values):
if idx == 0:
combined = val
else:
combined = np.concatenate((combined, val), axis=1)
p = np.random.permutation(combined.shape[0])
combined = combined[p]
out = out[p]
if(self.debug):
print("IN SIZE: ", combined.shape)
print("OUT SIZE: ", out.shape)
return combined, out