This repository has been archived by the owner on Jul 25, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
example.py
186 lines (144 loc) · 6.91 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import tensorrt as trt
from tensorrt.parsers import uffparser
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
from random import randint # generate a random test case
import time #import system tools
import os
import uff
import argparse
#log用のファイルとモデルセーブ用のディレクトリを作る
log_dir = "./logs"
save_dir = "./saves"
def check_dir(path):
if not os.path.exists(path):
os.mkdir(path)
check_dir(log_dir)
check_dir(save_dir)
# training option
MAX_ITERATION = 19000
BATCH_SIZE = 125
# inference option
MAX_BATCH_SIZE = 1
MAX_WORKSPACE_SIZE = 1<<20
class model():
def __init__(self):
self.data_set = input_data.read_data_sets('MNIST_data', one_hot=True)
#LeNet でテスト
self.inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name="Placeholder")
self.labels = tf.placeholder(tf.float32, [None, 10])
h_ = tf.layers.conv2d(self.inputs, filters=32, kernel_size=(5,5), padding="SAME", name="conv_0")
h_ = tf.nn.relu(h_)
h_ = tf.layers.max_pooling2d(h_, pool_size=(2,2), strides=(2,2))
h_ = tf.layers.conv2d(h_, filters=64, kernel_size=(5,5), padding="SAME", name="conv_1")
h_ = tf.nn.relu(h_)
h_ = tf.layers.max_pooling2d(h_, pool_size=(2,2), strides=(2,2))
h_ = tf.reshape(conv_layers[-1], (-1,7 * 7 *64))
h_ = tf.layers.dense(flatten, 512, tf.nn.relu, name='dense_0')
h_ = tf.layers.dropout(dense0, rate=0.65, training=training, name='dropout_0')
h_ = tf.layers.dense(dropout, 10, name='dense_1')
#モデルの出力が分かりやすいようにname_scopeで覆う
with tf.name_scope("inference"):
self.inference = tf.nn.softmax(logits)
self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.labels))
with tf.name_scope("summary"):
tf.summary.scalar("loss", self.loss)
self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.0001).minimize(self.loss)
def fit(self):
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
with tf.Session(config=config) as sess:
#変数の初期化
tf.global_variables_initializer().run()
#モデルの重みをセーブするためのセーバー
saver = tf.train.Saver(tf.global_variables())
#tensorBoard用のfile writer
s_writer = tf.summary.FileWriter(logidir=logdir, graph=sess.graph_def())
summary = tf.summary.merge_all()
for itr, (images_feed, labels_feed) in enumerate(self.data_set.train.next_batch(BATCH_SIZE)):
feed_dict = {
self.inputs: images_feed.reshape(-1, 28, 28, 1),
self.labels: labels_feed
}
#lossの計算と, summary, モデルの最適化をする
loss_, summary_, _ = sess.run([self.loss, summary, self.optimizer], feed_dict)
if itr%10 == 0:
print(itr, ": ", loss_)
s_writer.add_summary(summary_, itr)
if itr%50 == 0:
saver.save(sess, os.path.join(save_dir, "model.ckpt"))
if itr+1 == MAX_ITERATION:
break
def mk_TensorRT_engine(self):
#モデルがない場合学習をさせる
if not tf.train.get_checkpoint_state(os.path.join(save_dir, "model.ckpt")):
self.fit()
#学習済みモデルを読み込む
with tf.Session() as sess:
saver = tf.train.Saver(tf.global_variables())
saver.restore(sess, "save/model.ckpt")
graph_def = sess.graph_def()
frozen_graph = tf.graph_util.convert_variables_to_constants(sess, graph_def, ["inference/softmax"])
tf_model = tf.graph_util.remove_training_nodes(frozen_graph)
# Tensorflowのモデル形式からUFFへ変換
uff_model = uff.from_tensorflow(tf_model, ["inference/softmax"])
# TensorRT EngineのためのUFF Streamを作る
G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
# uff parserを作り,モデルの入出力に関する情報を加える
parser = uffparser.create_uff_parser()
# (channel, im_size, im_size)
parser.register_input("Placeholder", (1,28,28), 0)
parser.register_output("inference/softmax")
# utility関数を用いてエンジンを作る(最後の引数はmax batch size と max workspace size)
engine = trt.utils.uff_to_trt_engine(G_LOGGER, uff_model, parser, MAX_BATCH_SIZE, MAX_WORKSPACE_SIZE)
parser.destroy()
return engine
def inference(self):
# TensorRT EngineのためのUFF Streamを作る
G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.ERROR)
if not os.path.exists('./tf_mnist.engine'):
# engine の作成と保存
engine = self.mk_TensorRT_engine()
trt.utils.write_engine_to_file("./tf_mnist.engine", engine.serialize())
else:
# engine の読み込み
engine = trt.utils.load_engine(G_LOGGER, "./tf_mnist.engine")
#runtime とengineのcontextを作成
runtime = trt.infer.create_infer_runtime(G_LOGGER)
context = engine.create_execution_context()
#データ読み込み(1件のみ)
img, label = self.data_set.test.next_batch(1)
img = img[0].astype(np.float32)
label = label[0]
# GPUにメモリ割り当てと,CPUにメモリ割り当て(推測後の結果を扱うために)
# 結果受取用の変数
output = np.empty(10, dtype = np.float32)
#alocate device memory (The size of the allocations is the size of the input and expected output * the batch size.)
d_input = cuda.mem_alloc(1 * img.size * img.dtype.itemsize)
d_output = cuda.mem_alloc(1 * output.size * output.dtype.itemsize)
bindings = [int(d_input), int(d_output)]
#推測をするためのcuda Streamを作成
stream = cuda.Stream()
#データをGPUに,推測と結果のコピー
cuda.memcpy_htod_async(d_input, img, stream)
#推測
context.enqueue(1, bindings, stream.handle, None)
#結果のコピー
cuda.memcpy_dtoh_async(output, d_output, stream)
#スレッドの同期
stream.synchronize()
print("Test Case: " + str(label))
print ("Prediction: " + str(np.argmax(output)))
context.destroy()
engine.destroy()
new_engine.destroy()
runtime.destroy()
model_ = model()
# モデルのトレーニング
model_.fit()
# 学習済みモデルを使ってインファレンス
model.inference()