-
Notifications
You must be signed in to change notification settings - Fork 1
/
dropout_lstm.py
executable file
·160 lines (123 loc) · 4.72 KB
/
dropout_lstm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import sys
import torch
from torch import nn
from torch.autograd import Variable
from view import *
from holder import *
from util import *
from join_table import *
# customized lstm with inter-timestep dropout
# dropout mask is consistent across time steps
# batch-first is assumed
class DropoutLSTMLayer(torch.nn.Module):
def __init__(self, input_size, hidden_size, bias=True, dropout=0.0, bidirectional=False):
super(DropoutLSTMLayer, self).__init__()
is_cuda = torch.cuda.is_available()
self.drop = dropout
self.bidirectional = bidirectional
#self.output_hidden_size = hidden_size if bidirectional is False else hidden_size*2
#self.drop_fw = nn.Dropout(dropout)
self.cell_fw = nn.LSTMCell(input_size, hidden_size, bias)
self.fw_joiner = JoinTable(1)
self.h_fw_init = Variable(torch.zeros(1,self.cell_fw.hidden_size), requires_grad=False)
self.c_fw_init = Variable(torch.zeros(1,self.cell_fw.hidden_size), requires_grad=False)
if is_cuda:
self.h_fw_init = self.h_fw_init.cuda()
self.c_fw_init = self.c_fw_init.cuda()
if bidirectional:
#self.drop_bw = nn.Dropout(dropout)
self.cell_bw = nn.LSTMCell(input_size, hidden_size, bias)
self.bw_joiner = JoinTable(1)
self.fw_bw_joiner = JoinTable(2)
self.h_bw_init = Variable(torch.zeros(1, self.cell_fw.hidden_size), requires_grad=False)
self.c_bw_init = Variable(torch.zeros(1, self.cell_bw.hidden_size), requires_grad=False)
if is_cuda:
self.h_bw_init = self.h_bw_init.cuda()
self.c_bw_init = self.c_bw_init.cuda()
# change the dropout ratio
def dropout(self, p):
self.drop = p
#self.drop_fw.p = p
#if self.bidirectional:
# self.drop_bw.p = p
def sample_dropout(self):
is_cuda = torch.cuda.is_available()
scale = 1.0 / (1.0 - self.drop)
self.fw_x_mask = Variable(
torch.bernoulli(torch.Tensor(1, 1, self.cell_fw.input_size).fill_(1.0 - self.drop)) * scale,
requires_grad=False)
self.fw_h_mask = Variable(
torch.bernoulli(torch.Tensor(1, self.cell_fw.hidden_size).fill_(1.0 - self.drop)) * scale,
requires_grad=False)
self.bw_x_mask = Variable(
torch.bernoulli(torch.Tensor(1, 1, self.cell_bw.input_size).fill_(1.0 - self.drop)) * scale,
requires_grad=False)
self.bw_h_mask = Variable(
torch.bernoulli(torch.Tensor(1, self.cell_bw.hidden_size).fill_(1.0 - self.drop)) * scale,
requires_grad=False)
if is_cuda:
self.fw_x_mask = self.fw_x_mask.cuda()
self.fw_h_mask = self.fw_h_mask.cuda()
self.bw_x_mask = self.bw_x_mask.cuda()
self.bw_h_mask = self.bw_h_mask.cuda()
# x must be of shape (batch_l, seq_l, input_size)
# For simplicity, will output no hidden states
def forward(self, x):
self.sample_dropout()
batch_l, seq_l, input_size = x.shape
# forward pass
output_fw = [None for _ in xrange(seq_l)]
h_fw = self.h_fw_init.expand(batch_l, -1)
c_fw = self.c_fw_init.expand(batch_l, -1)
x_fw = x * self.fw_x_mask
for i in xrange(seq_l):
h_fw1 = h_fw * self.fw_h_mask
h_fw, c_fw = self.cell_fw(x[:, i, :], (h_fw, c_fw))
output_fw[i] = h_fw.unsqueeze(1) # (batch_l, 1, hidden_size)
rs = self.fw_joiner(output_fw)
if self.bidirectional is True:
output_bw = [None for _ in xrange(seq_l)]
h_bw = self.h_bw_init.expand(batch_l, -1)
c_bw = self.c_bw_init.expand(batch_l, -1)
x_bw = x * self.bw_x_mask
for i in xrange(seq_l-1, -1, -1):
h_bw1 = h_bw * self.bw_h_mask
h_bw, c_bw = self.cell_bw(x[:, i, :], (h_bw1, c_bw))
output_bw[i] = h_bw.unsqueeze(1) # (batch_l, 1, hidden_size)
rs = self.fw_bw_joiner([rs, self.bw_joiner(output_bw)])
return rs, None # output no hidden states
class DropoutLSTM(torch.nn.Module):
def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first=True, dropout=0.0, bidirectional=False):
super(DropoutLSTM, self).__init__()
assert(batch_first is True)
self.rnn_layers = []
for i in xrange(num_layers):
in_size = input_size
if i > 0:
if bidirectional:
in_size = hidden_size*2
else:
in_size = hidden_size
self.rnn_layers.append(DropoutLSTMLayer(in_size, hidden_size, bias, dropout, bidirectional))
self.rnn_layers = nn.ModuleList(self.rnn_layers)
self.num_layers = num_layers
# change the dropout ratio
def dropout(self, p):
for i in xrange(self.num_layers):
self.rnn_layers[i].dropout(p)
# x must be of shape (batch_l, seq_l, input_size)
# For simplicity, will output no hidden states
def forward(self, x):
for i in xrange(self.num_layers):
x, _ = self.rnn_layers[i](x)
return x, None
if __name__ == '__main__':
layer = DropoutLSTMLayer(10, 3, bidirectional=True)
for n,p in layer.named_parameters():
print(n)
x = Variable(torch.ones(2, 4, 10))
y, _ = layer(x)
print(y)
lstm = DropoutLSTM(10, 3, num_layers=2, bidirectional=True)
y, _ = lstm(x)
print(y)