-
Notifications
You must be signed in to change notification settings - Fork 0
/
reward.pl
406 lines (330 loc) · 11.9 KB
/
reward.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
#!/usr/bin/perl
# Copyleft: R.Jaksa 2018, GNU General Public License version 3
# include "CONFIG.pl"
use v5.10; # for state
use IO::Handle qw( ); STDOUT->autoflush(1);
# ------------------------------------------------------------------------------- HELP
$HELP=<<EOF;
NAME
reward - reward simulator for contextual bandits
USAGE
reward [OPTIONS] [TIMESTAMP] ACTION [CONTEXT]
cat ACTIONS_FILE | reward [OPTIONS]
DESCRIPTION
Reward provides "most" simple simulation of stochastic reward for
contextual bandits.
Reward returns the simulated reward for supplied action.
It just chooses random value from the defined uniform distribution.
Means of rewards provided for particular actions are linearly distributed.
Context, if provided, defines further linear shift of these means.
ACTION is the ID number of action to be rewarded. Optional CONTEXT
is a space separated vector defining the context in which action was done.
TIMESTAMP can be also provided optionally.
ACTIONS_FILE
Actions file or a stream are just lines with space separated numbers.
Optional ISO 8601 timestamp is followed by mandatory action ID, followed
by optional context vector. Empty lines or hash comments are skipped.
OPTIONS
-h This help.
-v Verbose execution using CD(STDERR).
-vw Vowpal Wabbit output format.
-a=NUM Number of possible actions CK((default 2: action 1 and action 2).)
-r=NUM Number of possible rewards CK((default 2: 0 and 1).)
CC(-i=NUM,NUM) Interval of reward values CK((default [0,No_of_rewards-1]).)
-s=NUM Spread of rewards distribution CK((default 2).)
-c=NUM Length of the context vector CK((default 0).)
-cn=NUM Number of context states CK((default 2: 0 and 1).)
CC(-ci=NUM,NUM) Interval of context values CK((default [0,Context_states-1]).)
The mean of reward is shifted by the context. The shift is between none to
opposite (opposite distribution of means compare to no-context).
-cl Linear context with every dimension equally important CK((default).)
-cc Cascading context with every next dimension less important.
EXAMPLES
CW(reward 2 1 1)
CW(evgen | reward)
CW(evgen -c=3 20 | reward)
Full simulation loop:
CW(evgen -c=3 -f=log.dat | reward | context_bandit >> log.dat)
EOF
# ---------------------------------------------------------------------------- VERBOSE
sub error {
my $s=$_[0]; $s=~s/\n$//;
print STDERR "$CR_$s$CD_\n"; }
sub debug {
my $s=$_[1]; $s=~s/\n$//;
printf STDERR "%7s: %s\n",$_[0],$s if $DEBUG; }
sub verbn { print STDERR "\n"; }
sub verb2 { printf STDERR "$CK_%22s: %s$CD_\n",$_[0],$_[1]; }
sub verb3 { printf STDERR "$CK_%22s: %s %s$CD_\n",$_[0],$_[1],$_[2]; }
# ------------------------------------------------------------------------------- MATH
# just round the number
sub round {
return int($_[0] + $_[0]/abs($_[0]*2 || 1)); }
# print number with max two decimal places
sub dec2 {
my $r = sprintf("%.2f",$_[0]);
$r =~ s/0+$// if $r =~ /\./;
$r =~ s/\.$//;
$r = 0 if $r eq "-0";
return $r; }
# compare two arrays
sub areq {
my $a1=$_[0];
my $a2=$_[1];
return 0 if $#{$a1} != $#{$a2};
for(my $j=0; $j<=$#{$a1}; $j++) { return 0 if $a1->[$j] != $a2->[$j]; }
return 1; }
# ------------------------------------------------------------------- PARSE INPUT LINE
sub isnum {
my $s = $_[0];
return 1 if $s eq "NaN";
return 1 if $s =~ /^-?[0-9]+(\.[0-9]+)?$/;
return 1 if $s =~ /^-?\.[0-9]*$/;
return 0; }
sub parse {
my $line = $_[0];
my $n = 0; # in line word index
my $a; # parsed action
my @c; # parsed context
foreach my $s (split /\s+/,$line) {
$n++;
next if $n==1 and $s =~ /^[0-9]+-[0-9]+-[0-9]+$/; # date on 1st field
next if $n==1 and $s =~ /^[0-9]+:[0-9]+(:[0-9]+)?(\.[0-9]+)?$/; # time on 1st field
next if $n==2 and $s =~ /^[0-9]+:[0-9]+(:[0-9]+)?(\.[0-9]+)?$/; # time on 2nd field
$a=$s and next if not defined $a and isnum $s;
push @c,$s and next if isnum $s;
error "wrong field: $s"; }
verb2 "input line","$line" if $VERBOSE;
# verb2 "action",$a if $VERBOSE;
# verb2 "context","@c" if $VERBOSE;
return($a,@c); }
# ------------------------------------------------------------------------------ ARGVS
foreach(@ARGV) { if($_ eq "-h") { printhelp $HELP; exit 0; }}
foreach(@ARGV) { if($_ eq "-v") { $VERBOSE=1; $_=""; last; }}
foreach(@ARGV) { if($_ eq "-d") { $DEBUG=1; $_=""; last; }}
foreach(@ARGV) { if($_ eq "-cl") { $CTYPE=1; $_=""; last; }}
foreach(@ARGV) { if($_ eq "-cc") { $CTYPE=2; $_=""; last; }}
foreach(@ARGV) { if($_ eq "-vw") { $VW=1; $_=""; last; }}
our $ACTIONS = 2;
our $SPREAD = 2;
foreach(@ARGV) { if($_ =~ /^-a=([0-9]+)$/) { $ACTIONS=$1; $_=""; last; }}
foreach(@ARGV) { if($_ =~ /^-s=([0-9]*\.?[0-9]*)$/) { $SPREAD=sprintf("%f",$1); $_=""; last; }}
our $REWARDS;
our ($RMIN,$RMAX);
foreach(@ARGV) { if($_ =~ /^-r=([0-9]+)$/) { $REWARDS=$1; $_=""; last; }}
foreach(@ARGV) { if($_ =~ /^-i=([0-9]+),([0-9]+)$/) { $RMIN=$1; $RMAX=$2; $_=""; last; }}
our $CONTVEC;
foreach(@ARGV) { if($_ =~ /^-c=([0-9]+)$/) { $CONTVEC=$1; $_=""; last; }}
our $CONTEXTS;
foreach(@ARGV) { if($_ =~ /^-cn=([0-9]+)$/) { $CONTEXTS=$1; $_=""; last; }}
our ($CMIN,$CMAX);
foreach(@ARGV) { if($_ =~ /^-ci=([0-9]+),([0-9]+)$/) { $CMIN=$1; $CMAX=$2; $_=""; last; }}
our $LINE;
my $gotdate;
my $gottime;
foreach(@ARGV) {
next if $_ eq "";
$LINE.="$_ " and $gotdate=1 and $_="" if not defined $gotdate and $_ =~ /^[0-9]+-[0-9]+-[0-9]+$/;
$LINE.="$_ " and $gottime=1 and $_="" if not defined $gottime and $_ =~ /^[0-9]+:[0-9]+(:[0-9]+)?(\.[0-9]+)?$/;
$LINE.="$_ " and $_="" if isnum $_; }
# wrong arguments
my @wrong;
foreach(@ARGV) { push @wrong,$_ if $_ ne ""; }
if(@wrong) {
error;
foreach my $arg (@wrong) { error "wrong argument: $arg"; }
error; }
# ---------------------------------------------------------------------- REWARDS LOGIC
if(not defined $REWARDS) {
if(defined $RMIN and defined $RMAX) { $REWARDS = int($RMAX-$RMIN+0.5); }
else { $REWARDS = 2; }}
my ($rmax,$rmin);
if(not defined $RMIN) {
if(defined $RMAX) { $rmin = $RMAX - $REWARDS; }
else { $rmin = 0; }}
if(not defined $RMAX) {
if(defined $RMIN) { $rmax = $RMIN + $REWARDS; }
else { $rmax = $REWARDS-1; }}
$RMAX = $rmax if not defined $RMAX;
$RMIN = $rmin if not defined $RMIN;
# ---------------------------------------------------------------------- CONTEXT LOGIC
our $CDEF;
$CDEF = 1 if defined $CONTEXTS or defined $CMIN or defined $CMAX;
if(not defined $CONTEXTS) {
if(defined $CMIN and defined $CMAX) { $CONTEXTS = int($CMAX-$CMIN); }
else { $CONTEXTS = 2; }}
my ($cmax,$cmin);
if(not defined $CMIN) {
if(defined $CMAX) { $cmin = $CMAX - $CONTEXTS; }
else { $cmin = 1; }}
if(not defined $CMAX) {
if(defined $CMIN) { $cmax = $CMIN + $CONTEXTS; }
else { $cmax = $CONTEXTS; }}
$CMAX = $cmax if not defined $CMAX;
$CMIN = $cmin if not defined $CMIN;
$CTYPE = 1 if not defined $CTYPE;
# ------------------------------------------------------------------------------- CORE
# reward: 0 1 2 ...
my ($r0,$rn) = ($RMIN,$RMAX); my $rd = $rn-$r0; my $rs = $rd/($REWARDS-1);
# action: 1 2 3 ...
my ($a0,$an) = (1,$ACTIONS); my $ad = $an-$a0;
# context: 0 1 2 ...
my $cc = 0;
my @c_c0; my @c_cn; my @c_cd; my @c_cs;
our %C;
$C{dim} = 0; # context dimensionality
$C{n} = 0; # current context dimensionality
$C{c0} = \@c_c0; # columns min values
$C{cn} = \@c_cn; # columns max values
$C{cd} = \@c_cd; # columns ranges
$C{cs} = \@c_cs; # columns steps for unit
$C{dim} = $CONTVEC if defined $CONTVEC;
# return the context step
sub cstep {
my $j = $_[0];
my $cs;
# context steps, where every dimension is equal
if($CTYPE == 1) {
$cs = 1.0 / $C{dim} / ($C{cd}->[$j] + 1.0); }
# context steps, where every next dimension is equal to just one part of previous
if($CTYPE == 2) {
if($j==0) { $cs = 1.0 / ($C{cd}->[$j] + 1.0); }
else { $cs = $C{cs}->[$j-1] / ($C{cd}->[$j] + 1.0); }}
return $cs; }
# init
if(defined $CDEF) {
my $cs;
for(my $j=0; $j<$C{dim}; $j++) {
$C{c0}->[$j] = $CMIN;
$C{cn}->[$j] = $CMAX;
$C{cd}->[$j] = $C{cn}->[$j] - $C{c0}->[$j];
$C{cs}->[$j] = cstep $j;
$cs .= int($C{cs}->[$j]*100+0.5)."% "; }
verb2 "context steps",$cs if $VERBOSE; }
if($VERBOSE) {
verb2 "$ACTIONS actions","$a0..$an (range $ad)";
verb2 "$REWARDS rewards","$r0..$rn (range $rd, step $rs)";
verb2 "spread","$SPREAD"; }
# check whether the event is valid
sub check {
my $a = $_[0];
my $cp = $_[1];
# action
my $a2 = dec2($a);
verb2 "action",$a2 if $VERBOSE;
error "action $a2 is out of range [$a0,$an]" if $a<$a0 or $a>$an;
error "action is decimal" if $a2 =~ /\./;
# context type
state $ct = 0;
if($VERBOSE and not $ct) {
my $s;
$s = "linear" if $CTYPE == 1;
$s = "cascading" if $CTYPE == 2;
verb2 "context type",$s;
$ct = 1; }
# context dimensionality
if(not defined $CONTVEC) {
my $cd = scalar @{$cp};
if($cd != $C{dim}) {
$C{dim} = $cd;
verb2 "context dimensionality",$C{dim} if $VERBOSE; }}
else {
state $cdok = 0;
verb2 "context dimensionality",$C{dim} if $VERBOSE and not $cdok;
$cdok = 1; }
# required context dimensionality
my $cn = scalar @{$cp};
$C{n} = $C{dim};
$C{n} = $cn if $cn < $C{n};
# context ranges
my $c2;
if(not defined $CDEF) {
my ($cx,$cs);
my @cdo = @{$C{cd}};
for(my $j=0; $j<$C{n}; $j++) {
my $cj = $cp->[$j];
$c2 .= "$cj ";
$C{c0}->[$j] = $cj if $cj < $C{c0}->[$j] or not defined $C{c0}->[$j];
$C{cn}->[$j] = $cj if $cj > $C{cn}->[$j] or not defined $C{cn}->[$j];
$C{cd}->[$j] = $C{cn}->[$j] - $C{c0}->[$j];
$C{cs}->[$j] = cstep $j;
$cx .= "[$C{c0}->[$j],$C{cn}->[$j]] ";
$cs .= int($C{cs}->[$j]*100+0.5)."% "; }
# context ranges changed
if(not areq($C{cd},\@cdo)) {
verb2 "context intervals",$cx if $VERBOSE;
verb2 "context steps",$cs if $VERBOSE; }}
verb2 "context",$c2 if $VERBOSE and @{$cp}; }
# compute the mean reward for action
sub mean {
my $a = $_[0];
my $cp = $_[1];
check $a,$cp;
# context shift
my $cs;
if(@{$cp}) {
$cs = 0;
for(my $j=0; $j<=$C{n}; $j++) {
$cs += ($cp->[$j] - $C{c0}->[$j]) * $C{cs}->[$j]; }
verb2 "context shift",int($cs*100+0.5)."%" if $VERBOSE; }
# mean
my $m = (1.0-$cs*2.0)*$rd/$ad*($a-$a0) + $r0 + $rd*$cs;
# verbose
my $m0s;
if($VERBOSE and defined $cs) {
my $m0 = $rd/$ad*($a-$a0) + $r0; # without the context shift
$m0s = " (".dec2($m0)." without context)" if $m0 != $m; }
verb2 "linear mean",dec2($m).$m0s if $VERBOSE;
return $m; }
# compute the mean reward for action without considering context
sub mean_nocontext {
my $a = $_[0];
my $c = $_[1];
check $a,$c;
my $m = $rd/$ad*($a-$a0) + $r0;
verb2 "linear mean",$m if $VERBOSE;
return $m; }
# distribute the reward in distribution
sub dist {
my $a = $_[0];
my $s = $a + rand($SPREAD) - $SPREAD/2;
verb2 "uniformly distributed",dec2($s) if $VERBOSE;
return $s; }
# quantize the reward
sub quant {
my $r = $_[0];
my $q = ($r-$r0)*($REWARDS-1)/$rd + $r0;
my $n = round($q)*$rs;
$n = $RMAX if $n>$RMAX;
$n = $RMIN if $n<$RMIN;
verb2 "quantized reward",dec2($n) if $VERBOSE;
return $n; }
# compute the reward
sub reward {
return quant dist mean $_[0],$_[1]; }
# ---------------------------------------------------------------------- ARGUMENTS RUN
if(defined $LINE) {
my ($a,@c) = parse $LINE;
my $r = reward $a,\@c;
print "$r\n"; }
# ---------------------------------------------------------------------- STREAMING RUN
if(not defined $LINE) {
while(<STDIN>) {
my $s = $_;
$s =~ s/\n$//;
# skip empty lines
if($s =~ /^\h*$/) { next; }
# comments
elsif($s =~ /^\h*\#/) {
print "$s r1\n" if not $VW;
next; }
# data
else {
my ($a,@c) = parse $s;
my $r = reward $a,\@c;
if($VW) {
print "$a:$r:1 | @c\n"; }
else {
print "$s $r\n"; }}
print "\n" if not $_=~/\n$/; }}
# -------------------------------------------------------------------------------- END