forked from Zhangbeibei1991/safety_stock_optimisation_RL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
environment.py
82 lines (66 loc) · 2.69 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import numpy as np
class Environment():
def __init__(self, envParams):
self.envParams = envParams
"""
natural state update
"""
def step(self, state, demand):
# get newState
newState = state.copy()
# check if daysToDelivery has been fulfilled
for i in range(3):
# check if waiting for any delivery
if newState[3, i] > 0:
# subtract daysToDelivery
newState[3, i] -= 1
# fulfill order
if newState[3, i] == 0:
# reduce from supplier when inventory is 0
newState[0, i] += newState[2, i]
# cut supplier's inventory
if i > 0:
newState[0, i - 1] -= newState[2, i]
# set pending delivery to 0
newState[2, i] = 0
# check for stockout
retailerState = newState[:, 2]
if retailerState[0] >= demand:
# consume inventory
retailerState[0] -= demand
else:
# add to stockout
retailerState[4] += demand - retailerState[0]
# set inventory to zero
retailerState[0] = 0
# trigger for aking action: retailer's inventory < reorderPoint & not waiting for any delivery
actionTrigger = (retailerState[0] <= retailerState[1]) & (retailerState[3] == 0)
# get rewards
# include state[4] accumulated number of stockouts + state[0] long-term inventory (after delivered to customer)
# e.g. just before the next reorderPoint
reward = 0
reward -= newState[4, 2] * self.envParams["stockoutCost"] # total stockouts this period x stockoutPrice
reward -= (newState[0, 0] * self.envParams["inventoryCost"][0] + newState[0, 1] * self.envParams["inventoryCost"][1]) # inventory
reward -= newState[0, 2] * self.envParams["inventoryCost"][2] # include unused safety stock at retailer
return (newState, actionTrigger, reward)
"""
execute action if there's any
"""
def execute(self, state, action):
newState = state.copy()
retailerState = newState[:, 2]
s1State = newState[:, 1]
s1Action = action[:, 1]
s0State = newState[:, 0]
s0Action = action[:, 0]
# pendingDelivery = ordered items
newState[2] = action[1]
# daysToDelivery = serviceTimes + processingTimes
s0State[3] = 0 + s0State[5] # s0 has 0 supplier serviceTime
s1State[3] = s0Action[0] + s1State[5]
retailerState[3] = s1Action[0]
# set next reorder point
newState[1, 2] = action[2, 2]
# reset stockout
newState[4] = 0
return newState