Inventory Optimization with MDP

Import Libraries

In [ ]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams.update({'font.size': 22})
In [ ]:
class Environment:
    
    # Initilizes object of Class Environment
    def __init__(self, max_inventory):
        self.fixed_cost = 0
        self.max_inventory = max_inventory
        self.lost_reveune_factor = 0.3
    
    # Returns the initial state
    def reset(self):
        return self.max_inventory
    
    # Order cost: Fixed cost + varaible cost
    def order_cost(self, x):
        if x>0:
            oc = self.fixed_cost +  2*x  
        else: 
            oc = 0
        return oc
    
    # Holding cost: When input is negative = backorders
    def holding_cost(self, x):
        
        if x < 0:
            return -2*x
        else:
            return 1*x
    
    # Revenue generated per unit
    def revenue(self, x):
        if x < 0:
            return 0
        else:
            return 8*x
    
    # Probability distribution of demand
    def prob(self, x):
        p = 1/(self.max_inventory+1)       # ASSUMPTION: Uniform demand distribution
        return p
    
    # Expected reward for taking action 'a' in state 's'
    def expected_reward(self, state, action, demand):
        a=0
        p=0
        # When inventory exceed demand, present value of revenue; it occurs with probability Pj(prob of demand) is calculated as follows
        for j in range(state+action):
            a += self.revenue(j)*self.prob(j)
            p += self.prob(j)
            
        p = 1-p
        # When demand exceed inventory, present value of revenue; it occurs with probability 1-Pj(prob of demand) is calculated as follows
        b = self.revenue(state+action)*p
        #print('expected reward:', a+b, a, b, p)
        return a+b
    
    # Calculate next state and reward(expected revenue - order cost - holding cost)
    def step(self, state, action, demand):
        
        next_state = state + action - demand
        reward = self.expected_reward(state, action, demand) - self.order_cost(action) - self.holding_cost(state) - self.holding_cost(action)
        
        return next_state, reward
    
     

Create our agent with (s, S) Policy

In [ ]:
class Agent:
    
    def __init__(self, max_inventory, min_stock = 0):
        self.max_inventory = max_inventory
        self.min_stock = min_stock
        
    def get_action(self, state):
        # Possible actions are 0,1,2...10  (max_inventory - state)
        if state < self.min_stock:
            if state < 0:
                action = self.max_inventory + np.abs(state)
            else:
                action = self.max_inventory - state
        else: 
            action = 0
        return action
    

Testing our policy

In [ ]:
max_inventory = 100
min_stock = 40
env = Environment(max_inventory = max_inventory)
agent = Agent(max_inventory = max_inventory, min_stock = min_stock)
max_episodes = 1
max_timesteps = 100  
net_cost = []
STATES = []
DEMAND = []
ACTIONS = []
#random.seed(2804)
for episode in range(max_episodes):
    
    state = env.reset()
    #print('initial state:', state)
    
    for timestep in range(max_timesteps):
        #print('state:', state)
        STATES.append(state)
        
        # Get the demand (random number with uniform distribution) 
        demand = random.randint(1,max_inventory)
        #print('demand:', demand)
        DEMAND.append(demand)
        
        # Take action based on current state (inventory level)
        action = agent.get_action(state)
        #print('action:', action)
        ACTIONS.append(action)
        
        # Calculate reward and next state based on action taken
        next_state, reward = env.step(state, action, demand)
        #print('next_state:', next_state)
        #print('cost:', reward)
        #print('------------------------')
        
        if timestep == max_timesteps-1:
            reward = 0                            # reward at terminal state
        
        net_cost.append(reward)
        state = next_state
    
    # Show stats at end of episode   
    print("episode: {}/{}, cost: {}, timestep: {}".format(episode, max_episodes, np.mean(net_cost), timestep))
    print('----------------------------------------------')

    

Plot States, Actions, Demand, NetCost wrt timesteps

In [ ]:
plt.figure(figsize=(12,8))
#plt.plot(range(max_timesteps*max_episodes),net_cost, 'orange')
plt.plot(range(max_timesteps*max_episodes),STATES, 'red')
#plt.plot(range(max_timesteps*max_episodes),DEMAND, 'green')
#plt.plot(range(max_timesteps*max_episodes),ACTIONS, 'blue')
plt.axhline(y = 0, color = 'black', linestyle = '--')
plt.axhline(y = min_stock, color = 'purple', linestyle = '-')
plt.xlabel('timesteps')
plt.ylabel('inventory level')
plt.show()
In [ ]: