import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection._search import ParameterGrid
import seaborn as sns
import copy


%matplotlib inline


# Set all of our base input values except demand
unit_cost = 7.50
selling_price = 10.00
unit_refund = 2.50
order_quantity = 200


def bookstore_profit(unit_cost, selling_price, unit_refund, order_quantity, demand):
    '''
    Compute profit in bookstore model
    '''
    order_cost = unit_cost * order_quantity
    sales_revenue = np.minimum(order_quantity, demand) * selling_price
    refund_revenue = np.maximum(0, order_quantity - demand) * unit_refund
    profit = sales_revenue + refund_revenue - order_cost
    return profit


# Demand parameters
demand_mean = 193
demand_sd = 40


print(f"Demand mean = {demand_mean}")
print(f"Demand sd = {demand_sd}")

Demand mean = 193
Demand sd = 40


deterministic_profit = bookstore_profit(unit_cost, selling_price, unit_refund, order_quantity, demand_mean)
deterministic_profit

447.5


from numpy.random import default_rng
rg = default_rng(4470)
rg.random() # Generate one just to see it work

0.45855804438027437


demand_sim = rg.normal(demand_mean, demand_sd, 1000)


demand_sim[:10]

array([217.03616307, 133.37168121, 231.54405167, 215.05711803,
       183.66669223, 268.28497816, 255.99191635, 188.58125604,
       164.62174798, 170.1384389 ])


demand_sim = np.around(rg.normal(demand_mean, demand_sd, 1000))
demand_sim[:10]

array([218., 203., 186., 203., 215., 151., 255., 176., 223., 252.])


print(f"Mean demand = {demand_sim.mean():.3f}, Std dev demand = {demand_sim.std():.3f}")

Mean demand = 192.056, Std dev demand = 40.170


from scipy.stats import norm


rv_normal = norm(loc=demand_mean, scale=demand_sd)
print(type(rv_normal))

<class 'scipy.stats._distn_infrastructure.rv_continuous_frozen'>


plt.title("Demand histogram")
plt.xlabel("Demand")
plt.ylabel("Density")
plt.hist(demand_sim, density=True);

x_normal = np.linspace(rv_normal.ppf(0.0001),
                rv_normal.ppf(0.999), 500)

plt.plot(x_normal, rv_normal.pdf(x_normal),
       'r-', lw=3, alpha=0.6, label='Normal pdf');


message = f"unit_cost: {unit_cost} \n" \
          f"selling_price: {selling_price} \n" \
          f"unit_refund: {unit_refund} \n" \
          f"order_quantity: {order_quantity}"

print(message)

unit_cost: 7.5 
selling_price: 10.0 
unit_refund: 2.5 
order_quantity: 200


profit_sim = pd.Series([(bookstore_profit(unit_cost, selling_price, unit_refund, order_quantity, d)) 
               for d in demand_sim])


profit_sim

0      500.0
1      500.0
2      395.0
3      500.0
4      500.0
       ...  
995    500.0
996    500.0
997    387.5
998    500.0
999    320.0
Length: 1000, dtype: float64


profit_sim.describe()

count    1000.000000
mean      347.832500
std       190.630397
min      -617.500000
25%       243.125000
50%       432.500000
75%       500.000000
max       500.000000
dtype: float64


print(f"Deterministic profit = {deterministic_profit}")
print(f"Simulation profit = {profit_sim.describe()['mean']}")

Deterministic profit = 447.5
Simulation profit = 347.8325


plt.title("Profit histogram")
plt.xlabel("Profit")
plt.ylabel("Num observations")
plt.ylim(0, 600)
plt.hist(profit_sim, density=False);


from scipy import stats


print(stats.percentileofscore(profit_sim, 0) / 100.0)

0.063


# Probability profit is between -200, 200


alpha = [1, 2, 3]
beta = [10, 20, 30]

for t in zip(alpha, beta):
    print(t)

(1, 10)
(2, 20)
(3, 30)


unit_cost_sim = rg.uniform(7.0, 8.0, 1000)
unit_refund_sim = rg.uniform(2.0, 3.0, 1000)

rv_uniform = stats.uniform(loc=7.0, scale=1.0)


plt.title("Unit Cost histogram")
plt.xlabel("Unit Cost")
plt.ylabel("Density")
plt.hist(unit_cost_sim, density=True);

x_uniform = np.linspace(7.0, 8.0, 100)

plt.plot(x_uniform, rv_uniform.pdf(x_uniform),
       'r-', lw=3, alpha=0.6, label='Uniform pdf');


random_inputs = zip(demand_sim, unit_cost_sim, unit_refund_sim)


profit_sim_2 = pd.Series([(bookstore_profit(uc, selling_price, uf, order_quantity, d)) 
               for (d, uc, uf) in random_inputs])


plt.title("Profit histogram for three uncertain inputs")
plt.xlabel("Profit")
plt.ylabel("Num observations")
plt.ylim(0, 600)
plt.hist(profit_sim_2, density=False);


# Create array of order quantities (the scenarios) 
order_quantity_range = np.arange(100, 400, 25)

# Create simulation data table 
sim_table_1 = [(oq, bookstore_profit(unit_cost, selling_price, unit_refund, oq, d)) 
                    for oq in order_quantity_range for d in demand_sim]

# Convert to dataframe
stbl_1_df = pd.DataFrame(sim_table_1, columns=['OrderQuantity', 'Profit'])

# Plot the results
profit_histo_g = sns.FacetGrid(stbl_1_df, col="OrderQuantity", sharey=True, col_wrap=3)
profit_histo_g = profit_histo_g.map(plt.hist, "Profit")


profit_box_g = sns.boxplot(x="OrderQuantity", y="Profit", data=stbl_1_df)


from whatif import BookstoreModel


unit_cost = 7.50
selling_price = 10.00
unit_refund = 2.50

order_quantity = 200

demand_mean = 193
demand_sd = 40

demand = demand_mean


model2 = BookstoreModel(unit_cost=unit_cost, 
                        selling_price=selling_price,
                        unit_refund=unit_refund,
                        order_quantity=order_quantity,
                        demand=demand)


num_reps = 100
demand_sim = rg.normal(demand_mean, demand_sd, num_reps)
unit_cost_sim = rg.uniform(7.0, 8.0, num_reps)
unit_refund_sim = rg.uniform(2.0, 3.0, num_reps)

random_inputs = {'demand': demand_sim,
                'unit_cost': unit_cost_sim,
                'unit_refund': unit_refund_sim}


random_inputs = {'demand': rg.normal(demand_mean, demand_sd, num_reps),
                'unit_cost': rg.uniform(7.0, 8.0, num_reps),
                'unit_refund': rg.uniform(2.0, 3.0, num_reps)}


scenario_inputs = {'order_quantity': np.arange(70, 321, 50)}
list(ParameterGrid(scenario_inputs))

[{'order_quantity': 70},
 {'order_quantity': 120},
 {'order_quantity': 170},
 {'order_quantity': 220},
 {'order_quantity': 270},
 {'order_quantity': 320}]


sim_outputs = ['profit']


def simulate(model, random_inputs, outputs, scenario_inputs=None, keep_random_inputs=False):
    '''Simulate model for one or more scenarios

    Parameters
    ----------
    model : object
        User defined object containing the appropriate methods and properties for computing outputs from inputs
    random_intputs : dict of str to sequence of random variates
        Keys are stochastic input variable names and values are sequence of $n$ random variates, where $n$ is the number of simulation replications
    outputs : list of str
        List of output variable names
    scenario_inputs : optional (default is None), dict of str to sequence
        Keys are deterministic input variable names and values are sequence of values for each scenario for this variable. Is consumed by
        scikit-learn ParameterGrid() function. See https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.ParameterGrid.html
    keep_random_inputs : optional (default is False), boolean
        If True, all the random input variates are included in the results dataframe

    Returns
    -------
    results_df : list of dict
        Values of all outputs for each simulation replication. If `scenario_inputs` is not None, then this is also for every combination of scenario inputs
    '''
    
    # Clone the model
    model_clone = copy.deepcopy(model)
    
    # Update clone with random_inputs
    model_clone.update(random_inputs)
    
    # Store raw simulation input values if desired
    if keep_random_inputs:
        scenario_base_vals = vars(model_clone)
    else:
        scenario_base_vals = vars(model)
    
    # Initialize output counters and containers
    scenario_num = 0
    scenario_results = []
    
    # Check if multiple scenarios
    if scenario_inputs is not None:
        # Create parameter grid for scenario inputs
        sim_param_grid = list(ParameterGrid(scenario_inputs))
        
        # Scenario loop
        for params in sim_param_grid:
            model_clone.update(params)
            # Initialize scenario related outputs
            result = {}
            scenario_vals = copy.copy(params)
            result['scenario_base_vals'] = scenario_base_vals
            result['scenario_num'] = scenario_num
            result['scenario_vals'] = scenario_vals
            raw_output = {}
            
            # Output measure loop
            for output_name in outputs:
                output_array = getattr(model_clone, output_name)()
                raw_output[output_name] = output_array
            
            # Gather results for this scenario
            result['output'] = raw_output
            scenario_results.append(result)
            scenario_num += 1
                
        return scenario_results

    else:
        # Similar logic to above, but only a single scenario
        results = []
        result = {}

        result['scenario_base_vals'] = scenario_base_vals
        result['scenario_num'] = scenario_num
        result['scenario_vals'] = {}
        
        raw_output = {}
        for output_name in outputs:
            output_array = getattr(model_clone, output_name)()
            raw_output[output_name] = output_array
            
        result['output'] = raw_output  
        results.append(result)

        return results


model2_results = simulate(model2, random_inputs, sim_outputs, scenario_inputs)


which_scenario = 4

# What are the keys in the output dictionaries
model2_results[which_scenario].keys()

dict_keys(['scenario_base_vals', 'scenario_num', 'scenario_vals', 'output'])


model2_results[which_scenario]['scenario_vals']

{'order_quantity': 270}


for scenario in model2_results:
    print(scenario['scenario_num'], scenario['scenario_vals'], scenario['output']['profit'].mean())

0 {'order_quantity': 70} 176.0191921065434
1 {'order_quantity': 120} 299.88460835700744
2 {'order_quantity': 170} 388.301729003981
3 {'order_quantity': 220} 329.96074377104713
4 {'order_quantity': 270} 123.62880894167007
5 {'order_quantity': 320} -126.44047555594678


model2_results[3]

{'scenario_base_vals': {'unit_cost': 7.5,
  'selling_price': 10.0,
  'unit_refund': 2.5,
  'order_quantity': 200,
  'demand': 193},
 'scenario_num': 3,
 'scenario_vals': {'order_quantity': 220},
 'output': {'profit': array([ 437.33849855,  362.33801054,  527.61319439,  -32.3279537 ,
          192.14065539,  651.46969433,  606.41222534,  -20.61877565,
           55.37130823,  348.683569  ,  556.60556558,   55.80330214,
          632.89588512,  338.12535262,  549.76294933,   81.75786067,
          592.26175912,  598.45394795,  281.65561129,  474.19900803,
         -430.70516881,  604.66037342,  145.25888343,  485.485362  ,
           79.80718479,  509.77492189,  474.11846441,  562.87239476,
          526.85786263,  242.04974183,  240.62026454,  514.01337326,
          449.59165548,  458.95455975,  395.70715053,  401.75601652,
          -13.958117  ,  160.7335364 ,  329.6733369 ,  336.37323543,
          581.36559412,  461.17053586,  149.00684799,  -64.64707145,
          593.08573741,  526.21426417,  654.11482303,  -77.38001029,
          215.67916855,  526.28526864,  376.14382412,  391.04766172,
          226.08350882,  336.12331556,  414.07318712, -297.35418839,
          480.57299913,  623.79006423,   62.61811128,  -24.30980311,
          456.48524773,  538.87892008,  294.20918401,  466.64746521,
          288.08604594,  161.89670337,  123.62260152,  607.28484597,
          577.96741337,  416.82069247,  496.88944225,  231.35986321,
           90.15878388,  547.69430656,  561.89709593,  474.61404907,
          170.00584462,  572.21045765,  551.4747969 ,  103.94906969,
          509.99439348,  511.74156091,   36.48006374,  539.96425365,
          144.65560568,  569.81973712,  150.42237779,  -23.28556053,
          164.29894871,  621.36091716,  137.34759389,  -68.92037257,
          166.81647504,  337.73999166,   49.34594687,   65.16093386,
          398.22177368,  522.88855275,   99.43671569,  413.16110017])}}


def get_sim_results_df(results):
    
    dfs = []    
    for r in results:
        df = pd.DataFrame(r['output'])
        df['scenario_num'] = r['scenario_num']
        for key, val in r['scenario_vals'].items():
            df[key] = val
            
        dfs.append(df)

    results_df = pd.concat(dfs)
    
    return results_df


model2_results_df = get_sim_results_df(model2_results)


model2_results_df


sns.boxplot(x="order_quantity", y="profit", data=model2_results_df);


profit_histo_g2 = sns.FacetGrid(model2_results_df, col='order_quantity', sharey=True, col_wrap=3)
profit_histo_g2 = profit_histo_g2.map(plt.hist, "profit")


model2_results_df.groupby(['scenario_num'])['profit'].describe()


# Probability profit is between -200, 200
print((stats.percentileofscore(profit_sim, 200) - stats.percentileofscore(profit_sim, -200)) / 100.0)

0.1895


# Create new random inputs dictionary with std dev of demand cut in half
random_inputs_2b = {'demand': rg.normal(demand_mean, demand_sd / 2, num_reps),
                'unit_cost': rg.uniform(7.0, 8.0, num_reps),
                'unit_refund': rg.uniform(2.0, 3.0, num_reps)}

# Rerun the simulation
model2b_results = simulate(model2, random_inputs_2b, sim_outputs, scenario_inputs)

# Gather results into dataframe
model2b_results_df = get_sim_results_df(model2b_results)

# Plot results
profit_histo_g2b = sns.FacetGrid(model2b_results_df, col='order_quantity', sharey=True, col_wrap=3)
profit_histo_g2b = profit_histo_g2b.map(plt.hist, "profit")


# Summary stats
model2b_results_df.groupby(['scenario_num'])['profit'].describe()


model2_results_df.groupby(['scenario_num'])['profit'].describe()

	count	mean	std	min	25%	50%	75%	max
scenario_num
0	100.0	176.019192	19.141648	141.097517	162.634144	174.243792	191.941489	209.399164
1	100.0	299.884608	37.420110	127.959514	278.066820	298.703644	329.042553	358.969995
2	100.0	388.301729	101.928015	-151.372827	358.996476	406.932804	456.672025	506.665625
3	100.0	329.960744	233.195915	-430.705169	148.069857	393.377406	526.428417	654.114823
4	100.0	123.628809	296.697983	-710.037510	-110.073725	126.840188	317.663371	762.579307
5	100.0	-126.440476	305.729840	-989.369852	-368.151247	-112.874911	84.724617	525.821376

	count	mean	std	min	25%	50%	75%	max
scenario_num
0	100.0	175.299523	21.428187	140.300003	158.869471	176.145203	194.921136	208.445339
1	100.0	300.513468	36.734036	240.514291	272.347664	301.963205	334.150519	357.334867
2	100.0	416.099680	59.046978	273.790126	364.677857	418.495280	468.053789	506.224395
3	100.0	327.434946	146.906044	-18.744937	241.750839	325.874946	427.901792	645.070979
4	100.0	82.386053	159.851003	-311.279999	-5.862679	83.628352	179.793912	461.199566
5	100.0	-167.571146	167.153030	-603.815062	-253.458905	-163.332479	-65.282083	234.865305

	count	mean	std	min	25%	50%	75%	max
scenario_num
0	100.0	176.019192	19.141648	141.097517	162.634144	174.243792	191.941489	209.399164
1	100.0	299.884608	37.420110	127.959514	278.066820	298.703644	329.042553	358.969995
2	100.0	388.301729	101.928015	-151.372827	358.996476	406.932804	456.672025	506.665625
3	100.0	329.960744	233.195915	-430.705169	148.069857	393.377406	526.428417	654.114823
4	100.0	123.628809	296.697983	-710.037510	-110.073725	126.840188	317.663371	762.579307
5	100.0	-126.440476	305.729840	-989.369852	-368.151247	-112.874911	84.724617	525.821376

Excel "What if?" analysis with Python - Part 3: Monte-carlo simulation¶

Bookstore model¶

A non-OO approach to Monte-Carlo simulation¶

Generating random demand values¶

Running the simulation¶

Analyzing the simulation results¶

Multiple Uncertain Inputs¶

Multiple scenarios: profit vs order quantity¶

Repeat simulation using OO approach¶

Wrap up and next steps¶

	profit	scenario_num	order_quantity
0	170.275117	0	70
1	164.192202	0	70
2	167.876925	0	70
3	160.018106	0	70
4	181.568184	0	70
...	...	...	...
95	-444.837562	5	320
96	-186.528891	5	320
97	68.017946	5	320
98	-433.013870	5	320
99	-25.215226	5	320