Skip to content

Commit 0a43fe7

Browse files
committed
chore(utility): renamed output_analysis -> rep_utility
1 parent 78cdc32 commit 0a43fe7

1 file changed

Lines changed: 393 additions & 0 deletions

File tree

rep_utility.py

Lines changed: 393 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,393 @@
1+
"""
2+
module: output_analysis
3+
4+
Provides methods for the confidence interval method for selecting
5+
the number of replications to run with a Discrete-Event Simulation.
6+
"""
7+
8+
import plotly.graph_objects as go
9+
import numpy as np
10+
import pandas as pd
11+
from scipy.stats import t
12+
import warnings
13+
14+
from typing import Protocol, runtime_checkable, Optional
15+
16+
OBSERVER_INTERFACE_ERROR = "Observers of OnlineStatistics must implement " \
17+
+ "ReplicationObserver interface. i.e. " \
18+
+ "update(results: OnlineStatistics) -> None"
19+
20+
21+
@runtime_checkable
22+
class ReplicationObserver(Protocol):
23+
"""
24+
Interface for an observer of an instance of the ReplicationsAnalyser
25+
"""
26+
def update(self, results) -> None:
27+
"""
28+
Add an observation of a replication
29+
30+
Parameters:
31+
-----------
32+
results: OnlineStatistic
33+
The current replication to observe.
34+
"""
35+
pass
36+
37+
38+
class OnlineStatistics:
39+
"""
40+
Welford’s algorithm for computing a running sample mean and
41+
variance. Allowing computation of CIs and half width % deviation
42+
from the mean.
43+
44+
This is a robust, accurate and old(ish) approach (1960s) that
45+
I first read about in Donald Knuth’s art of computer programming vol 2.
46+
"""
47+
48+
def __init__(
49+
self,
50+
data: Optional[np.ndarray] = None,
51+
alpha: Optional[float] = 0.1,
52+
observer: Optional[ReplicationObserver] = None
53+
) -> None:
54+
"""
55+
Initiaise Welford’s algorithm for computing a running sample mean and
56+
variance.
57+
58+
Parameters:
59+
-------
60+
data: array-like, optional (default = None)
61+
Contains an initial data sample.
62+
63+
alpha: float
64+
To compute 100(1 - alpha) confidence interval
65+
66+
observer: ReplicationObserver, optional (default=None)
67+
A user may optionally track the updates to the statistics using a
68+
ReplicationObserver (e.g. ReplicationTabuliser). This allows further
69+
tabular or visual analysis or saving results to file if required.
70+
71+
"""
72+
73+
self.n = 0
74+
self.x_i = None
75+
self.mean = None
76+
# sum of squares of differences from the current mean
77+
self._sq = None
78+
self.alpha = alpha
79+
self._observers = []
80+
if observer is not None:
81+
self.register_observer(observer)
82+
83+
if isinstance(data, np.ndarray):
84+
for x in data:
85+
self.update(x)
86+
87+
def register_observer(self, observer: ReplicationObserver) -> None:
88+
"""
89+
observer: ReplicationRecorder, optional (default = None)
90+
Include a method for recording the replication results at each
91+
update. Part of observer pattern. If None then no results are
92+
observed.
93+
94+
"""
95+
if not isinstance(observer, ReplicationObserver):
96+
raise ValueError(OBSERVER_INTERFACE_ERROR)
97+
98+
self._observers.append(observer)
99+
100+
@property
101+
def variance(self) -> float:
102+
"""
103+
Sample variance of data
104+
Sum of squares of differences from the current mean divided by n - 1
105+
"""
106+
107+
return self._sq / (self.n - 1)
108+
109+
@property
110+
def std(self) -> float:
111+
"""
112+
Standard deviation of data
113+
"""
114+
if self.n > 2:
115+
return np.sqrt(self.variance)
116+
else:
117+
return np.nan
118+
119+
@property
120+
def std_error(self) -> float:
121+
"""
122+
Standard error of the mean
123+
"""
124+
return self.std / np.sqrt(self.n)
125+
126+
@property
127+
def half_width(self) -> float:
128+
"""
129+
Confidence interval half width
130+
"""
131+
dof = self.n - 1
132+
t_value = t.ppf(1 - (self.alpha / 2), dof)
133+
return t_value * self.std_error
134+
135+
@property
136+
def lci(self) -> float:
137+
"""
138+
Lower confidence interval bound
139+
"""
140+
if self.n > 2:
141+
return self.mean - self.half_width
142+
else:
143+
return np.nan
144+
145+
@property
146+
def uci(self) -> float:
147+
"""
148+
Lower confidence interval bound
149+
"""
150+
if self.n > 2:
151+
return self.mean + self.half_width
152+
else:
153+
return np.nan
154+
155+
@property
156+
def deviation(self) -> float:
157+
"""
158+
Precision of the confidence interval expressed as the
159+
percentage deviation of the half width from the mean.
160+
"""
161+
if self.n > 2:
162+
return self.half_width / self.mean
163+
else:
164+
return np.nan
165+
166+
def update(self, x: float) -> None:
167+
"""
168+
Running update of mean and variance implemented using Welford's
169+
algorithm (1962).
170+
171+
See Knuth. D `The Art of Computer Programming` Vol 2. 2nd ed. Page 216.
172+
173+
Params:
174+
------
175+
x: float
176+
A new observation
177+
"""
178+
self.n += 1
179+
self.x_i = x
180+
181+
# init values
182+
if self.n == 1:
183+
self.mean = x
184+
self._sq = 0
185+
else:
186+
# compute the updated mean
187+
updated_mean = self.mean + ((x - self.mean) / self.n)
188+
189+
# update the sum of squares of differences from the current mean
190+
self._sq += (x - self.mean) * (x - updated_mean)
191+
192+
# update the tracked mean
193+
self.mean = updated_mean
194+
195+
self.notify()
196+
197+
def notify(self) -> None:
198+
"""
199+
Notify any observers that a update has taken place.
200+
"""
201+
for observer in self._observers:
202+
observer.update(self)
203+
204+
205+
class ReplicationTabulizer:
206+
"""
207+
Record the replication results from an instance of ReplicationsAlgorithm
208+
in a pandas DataFrame.
209+
210+
Implement as the part of observer pattern. Provides a summary frame
211+
equivalent to the output of a confidence_interval_method
212+
"""
213+
def __init__(self):
214+
# to track online stats
215+
self.stdev = []
216+
self.lower = []
217+
self.upper = []
218+
self.dev = []
219+
self.cumulative_mean = []
220+
self.x_i = []
221+
self.n = 0
222+
223+
def update(self, results: OnlineStatistics) -> None:
224+
"""
225+
Add an observation of a replication
226+
227+
Parameters:
228+
-----------
229+
results: OnlineStatistic
230+
The current replication to observe.
231+
"""
232+
self.x_i.append(results.x_i)
233+
self.cumulative_mean.append(results.mean)
234+
self.stdev.append(results.std)
235+
self.lower.append(results.lci)
236+
self.upper.append(results.uci)
237+
self.dev.append(results.deviation)
238+
self.n += 1
239+
240+
def summary_table(self) -> pd.DataFrame:
241+
"""
242+
Return a dataframe of results equivalent to the confidence interval
243+
method.
244+
"""
245+
# combine results into a single dataframe
246+
results = pd.DataFrame([self.x_i, self.cumulative_mean,
247+
self.stdev, self.lower, self.upper, self.dev]).T
248+
results.columns = ['Mean', 'Cumulative Mean', 'Standard Deviation',
249+
'Lower Interval', 'Upper Interval', '% deviation']
250+
results.index = np.arange(1, self.n+1)
251+
results.index.name = 'replications'
252+
253+
return results
254+
255+
256+
def confidence_interval_method(
257+
replications,
258+
alpha: Optional[float] = 0.05,
259+
desired_precision: Optional[float] = 0.05,
260+
min_rep: Optional[int] = 5,
261+
decimal_places: Optional[int] = 2
262+
):
263+
'''
264+
The confidence interval method for selecting the number of replications
265+
to run in a simulation.
266+
267+
Finds the smallest number of replications where the width of the confidence
268+
interval is less than the desired_precision.
269+
270+
Returns both the number of replications and the full results dataframe.
271+
272+
Parameters:
273+
----------
274+
replications: arraylike
275+
Array (e.g. np.ndarray or list) of replications of a performance metric
276+
277+
alpha: float, optional (default=0.05)
278+
procedure constructs a 100(1-alpha) confidence interval for the
279+
cumulative mean.
280+
281+
desired_precision: float, optional (default=0.05)
282+
Desired mean deviation from confidence interval.
283+
284+
min_rep: int, optional (default=5)
285+
set to a integer > 0 and ignore all of the replications prior to it
286+
when selecting the number of replications to run to achieve the desired
287+
precision. Useful when the number of replications returned does not
288+
provide a stable precision below target.
289+
290+
decimal_places: int, optional (default=2)
291+
sets the number of decimal places of the returned dataframe containing
292+
the results
293+
294+
Returns:
295+
--------
296+
tuple: int, pd.DataFrame
297+
298+
'''
299+
# welford's method to track cumulative mean and construct CIs at each rep
300+
# track the process and construct data table using ReplicationTabuliser
301+
observer = ReplicationTabulizer()
302+
stats = OnlineStatistics(alpha=alpha, data=replications[:2], observer=observer)
303+
304+
# iteratively update.
305+
for i in range(2, len(replications)):
306+
stats.update(replications[i])
307+
308+
results = observer.summary_table()
309+
310+
# get the smallest no. of reps where deviation is less than precision target
311+
312+
try:
313+
n_reps = results.iloc[min_rep:].loc[results['% deviation']
314+
<= desired_precision].iloc[0].name
315+
except IndexError:
316+
# no replications with desired precision
317+
message = 'WARNING: the replications do not reach desired precision'
318+
warnings.warn(message)
319+
n_reps = -1
320+
321+
return n_reps, results.round(decimal_places)
322+
323+
324+
325+
def plotly_confidence_interval_method(n_reps, conf_ints, metric_name,
326+
figsize=(1200, 400)):
327+
"""
328+
Interactive Plotly visualization with deviation hover information
329+
330+
Parameters:
331+
----------
332+
n_reps: int
333+
Minimum number of reps selected
334+
conf_ints: pandas.DataFrame
335+
Results from `confidence_interval_method` function
336+
metric_name: str
337+
Name of the performance measure
338+
figsize: tuple, optional (default=(1200,400))
339+
Plot dimensions in pixels (width, height)
340+
341+
Returns:
342+
-------
343+
plotly.graph_objects.Figure
344+
"""
345+
fig = go.Figure()
346+
347+
# Calculate relative deviations [1][4]
348+
deviation_pct = ((conf_ints['Upper Interval'] - conf_ints['Cumulative Mean']) /
349+
conf_ints['Cumulative Mean'] * 100).round(2)
350+
351+
# Confidence interval bands with hover info
352+
for col, color, dash in zip(['Lower Interval', 'Upper Interval'],
353+
['lightblue', 'lightblue'],
354+
['dot', 'dot']):
355+
fig.add_trace(go.Scatter(
356+
x=conf_ints.index,
357+
y=conf_ints[col],
358+
line=dict(color=color, dash=dash),
359+
name=col,
360+
text=[f'Deviation: {d}%' for d in deviation_pct],
361+
hoverinfo='x+y+name+text'
362+
))
363+
364+
# Cumulative mean line with enhanced hover
365+
fig.add_trace(go.Scatter(
366+
x=conf_ints.index,
367+
y=conf_ints['Cumulative Mean'],
368+
line=dict(color='blue', width=2),
369+
name='Cumulative Mean',
370+
hoverinfo='x+y+name'
371+
))
372+
373+
# Vertical threshold line
374+
fig.add_shape(
375+
type='line',
376+
x0=n_reps,
377+
x1=n_reps,
378+
y0=0,
379+
y1=1,
380+
yref='paper',
381+
line=dict(color='red', dash='dash')
382+
)
383+
384+
# Configure layout
385+
fig.update_layout(
386+
width=figsize[0],
387+
height=figsize[1],
388+
yaxis_title=f'Cumulative Mean: {metric_name}',
389+
hovermode='x unified',
390+
showlegend=True
391+
)
392+
393+
return fig

0 commit comments

Comments
 (0)