Lab 12 - Evaluation in Production¶
Goal: combine multiple metrics into a weighted rollout KPI and gate decisions.
Info: Weighted KPI
Combine precision, SLA hit-rate, override-rate. Weight by business priority (compliance > efficiency). Use KPI to gate rollouts.
Info: Rollback thresholds
Agree on rollback criteria before rollout. If KPI drops below -0.01, rollback. Honor the threshold; it is your safety net.
In [ ]:
Copied!
baseline = {'precision': 0.88, 'sla_hit': 0.91, 'override_inv': 0.84}
candidate = {'precision': 0.90, 'sla_hit': 0.89, 'override_inv': 0.76}
weights = {'precision': 0.4, 'sla_hit': 0.4, 'override_inv': 0.2}
def weighted_kpi(metrics, weights):
return sum(metrics[k] * weights[k] for k in weights)
b = weighted_kpi(baseline, weights)
c = weighted_kpi(candidate, weights)
print('Baseline KPI:', round(b, 4))
print('Candidate KPI:', round(c, 4))
print('Delta:', round(c - b, 4))
baseline = {'precision': 0.88, 'sla_hit': 0.91, 'override_inv': 0.84}
candidate = {'precision': 0.90, 'sla_hit': 0.89, 'override_inv': 0.76}
weights = {'precision': 0.4, 'sla_hit': 0.4, 'override_inv': 0.2}
def weighted_kpi(metrics, weights):
return sum(metrics[k] * weights[k] for k in weights)
b = weighted_kpi(baseline, weights)
c = weighted_kpi(candidate, weights)
print('Baseline KPI:', round(b, 4))
print('Candidate KPI:', round(c, 4))
print('Delta:', round(c - b, 4))
In [ ]:
Copied!
def rollout_gate(delta, go=0.01, hold=-0.01):
if delta >= go:
return 'go'
if delta <= hold:
return 'rollback'
return 'hold'
print('Decision:', rollout_gate(c - b))
def rollout_gate(delta, go=0.01, hold=-0.01):
if delta >= go:
return 'go'
if delta <= hold:
return 'rollback'
return 'hold'
print('Decision:', rollout_gate(c - b))
Visualization: weighted KPI comparison¶
This compares baseline vs candidate metrics and the resulting weighted KPI decision.
In [ ]:
Copied!
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
baseline = {'precision': 0.88, 'sla_hit': 0.91, 'override_inv': 0.84}
candidate = {'precision': 0.90, 'sla_hit': 0.89, 'override_inv': 0.76}
weights = {'precision': 0.4, 'sla_hit': 0.4, 'override_inv': 0.2}
def weighted_kpi(m, w):
return sum(m[k] * w[k] for k in w)
b = weighted_kpi(baseline, weights)
c = weighted_kpi(candidate, weights)
metrics = list(weights.keys())
bvals = [baseline[m] for m in metrics]
cvals = [candidate[m] for m in metrics]
x = range(len(metrics)); w = 0.35
fig, ax = plt.subplots(1,2, figsize=(11,4))
ax[0].bar([i-w/2 for i in x], bvals, width=w, label='Baseline')
ax[0].bar([i+w/2 for i in x], cvals, width=w, label='Candidate')
ax[0].set_xticks(list(x), metrics)
ax[0].set_ylim(0,1)
ax[0].legend()
ax[0].set_title('Metric comparison')
ax[1].bar(['Baseline KPI','Candidate KPI'], [b, c], color=['#4C78A8','#F58518'])
ax[1].set_title(f'KPI delta = {c - b:.3f}')
plt.tight_layout()
plt.show()
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
baseline = {'precision': 0.88, 'sla_hit': 0.91, 'override_inv': 0.84}
candidate = {'precision': 0.90, 'sla_hit': 0.89, 'override_inv': 0.76}
weights = {'precision': 0.4, 'sla_hit': 0.4, 'override_inv': 0.2}
def weighted_kpi(m, w):
return sum(m[k] * w[k] for k in w)
b = weighted_kpi(baseline, weights)
c = weighted_kpi(candidate, weights)
metrics = list(weights.keys())
bvals = [baseline[m] for m in metrics]
cvals = [candidate[m] for m in metrics]
x = range(len(metrics)); w = 0.35
fig, ax = plt.subplots(1,2, figsize=(11,4))
ax[0].bar([i-w/2 for i in x], bvals, width=w, label='Baseline')
ax[0].bar([i+w/2 for i in x], cvals, width=w, label='Candidate')
ax[0].set_xticks(list(x), metrics)
ax[0].set_ylim(0,1)
ax[0].legend()
ax[0].set_title('Metric comparison')
ax[1].bar(['Baseline KPI','Candidate KPI'], [b, c], color=['#4C78A8','#F58518'])
ax[1].set_title(f'KPI delta = {c - b:.3f}')
plt.tight_layout()
plt.show()