Lab 07 - Regression¶
Goal: fit a simple linear model for resolution-time planning.
Info: Forecasting
Predict numeric outcomes (resolution hours, queue depth) from features. Enables proactive staffing and SLA planning.
Info: Residuals reveal bias
Consistently positive residuals for high-severity tickets suggest model underestimates complexity. Investigate and recalibrate.
In [ ]:
Copied!
ticket_tokens = [40, 55, 70, 85, 100, 120, 140]
resolution_hours = [1.2, 1.6, 2.1, 2.5, 3.0, 3.6, 4.1]
def mean(xs):
return sum(xs) / len(xs)
def fit_line(xs, ys):
x_bar = mean(xs)
y_bar = mean(ys)
num = sum((x - x_bar) * (y - y_bar) for x, y in zip(xs, ys))
den = sum((x - x_bar) ** 2 for x in xs)
b1 = num / den
b0 = y_bar - b1 * x_bar
return b0, b1
b0, b1 = fit_line(ticket_tokens, resolution_hours)
print("model: y_hat =", round(b0, 4), "+", round(b1, 4), "* x")
ticket_tokens = [40, 55, 70, 85, 100, 120, 140]
resolution_hours = [1.2, 1.6, 2.1, 2.5, 3.0, 3.6, 4.1]
def mean(xs):
return sum(xs) / len(xs)
def fit_line(xs, ys):
x_bar = mean(xs)
y_bar = mean(ys)
num = sum((x - x_bar) * (y - y_bar) for x, y in zip(xs, ys))
den = sum((x - x_bar) ** 2 for x in xs)
b1 = num / den
b0 = y_bar - b1 * x_bar
return b0, b1
b0, b1 = fit_line(ticket_tokens, resolution_hours)
print("model: y_hat =", round(b0, 4), "+", round(b1, 4), "* x")
In [ ]:
Copied!
import math
def predict(x, b0, b1):
return b0 + b1 * x
preds = [predict(x, b0, b1) for x in ticket_tokens]
residuals = [y - yhat for y, yhat in zip(resolution_hours, preds)]
mae = sum(abs(r) for r in residuals) / len(residuals)
rmse = math.sqrt(sum(r * r for r in residuals) / len(residuals))
print("MAE:", round(mae, 4))
print("RMSE:", round(rmse, 4))
print("Predicted hours for 130 tokens:", round(predict(130, b0, b1), 3))
import math
def predict(x, b0, b1):
return b0 + b1 * x
preds = [predict(x, b0, b1) for x in ticket_tokens]
residuals = [y - yhat for y, yhat in zip(resolution_hours, preds)]
mae = sum(abs(r) for r in residuals) / len(residuals)
rmse = math.sqrt(sum(r * r for r in residuals) / len(residuals))
print("MAE:", round(mae, 4))
print("RMSE:", round(rmse, 4))
print("Predicted hours for 130 tokens:", round(predict(130, b0, b1), 3))
Visualization: regression line and residuals¶
Dots are observed values. The red line is the fitted model. Vertical gaps are residual errors.
In [ ]:
Copied!
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
x = [40, 55, 70, 85, 100, 120, 140]
y = [1.2, 1.6, 2.1, 2.5, 3.0, 3.6, 4.1]
def mean(xs):
return sum(xs) / len(xs)
def fit_line(xs, ys):
xb = mean(xs); yb = mean(ys)
b1 = sum((a-xb)*(b-yb) for a,b in zip(xs,ys))/sum((a-xb)**2 for a in xs)
b0 = yb - b1*xb
return b0, b1
b0, b1 = fit_line(x, y)
yhat = [b0 + b1 * xi for xi in x]
plt.figure(figsize=(8,4))
plt.scatter(x, y, label='Observed')
plt.plot(x, yhat, color='crimson', label='Fitted line')
for xi, yi, yp in zip(x, y, yhat):
plt.plot([xi, xi], [yi, yp], color='gray', alpha=0.4)
plt.title('Regression fit with residuals')
plt.xlabel('Token count')
plt.ylabel('Resolution hours')
plt.legend()
plt.tight_layout()
plt.show()
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
x = [40, 55, 70, 85, 100, 120, 140]
y = [1.2, 1.6, 2.1, 2.5, 3.0, 3.6, 4.1]
def mean(xs):
return sum(xs) / len(xs)
def fit_line(xs, ys):
xb = mean(xs); yb = mean(ys)
b1 = sum((a-xb)*(b-yb) for a,b in zip(xs,ys))/sum((a-xb)**2 for a in xs)
b0 = yb - b1*xb
return b0, b1
b0, b1 = fit_line(x, y)
yhat = [b0 + b1 * xi for xi in x]
plt.figure(figsize=(8,4))
plt.scatter(x, y, label='Observed')
plt.plot(x, yhat, color='crimson', label='Fitted line')
for xi, yi, yp in zip(x, y, yhat):
plt.plot([xi, xi], [yi, yp], color='gray', alpha=0.4)
plt.title('Regression fit with residuals')
plt.xlabel('Token count')
plt.ylabel('Resolution hours')
plt.legend()
plt.tight_layout()
plt.show()