Source code for linreg_ally.plotting

import altair as alt
import pandas as pd
import numpy as np
import scipy.stats as stats

[docs] def qq_and_residuals_plot(y_actual, y_predicted, concatenate=True): """ Generate a Q-Q plot (standardized residuals vs theoretical quantiles) and a Residuals vs. Fitted Values plot using Altair for regression diagnostics. Parameters ---------- y_actual : array-like Actual observed values from the dataset. Must be numeric and have the same length as `y_predicted`. y_predicted : array-like Predicted (fitted) values from the regression model. Must be numeric and have the same length as `y_actual`. concatenate : bool, optional (default=True) If True, concatenates the Q-Q plot and Residuals vs. Fitted Values plot side by side. If False, returns the plots separately as individual Altair charts. Returns ------- alt.Chart or tuple of alt.Chart - If `concatenate=True`, returns a concatenated Altair chart with both plots. - If `concatenate=False`, returns a tuple containing the Q-Q plot and Residuals vs. Fitted Values plot. Raises ------ ValueError If `y_actual` or `y_predicted` are empty, have mismatched lengths, or contain fewer than two data points. TypeError If `y_actual` or `y_predicted` are not numeric. Examples -------- >>> import numpy as np >>> np.random.seed(42) >>> y_actual = np.random.normal(10, 2, 100) >>> y_predicted = y_actual + np.random.normal(0, 1, 100) >>> chart = qq_and_residuals_plot(y_actual, y_predicted, concatenate=True) >>> chart.display() """ # Ensure inputs are NumPy arrays y_actual = np.array(y_actual) y_predicted = np.array(y_predicted) # Validate inputs if y_actual.size == 0 or y_predicted.size == 0: raise ValueError("Inputs must not be empty") if y_actual.size != y_predicted.size: raise ValueError("Inputs must have the same length") if y_actual.size < 2: raise ValueError("Insufficient data points for plotting") if not (np.issubdtype(y_actual.dtype, np.number) and np.issubdtype(y_predicted.dtype, np.number)): raise TypeError("Inputs must be numeric") # Calculate residuals residuals = y_actual - y_predicted standardized_residuals = (residuals - np.mean(residuals)) / np.std(residuals) # Create Q-Q plot data frame qq_theoretical, qq_sample = stats.probplot(standardized_residuals, dist="norm", fit=False) qq_df = pd.DataFrame({ 'Theoretical Quantiles': qq_theoretical, 'Standardized Residuals': qq_sample }) # Create reference line for Q-Q plot (45-degree line) qq_reference_line = alt.Chart(pd.DataFrame({ 'Theoretical Quantiles': [-3, 3], # Typical range for normal quantiles 'Standardized Residuals': [-3, 3] })).mark_line(color='red', strokeDash=[5, 5]).encode( x='Theoretical Quantiles', y='Standardized Residuals' ) # Q-Q Plot qq_plot = alt.Chart(qq_df).mark_circle(size=60).encode( x=alt.X('Theoretical Quantiles', title='Theoretical Quantiles'), y=alt.Y('Standardized Residuals', title='Standardized Residuals') ).properties(title="Q-Q Plot", width=300, height=300) + qq_reference_line # Residuals vs Fitted Values data frame residuals_df = pd.DataFrame({ 'Fitted Values': y_predicted, 'Residuals': residuals }) # Create horizontal reference line at y = 0 for Residuals vs. Fitted Values residuals_reference_line = alt.Chart(pd.DataFrame({ 'Fitted Values': [y_predicted.min() - 2, y_predicted.max() + 2], #edited starting point 'Residuals': [0, 0] })).mark_line(color='red', strokeDash=[5, 5]).encode( x='Fitted Values', y='Residuals' ) # Residuals vs Fitted Values Plot residuals_plot = alt.Chart(residuals_df).mark_circle(size=60).encode( x=alt.X('Fitted Values', title='Fitted Values'), y=alt.Y('Residuals', title='Residuals') ).properties(title="Residuals vs. Fitted Values", width=300, height=300) + residuals_reference_line # Combine or return separately if concatenate: return alt.hconcat(qq_plot, residuals_plot).resolve_scale(y='independent') return qq_plot, residuals_plot