用Python从零构建Kriging模型代码驱动的空间预测数学解析在数据分析与地质统计领域Kriging模型以其优雅的数学结构和精准的空间预测能力成为处理空间相关性问题的重要工具。传统学习路径往往要求先掌握复杂的统计学理论让许多实践者望而生畏。本文将打破这一常规——我们不再从数学公式开始而是通过Python代码逆向拆解Kriging的核心原理让协方差矩阵、最大似然估计等抽象概念在Jupyter Notebook中变得可视、可调、可感知。1. 环境准备与数据建模1.1 基础工具链配置确保已安装以下Python科学计算套件推荐使用Anaconda环境# 核心依赖 import numpy as np import scipy.optimize as opt import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from scipy.spatial.distance import cdist1.2 构建实验数据集我们创建具有空间自相关性的模拟数据作为演示案例def generate_spatial_data(n50): np.random.seed(42) X np.random.uniform(0, 10, (n, 2)) # 真实曲面包含空间相关性 z (np.sin(X[:,0]/2) np.cos(X[:,1]/3) 0.3*np.random.normal(0, 1, n)) return X, z X_train, y_train generate_spatial_data()可视化训练数据分布fig plt.figure(figsize(10,6)) ax fig.add_subplot(111, projection3d) ax.scatter(X_train[:,0], X_train[:,1], y_train, cy_train, cmapviridis) ax.set_xlabel(X1); ax.set_ylabel(X2); ax.set_zlabel(Y) plt.title(空间训练数据分布) plt.show()2. Kriging核心组件实现2.1 相关函数建模Kriging的核心在于量化空间相关性。我们实现高斯相关函数def correlation_function(X1, X2, theta0.5, p2): 高斯相关函数实现 Args: X1: 点集矩阵 (n1 x d) X2: 点集矩阵 (n2 x d) theta: 范围参数 p: 幂次参数 Returns: R: 相关矩阵 (n1 x n2) dists cdist(X1, X2, minkowski, pp) return np.exp(-theta * dists**p)参数θ对相关性的影响可通过以下代码直观展示x np.linspace(0, 10, 100) X1 x.reshape(-1,1) theta_values [0.1, 0.5, 1.0] plt.figure(figsize(10,5)) for theta in theta_values: R correlation_function(X1, np.array([[5.0]]), theta) plt.plot(x, R, labelfθ{theta}) plt.legend(); plt.xlabel(距离); plt.ylabel(相关性) plt.title(θ参数对空间相关性的影响) plt.show()2.2 协方差矩阵构建基于相关函数构建完整的协方差系统def build_covariance_matrix(X, sigma21.0, theta0.5): 构建Kriging协方差矩阵 Args: X: 输入点集 (n x d) sigma2: 过程方差 theta: 相关参数 Returns: R: 协方差矩阵 (n x n) R correlation_function(X, X, theta) return sigma2 * R3. 参数估计与模型训练3.1 最大似然估计实现通过优化似然函数估计最优参数def neg_log_likelihood(params, X, y): 负对数似然函数 Args: params: [theta, sigma2, mu] X: 训练点集 y: 观测值 Returns: -log_likelihood值 theta, sigma2 params[0], params[1] n len(y) R build_covariance_matrix(X, sigma2, theta) try: L np.linalg.cholesky(R) except np.linalg.LinAlgError: return np.inf # 均值估计 ones np.ones(n) mu (ones.T np.linalg.solve(L.T, np.linalg.solve(L, y))) / \ (ones.T np.linalg.solve(L.T, np.linalg.solve(L, ones))) # 对数似然计算 y_centered y - mu log_det 2 * np.sum(np.log(np.diag(L))) likelihood (log_det y_centered.T np.linalg.solve(L.T, np.linalg.solve(L, y_centered))) / 2 return likelihood # 参数优化 initial_guess [0.5, 1.0] bounds [(1e-3, 10), (1e-3, None)] result opt.minimize(neg_log_likelihood, initial_guess, args(X_train, y_train), boundsbounds) theta_opt, sigma2_opt result.x3.2 参数优化可视化绘制似然函数曲面观察优化过程theta_grid np.linspace(0.1, 2, 50) sigma2_grid np.linspace(0.5, 3, 50) likelihoods np.zeros((len(theta_grid), len(sigma2_grid))) for i, theta in enumerate(theta_grid): for j, sigma2 in enumerate(sigma2_grid): likelihoods[i,j] neg_log_likelihood([theta, sigma2], X_train, y_train) plt.figure(figsize(10,6)) plt.contourf(theta_grid, sigma2_grid, likelihoods.T, levels20, cmapviridis) plt.colorbar(label负对数似然值) plt.scatter(theta_opt, sigma2_opt, cred, s100, label最优解) plt.xlabel(θ); plt.ylabel(σ²); plt.legend() plt.title(参数空间似然函数曲面) plt.show()4. 预测实现与结果分析4.1 Kriging预测器实现class KrigingPredictor: def __init__(self, theta0.5, sigma21.0, p2): self.theta theta self.sigma2 sigma2 self.p p def fit(self, X, y): self.X X self.y y self.n len(y) # 构建协方差矩阵 self.R build_covariance_matrix(X, self.sigma2, self.theta) self.L np.linalg.cholesky(self.R) # 估计全局均值 ones np.ones(self.n) self.mu (ones.T np.linalg.solve(self.L.T, np.linalg.solve(self.L, y))) / \ (ones.T np.linalg.solve(self.L.T, np.linalg.solve(self.L, ones))) def predict(self, X_new): # 计算新旧点间相关性 r correlation_function(self.X, X_new, self.theta, self.p) # 计算权重 weights np.linalg.solve(self.L.T, np.linalg.solve(self.L, r)) # 预测值 y_pred self.mu weights.T (self.y - self.mu) # 预测方差 sigma2_pred self.sigma2 * (1 - np.diag(r.T np.linalg.solve(self.L.T, np.linalg.solve(self.L, r)))) return y_pred, sigma2_pred4.2 空间预测可视化在测试网格上进行预测# 创建预测网格 xx, yy np.meshgrid(np.linspace(0, 10, 30), np.linspace(0, 10, 30)) X_grid np.column_stack([xx.ravel(), yy.ravel()]) # 训练预测器 kriging KrigingPredictor(thetatheta_opt, sigma2sigma2_opt) kriging.fit(X_train, y_train) # 网格预测 y_pred, sigma2_pred kriging.predict(X_grid) y_pred y_pred.reshape(xx.shape) sigma2_pred sigma2_pred.reshape(xx.shape) # 可视化预测结果 fig plt.figure(figsize(15,5)) # 预测均值 ax1 fig.add_subplot(131, projection3d) ax1.plot_surface(xx, yy, y_pred, cmapviridis, alpha0.8) ax1.scatter(X_train[:,0], X_train[:,1], y_train, cred, s50) ax1.set_title(Kriging预测曲面) # 预测方差 ax2 fig.add_subplot(132, projection3d) ax2.plot_surface(xx, yy, sigma2_pred, cmapplasma) ax2.set_title(预测方差曲面) # 二维等高线 ax3 fig.add_subplot(133) contour ax3.contourf(xx, yy, y_pred, levels20, cmapviridis) plt.colorbar(contour) ax3.scatter(X_train[:,0], X_train[:,1], cred, s50) ax3.set_title(预测等高线与采样点) plt.tight_layout() plt.show()4.3 模型诊断与验证计算留一法交叉验证误差def loocv_error(X, y, theta, sigma2): n len(y) errors np.zeros(n) for i in range(n): # 留一数据集 X_loo np.delete(X, i, axis0) y_loo np.delete(y, i) # 训练临时模型 kriging KrigingPredictor(thetatheta, sigma2sigma2) kriging.fit(X_loo, y_loo) # 预测被剔除的点 y_pred, _ kriging.predict(X[i:i1]) errors[i] (y_pred - y[i])**2 return np.mean(errors) cv_error loocv_error(X_train, y_train, theta_opt, sigma2_opt) print(f留一法交叉验证MSE: {cv_error:.4f})通过参数敏感性分析理解模型行为theta_range np.linspace(0.1, 2, 20) errors [loocv_error(X_train, y_train, theta, sigma2_opt) for theta in theta_range] plt.figure(figsize(8,5)) plt.plot(theta_range, errors, o-) plt.axvline(theta_opt, colorred, linestyle--, labelf最优θ{theta_opt:.2f}) plt.xlabel(θ值); plt.ylabel(LOOCV误差); plt.legend() plt.title(θ参数与模型误差关系) plt.show()
别再死记硬背公式了!用Python从零推导Kriging模型,搞懂空间预测的数学之美
发布时间:2026/5/28 20:01:06
用Python从零构建Kriging模型代码驱动的空间预测数学解析在数据分析与地质统计领域Kriging模型以其优雅的数学结构和精准的空间预测能力成为处理空间相关性问题的重要工具。传统学习路径往往要求先掌握复杂的统计学理论让许多实践者望而生畏。本文将打破这一常规——我们不再从数学公式开始而是通过Python代码逆向拆解Kriging的核心原理让协方差矩阵、最大似然估计等抽象概念在Jupyter Notebook中变得可视、可调、可感知。1. 环境准备与数据建模1.1 基础工具链配置确保已安装以下Python科学计算套件推荐使用Anaconda环境# 核心依赖 import numpy as np import scipy.optimize as opt import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from scipy.spatial.distance import cdist1.2 构建实验数据集我们创建具有空间自相关性的模拟数据作为演示案例def generate_spatial_data(n50): np.random.seed(42) X np.random.uniform(0, 10, (n, 2)) # 真实曲面包含空间相关性 z (np.sin(X[:,0]/2) np.cos(X[:,1]/3) 0.3*np.random.normal(0, 1, n)) return X, z X_train, y_train generate_spatial_data()可视化训练数据分布fig plt.figure(figsize(10,6)) ax fig.add_subplot(111, projection3d) ax.scatter(X_train[:,0], X_train[:,1], y_train, cy_train, cmapviridis) ax.set_xlabel(X1); ax.set_ylabel(X2); ax.set_zlabel(Y) plt.title(空间训练数据分布) plt.show()2. Kriging核心组件实现2.1 相关函数建模Kriging的核心在于量化空间相关性。我们实现高斯相关函数def correlation_function(X1, X2, theta0.5, p2): 高斯相关函数实现 Args: X1: 点集矩阵 (n1 x d) X2: 点集矩阵 (n2 x d) theta: 范围参数 p: 幂次参数 Returns: R: 相关矩阵 (n1 x n2) dists cdist(X1, X2, minkowski, pp) return np.exp(-theta * dists**p)参数θ对相关性的影响可通过以下代码直观展示x np.linspace(0, 10, 100) X1 x.reshape(-1,1) theta_values [0.1, 0.5, 1.0] plt.figure(figsize(10,5)) for theta in theta_values: R correlation_function(X1, np.array([[5.0]]), theta) plt.plot(x, R, labelfθ{theta}) plt.legend(); plt.xlabel(距离); plt.ylabel(相关性) plt.title(θ参数对空间相关性的影响) plt.show()2.2 协方差矩阵构建基于相关函数构建完整的协方差系统def build_covariance_matrix(X, sigma21.0, theta0.5): 构建Kriging协方差矩阵 Args: X: 输入点集 (n x d) sigma2: 过程方差 theta: 相关参数 Returns: R: 协方差矩阵 (n x n) R correlation_function(X, X, theta) return sigma2 * R3. 参数估计与模型训练3.1 最大似然估计实现通过优化似然函数估计最优参数def neg_log_likelihood(params, X, y): 负对数似然函数 Args: params: [theta, sigma2, mu] X: 训练点集 y: 观测值 Returns: -log_likelihood值 theta, sigma2 params[0], params[1] n len(y) R build_covariance_matrix(X, sigma2, theta) try: L np.linalg.cholesky(R) except np.linalg.LinAlgError: return np.inf # 均值估计 ones np.ones(n) mu (ones.T np.linalg.solve(L.T, np.linalg.solve(L, y))) / \ (ones.T np.linalg.solve(L.T, np.linalg.solve(L, ones))) # 对数似然计算 y_centered y - mu log_det 2 * np.sum(np.log(np.diag(L))) likelihood (log_det y_centered.T np.linalg.solve(L.T, np.linalg.solve(L, y_centered))) / 2 return likelihood # 参数优化 initial_guess [0.5, 1.0] bounds [(1e-3, 10), (1e-3, None)] result opt.minimize(neg_log_likelihood, initial_guess, args(X_train, y_train), boundsbounds) theta_opt, sigma2_opt result.x3.2 参数优化可视化绘制似然函数曲面观察优化过程theta_grid np.linspace(0.1, 2, 50) sigma2_grid np.linspace(0.5, 3, 50) likelihoods np.zeros((len(theta_grid), len(sigma2_grid))) for i, theta in enumerate(theta_grid): for j, sigma2 in enumerate(sigma2_grid): likelihoods[i,j] neg_log_likelihood([theta, sigma2], X_train, y_train) plt.figure(figsize(10,6)) plt.contourf(theta_grid, sigma2_grid, likelihoods.T, levels20, cmapviridis) plt.colorbar(label负对数似然值) plt.scatter(theta_opt, sigma2_opt, cred, s100, label最优解) plt.xlabel(θ); plt.ylabel(σ²); plt.legend() plt.title(参数空间似然函数曲面) plt.show()4. 预测实现与结果分析4.1 Kriging预测器实现class KrigingPredictor: def __init__(self, theta0.5, sigma21.0, p2): self.theta theta self.sigma2 sigma2 self.p p def fit(self, X, y): self.X X self.y y self.n len(y) # 构建协方差矩阵 self.R build_covariance_matrix(X, self.sigma2, self.theta) self.L np.linalg.cholesky(self.R) # 估计全局均值 ones np.ones(self.n) self.mu (ones.T np.linalg.solve(self.L.T, np.linalg.solve(self.L, y))) / \ (ones.T np.linalg.solve(self.L.T, np.linalg.solve(self.L, ones))) def predict(self, X_new): # 计算新旧点间相关性 r correlation_function(self.X, X_new, self.theta, self.p) # 计算权重 weights np.linalg.solve(self.L.T, np.linalg.solve(self.L, r)) # 预测值 y_pred self.mu weights.T (self.y - self.mu) # 预测方差 sigma2_pred self.sigma2 * (1 - np.diag(r.T np.linalg.solve(self.L.T, np.linalg.solve(self.L, r)))) return y_pred, sigma2_pred4.2 空间预测可视化在测试网格上进行预测# 创建预测网格 xx, yy np.meshgrid(np.linspace(0, 10, 30), np.linspace(0, 10, 30)) X_grid np.column_stack([xx.ravel(), yy.ravel()]) # 训练预测器 kriging KrigingPredictor(thetatheta_opt, sigma2sigma2_opt) kriging.fit(X_train, y_train) # 网格预测 y_pred, sigma2_pred kriging.predict(X_grid) y_pred y_pred.reshape(xx.shape) sigma2_pred sigma2_pred.reshape(xx.shape) # 可视化预测结果 fig plt.figure(figsize(15,5)) # 预测均值 ax1 fig.add_subplot(131, projection3d) ax1.plot_surface(xx, yy, y_pred, cmapviridis, alpha0.8) ax1.scatter(X_train[:,0], X_train[:,1], y_train, cred, s50) ax1.set_title(Kriging预测曲面) # 预测方差 ax2 fig.add_subplot(132, projection3d) ax2.plot_surface(xx, yy, sigma2_pred, cmapplasma) ax2.set_title(预测方差曲面) # 二维等高线 ax3 fig.add_subplot(133) contour ax3.contourf(xx, yy, y_pred, levels20, cmapviridis) plt.colorbar(contour) ax3.scatter(X_train[:,0], X_train[:,1], cred, s50) ax3.set_title(预测等高线与采样点) plt.tight_layout() plt.show()4.3 模型诊断与验证计算留一法交叉验证误差def loocv_error(X, y, theta, sigma2): n len(y) errors np.zeros(n) for i in range(n): # 留一数据集 X_loo np.delete(X, i, axis0) y_loo np.delete(y, i) # 训练临时模型 kriging KrigingPredictor(thetatheta, sigma2sigma2) kriging.fit(X_loo, y_loo) # 预测被剔除的点 y_pred, _ kriging.predict(X[i:i1]) errors[i] (y_pred - y[i])**2 return np.mean(errors) cv_error loocv_error(X_train, y_train, theta_opt, sigma2_opt) print(f留一法交叉验证MSE: {cv_error:.4f})通过参数敏感性分析理解模型行为theta_range np.linspace(0.1, 2, 20) errors [loocv_error(X_train, y_train, theta, sigma2_opt) for theta in theta_range] plt.figure(figsize(8,5)) plt.plot(theta_range, errors, o-) plt.axvline(theta_opt, colorred, linestyle--, labelf最优θ{theta_opt:.2f}) plt.xlabel(θ值); plt.ylabel(LOOCV误差); plt.legend() plt.title(θ参数与模型误差关系) plt.show()