航空客流预测实战LSTM与Transformer融合建模全流程解析时序预测一直是数据分析领域的核心挑战之一尤其是像航空乘客数据这样具有明显季节性、趋势性和随机波动性的复杂序列。传统的统计方法如ARIMA往往难以捕捉非线性关系而单一的深度学习模型又可能无法兼顾局部细节和全局依赖。本文将带你用PyTorch实现一个LSTM与Transformer的混合模型从数据预处理到模型部署手把手完成未来7天客流预测项目。1. 项目背景与数据准备航空乘客数据集AirPassengers是时间序列分析中的经典案例记录了1949年至1960年每月国际航线乘客数量。这个数据集完美展现了三种关键时序特征趋势性战后航空业快速发展乘客数量呈明显上升趋势季节性每年暑期6-8月出现客流高峰冬季相对低谷随机波动特殊事件如节假日、天气导致的短期波动import pandas as pd import matplotlib.pyplot as plt # 加载数据 data pd.read_csv(AirPassengers.csv, parse_dates[Month], index_colMonth) print(f数据时间跨度{data.index.min()} 至 {data.index.max()}) print(f总样本数{len(data)}) # 可视化原始序列 plt.figure(figsize(12,6)) plt.plot(data, labelMonthly Passengers) plt.title(AirPassengers Dataset (1949-1960)) plt.xlabel(Year) plt.ylabel(Passengers (thousands)) plt.grid(True) plt.show()注意原始数据通常需要检查缺失值但本例作为经典数据集已保证完整性。实际项目中建议先用data.isnull().sum()检查2. 数据预处理关键技术2.1 滑动窗口构造时序预测的核心是将连续序列转化为监督学习问题。我们采用滑动窗口方法构建样本输入窗口input_window过去30个月的数据输出窗口output_window未来7个月的预测import numpy as np def create_sliding_windows(data, input_size, output_size): X, y [], [] for i in range(len(data)-input_size-output_size1): X.append(data[i:iinput_size]) y.append(data[iinput_size:iinput_sizeoutput_size]) return np.array(X), np.array(y) # 数据标准化 from sklearn.preprocessing import MinMaxScaler scaler MinMaxScaler(feature_range(0, 1)) scaled_data scaler.fit_transform(data.values.reshape(-1,1)) # 创建窗口 X, y create_sliding_windows(scaled_data, input_size30, output_size7) print(f样本数量{X.shape[0]} | 输入维度{X.shape[1]} | 输出维度{y.shape[1]})2.2 数据集划分与加载不同于随机划分时序数据必须保持时间先后顺序数据集时间范围样本占比用途训练集1949-195880%模型训练验证集195910%超参数调优测试集196010%最终评估from torch.utils.data import TensorDataset, DataLoader import torch # 转换为PyTorch张量 X_tensor torch.FloatTensor(X).unsqueeze(-1) # 添加特征维度 y_tensor torch.FloatTensor(y) # 按时间顺序划分 split_idx [int(0.8*len(X)), int(0.9*len(X))] train_data TensorDataset(X_tensor[:split_idx[0]], y_tensor[:split_idx[0]]) val_data TensorDataset(X_tensor[split_idx[0]:split_idx[1]], y_tensor[split_idx[0]:split_idx[1]]) test_data TensorDataset(X_tensor[split_idx[1]:], y_tensor[split_idx[1]:]) # 创建DataLoader batch_size 16 train_loader DataLoader(train_data, batch_sizebatch_size, shuffleTrue) val_loader DataLoader(val_data, batch_sizebatch_size) test_loader DataLoader(test_data, batch_sizebatch_size)3. 混合模型架构设计3.1 LSTM分支捕捉局部时序模式LSTM通过门控机制解决长期依赖问题其核心单元结构如下import torch.nn as nn class LSTMBranch(nn.Module): def __init__(self, input_dim, hidden_dim, num_layers1): super().__init__() self.lstm nn.LSTM(input_dim, hidden_dim, num_layers, batch_firstTrue) def forward(self, x): # x形状: (batch, seq_len, input_dim) out, (h_n, c_n) self.lstm(x) return h_n[-1] # 取最后一层的最终隐藏状态关键参数说明input_dim输入特征维度本例为1hidden_dimLSTM隐藏单元数建议64-256num_layers堆叠层数深层LSTM可能引发梯度问题3.2 Transformer分支建模全局依赖Transformer通过自注意力机制捕获任意距离的依赖关系class TransformerBranch(nn.Module): def __init__(self, input_dim, model_dim, num_heads, num_layers): super().__init__() self.input_proj nn.Linear(input_dim, model_dim) self.pos_encoder PositionalEncoding(model_dim) encoder_layer nn.TransformerEncoderLayer(d_modelmodel_dim, nheadnum_heads) self.transformer nn.TransformerEncoder(encoder_layer, num_layers) def forward(self, x): # 输入投影 x self.input_proj(x) # (batch, seq_len, model_dim) # 位置编码 x self.pos_encoder(x) # Transformer处理 (需要调整维度顺序) x x.transpose(0,1) # (seq_len, batch, model_dim) out self.transformer(x) # 全局平均池化 return out.mean(dim0) # (batch, model_dim)提示位置编码(PositionalEncoding)实现参考原始Transformer论文代码较长此处省略3.3 模型融合与完整架构将两个分支的特征拼接后通过全连接层输出预测class HybridModel(nn.Module): def __init__(self, input_window, output_window, lstm_hidden64, trans_dim64, trans_heads4, trans_layers2): super().__init__() self.lstm_branch LSTMBranch(1, lstm_hidden) self.trans_branch TransformerBranch(1, trans_dim, trans_heads, trans_layers) self.fc nn.Linear(lstm_hidden trans_dim, output_window) def forward(self, x): lstm_feat self.lstm_branch(x) # (batch, lstm_hidden) trans_feat self.trans_branch(x) # (batch, trans_dim) combined torch.cat([lstm_feat, trans_feat], dim1) return self.fc(combined) # (batch, output_window)模型参数量对比模型类型参数量示例配置优势劣势纯LSTM~17K局部特征捕捉强长程依赖弱纯Transformer~25K全局关系建模好小数据易过拟合混合模型~42K优势互补计算量稍大4. 模型训练与优化4.1 训练配置与技巧from torch.optim import Adam from torch.nn import MSELoss device torch.device(cuda if torch.cuda.is_available() else cpu) model HybridModel(input_window30, output_window7).to(device) optimizer Adam(model.parameters(), lr0.001, weight_decay1e-5) criterion MSELoss() # 学习率调度器 scheduler torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, modemin, factor0.5, patience10)关键训练技巧梯度裁剪防止Transformer训练不稳定torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm1.0)早停机制验证损失连续不下降时终止训练混合精度训练使用torch.cuda.amp加速训练4.2 训练循环实现def train_epoch(model, loader, optimizer, criterion): model.train() total_loss 0 for inputs, targets in loader: inputs, targets inputs.to(device), targets.to(device) optimizer.zero_grad() with torch.cuda.amp.autocast(): # 混合精度 outputs model(inputs) loss criterion(outputs, targets) loss.backward() optimizer.step() total_loss loss.item() * inputs.size(0) return total_loss / len(loader.dataset) def evaluate(model, loader, criterion): model.eval() total_loss 0 with torch.no_grad(): for inputs, targets in loader: inputs, targets inputs.to(device), targets.to(device) outputs model(inputs) total_loss criterion(outputs, targets).item() * inputs.size(0) return total_loss / len(loader.dataset) # 训练过程 best_val_loss float(inf) for epoch in range(100): train_loss train_epoch(model, train_loader, optimizer, criterion) val_loss evaluate(model, val_loader, criterion) scheduler.step(val_loss) if val_loss best_val_loss: best_val_loss val_loss torch.save(model.state_dict(), best_model.pth) print(fEpoch {epoch1}: Train Loss{train_loss:.4f}, Val Loss{val_loss:.4f})5. 结果分析与模型部署5.1 测试集评估# 加载最佳模型 model.load_state_dict(torch.load(best_model.pth)) test_loss evaluate(model, test_loader, criterion) print(fTest MSE Loss: {test_loss:.4f}) # 反标准化预测结果 def inverse_transform(predictions): return scaler.inverse_transform(predictions.cpu().numpy()) # 可视化测试样本 model.eval() with torch.no_grad(): test_input, test_target next(iter(test_loader)) test_output model(test_input.to(device)) # 反标准化 true_values inverse_transform(test_target) pred_values inverse_transform(test_output) # 绘制对比图 plt.figure(figsize(12,6)) plt.plot(true_values[0], g-, labelActual) plt.plot(pred_values[0], r--, labelPredicted) plt.title(7-Month Passenger Prediction on Test Set) plt.xlabel(Month) plt.ylabel(Passengers) plt.legend() plt.grid(True)5.2 未来预测实战# 使用最近30个月数据预测未来7个月 recent_data scaled_data[-30:] # 最后30个数据点 input_tensor torch.FloatTensor(recent_data).unsqueeze(0).unsqueeze(-1).to(device) model.eval() with torch.no_grad(): prediction model(input_tensor) future_passengers inverse_transform(prediction)[0] print(Next 7 Months Passenger Prediction:) for i, num in enumerate(future_passengers, 1): print(fMonth {i}: {int(num)} passengers) # 生成未来日期 last_date data.index[-1] future_dates pd.date_range(startlast_date, periods8, freqM)[1:] # 跳过第一个最后已知日期 # 可视化完整趋势 plt.figure(figsize(14,7)) plt.plot(data.index, data.values, b-, labelHistorical) plt.plot(future_dates, future_passengers, r--, markero, labelForecast) plt.title(AirPassengers Historical Data with 7-Month Forecast) plt.xlabel(Year) plt.ylabel(Passengers) plt.legend() plt.grid(True) plt.tight_layout()5.3 模型保存与部署# 保存完整模型包含架构 torch.save(model, hybrid_model_full.pth) # 保存为TorchScript格式生产环境推荐 scripted_model torch.jit.script(model) scripted_model.save(hybrid_model_scripted.pt) # 示例加载代码 loaded_model torch.jit.load(hybrid_model_scripted.pt)部署建议方案REST API服务使用FastAPI封装模型预测接口from fastapi import FastAPI app FastAPI() app.post(/predict) async def predict(past_data: list): input_tensor torch.FloatTensor(past_data).unsqueeze(0).unsqueeze(-1) with torch.no_grad(): prediction loaded_model(input_tensor) return {forecast: prediction.tolist()}批量预测脚本定期运行预测任务结果存入数据库Streamlit可视化工具构建交互式预测仪表盘6. 进阶优化方向6.1 特征工程扩展外部特征融合# 添加节假日标记、油价等外部变量 class EnhancedDataset(Dataset): def __init__(self, passenger_data, external_features): self.passengers passenger_data self.external external_features def __getitem__(self, idx): return ( torch.cat([ self.passengers[idx], self.external[idx] ], dim-1), self.passengers[idx] )多尺度特征提取同时使用30天、90天、365天滑动窗口通过1D CNN提取不同时间粒度的特征6.2 模型架构改进class ImprovedHybrid(nn.Module): def __init__(self): super().__init__() # 多尺度CNN特征提取 self.cnn nn.Sequential( nn.Conv1d(1, 32, kernel_size3, stride1), nn.ReLU(), nn.Conv1d(32, 64, kernel_size5, stride1), nn.ReLU(), nn.AdaptiveAvgPool1d(1) ) # 原混合架构 self.lstm_branch LSTMBranch(1, 64) self.trans_branch TransformerBranch(1, 64, 4, 2) # 特征融合 self.fc nn.Linear(646464, 7) # CNN LSTM Transformer def forward(self, x): # CNN路径 cnn_feat self.cnn(x.transpose(1,2)).squeeze(-1) # 原混合路径 lstm_feat self.lstm_branch(x) trans_feat self.trans_branch(x) # 特征拼接 combined torch.cat([cnn_feat, lstm_feat, trans_feat], dim1) return self.fc(combined)6.3 超参数优化策略# 使用Optuna进行自动化调参 import optuna def objective(trial): # 定义搜索空间 params { lstm_hidden: trial.suggest_categorical(lstm_hidden, [64, 128, 256]), trans_dim: trial.suggest_categorical(trans_dim, [64, 128]), lr: trial.suggest_float(lr, 1e-4, 1e-3, logTrue), batch_size: trial.suggest_categorical(batch_size, [16, 32, 64]) } # 创建模型和数据加载器 model HybridModel(input_window30, output_window7, lstm_hiddenparams[lstm_hidden], trans_dimparams[trans_dim]).to(device) optimizer Adam(model.parameters(), lrparams[lr]) # 训练和验证 for epoch in range(50): train_epoch(model, train_loader, optimizer, criterion) val_loss evaluate(model, val_loader, criterion) return val_loss study optuna.create_study(directionminimize) study.optimize(objective, n_trials30) print(Best parameters:, study.best_params)在实际项目中这种混合架构相比单一模型通常能提升15-30%的预测准确率特别是在同时存在局部波动和长期趋势的场景下。不过也要注意计算资源消耗会增加约40%需要根据业务需求权衡。
从航班乘客数据实战出发:用LSTM+Transformer预测未来一周客流(Python/PyTorch保姆级教程)
发布时间:2026/5/24 21:14:26
航空客流预测实战LSTM与Transformer融合建模全流程解析时序预测一直是数据分析领域的核心挑战之一尤其是像航空乘客数据这样具有明显季节性、趋势性和随机波动性的复杂序列。传统的统计方法如ARIMA往往难以捕捉非线性关系而单一的深度学习模型又可能无法兼顾局部细节和全局依赖。本文将带你用PyTorch实现一个LSTM与Transformer的混合模型从数据预处理到模型部署手把手完成未来7天客流预测项目。1. 项目背景与数据准备航空乘客数据集AirPassengers是时间序列分析中的经典案例记录了1949年至1960年每月国际航线乘客数量。这个数据集完美展现了三种关键时序特征趋势性战后航空业快速发展乘客数量呈明显上升趋势季节性每年暑期6-8月出现客流高峰冬季相对低谷随机波动特殊事件如节假日、天气导致的短期波动import pandas as pd import matplotlib.pyplot as plt # 加载数据 data pd.read_csv(AirPassengers.csv, parse_dates[Month], index_colMonth) print(f数据时间跨度{data.index.min()} 至 {data.index.max()}) print(f总样本数{len(data)}) # 可视化原始序列 plt.figure(figsize(12,6)) plt.plot(data, labelMonthly Passengers) plt.title(AirPassengers Dataset (1949-1960)) plt.xlabel(Year) plt.ylabel(Passengers (thousands)) plt.grid(True) plt.show()注意原始数据通常需要检查缺失值但本例作为经典数据集已保证完整性。实际项目中建议先用data.isnull().sum()检查2. 数据预处理关键技术2.1 滑动窗口构造时序预测的核心是将连续序列转化为监督学习问题。我们采用滑动窗口方法构建样本输入窗口input_window过去30个月的数据输出窗口output_window未来7个月的预测import numpy as np def create_sliding_windows(data, input_size, output_size): X, y [], [] for i in range(len(data)-input_size-output_size1): X.append(data[i:iinput_size]) y.append(data[iinput_size:iinput_sizeoutput_size]) return np.array(X), np.array(y) # 数据标准化 from sklearn.preprocessing import MinMaxScaler scaler MinMaxScaler(feature_range(0, 1)) scaled_data scaler.fit_transform(data.values.reshape(-1,1)) # 创建窗口 X, y create_sliding_windows(scaled_data, input_size30, output_size7) print(f样本数量{X.shape[0]} | 输入维度{X.shape[1]} | 输出维度{y.shape[1]})2.2 数据集划分与加载不同于随机划分时序数据必须保持时间先后顺序数据集时间范围样本占比用途训练集1949-195880%模型训练验证集195910%超参数调优测试集196010%最终评估from torch.utils.data import TensorDataset, DataLoader import torch # 转换为PyTorch张量 X_tensor torch.FloatTensor(X).unsqueeze(-1) # 添加特征维度 y_tensor torch.FloatTensor(y) # 按时间顺序划分 split_idx [int(0.8*len(X)), int(0.9*len(X))] train_data TensorDataset(X_tensor[:split_idx[0]], y_tensor[:split_idx[0]]) val_data TensorDataset(X_tensor[split_idx[0]:split_idx[1]], y_tensor[split_idx[0]:split_idx[1]]) test_data TensorDataset(X_tensor[split_idx[1]:], y_tensor[split_idx[1]:]) # 创建DataLoader batch_size 16 train_loader DataLoader(train_data, batch_sizebatch_size, shuffleTrue) val_loader DataLoader(val_data, batch_sizebatch_size) test_loader DataLoader(test_data, batch_sizebatch_size)3. 混合模型架构设计3.1 LSTM分支捕捉局部时序模式LSTM通过门控机制解决长期依赖问题其核心单元结构如下import torch.nn as nn class LSTMBranch(nn.Module): def __init__(self, input_dim, hidden_dim, num_layers1): super().__init__() self.lstm nn.LSTM(input_dim, hidden_dim, num_layers, batch_firstTrue) def forward(self, x): # x形状: (batch, seq_len, input_dim) out, (h_n, c_n) self.lstm(x) return h_n[-1] # 取最后一层的最终隐藏状态关键参数说明input_dim输入特征维度本例为1hidden_dimLSTM隐藏单元数建议64-256num_layers堆叠层数深层LSTM可能引发梯度问题3.2 Transformer分支建模全局依赖Transformer通过自注意力机制捕获任意距离的依赖关系class TransformerBranch(nn.Module): def __init__(self, input_dim, model_dim, num_heads, num_layers): super().__init__() self.input_proj nn.Linear(input_dim, model_dim) self.pos_encoder PositionalEncoding(model_dim) encoder_layer nn.TransformerEncoderLayer(d_modelmodel_dim, nheadnum_heads) self.transformer nn.TransformerEncoder(encoder_layer, num_layers) def forward(self, x): # 输入投影 x self.input_proj(x) # (batch, seq_len, model_dim) # 位置编码 x self.pos_encoder(x) # Transformer处理 (需要调整维度顺序) x x.transpose(0,1) # (seq_len, batch, model_dim) out self.transformer(x) # 全局平均池化 return out.mean(dim0) # (batch, model_dim)提示位置编码(PositionalEncoding)实现参考原始Transformer论文代码较长此处省略3.3 模型融合与完整架构将两个分支的特征拼接后通过全连接层输出预测class HybridModel(nn.Module): def __init__(self, input_window, output_window, lstm_hidden64, trans_dim64, trans_heads4, trans_layers2): super().__init__() self.lstm_branch LSTMBranch(1, lstm_hidden) self.trans_branch TransformerBranch(1, trans_dim, trans_heads, trans_layers) self.fc nn.Linear(lstm_hidden trans_dim, output_window) def forward(self, x): lstm_feat self.lstm_branch(x) # (batch, lstm_hidden) trans_feat self.trans_branch(x) # (batch, trans_dim) combined torch.cat([lstm_feat, trans_feat], dim1) return self.fc(combined) # (batch, output_window)模型参数量对比模型类型参数量示例配置优势劣势纯LSTM~17K局部特征捕捉强长程依赖弱纯Transformer~25K全局关系建模好小数据易过拟合混合模型~42K优势互补计算量稍大4. 模型训练与优化4.1 训练配置与技巧from torch.optim import Adam from torch.nn import MSELoss device torch.device(cuda if torch.cuda.is_available() else cpu) model HybridModel(input_window30, output_window7).to(device) optimizer Adam(model.parameters(), lr0.001, weight_decay1e-5) criterion MSELoss() # 学习率调度器 scheduler torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, modemin, factor0.5, patience10)关键训练技巧梯度裁剪防止Transformer训练不稳定torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm1.0)早停机制验证损失连续不下降时终止训练混合精度训练使用torch.cuda.amp加速训练4.2 训练循环实现def train_epoch(model, loader, optimizer, criterion): model.train() total_loss 0 for inputs, targets in loader: inputs, targets inputs.to(device), targets.to(device) optimizer.zero_grad() with torch.cuda.amp.autocast(): # 混合精度 outputs model(inputs) loss criterion(outputs, targets) loss.backward() optimizer.step() total_loss loss.item() * inputs.size(0) return total_loss / len(loader.dataset) def evaluate(model, loader, criterion): model.eval() total_loss 0 with torch.no_grad(): for inputs, targets in loader: inputs, targets inputs.to(device), targets.to(device) outputs model(inputs) total_loss criterion(outputs, targets).item() * inputs.size(0) return total_loss / len(loader.dataset) # 训练过程 best_val_loss float(inf) for epoch in range(100): train_loss train_epoch(model, train_loader, optimizer, criterion) val_loss evaluate(model, val_loader, criterion) scheduler.step(val_loss) if val_loss best_val_loss: best_val_loss val_loss torch.save(model.state_dict(), best_model.pth) print(fEpoch {epoch1}: Train Loss{train_loss:.4f}, Val Loss{val_loss:.4f})5. 结果分析与模型部署5.1 测试集评估# 加载最佳模型 model.load_state_dict(torch.load(best_model.pth)) test_loss evaluate(model, test_loader, criterion) print(fTest MSE Loss: {test_loss:.4f}) # 反标准化预测结果 def inverse_transform(predictions): return scaler.inverse_transform(predictions.cpu().numpy()) # 可视化测试样本 model.eval() with torch.no_grad(): test_input, test_target next(iter(test_loader)) test_output model(test_input.to(device)) # 反标准化 true_values inverse_transform(test_target) pred_values inverse_transform(test_output) # 绘制对比图 plt.figure(figsize(12,6)) plt.plot(true_values[0], g-, labelActual) plt.plot(pred_values[0], r--, labelPredicted) plt.title(7-Month Passenger Prediction on Test Set) plt.xlabel(Month) plt.ylabel(Passengers) plt.legend() plt.grid(True)5.2 未来预测实战# 使用最近30个月数据预测未来7个月 recent_data scaled_data[-30:] # 最后30个数据点 input_tensor torch.FloatTensor(recent_data).unsqueeze(0).unsqueeze(-1).to(device) model.eval() with torch.no_grad(): prediction model(input_tensor) future_passengers inverse_transform(prediction)[0] print(Next 7 Months Passenger Prediction:) for i, num in enumerate(future_passengers, 1): print(fMonth {i}: {int(num)} passengers) # 生成未来日期 last_date data.index[-1] future_dates pd.date_range(startlast_date, periods8, freqM)[1:] # 跳过第一个最后已知日期 # 可视化完整趋势 plt.figure(figsize(14,7)) plt.plot(data.index, data.values, b-, labelHistorical) plt.plot(future_dates, future_passengers, r--, markero, labelForecast) plt.title(AirPassengers Historical Data with 7-Month Forecast) plt.xlabel(Year) plt.ylabel(Passengers) plt.legend() plt.grid(True) plt.tight_layout()5.3 模型保存与部署# 保存完整模型包含架构 torch.save(model, hybrid_model_full.pth) # 保存为TorchScript格式生产环境推荐 scripted_model torch.jit.script(model) scripted_model.save(hybrid_model_scripted.pt) # 示例加载代码 loaded_model torch.jit.load(hybrid_model_scripted.pt)部署建议方案REST API服务使用FastAPI封装模型预测接口from fastapi import FastAPI app FastAPI() app.post(/predict) async def predict(past_data: list): input_tensor torch.FloatTensor(past_data).unsqueeze(0).unsqueeze(-1) with torch.no_grad(): prediction loaded_model(input_tensor) return {forecast: prediction.tolist()}批量预测脚本定期运行预测任务结果存入数据库Streamlit可视化工具构建交互式预测仪表盘6. 进阶优化方向6.1 特征工程扩展外部特征融合# 添加节假日标记、油价等外部变量 class EnhancedDataset(Dataset): def __init__(self, passenger_data, external_features): self.passengers passenger_data self.external external_features def __getitem__(self, idx): return ( torch.cat([ self.passengers[idx], self.external[idx] ], dim-1), self.passengers[idx] )多尺度特征提取同时使用30天、90天、365天滑动窗口通过1D CNN提取不同时间粒度的特征6.2 模型架构改进class ImprovedHybrid(nn.Module): def __init__(self): super().__init__() # 多尺度CNN特征提取 self.cnn nn.Sequential( nn.Conv1d(1, 32, kernel_size3, stride1), nn.ReLU(), nn.Conv1d(32, 64, kernel_size5, stride1), nn.ReLU(), nn.AdaptiveAvgPool1d(1) ) # 原混合架构 self.lstm_branch LSTMBranch(1, 64) self.trans_branch TransformerBranch(1, 64, 4, 2) # 特征融合 self.fc nn.Linear(646464, 7) # CNN LSTM Transformer def forward(self, x): # CNN路径 cnn_feat self.cnn(x.transpose(1,2)).squeeze(-1) # 原混合路径 lstm_feat self.lstm_branch(x) trans_feat self.trans_branch(x) # 特征拼接 combined torch.cat([cnn_feat, lstm_feat, trans_feat], dim1) return self.fc(combined)6.3 超参数优化策略# 使用Optuna进行自动化调参 import optuna def objective(trial): # 定义搜索空间 params { lstm_hidden: trial.suggest_categorical(lstm_hidden, [64, 128, 256]), trans_dim: trial.suggest_categorical(trans_dim, [64, 128]), lr: trial.suggest_float(lr, 1e-4, 1e-3, logTrue), batch_size: trial.suggest_categorical(batch_size, [16, 32, 64]) } # 创建模型和数据加载器 model HybridModel(input_window30, output_window7, lstm_hiddenparams[lstm_hidden], trans_dimparams[trans_dim]).to(device) optimizer Adam(model.parameters(), lrparams[lr]) # 训练和验证 for epoch in range(50): train_epoch(model, train_loader, optimizer, criterion) val_loss evaluate(model, val_loader, criterion) return val_loss study optuna.create_study(directionminimize) study.optimize(objective, n_trials30) print(Best parameters:, study.best_params)在实际项目中这种混合架构相比单一模型通常能提升15-30%的预测准确率特别是在同时存在局部波动和长期趋势的场景下。不过也要注意计算资源消耗会增加约40%需要根据业务需求权衡。