ICode9

精准搜索请尝试: 精确搜索
首页 > 编程语言> 文章详细

python_经验模态分解EMD_长短期记忆模型LSTM_公交短时客流预测

2021-03-14 15:59:54  阅读:564  来源: 互联网

标签:EMD IMF python self ts train test LSTM data


1、摘要

本文主要讲解:python_经验模态分解EMD_长短期记忆模型LSTM_公交短时客流预测
主要思路:

  1. 整理特征:天气、风力、时间、工作日和非工作日、节假日和非节假日、温度等
  2. 对客流量进行经验模态分解EMD
  3. 构建LSTM网络,优化器选择Adam
  4. reshape训练集和测试集,适配LSTM网络的输入尺寸
  5. 设置 batch_size和epochs,开始训练
  6. 评估模型、保存模型、画出模型预测结果的图

2、数据介绍

公交车在高峰和平峰转换期间的调度
深圳公交17年4月的数据

3、完整代码

import math
import os

import numpy as np
import pandas as pd
from PyEMD import EMD
from keras import Sequential
from keras.layers import Dense, LSTM
from keras.layers import Dropout
from sklearn import preprocessing
# 定义多通道特征组合模型
from sklearn.metrics import mean_squared_error


def build_model():
    d = 0.2
    neurons = [128, 128, 32, 1]
    model_lstm = Sequential()
    # 对每天61条记录进行分块
    model_lstm.add(LSTM(neurons[0], input_shape=(61, 7), return_sequences=True))
    model_lstm.add(Dropout(d))
    model_lstm.add(LSTM(neurons[1], input_shape=(61, 1), return_sequences=False))
    model_lstm.add(Dropout(d))
    model_lstm.add(Dense(neurons[2], kernel_initializer="uniform", activation='relu'))
    model_lstm.add(Dense(neurons[3], kernel_initializer="uniform", activation='linear'))
    # adam = keras.optimizers.Adam(decay=0.2)
    model_lstm.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
    model_lstm.summary()
    return model_lstm


class DataLoader():
    """一个用于EMD-lstm模型数据加载和转换的类"""

    def __init__(self, filename, cols, input_timesteps, seq_len):
        """
        :param filename: the name of the file contains the data, type: .csv
        :param split1: split the data into 2 parts: training, (validation, test)
        |-------------------------------------------|-------------|--------------|
                                                 split1(0.7)   split2(0.85)
        :param cols: the features
        :param input_timesteps: the length of looking back (1 month or 1 year), unit: hours
        :param seq_len: the sum of input_timesteps and pre_len
        """

        self.dataframe = pd.read_excel(filename)
        self.test_data = pd.read_excel(r'上车测试数据.xlsx', usecols=range(1, 9))
        self.test_data = pd.concat([self.test_data[:61], self.test_data], axis=0)
        self.cols = cols
        self.len_train_windows = None
        self.input_timesteps = input_timesteps
        self.seq_len = seq_len
        print('the input cols are:', self.cols)
        self.Normalization()

    def scale_EMD(self):
        import matplotlib.pyplot as plt
        train_pro = self.cols[1:]
        emd_array = self.dataframe['card_id'].values
        self.IMFs = EMD().emd(emd_array)
        # plt.plot(self.IMFs.reshape(12 * 5124, 1), color="red", label="Fitting Line", linewidth=2)
        # plt.legend()
        # plt.show()
        # print('the signal is decomposed into ' + str(self.IMFs.shape[0]) + ' parts')
        self.df_names_IMF = locals()

        for ind, IMF in enumerate(self.IMFs):
            IMF_name = 'IMF' + str(ind) + '_card_id'
            data = {IMF_name: self.IMFs[ind]}
            IMF_i = pd.DataFrame(data=data).reset_index()
            fe = self.dataframe[train_pro].reset_index()
            self.df_names_IMF['IMF' + str(ind)] = pd.merge(IMF_i, fe, on='index')

        emd_test = self.test_data['card_id'].values
        self.test_IMFs = EMD().emd(emd_test)
        # plt.plot(self.test_IMFs.reshape(7 * 487, 1), color="orange", label="Fitting Line", linewidth=2)
        # plt.legend()
        # plt.show()
        # print(self.test_IMFs)
        # print('the signal is decomposed into ' + str(self.IMFs.shape[0]) + ' parts')
        self.test_names_IMF = locals()

        for ind, IMF in enumerate(self.test_IMFs):
            IMF_name = 'IMF' + str(ind) + '_card_id'
            data = {IMF_name: self.test_IMFs[ind]}
            IMF_i = pd.DataFrame(data=data).reset_index()
            fe = self.test_data[train_pro].reset_index()
            self.test_names_IMF['IMF' + str(ind)] = pd.merge(IMF_i, fe, on='index')

    def make_train_test_data(self):
        ts = 61
        train_x = self.data_train[:, 2:]

        train_y = self.data_train[:, 1:2]

        test_x = self.test_data[:, 2:]

        test_y = self.test_data[:, 1:2]

        # #############  构建训练和预测集  ###################
        ts_train_x = np.array([])
        ts_train_y = np.array([])

        ts_test_x = np.array([])
        ts_test_y = np.array([])

        # 构建训练数据集
        print('训练数据的原始shape:', train_x.shape)
        for i in range(train_x.shape[0]):
            if i + ts == train_x.shape[0]:
                break

            ts_train_x = np.append(ts_train_x, train_x[i: i + ts, :])

            ts_train_y = np.append(ts_train_y, train_y[i + ts])

        # 构建预测数据集
        print('预测数据的原始shape:', test_x.shape)
        for i in range(test_x.shape[0]):
            if i + ts == test_x.shape[0]:
                break

            ts_test_x = np.append(ts_test_x, test_x[i: i + ts, :])

            ts_test_y = np.append(ts_test_y, test_y[i + ts])

        return ts_train_x.reshape((train_x.shape[0] - ts, ts, train_x.shape[1])), ts_train_y, \
               ts_test_x.reshape((test_x.shape[0] - ts, ts, test_x.shape[1])), ts_test_y

    def Normalization(self):
        '''
            对训练数据进行规范化处理,并对验证和测试数据应用相同的量表
        '''

        self.scale_EMD()
        IMF_number = 8

        print('processing the data of IM' + str(IMF_number))

        if IMF_number in range(self.IMFs.shape[0]):
            self.data_train_original = self.df_names_IMF['IMF' + str(IMF_number)]
        else:
            print("Oops!IMF_number was no valid number. it must between 0 and " + str(self.IMFs.shape[0] - 1))
        self.data_train_original = pd.concat([self.data_train_original[:61], self.data_train_original], axis=0)
        self.min_max_scaler = preprocessing.MinMaxScaler()
        self.data_train = self.min_max_scaler.fit_transform(self.data_train_original.values)
        self.len_train = len(self.data_train_original)

        IMF_test_number = 6
        if IMF_test_number in range(self.test_IMFs.shape[0]):
            self.test_original = self.test_names_IMF['IMF' + str(IMF_test_number)]
        else:
            print("Oops!IMF_number was no valid number. it must between 0 and " + str(self.test_IMFs.shape[0] - 1))
        self.min_max_scaler = preprocessing.MinMaxScaler()
        self.test_data = self.min_max_scaler.fit_transform(self.test_original.values)
        self.len_test = len(self.test_original)


def model_score(model, X_train, y_train, X_test, y_test):
    trainScore = model.evaluate(X_train, y_train, verbose=0)
    print('Train Score: %.5f MSE (%.2f RMSE)' % (trainScore[0], math.sqrt(trainScore[0])))
    testScore = model.evaluate(X_test, y_test, verbose=0)
    print('Test Score: %.5f MSE (%.2f RMSE)' % (testScore[0], math.sqrt(testScore[0])))


def model_test_score(model, X_test, y_test):
    y_hat = model.predict(X_test)
    y_t = y_test.reshape(-1, 1)

    temp = pd.DataFrame(y_hat)
    temp['yhat'] = y_hat
    temp['y'] = y_t
    temp_rmse = np.sqrt(mean_squared_error(temp.y, temp.yhat))
    temp_mse = mean_squared_error(temp.y, temp.yhat)
    print('test RMSE: %.3f' % temp_rmse)
    print('test MSE: %.3f' % temp_mse)
    return temp_rmse, temp_mse


if __name__ == '__main__':
    os.chdir(r'E:\项目文件\基于改进的LSTM短时客流预测\数据')

    save_dir = 'E:\项目文件\基于改进的LSTM短时客流预测\emd-lstm模型\\'
    # 每个时间序列块的数据行数
    seq_len = 61
    data = DataLoader(
        filename=os.path.join(r'E:\项目文件\基于改进的LSTM短时客流预测\数据\\', r'上车训练数据.xlsx'),
        cols=['card_id', 'feng_1.0', 'feng_2.0', 'feng_3.0', 'work_1', 'work_2', 'tianqi_average_scale',
              'temperature_average_scale'],
        input_timesteps=seq_len,
        seq_len=61
    )

    train_x, train_y, val_x, val_y = data.make_train_test_data()

    print('Starting training Model')
    model = build_model()

    model.fit(train_x, train_y, epochs=512, batch_size=32)

    save_fname = os.path.join(save_dir, 'multi_lstm.h5')
    log_fname = save_dir

    model.save(save_fname)
    model_score(model, train_x, train_y, val_x, val_y)
    model_test_score(model, val_x, val_y)

标签:EMD,IMF,python,self,ts,train,test,LSTM,data
来源: https://blog.csdn.net/qq_30803353/article/details/114790863

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有