Project: Data Analysis and Visualizations and Predicting Future Energy Consumption using LSTM Predicting Values 2 month Later Accurately RNN


Hourly Energy Consumption

Step 1:

Import Library

In [146]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pprint
%matplotlib inline
In [81]:
df = pd.read_csv("AEP_hourly.csv")
print("First Five Rows ","\n")

print("Information About Dataset","\n")

print("Describe the Dataset ","\n")

print("Null Values t ","\n")
First Five Rows  

              Datetime   AEP_MW
0  2004-12-31 01:00:00  13478.0
1  2004-12-31 02:00:00  12865.0 

Information About Dataset 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121273 entries, 0 to 121272
Data columns (total 2 columns):
Datetime    121273 non-null object
AEP_MW      121273 non-null float64
dtypes: float64(1), object(1)
memory usage: 1.9+ MB

Describe the Dataset  

count  121273.000000
mean    15499.513717
std      2591.399065
min      9581.000000
25%     13630.000000
50%     15310.000000
75%     17200.000000
max     25695.000000 

Null Values t  

Datetime    0
AEP_MW      0
dtype: int64 

Step 2:

Reformat the Date Time Columns

In [82]:
# Extract all Data Like Year MOnth Day Time etc
dataset = df
dataset["Month"] = pd.to_datetime(df["Datetime"]).dt.month
dataset["Year"] = pd.to_datetime(df["Datetime"]).dt.year
dataset["Date"] = pd.to_datetime(df["Datetime"])
dataset["Time"] = pd.to_datetime(df["Datetime"]).dt.time
dataset["Week"] = pd.to_datetime(df["Datetime"]).dt.week
dataset["Day"] = pd.to_datetime(df["Datetime"]).dt.day_name()
dataset = df.set_index("Datetime")
dataset.index = pd.to_datetime(dataset.index)
AEP_MW Month Year Date Time Week Day
2004-12-31 01:00:00 13478.0 12 2004 2004-12-31 01:00:00 53 Friday

Step 3:

In [96]:
# How many Unique Year do we Have in Dataset 
print("Total Number of Unique Year", df.Year.nunique(), "\n")
[2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017

Total Number of Unique Year 15 

Lets us see the energy consumption Each Year

In [304]:
from matplotlib import style

fig = plt.figure()
ax1 = plt.subplot2grid((1,1), (0,0))


sns.lineplot(x=dataset["Year"], y=dataset["AEP_MW"], data=df)

plt.title("Energy consumptionnin Year 2004")
plt.ylabel("Energy in MW")

for label in ax1.xaxis.get_ticklabels():

plt.title("Energy Consumption According to Year")
No handles with labels found to put in legend.
Text(0.5, 1.0, 'Energy Consumption According to Year')
In [339]:
from matplotlib import style

fig = plt.figure()

ax1= fig.add_subplot(311)
ax2= fig.add_subplot(312)
ax3= fig.add_subplot(313)


y_2004 = dataset["2004"]["AEP_MW"].to_list()
x_2004 = dataset["2004"]["Date"].to_list()
ax1.plot(x_2004,y_2004, color="green", linewidth=1.7)

y_2005 = dataset["2005"]["AEP_MW"].to_list()
x_2005 = dataset["2005"]["Date"].to_list()
ax2.plot(x_2005, y_2005, color="green", linewidth=1)

y_2006 = dataset["2006"]["AEP_MW"].to_list()
x_2006 = dataset["2006"]["Date"].to_list()
ax3.plot(x_2006, y_2006, color="green", linewidth=1)

plt.rcParams["figure.figsize"] = (18,8)
plt.title("Energy consumptionnin")
plt.ylabel("Energy in MW")
plt.grid(True, alpha=1)

for label in ax1.xaxis.get_ticklabels():
No handles with labels found to put in legend.

Energy Distribution

In [341]:
plt.title("Ennergy Distribution")
Text(0.5, 1.0, 'Ennergy Distribution')

Energy with Respect to Time

In [356]:
fig = plt.figure()
ax1= fig.add_subplot(111)

sns.lineplot(x=dataset["Time"],y=dataset["AEP_MW"], data=df)
plt.title("Energy Consumption vs Time ")
plt.grid(True, alpha=1)

for label in ax1.xaxis.get_ticklabels():
No handles with labels found to put in legend.

Resampleing Data

In [415]:
NewDataSet = dataset.resample('D').mean()
In [416]:
print("Old Dataset ",dataset.shape )
print("New  Dataset ",NewDataSet.shape )
Old Dataset  (121273, 7)
New  Dataset  (5055, 4)
In [417]:
TestData = NewDataSet.tail(100)

Training_Set = NewDataSet.iloc[:,0:1]

Training_Set = Training_Set[:-60]
In [418]:
print("Training Set Shape ", Training_Set.shape)
print("Test Set Shape ", TestData.shape)
Training Set Shape  (4995, 1)
Test Set Shape  (100, 4)
In [419]:
Training_Set = Training_Set.values
sc = MinMaxScaler(feature_range=(0, 1))
Train = sc.fit_transform(Training_Set)
In [420]:
X_Train = []
Y_Train = []

# Range should be fromm 60 Values to END 
for i in range(60, Train.shape[0]):
    # X_Train 0-59 
    # Y Would be 60 th Value based on past 60 Values 

# Convert into Numpy Array
X_Train = np.array(X_Train)
Y_Train = np.array(Y_Train)

(4935, 60, 1)
(4935, 1)
In [421]:
# Shape should be Number of [Datapoints , Steps , 1 )
# we convert into 3-d Vector or #rd Dimesnsion
X_Train = np.reshape(X_Train, newshape=(X_Train.shape[0], X_Train.shape[1], 1))
(4935, 60, 1)


In [422]:
regressor = Sequential()

# Adding the first LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_Train.shape[1], 1)))

# Adding a second LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))

# Adding a third LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))

# Adding a fourth LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50))

# Adding the output layer
regressor.add(Dense(units = 1))

# Compiling the RNN
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
In [423]:, Y_Train, epochs = 50, batch_size = 32)
Epoch 1/50
4935/4935 [==============================] - 33s 7ms/step - loss: 0.0237
Epoch 2/50
4935/4935 [==============================] - 33s 7ms/step - loss: 0.0183
Epoch 3/50
4935/4935 [==============================] - 34s 7ms/step - loss: 0.0173
Epoch 4/50
4935/4935 [==============================] - 34s 7ms/step - loss: 0.0164
Epoch 5/50
4935/4935 [==============================] - 35s 7ms/step - loss: 0.0157
Epoch 6/50
4935/4935 [==============================] - 34s 7ms/step - loss: 0.0160
Epoch 7/50
4935/4935 [==============================] - 34s 7ms/step - loss: 0.0151
Epoch 8/50
4935/4935 [==============================] - 35s 7ms/step - loss: 0.0125
Epoch 9/50
4935/4935 [==============================] - 34s 7ms/step - loss: 0.0099
Epoch 10/50
4935/4935 [==============================] - 34s 7ms/step - loss: 0.0089
Epoch 11/50
4935/4935 [==============================] - 34s 7ms/step - loss: 0.0085
Epoch 12/50
4935/4935 [==============================] - 34s 7ms/step - loss: 0.0083
Epoch 13/50
4935/4935 [==============================] - 34s 7ms/step - loss: 0.0078
Epoch 14/50
4935/4935 [==============================] - 34s 7ms/step - loss: 0.0079
Epoch 15/50
4935/4935 [==============================] - 34s 7ms/step - loss: 0.0073
Epoch 16/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0075
Epoch 17/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0072
Epoch 18/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0070
Epoch 19/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0066
Epoch 20/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0063
Epoch 21/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0061
Epoch 22/50
4935/4935 [==============================] - 32s 6ms/step - loss: 0.0058
Epoch 23/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0056
Epoch 24/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0055
Epoch 25/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0053
Epoch 26/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0054
Epoch 27/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0053
Epoch 28/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0051
Epoch 29/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0050
Epoch 30/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0051
Epoch 31/50
4935/4935 [==============================] - 32s 6ms/step - loss: 0.0050
Epoch 32/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0049
Epoch 33/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0048
Epoch 34/50
4935/4935 [==============================] - 32s 7ms/step - loss: 0.0048
Epoch 35/50
4935/4935 [==============================] - 2283s 463ms/step - loss: 0.0048
Epoch 36/50
4935/4935 [==============================] - 3475s 704ms/step - loss: 0.0047
Epoch 37/50
4935/4935 [==============================] - 32s 6ms/step - loss: 0.0047
Epoch 38/50
4935/4935 [==============================] - 29s 6ms/step - loss: 0.0047
Epoch 39/50
4935/4935 [==============================] - 30s 6ms/step - loss: 0.0046
Epoch 40/50
4935/4935 [==============================] - 31s 6ms/step - loss: 0.0046
Epoch 41/50
4935/4935 [==============================] - 33s 7ms/step - loss: 0.0045
Epoch 42/50
4935/4935 [==============================] - 37s 7ms/step - loss: 0.0045
Epoch 43/50
4935/4935 [==============================] - 38s 8ms/step - loss: 0.0047
Epoch 44/50
4935/4935 [==============================] - 36s 7ms/step - loss: 0.0045
Epoch 45/50
4935/4935 [==============================] - 35s 7ms/step - loss: 0.0044
Epoch 46/50
4935/4935 [==============================] - 38s 8ms/step - loss: 0.0044
Epoch 47/50
4935/4935 [==============================] - 34s 7ms/step - loss: 0.0043
Epoch 48/50
4935/4935 [==============================] - 42s 9ms/step - loss: 0.0043
Epoch 49/50
4935/4935 [==============================] - 37s 7ms/step - loss: 0.0044
Epoch 50/50
4935/4935 [==============================] - 37s 8ms/step - loss: 0.0044
<keras.callbacks.History at 0x1a36d8f898>

Test Data

In [462]:
AEP_MW Month Year Week
2018-04-26 13157.791667 4 2018 17
2018-04-27 12964.000000 4 2018 17
In [463]:
(100, 4)
In [464]:
(5055, 4)
In [465]:
Df_Total = pd.concat((NewDataSet[["AEP_MW"]], TestData[["AEP_MW"]]), axis=0)
In [466]:
(5155, 1)
In [467]:
inputs = Df_Total[len(Df_Total) - len(TestData) - 60:].values
(160, 1)
In [468]:
inputs = Df_Total[len(Df_Total) - len(TestData) - 60:].values

# We need to Reshape
inputs = inputs.reshape(-1,1)

# Normalize the Dataset
inputs = sc.transform(inputs)

X_test = []
for i in range(60, 160):
# Convert into Numpy Array
X_test = np.array(X_test)

# Reshape before Passing to Network
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# Pass to Model 
predicted_stock_price = regressor.predict(X_test)

# Do inverse Transformation to get Values 
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
In [469]:
True_MegaWatt = TestData["AEP_MW"].to_list()
Predicted_MegaWatt  = predicted_stock_price
dates = TestData.index.to_list()
In [471]:
Machine_Df = pd.DataFrame(data={
    "TrueMegaWatt": True_MegaWatt,
    "PredictedMeagWatt":[x[0] for x in Predicted_MegaWatt ]

Future Predicted

In [474]:
Date TrueMegaWatt PredictedMeagWatt
0 2018-04-26 13157.791667 13671.706055
1 2018-04-27 12964.000000 12991.945312
2 2018-04-28 12237.583333 14521.591797
3 2018-04-29 12156.791667 13211.944336
4 2018-04-30 13443.500000 12788.455078
5 2018-05-01 13251.875000 13789.046875
6 2018-05-02 13641.166667 12804.154297
7 2018-05-03 14217.250000 12709.704102
8 2018-05-04 13725.625000 14261.728516
9 2018-05-05 11902.166667 14472.195312
10 2018-05-06 11680.083333 12677.794922
11 2018-05-07 12972.500000 12127.531250
12 2018-05-08 13295.083333 12887.196289
13 2018-05-09 13688.750000 12743.552734
14 2018-05-10 13993.250000 12747.035156
15 2018-05-11 13525.166667 13814.033203
16 2018-05-12 12942.916667 13970.200195
17 2018-05-13 12832.541667 13168.587891
18 2018-05-14 15004.750000 12955.161133
19 2018-05-15 15171.791667 15169.067383
20 2018-05-16 13925.416667 14419.253906
21 2018-05-17 14465.666667 12913.649414
22 2018-05-18 13684.333333 14998.011719
23 2018-05-19 13044.166667 14174.238281
24 2018-05-20 13169.125000 13413.721680
25 2018-05-21 14728.666667 13382.070312
26 2018-05-22 14857.125000 14739.416992
27 2018-05-23 14489.583333 14121.821289
28 2018-05-24 14656.250000 13763.244141
29 2018-05-25 15137.125000 15047.317383
... ... ... ...
70 2018-07-05 17609.000000 17120.591797
71 2018-07-06 15742.916667 17615.269531
72 2018-07-07 13610.333333 14689.130859
73 2018-07-08 13768.708333 13816.837891
74 2018-07-09 16427.333333 15385.699219
75 2018-07-10 17489.333333 16932.236328
76 2018-07-11 16714.125000 17681.707031
77 2018-07-12 16330.833333 16694.558594
78 2018-07-13 16911.291667 15885.130859
79 2018-07-14 16488.375000 16239.578125
80 2018-07-15 16296.208333 16572.927734
81 2018-07-16 17400.041667 17885.480469
82 2018-07-17 17311.125000 17595.656250
83 2018-07-18 15814.041667 17368.632812
84 2018-07-19 15889.916667 15917.466797
85 2018-07-20 15332.500000 15957.360352
86 2018-07-21 13795.250000 14366.544922
87 2018-07-22 13479.333333 13657.029297
88 2018-07-23 15410.083333 15275.373047
89 2018-07-24 15890.541667 15779.814453
90 2018-07-25 16503.333333 16030.302734
91 2018-07-26 16474.250000 16809.560547
92 2018-07-27 15816.625000 16138.321289
93 2018-07-28 14113.083333 14586.478516
94 2018-07-29 13658.000000 13875.068359
95 2018-07-30 15368.083333 15294.772461
96 2018-07-31 15180.291667 15672.427734
97 2018-08-01 15151.166667 15329.677734
98 2018-08-02 15687.666667 15497.061523
99 2018-08-03 14809.000000 15975.358398

100 rows × 3 columns

In [476]:
True_MegaWatt = TestData["AEP_MW"].to_list()
Predicted_MegaWatt  = [x[0] for x in Predicted_MegaWatt ]
dates = TestData.index.to_list()
In [487]:
fig = plt.figure()

ax1= fig.add_subplot(111)

x = dates
y = True_MegaWatt

y1 = Predicted_MegaWatt

plt.plot(x,y, color="green")
plt.plot(x,y1, color="red")
# beautify the x-labels
plt.ylabel("Power in MW")
plt.title("Machine Learned the Pattern Predicting Future Values ")
No handles with labels found to put in legend.
<matplotlib.legend.Legend at 0x1a4984b780>


