Hourly Energy Consumption¶
Step 1:¶
Import Library¶
In [146]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pprint
%matplotlib inline
In [81]:
df = pd.read_csv("AEP_hourly.csv")
print("="*50)
print("First Five Rows ","\n")
print(df.head(2),"\n")
print("="*50)
print("Information About Dataset","\n")
print(df.info(),"\n")
print("="*50)
print("Describe the Dataset ","\n")
print(df.describe(),"\n")
print("="*50)
print("Null Values t ","\n")
print(df.isnull().sum(),"\n")
In [82]:
# Extract all Data Like Year MOnth Day Time etc
dataset = df
dataset["Month"] = pd.to_datetime(df["Datetime"]).dt.month
dataset["Year"] = pd.to_datetime(df["Datetime"]).dt.year
dataset["Date"] = pd.to_datetime(df["Datetime"]).dt.date
dataset["Time"] = pd.to_datetime(df["Datetime"]).dt.time
dataset["Week"] = pd.to_datetime(df["Datetime"]).dt.week
dataset["Day"] = pd.to_datetime(df["Datetime"]).dt.day_name()
dataset = df.set_index("Datetime")
dataset.index = pd.to_datetime(dataset.index)
dataset.head(1)
Out[82]:
Step 3:¶
In [96]:
# How many Unique Year do we Have in Dataset
print(df.Year.unique(),"\n")
print("Total Number of Unique Year", df.Year.nunique(), "\n")
Lets us see the energy consumption Each Year¶
In [304]:
from matplotlib import style
fig = plt.figure()
ax1 = plt.subplot2grid((1,1), (0,0))
style.use('ggplot')
sns.lineplot(x=dataset["Year"], y=dataset["AEP_MW"], data=df)
sns.set(rc={'figure.figsize':(15,6)})
plt.title("Energy consumptionnin Year 2004")
plt.xlabel("Date")
plt.ylabel("Energy in MW")
plt.grid(True)
plt.legend()
for label in ax1.xaxis.get_ticklabels():
label.set_rotation(90)
plt.title("Energy Consumption According to Year")
Out[304]:
In [339]:
from matplotlib import style
fig = plt.figure()
ax1= fig.add_subplot(311)
ax2= fig.add_subplot(312)
ax3= fig.add_subplot(313)
style.use('ggplot')
y_2004 = dataset["2004"]["AEP_MW"].to_list()
x_2004 = dataset["2004"]["Date"].to_list()
ax1.plot(x_2004,y_2004, color="green", linewidth=1.7)
y_2005 = dataset["2005"]["AEP_MW"].to_list()
x_2005 = dataset["2005"]["Date"].to_list()
ax2.plot(x_2005, y_2005, color="green", linewidth=1)
y_2006 = dataset["2006"]["AEP_MW"].to_list()
x_2006 = dataset["2006"]["Date"].to_list()
ax3.plot(x_2006, y_2006, color="green", linewidth=1)
plt.rcParams["figure.figsize"] = (18,8)
plt.title("Energy consumptionnin")
plt.xlabel("Date")
plt.ylabel("Energy in MW")
plt.grid(True, alpha=1)
plt.legend()
for label in ax1.xaxis.get_ticklabels():
label.set_rotation(90)
Energy Distribution¶
In [341]:
sns.distplot(dataset["AEP_MW"])
plt.title("Ennergy Distribution")
Out[341]:
Energy with Respect to Time¶
In [356]:
fig = plt.figure()
ax1= fig.add_subplot(111)
sns.lineplot(x=dataset["Time"],y=dataset["AEP_MW"], data=df)
plt.title("Energy Consumption vs Time ")
plt.xlabel("Time")
plt.grid(True, alpha=1)
plt.legend()
for label in ax1.xaxis.get_ticklabels():
label.set_rotation(90)
Resampleing Data¶
In [415]:
NewDataSet = dataset.resample('D').mean()
In [416]:
print("Old Dataset ",dataset.shape )
print("New Dataset ",NewDataSet.shape )
In [417]:
TestData = NewDataSet.tail(100)
Training_Set = NewDataSet.iloc[:,0:1]
Training_Set = Training_Set[:-60]
In [418]:
print("Training Set Shape ", Training_Set.shape)
print("Test Set Shape ", TestData.shape)
In [419]:
Training_Set = Training_Set.values
sc = MinMaxScaler(feature_range=(0, 1))
Train = sc.fit_transform(Training_Set)
In [420]:
X_Train = []
Y_Train = []
# Range should be fromm 60 Values to END
for i in range(60, Train.shape[0]):
# X_Train 0-59
X_Train.append(Train[i-60:i])
# Y Would be 60 th Value based on past 60 Values
Y_Train.append(Train[i])
# Convert into Numpy Array
X_Train = np.array(X_Train)
Y_Train = np.array(Y_Train)
print(X_Train.shape)
print(Y_Train.shape)
In [421]:
# Shape should be Number of [Datapoints , Steps , 1 )
# we convert into 3-d Vector or #rd Dimesnsion
X_Train = np.reshape(X_Train, newshape=(X_Train.shape[0], X_Train.shape[1], 1))
X_Train.shape
Out[421]:
Model¶
In [422]:
regressor = Sequential()
# Adding the first LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_Train.shape[1], 1)))
regressor.add(Dropout(0.2))
# Adding a second LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
# Adding a third LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
# Adding a fourth LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))
# Adding the output layer
regressor.add(Dense(units = 1))
# Compiling the RNN
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
In [423]:
regressor.fit(X_Train, Y_Train, epochs = 50, batch_size = 32)
Out[423]:
Test Data¶
In [462]:
TestData.head(2)
Out[462]:
In [463]:
TestData.shape
Out[463]:
In [464]:
NewDataSet.shape
Out[464]:
In [465]:
Df_Total = pd.concat((NewDataSet[["AEP_MW"]], TestData[["AEP_MW"]]), axis=0)
In [466]:
Df_Total.shape
Out[466]:
In [467]:
inputs = Df_Total[len(Df_Total) - len(TestData) - 60:].values
inputs.shape
Out[467]:
In [468]:
inputs = Df_Total[len(Df_Total) - len(TestData) - 60:].values
# We need to Reshape
inputs = inputs.reshape(-1,1)
# Normalize the Dataset
inputs = sc.transform(inputs)
X_test = []
for i in range(60, 160):
X_test.append(inputs[i-60:i])
# Convert into Numpy Array
X_test = np.array(X_test)
# Reshape before Passing to Network
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
# Pass to Model
predicted_stock_price = regressor.predict(X_test)
# Do inverse Transformation to get Values
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
In [469]:
True_MegaWatt = TestData["AEP_MW"].to_list()
Predicted_MegaWatt = predicted_stock_price
dates = TestData.index.to_list()
In [471]:
Machine_Df = pd.DataFrame(data={
"Date":dates,
"TrueMegaWatt": True_MegaWatt,
"PredictedMeagWatt":[x[0] for x in Predicted_MegaWatt ]
})
Future Predicted¶
In [474]:
Machine_Df
Out[474]:
In [476]:
True_MegaWatt = TestData["AEP_MW"].to_list()
Predicted_MegaWatt = [x[0] for x in Predicted_MegaWatt ]
dates = TestData.index.to_list()
In [487]:
fig = plt.figure()
ax1= fig.add_subplot(111)
x = dates
y = True_MegaWatt
y1 = Predicted_MegaWatt
plt.plot(x,y, color="green")
plt.plot(x,y1, color="red")
# beautify the x-labels
plt.gcf().autofmt_xdate()
plt.xlabel('Dates')
plt.ylabel("Power in MW")
plt.title("Machine Learned the Pattern Predicting Future Values ")
plt.legend()
Out[487]:
This comment has been removed by the author.
ReplyDeletePythonist: Project: Data Analysis And Visualizations And Predicting Future Energy Consumption Using Lstm Predicting Values 2 Month Later Accurately Rnn >>>>> Download Now
Delete>>>>> Download Full
Pythonist: Project: Data Analysis And Visualizations And Predicting Future Energy Consumption Using Lstm Predicting Values 2 Month Later Accurately Rnn >>>>> Download LINK
>>>>> Download Now
Pythonist: Project: Data Analysis And Visualizations And Predicting Future Energy Consumption Using Lstm Predicting Values 2 Month Later Accurately Rnn >>>>> Download Full
>>>>> Download LINK R9
This comment has been removed by the author.
ReplyDeleteHii Soumil, I am following your tutorial and facing some issue with Prediction value , I am very new to this. I completely stuck with this problem so need your guidance over this. It would be very helpful if you enlighten me.
ReplyDeletePlease look into this link :https://stackoverflow.com/questions/59775085/getting-constant-prediction-values-using-lstm-keras-syntax
If You are going to use Google Colab: Instead of Jupyter notebbok. For last part of code named as "MODEL"
ReplyDeleteYou need to import theese libraries/moduls,: ;)
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
this problem aries while running this code in google lab please help
ReplyDelete-------------------------------
TypeError Traceback (most recent call last)
in ()
2 ax1= fig.add_subplot(111)
3
----> 4 sns.lineplot(x=dataset["Time"],y=dataset["AEP_MW"], data=df)
5 plt.title("Energy Consumption vs Time ")
6 plt.xlabel("Time")
8 frames
/usr/local/lib/python3.6/dist-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
83
84 """
---> 85 return array(a, dtype, copy=False, order=order)
86
87
TypeError: float() argument must be a string or a number, not 'datetime.time
hi, do you have the solution for this? can you pls email me if you have the solution
Deleteainafaqihah7@gmail.com
This comment has been removed by the author.
Deletereplace x=dataset["Time"] with x=dataset["Time"].astype(str)
Deletei have same error....
Deletehow can i solve this?
Hi, would you mind updating this code as it doesn't seem to work anymore!
ReplyDeleteHi, i hope you are safe
Deletethis is the entire project
https://github.com/drwiiche/electricity-consumption/blob/master/electricity-consumption-project.ipynb
Apologies for the late reply, the losses that I'm getting are Nans. Any fix for this ?
Deletecan i get the dataset link
ReplyDeleteyou can get it here :
Deletehttps://github.com/drwiiche/electricity-consumption
Thanks for the blog loaded with so many information. Stopping by your blog helped me to get what I was looking for. hemp oil wellness
ReplyDeleteHi where did u Predict Values of 2 month???
ReplyDeleteThanks for providing recent updates regarding the concern, I look forward to read more. aktieanalys
ReplyDeleteHi, I am a python beginner, how could I determine rms and mae comparing true data and predicted data with using Sklearn?
ReplyDeleteSeems the value is not correct if I input like this comparing y and y1 in the model:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
rmse = sqrt(mean_squared_error(y, y1))
mae = mean_absolute_error(y, y1)
Plus Xnergy Edge - AIOT/Energy IOT for smart energy IOT, provides ecosystem that turns building energy insights into savings & business intelligence building ems
ReplyDeleteHi,
ReplyDeleteThank you for a good description. I managed to run the code and overcome all error. Now I would like to predict future value and not substract "60 days data" from my excel file. Where do i change in the code in order to tell the machine to predict the energy consumption in the coming 60 days (ex 2021-06-07)?
hey i am new to python and ml and i am not getting this code ...what actually does this code do ? I thought this is going to predict future energy consumption
DeleteThis comment has been removed by the author.
ReplyDeletePositive site, where did u come up with the information on this posting?I have read a few of the articles on your website now, and I really like your style. Thanks a million and please keep up the effective work. amazon product research tool free
ReplyDeleteThis comment has been removed by the author.
ReplyDeleteI have been impressed after read this because of some quality work and informative thoughts. I just want to say thanks for the writer and wish you all the best for coming! Your exuberance is refreshing. The Best Remote Team Management Tool
ReplyDeleteCan you please provide dataset of this problem
ReplyDeleteHi
ReplyDeleteFirst of all thanks for the presentation regarding time series prediction analysis. By the way, I am a Civil Engineer. I prefer to forecast events regarding to hydrology based on CTS-LSMT. Can you please help me to build up coding for that?
Getting following error
ReplyDeleteCannot convert a symbolic Tensor (lstm_2/strided_slice:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported
Please help
how can you write a Python program that analyzes the consumption for a month from CSV file, number of days in the month, average consumption per hour,time of the highest consumption.(link adress for the CSV file)
ReplyDeletehttps://cdn.fbsbx.com/v/t59.2708-21/246526620_199119609030507_8304275238008157976_n.csv/meteringvalues-mp-xxxxx-consumption-202012.csv?_nc_cat=106&ccb=1-5&_nc_sid=0cab14&_nc_ohc=_XCmvNzqBBsAX9rtULi&_nc_ht=cdn.fbsbx.com&oh=b9bf11f8afd69c6d4bb34c5a3c8bc46c&oe=61733F1E&dl=1
hello , please you can give me your private contact i need your help
ReplyDeletePythonist: Project: Data Analysis And Visualizations And Predicting Future Energy Consumption Using Lstm Predicting Values 2 Month Later Accurately Rnn >>>>> Download Now
ReplyDelete>>>>> Download Full
Pythonist: Project: Data Analysis And Visualizations And Predicting Future Energy Consumption Using Lstm Predicting Values 2 Month Later Accurately Rnn >>>>> Download LINK
>>>>> Download Now
Pythonist: Project: Data Analysis And Visualizations And Predicting Future Energy Consumption Using Lstm Predicting Values 2 Month Later Accurately Rnn >>>>> Download Full
>>>>> Download LINK
Hello Soumil, I wanted to know how did you manage to get the data set of power consumption of the company. Basically, I am also working on the same project just don't know how to or from where I can get tha data set. Urgently required please help.
ReplyDeleteI have problem in Energy with Respect to Time part ---------------------------------------------------------------------------
ReplyDeleteTypeError Traceback (most recent call last)
/usr/local/lib/python3.9/dist-packages/pandas/_libs/lib.pyx in pandas._libs.lib.maybe_convert_numeric()
TypeError: Invalid object type
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
6 frames
in
2 ax1= fig.add_subplot(111)
3
----> 4 sns.lineplot(x=dataset["Time"],y=dataset["AEP_MW"], data=df)
5 plt.title("Energy Consumption vs Time ")
6 plt.xlabel("Time")
/usr/local/lib/python3.9/dist-packages/seaborn/relational.py in lineplot(data, x, y, hue, size, style, units, palette, hue_order, hue_norm, sizes, size_order, size_norm, dashes, markers, style_order, estimator, errorbar, n_boot, seed, orient, sort, err_style, err_kws, legend, ci, ax, **kwargs)
643 kwargs["color"] = _default_color(ax.plot, hue, color, kwargs)
644
--> 645 p.plot(ax, kwargs)
646 return ax
647
/usr/local/lib/python3.9/dist-packages/seaborn/relational.py in plot(self, ax, kws)
421 # Loop over the semantic subsets and add to the plot
422 grouping_vars = "hue", "size", "style"
--> 423 for sub_vars, sub_data in self.iter_data(grouping_vars, from_comp_data=True):
424
425 if self.sort:
/usr/local/lib/python3.9/dist-packages/seaborn/_oldcore.py in iter_data(self, grouping_vars, reverse, from_comp_data, by_facet, allow_empty, dropna)
1026
1027 if from_comp_data:
-> 1028 data = self.comp_data
1029 else:
1030 data = self.plot_data
/usr/local/lib/python3.9/dist-packages/seaborn/_oldcore.py in comp_data(self)
1124 # supporting `order` in categorical plots is tricky
1125 orig = orig[orig.isin(self.var_levels[var])]
-> 1126 comp = pd.to_numeric(converter.convert_units(orig))
1127 if converter.get_scale() == "log":
1128 comp = np.log10(comp)
/usr/local/lib/python3.9/dist-packages/pandas/core/tools/numeric.py in to_numeric(arg, errors, downcast)
182 coerce_numeric = errors not in ("ignore", "raise")
183 try:
--> 184 values, _ = lib.maybe_convert_numeric(
185 values, set(), coerce_numeric=coerce_numeric
186 )
/usr/local/lib/python3.9/dist-packages/pandas/_libs/lib.pyx in pandas._libs.lib.maybe_convert_numeric()
TypeError: Invalid object type at position 0
can someone help me 1 Training_Set = Training_Set.values
ReplyDelete2 sc = MinMaxScaler(feature_range=(0, 1))
3 Train = sc.fit_transform(Training_Set)
AttributeError: 'numpy.ndarray' object has no attribute 'values'