import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as web
import datetime as dt
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
company = 'FB'
start = dt.datetime(2017,1,1)
end=dt.datetime(2020,3,26)
data = web.DataReader(company, 'yahoo', start, end)
data.head()
Let us now prepare the data for the neural network. For this, we are going to create a scalar first so we are going to scale down all the values that we have so that they fit in between 0 and 1.
scaler = MinMaxScaler(feature_range=(0,1))
Now we are not going to transform the whole data frame we are only going to be interested in closing price because we are not going to predict the opening price.
scaled_data = scaler.fit_transform(data['Close'].values.reshape(-1,1))
Now we are going to define prediction days this is just going to be a number. How many days do I want to base my prediction to decide whats the price is going to be the next day. In this case, I'm going with 60 days.
prediction_days = 60
Let us now prepare the data
x_train = []
y_train = []
for x in range(prediction_days, len(scaled_data)):
x_train.append(scaled_data[x-prediction_days:x,0])
y_train.append(scaled_data[x,0])
Now let us convert them into NumPy arrays and then we are going to reshape x_train so that it works with the neural network.
x_train,y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1],1))
now we are going to be intializing RNN and we are going to assign this to the model.
model = Sequential()
#Training Neural Network
In this stage, the data is fed to the neural network and trained for prediction assigning random biases and weights.
# Adding the first LSTM layer and some Dropout regularisation
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(Dropout(0.2))
# Adding a second LSTM layer and some Dropout regularisation
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
# Adding a third LSTM layer and some Dropout regularisation
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
Next, we are going to compile the RNN. Here we are going to use an optimizer. An optimizer is one of the two arguments that are required for compiling from the scarce model. The type of optimizer used can greatly affect how fast the algorithm converges to the minimum value. Here we have chosen to use Adam optimizer. The Adam optimizer combines the perks of two other optimizers: ADAgrad and RMSprop.
What is a Recurrent Neural Network?
A Recurrent Neural Network is a type of neural network that contains loops, allowing information to be stored within the network. In short, Recurrent Neural Networks use their reasoning from previous experiences to inform the upcoming events. Recurrent models are valuable in their ability to sequence vectors, which opens up the API to performing more complicated tasks.
How do Recurrent Neural Networks work?
Recurrent Neural Networks can be thought of as a series of networks linked together. They often have a chain-like architecture, making them applicable for tasks such as speech recognition, language translation, etc. An RNN can be designed to operate across sequences of vectors in the input, output, or both. For example, a sequenced input may take a sentence as an input and output a positive or negative sentiment value. Alternatively, a sequenced output may take an image as an input, and produce a sentence as an output.
Let's imagine training a RNN to the word "happy," given the letters "h, a, p, y." The RNN will be trained on four separate examples, each corresponding to the likelihood that letters will fall into an intended sequence. For example, the network will be trained to understand the probability that the letter "a" should follow in the context of "h." Similarly, the letter "p" should appear after sequences of "ha." Again, a probability will be calculated for the letter "p" following the sequence "hap." The process will continue until probabilities are calculated to determine the likelihood of letters falling into the intended sequence. So, as the network receives each input, it will determine the probability of the subsequent letter based on the probability of the previous letter or sequence. Over time, the network can be updated to more accurately produce results.
# Compiling the RNN
model.compile(optimizer='adam', loss='mean_squared_error')
# Fitting the RNN to the Training set
model.fit(x_train, y_train, epochs=25, batch_size=32)
#Output Generation
In this layer, the output value generated by the output layer of the RNN is compared with the target value. The error or the difference between the target and the obtained output value is minimized by using back propagation algorithm.
In the next step we are going to test the model accuracy on existing data and for this we are going to prepare some test data.
#load test data
test_start = dt.datetime(2020,3,26)
test_end = dt.datetime.now()
test_data = web.DataReader(company, 'yahoo', test_start, test_end)
actual_prices = test_data['Close'].values
total_dataset = pd.concat((data['Close'], test_data['Close']),axis=0)
model_inputs = total_dataset[len(total_dataset) - len(test_data) - prediction_days:].values
model_inputs = model_inputs.reshape(-1,1)
model_inputs = scaler.transform(model_inputs)
And now let us make some prediction on the test data.
#make predictions on test data
x_test = []
for x in range(prediction_days, len(model_inputs)):
x_test.append(model_inputs[x-prediction_days:x, 0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1],1))
predicted_prices = model.predict(x_test)
predicted_prices = scaler.inverse_transform(predicted_prices)
The next step is to visualize the prediction data.
#Visualization
A rolling analysis of a time series model is often used to assess the models stability over time. When analyzing financial time series data using a statistical model, a key assumption is that the parameters of the model are constant over time.
#plot the test predictions
plt.plot(actual_prices, color="blue", label=f"Actual {company} Price")
plt.plot(predicted_prices, color="black", label=f"predicted {company} Price")
plt.title(f"{company} Share Price")
plt.xlabel('Time')
plt.ylabel(f'{company} Share Price')
plt.legend()
plt.show()
The final part is predicting the future days or the next stock market day. So for this we are going to create a real data list.
#predict next day
real_data = [model_inputs[len(model_inputs) + 1 - prediction_days:len(model_inputs+1),0]]
real_data = np.array(real_data)
real_data = np.reshape(real_data, (real_data.shape[0], real_data.shape[1],1))
prediction = model.predict(real_data)
prediction = scaler.inverse_transform(prediction)
print(f"prediction: {prediction}")