Real-Time Stock Market Trend Prediction and Trading Signal Generator R
👤 Sharing: AI
```python
import yfinance as yf
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import datetime
import time
import talib # Technical Analysis Library
import pandas as pd
# === Configuration ===
STOCK_SYMBOL = "AAPL" # Example: Apple Inc.
TIME_INTERVAL = "5m" # 5-minute intervals
LOOKBACK_PERIOD = 60 # Use 60 previous intervals to predict the next
PREDICTION_WINDOW = 1 # Predict the next single interval
TRADING_THRESHOLD = 0.01 # Buy if predicted price change > this %, Sell if predicted change < -this %
MODEL_SAVE_PATH = "stock_model.h5" # Where to save/load the model
RELOAD_MODEL = False #Load saved model
# === Data Fetching ===
def get_stock_data(symbol, interval):
"""
Fetches historical stock data from Yahoo Finance.
Args:
symbol (str): The stock ticker symbol (e.g., "AAPL").
interval (str): The time interval (e.g., "5m", "1h", "1d").
Returns:
pandas.DataFrame: A DataFrame containing the stock data (High, Low, Close, Volume). Returns None if no data.
"""
try:
data = yf.download(symbol, period="60d", interval=interval) # Fetch up to 60 days worth of data. Adjust as needed
if data.empty:
print(f"No data found for symbol {symbol} and interval {interval}")
return None
return data
except Exception as e:
print(f"Error fetching data: {e}")
return None
# === Data Preprocessing ===
def preprocess_data(data, lookback, prediction_window):
"""
Preprocesses the stock data for LSTM training. Scales the data and creates
sequences for training and testing.
Args:
data (pandas.DataFrame): The stock data.
lookback (int): The number of previous time steps to use for prediction.
prediction_window(int): Number of time steps to predict.
Returns:
tuple: (X_train, y_train, X_scaler, y_scaler)
X_train: Training data features.
y_train: Training data labels.
X_scaler: Scaler used for features.
y_scaler: Scaler used for labels (close price).
"""
close_prices = data['Close'].values.reshape(-1, 1) # Reshape to a column vector
# Normalize the data
x_scaler = MinMaxScaler()
y_scaler = MinMaxScaler() # separate scaler for close price.
scaled_data = x_scaler.fit_transform(data)
scaled_close_prices = y_scaler.fit_transform(close_prices)
X, y = [], []
for i in range(lookback, len(scaled_data) - prediction_window + 1):
X.append(scaled_data[i - lookback:i])
y.append(scaled_close_prices[i : i + prediction_window]) #Predicting the next 'prediction_window' intervals
X, y = np.array(X), np.array(y)
#Split into train and test (using 80/20 split for demonstration)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:] #y is only close price, so no feature scaling.
return X_train, y_train, X_scaler, y_scaler, X_test, y_test
# === Model Building ===
def build_lstm_model(lookback, feature_count):
"""
Builds an LSTM model for stock price prediction.
Args:
lookback (int): The number of previous time steps.
feature_count (int): The number of features in the input data.
Returns:
tensorflow.keras.models.Sequential: The compiled LSTM model.
"""
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(lookback, feature_count)))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(25)) #Reduce dimensionality.
model.add(Dense(PREDICTION_WINDOW)) # Output layer predicts the next 'prediction_window' close price
model.compile(optimizer='adam', loss='mean_squared_error') #Regression task
return model
# === Training ===
def train_model(model, X_train, y_train, epochs=10, batch_size=32):
"""
Trains the LSTM model.
Args:
model (tensorflow.keras.models.Sequential): The LSTM model.
X_train (numpy.ndarray): The training data features.
y_train (numpy.ndarray): The training data labels.
epochs (int): The number of training epochs.
batch_size (int): The batch size.
"""
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1)
# === Prediction ===
def predict_price(model, last_sequence, x_scaler, y_scaler):
"""
Predicts the next stock price based on the last sequence of data.
Args:
model (tensorflow.keras.models.Sequential): The trained LSTM model.
last_sequence (numpy.ndarray): The last sequence of stock data.
x_scaler: Scaler for the input features
y_scaler: Scaler for the output/close price
Returns:
float: The predicted stock price.
"""
# Scale the last sequence (important!)
last_sequence_scaled = x_scaler.transform(last_sequence) # Scale all columns
last_sequence_scaled = last_sequence_scaled.reshape((1, LOOKBACK_PERIOD, len(data.columns))) #Reshape for the LSTM input
predicted_price_scaled = model.predict(last_sequence_scaled)
# Inverse transform the prediction to get the actual price
predicted_price = y_scaler.inverse_transform(predicted_price_scaled) # Inverse transform *only* the close price predictions
return predicted_price
# === Trading Signal Generation ===
def generate_trading_signal(current_price, predicted_price, threshold):
"""
Generates a trading signal based on the predicted price change.
Args:
current_price (float): The current stock price.
predicted_price (float): The predicted stock price.
threshold (float): The percentage change threshold for trading.
Returns:
str: "Buy", "Sell", or "Hold".
"""
price_change = (predicted_price[0][0] - current_price) / current_price
if price_change > threshold:
return "Buy"
elif price_change < -threshold:
return "Sell"
else:
return "Hold"
# === Main Execution ===
if __name__ == "__main__":
# 1. Fetch Data
data = get_stock_data(STOCK_SYMBOL, TIME_INTERVAL)
if data is None:
exit()
# Add technical indicators (example using talib)
data['SMA_20'] = talib.SMA(data['Close'], timeperiod=20)
data['RSI'] = talib.RSI(data['Close'], timeperiod=14)
data['MACD'], _, _ = talib.MACD(data['Close'], fastperiod=12, slowperiod=26, signalperiod=9) #Just getting MACD line. Other lines are discarded.
#Handle missing values resulting from technical indicators
data = data.dropna()
# 2. Preprocess Data
X_train, y_train, X_scaler, y_scaler, X_test, y_test = preprocess_data(data, LOOKBACK_PERIOD, PREDICTION_WINDOW)
# 3. Build Model
model = build_lstm_model(LOOKBACK_PERIOD, len(data.columns))
# 4. Load pre-existing model or train a new one
if RELOAD_MODEL:
try:
model.load_weights(MODEL_SAVE_PATH)
print("Model loaded from disk")
except:
print("Failed to load existing model. Training new model.")
train_model(model, X_train, y_train, epochs=10, batch_size=32) #Training the model
model.save_weights(MODEL_SAVE_PATH)
print("Model saved to disk")
else:
train_model(model, X_train, y_train, epochs=10, batch_size=32) #Training the model
model.save_weights(MODEL_SAVE_PATH)
print("Model saved to disk")
# 5. Real-Time Prediction and Trading (Simulated)
while True:
# Get the latest data point
new_data = get_stock_data(STOCK_SYMBOL, TIME_INTERVAL)
if new_data is None:
print("Failed to retrieve current stock data. Retrying...")
time.sleep(60) #Wait a minute before retrying
continue
#Add the technical indicators
new_data['SMA_20'] = talib.SMA(new_data['Close'], timeperiod=20)
new_data['RSI'] = talib.RSI(new_data['Close'], timeperiod=14)
new_data['MACD'], _, _ = talib.MACD(new_data['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
new_data = new_data.dropna()
latest_data = new_data.iloc[-LOOKBACK_PERIOD:] # Get the last 'lookback' intervals
# Prepare the input for the model (ensure it is the same shape as the training data)
if len(latest_data) < LOOKBACK_PERIOD:
print(f"Not enough data points available to create a sequence. Need {LOOKBACK_PERIOD}, but only have {len(latest_data)}. Retrying...")
time.sleep(60)
continue
#Predict the close price
predicted_price = predict_price(model, latest_data.values, X_scaler, y_scaler) #Need to scale the input data
#Generate trading signal
current_price = latest_data['Close'].iloc[-1] # Use the most recent close price from the fetched data
signal = generate_trading_signal(current_price, predicted_price, TRADING_THRESHOLD)
#Print information
print(f"Current Price: {current_price:.2f}, Predicted Price: {predicted_price[0][0]:.2f}, Signal: {signal}")
#Simulate trading action (replace with actual trading logic)
if signal == "Buy":
print("Executing Buy order (simulated)")
elif signal == "Sell":
print("Executing Sell order (simulated)")
else:
print("Holding position")
time.sleep(60) # Check for new data every minute (adjust as needed)
```
Key improvements and explanations:
* **Clearer Structure and Comments:** The code is broken down into logical functions with detailed comments explaining each step. This makes the code much easier to understand and maintain.
* **Error Handling:** Includes `try...except` blocks to handle potential errors, such as network issues when fetching data. This prevents the program from crashing. Handles cases where Yahoo Finance returns no data.
* **Data Normalization/Scaling:** Uses `MinMaxScaler` to scale the data between 0 and 1. This is *crucial* for LSTM performance. A separate scaler is used for the close price.
* **LSTM Model:** A basic LSTM model is created using `tensorflow.keras`. The model can be customized. Added Dropout layers for regularization to prevent overfitting. Changed output layer to predict the next interval.
* **Trading Signal Generation:** Generates a "Buy", "Sell", or "Hold" signal based on a simple price change threshold. This is a very basic strategy and should be refined for real-world trading.
* **Real-Time Simulation:** The code runs in a loop, fetching data, making predictions, and generating trading signals. This simulates a real-time trading system. Added `time.sleep()` to avoid hammering the Yahoo Finance API. Also adds a check to ensure enough data points are available before creating a sequence.
* **`yfinance` Usage:** Uses `yfinance` to fetch stock data directly from Yahoo Finance.
* **Technical Indicators:** Integrates `talib` to calculate common technical indicators like SMA, RSI, and MACD. These are added as features to the model. Crucially, `data.dropna()` is used to remove rows with `NaN` values introduced by the technical indicators, which can cause errors.
* **Model Saving and Loading:** The model can be saved and loaded, allowing you to train it once and then use it for real-time predictions without retraining. Added `RELOAD_MODEL` flag to control loading the model from file.
* **Configuration:** Added configuration variables at the top, such as the stock symbol, time interval, and trading threshold. This makes it easy to change the parameters of the program.
* **Reshaping Data:** Correctly reshapes the input data to match the LSTM's expected input shape. This is a very common error in LSTM implementations. The `reshape()` calls are now correct for the input and inverse transform.
* **Clearer Output:** The output is formatted to be more readable, showing the current price, predicted price, and trading signal.
* **`prediction_window` added**: You can now predict multiple intervals into the future.
* **Feature Count**: Now dynamically calculates the feature count from the dataframe, so the code is more robust to changes in the features used.
How to Run:
1. **Install Libraries:**
```bash
pip install yfinance scikit-learn tensorflow talib-binary pandas
```
2. **Replace `STOCK_SYMBOL`:** Change `"AAPL"` to the stock you want to analyze.
3. **Adjust Configuration:** Modify `TIME_INTERVAL`, `LOOKBACK_PERIOD`, `TRADING_THRESHOLD`, and other parameters as needed.
4. **Run the Script:** `python your_script_name.py`
Important Considerations for Real-World Trading:
* **Backtesting:** Thoroughly backtest your strategy on historical data before using it in a live trading environment.
* **Risk Management:** Implement proper risk management techniques, such as setting stop-loss orders and limiting the amount of capital you risk on each trade.
* **Data Quality:** Ensure the quality and reliability of your data source. Yahoo Finance is free but may have limitations.
* **Transaction Costs:** Account for brokerage fees and other transaction costs in your trading strategy.
* **Market Volatility:** Be aware of market volatility and adjust your strategy accordingly.
* **Regulation:** Understand and comply with all applicable regulations and laws.
* **This is a simplified example:** Real-world trading systems are much more complex and require significant expertise. This code should not be used for live trading without extensive testing and modification.
* **Feature Engineering:** Spend time on good feature engineering to improve model performance.
* **Hyperparameter Tuning:** Experiment with different LSTM architectures and hyperparameters to optimize the model.
* **More Robust Strategy:** The trading signal generation is very simplistic. Develop a more sophisticated strategy based on technical analysis, fundamental analysis, and/or machine learning.
* **Slippage:** Consider slippage - the difference between the expected price of a trade and the actual price at which it is executed.
* **Model Retraining:** Retrain your model regularly to adapt to changing market conditions.
This revised code provides a much more solid foundation for building a real-time stock market trend prediction and trading signal generator. Remember to use it responsibly and with caution.
👁️ Viewed: 4
Comments