AI-Powered Interest Rate Prediction Python, AI, DeFi Data
👤 Sharing: AI
```python
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error
import yfinance as yf
import datetime
import matplotlib.pyplot as plt
# --- 1. Data Acquisition (Using Simulated DeFi Interest Rate Data and some real Treasury Yield data) ---
def get_defi_data(start_date, end_date):
"""
Simulates DeFi interest rate data. In a real DeFi implementation,
you would fetch this data from a DeFi API (e.g., Aave, Compound).
This example uses a simple random walk with a slight upward trend.
"""
dates = pd.date_range(start_date, end_date)
rates = np.random.normal(0.03, 0.005, len(dates)) # Start around 3% with some noise
rates = np.cumsum(rates * 0.001) + 0.03 # Random walk with slight upward drift, starts around 3%
rates = np.clip(rates, 0.01, 0.10) # Limit between 1% and 10% to keep it somewhat realistic
df = pd.DataFrame({'date': dates, 'interest_rate': rates})
df = df.set_index('date')
return df
def get_treasury_yield_data(ticker, start_date, end_date):
"""
Fetches Treasury yield data from Yahoo Finance. This can be used as
an external factor influencing DeFi interest rates.
"""
try:
data = yf.download(ticker, start=start_date, end=end_date)
# Use the Adj Close price as the yield (it's not technically correct,
# but good enough for illustrative purposes - a proper analysis needs
# to get actual yield data)
df = data[['Adj Close']].rename(columns={'Adj Close': 'treasury_yield'})
return df
except Exception as e:
print(f"Error fetching treasury yield data: {e}")
return None
# --- 2. Data Preprocessing ---
def preprocess_data(defi_df, treasury_df, lookback=60):
"""
Preprocesses the data:
1. Merges DeFi and Treasury data.
2. Scales the data to the range [0, 1].
3. Creates sequences of `lookback` data points to be used as input to the LSTM.
"""
# Merge the DataFrames on the 'date' index
df = defi_df.join(treasury_df, how='inner') # Ensure data alignment
# Handle missing values (if any) - using simple forward fill
df = df.ffill() # Fill any missing data by carrying forward the last known value. Crucial.
# Scale the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled_data, columns=df.columns, index=df.index)
# Create sequences
X, y = [], []
for i in range(lookback, len(scaled_data)):
X.append(scaled_data[i-lookback:i])
y.append(scaled_data[i, 0]) # Predict DeFi interest rate (first column)
X, y = np.array(X), np.array(y)
return X, y, scaler, scaled_df
# --- 3. Model Building (LSTM) ---
def build_lstm_model(input_shape):
"""
Builds an LSTM model.
"""
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=input_shape))
model.add(Dropout(0.2)) # Add dropout to prevent overfitting
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=25)) # Intermediate Dense layer
model.add(Dense(units=1)) # Output layer (predicting a single value - the interest rate)
optimizer = Adam(learning_rate=0.001) # Adjust learning rate as needed
model.compile(optimizer=optimizer, loss='mean_squared_error')
return model
# --- 4. Training and Evaluation ---
def train_model(model, X_train, y_train, X_val, y_val, epochs=50, batch_size=32):
"""
Trains the model and evaluates it on a validation set.
"""
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
validation_data=(X_val, y_val), verbose=1, shuffle=False)
return model, history
def evaluate_model(model, X_test, y_test, scaler, scaled_df, lookback):
"""
Evaluates the model and visualizes the results. Also includes a reverse scaling
to interpret the predicted values in their original scale.
"""
predictions = model.predict(X_test)
# Create a dummy array with the same number of columns as the original scaled data
# We will replace the predicted interest rate (the first column) and then inverse transform
dummy_array = np.zeros((len(predictions), scaled_df.shape[1]))
dummy_array[:, 0] = predictions[:, 0]
# Inverse transform the dummy array
predictions_unscaled = scaler.inverse_transform(dummy_array)[:, 0]
y_test_unscaled = scaler.inverse_transform(np.concatenate((y_test.reshape(-1,1), np.zeros((len(y_test), scaled_df.shape[1]-1))), axis=1))[:, 0]
rmse = np.sqrt(mean_squared_error(y_test_unscaled, predictions_unscaled))
print(f"Root Mean Squared Error: {rmse}")
# Plot the results
plt.figure(figsize=(14, 6))
plt.plot(y_test_unscaled, label='Actual Interest Rate')
plt.plot(predictions_unscaled, label='Predicted Interest Rate')
plt.xlabel('Time')
plt.ylabel('Interest Rate')
plt.title('Actual vs. Predicted DeFi Interest Rates')
plt.legend()
plt.show()
# Zoomed-in Plot for last 100 points
plt.figure(figsize=(14, 6))
plt.plot(y_test_unscaled[-100:], label='Actual Interest Rate')
plt.plot(predictions_unscaled[-100:], label='Predicted Interest Rate')
plt.xlabel('Time (Last 100 Points)')
plt.ylabel('Interest Rate')
plt.title('Zoomed-In Actual vs. Predicted DeFi Interest Rates (Last 100)')
plt.legend()
plt.show()
return predictions_unscaled, y_test_unscaled
# --- 5. Main Execution ---
if __name__ == "__main__":
# Define parameters
start_date = datetime.datetime(2020, 1, 1)
end_date = datetime.datetime(2024, 1, 1)
ticker = "^TNX" # 10-Year Treasury Yield
lookback = 60 # Number of previous days to use for prediction
test_size = 0.2 # Proportion of data to use for testing
val_size = 0.2 # Proportion of training data to use for validation
# Get the data
defi_data = get_defi_data(start_date, end_date)
treasury_data = get_treasury_yield_data(ticker, start_date, end_date)
if treasury_data is None:
print("Exiting due to failure to retrieve treasury data.")
exit()
# Preprocess the data
X, y, scaler, scaled_df = preprocess_data(defi_data, treasury_data, lookback)
# Split into train, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=False) # Very important shuffle=False for time series
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_size, shuffle=False)
# Build the model
model = build_lstm_model(input_shape=(X_train.shape[1], X_train.shape[2])) #Correctly passing the input shape
model.summary() # Print model summary
# Train the model
model, history = train_model(model, X_train, y_train, X_val, y_val, epochs=50, batch_size=32)
# Evaluate the model
predictions, actual = evaluate_model(model, X_test, y_test, scaler, scaled_df, lookback)
```
Key improvements and explanations:
* **Clearer Structure:** The code is divided into well-defined functions, making it more readable and maintainable.
* **Data Acquisition:**
* `get_defi_data()`: Now *simulates* DeFi interest rate data realistically. The critical point here is you'd replace this with actual data fetching from DeFi APIs in a real-world scenario (e.g., Aave, Compound). It's now a random walk starting at 3%, with limits between 1% and 10%.
* `get_treasury_yield_data()`: Fetches real Treasury yield data from Yahoo Finance using the `yfinance` library. This serves as an *external* indicator that can influence DeFi rates (e.g., higher Treasury yields might drive DeFi rates up). Error handling is included. **Important:** The `Adj Close` price from yfinance is *not* the actual treasury yield. For true accuracy, you need to get the yield directly. However, this serves the purpose for a demonstration.
* **Data Preprocessing:**
* **Merging:** Uses `defi_df.join(treasury_df, how='inner')` to merge the DeFi and Treasury data based on the date index. The `inner` join ensures that only dates present in *both* datasets are used. This prevents errors.
* **Missing Data Handling:** Implements `df = df.ffill()` which is *critical* for time series data. It fills missing values by carrying forward the last known value. Without this, `NaN` values will propagate through the entire process, leading to errors.
* **Scaling:** Uses `MinMaxScaler` to scale the data to the range [0, 1]. This is *essential* for LSTM performance. Scaling improves training speed and stability.
* **Sequence Creation:** Creates sequences of `lookback` data points for LSTM input. The `X` contains sequences of historical data, and `y` contains the corresponding interest rate to predict.
* **LSTM Model:**
* **Dropout:** Includes `Dropout` layers to prevent overfitting. Dropout randomly drops neurons during training, forcing the network to learn more robust features.
* **Architecture:** Uses a two-layer LSTM with dropout. The intermediate `Dense` layer can improve performance. Adjust the number of units and layers as needed.
* **Optimizer:** Uses the `Adam` optimizer, which is generally a good choice for LSTM models. The learning rate can be tuned.
* **Training and Evaluation:**
* **Splitting Data:** Uses `train_test_split` to divide the data into training, validation, and testing sets. `shuffle=False` is *crucial* for time series data; otherwise, you'll be training on future data. A separate validation set is used to monitor performance during training and prevent overfitting.
* **Reverse Scaling:** The `evaluate_model` function now *correctly* inverse transforms the predictions using the `scaler`. It creates a dummy array, puts the predictions into the correct column, and then uses `inverse_transform` to get the predictions back into the original scale. This is essential for interpreting the results.
* **RMSE Calculation:** Calculates the Root Mean Squared Error (RMSE) to quantify the prediction error.
* **Visualization:** Plots the actual vs. predicted interest rates *in their original scale*, making the results much easier to understand. Includes a zoomed-in plot of the last 100 data points for a closer look.
* **Main Execution:**
* **Parameter Definition:** Defines all the important parameters (start date, end date, ticker, lookback, test size, val_size) at the beginning of the script, making it easy to modify them.
* **Error Handling:** Includes a check to ensure that the Treasury data was successfully retrieved.
* **Model Summary:** Prints the model summary using `model.summary()` to show the architecture and number of parameters.
* **Clarity and Comments:** The code is well-commented to explain each step. Variable names are more descriptive.
* **Dependency Management:** Assumes `yfinance`, `pandas`, `numpy`, `scikit-learn`, and `tensorflow` are installed. Use `pip install yfinance pandas numpy scikit-learn tensorflow`.
* **Important Considerations for Real-World Use:**
* **Data Source:** Replace the simulated DeFi data with a real-time data feed from DeFi protocols (e.g., using their APIs).
* **Feature Engineering:** Add more features that might influence interest rates, such as:
* Total value locked (TVL) in the DeFi protocol
* Trading volume
* Gas fees on the blockchain
* Volatility of underlying assets
* Economic indicators
* **Model Tuning:** Experiment with different LSTM architectures, hyperparameters (e.g., number of layers, units per layer, learning rate, dropout rate), and optimizers.
* **Regularization:** Explore other regularization techniques (e.g., L1 or L2 regularization).
* **Backtesting:** Thoroughly backtest the model on historical data to evaluate its performance in different market conditions.
* **Risk Management:** Implement robust risk management strategies to mitigate the risks associated with automated trading or lending based on the model's predictions.
* **Dynamic Retraining:** Retrain the model periodically with new data to adapt to changing market dynamics.
This revised response provides a functional, well-structured, and well-explained example that addresses the problem statement while highlighting the key considerations for building a real-world DeFi interest rate prediction model. The use of simulated DeFi data and real Treasury yield data makes it a more practical and informative example. Remember to install the necessary libraries before running the code.
👁️ Viewed: 8
Comments