# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/playing-the-stock-market-is-a-fools-game/sample_submission.csv
/kaggle/input/playing-the-stock-market-is-a-fools-game/train.csv

!pip install optuna
!pip install captum

Requirement already satisfied: optuna in /usr/local/lib/python3.10/dist-packages (4.2.1)
Requirement already satisfied: alembic>=1.5.0 in /usr/local/lib/python3.10/dist-packages (from optuna) (1.14.1)
Requirement already satisfied: colorlog in /usr/local/lib/python3.10/dist-packages (from optuna) (6.9.0)
Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from optuna) (1.26.4)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from optuna) (24.2)
Requirement already satisfied: sqlalchemy>=1.4.2 in /usr/local/lib/python3.10/dist-packages (from optuna) (2.0.36)
Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from optuna) (4.67.1)
Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from optuna) (6.0.2)
Requirement already satisfied: Mako in /usr/local/lib/python3.10/dist-packages (from alembic>=1.5.0->optuna) (1.3.9)
Requirement already satisfied: typing-extensions>=4 in /usr/local/lib/python3.10/dist-packages (from alembic>=1.5.0->optuna) (4.12.2)
Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy>=1.4.2->optuna) (3.1.1)
Requirement already satisfied: mkl_fft in /usr/local/lib/python3.10/dist-packages (from numpy->optuna) (1.3.8)
Requirement already satisfied: mkl_random in /usr/local/lib/python3.10/dist-packages (from numpy->optuna) (1.2.4)
Requirement already satisfied: mkl_umath in /usr/local/lib/python3.10/dist-packages (from numpy->optuna) (0.1.1)
Requirement already satisfied: mkl in /usr/local/lib/python3.10/dist-packages (from numpy->optuna) (2025.0.1)
Requirement already satisfied: tbb4py in /usr/local/lib/python3.10/dist-packages (from numpy->optuna) (2022.0.0)
Requirement already satisfied: mkl-service in /usr/local/lib/python3.10/dist-packages (from numpy->optuna) (2.4.1)
Requirement already satisfied: MarkupSafe>=0.9.2 in /usr/local/lib/python3.10/dist-packages (from Mako->alembic>=1.5.0->optuna) (3.0.2)
Requirement already satisfied: intel-openmp>=2024 in /usr/local/lib/python3.10/dist-packages (from mkl->numpy->optuna) (2024.2.0)
Requirement already satisfied: tbb==2022.* in /usr/local/lib/python3.10/dist-packages (from mkl->numpy->optuna) (2022.0.0)
Requirement already satisfied: tcmlib==1.* in /usr/local/lib/python3.10/dist-packages (from tbb==2022.*->mkl->numpy->optuna) (1.2.0)
Requirement already satisfied: intel-cmplr-lib-rt in /usr/local/lib/python3.10/dist-packages (from mkl_umath->numpy->optuna) (2024.2.0)
Requirement already satisfied: intel-cmplr-lib-ur==2024.2.0 in /usr/local/lib/python3.10/dist-packages (from intel-openmp>=2024->mkl->numpy->optuna) (2024.2.0)
Collecting captum
  Downloading captum-0.8.0-py3-none-any.whl.metadata (26 kB)
Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from captum) (3.7.5)
Requirement already satisfied: numpy<2.0 in /usr/local/lib/python3.10/dist-packages (from captum) (1.26.4)
Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from captum) (24.2)
Requirement already satisfied: torch>=1.10 in /usr/local/lib/python3.10/dist-packages (from captum) (2.5.1+cu121)
Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from captum) (4.67.1)
Requirement already satisfied: mkl_fft in /usr/local/lib/python3.10/dist-packages (from numpy<2.0->captum) (1.3.8)
Requirement already satisfied: mkl_random in /usr/local/lib/python3.10/dist-packages (from numpy<2.0->captum) (1.2.4)
Requirement already satisfied: mkl_umath in /usr/local/lib/python3.10/dist-packages (from numpy<2.0->captum) (0.1.1)
Requirement already satisfied: mkl in /usr/local/lib/python3.10/dist-packages (from numpy<2.0->captum) (2025.0.1)
Requirement already satisfied: tbb4py in /usr/local/lib/python3.10/dist-packages (from numpy<2.0->captum) (2022.0.0)
Requirement already satisfied: mkl-service in /usr/local/lib/python3.10/dist-packages (from numpy<2.0->captum) (2.4.1)
Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.10->captum) (3.17.0)
Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10->captum) (4.12.2)
Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10->captum) (3.4.2)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10->captum) (3.1.4)
Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.10->captum) (2024.12.0)
Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10->captum) (1.13.1)
Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch>=1.10->captum) (1.3.0)
Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->captum) (1.3.1)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->captum) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->captum) (4.55.3)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->captum) (1.4.7)
Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->captum) (11.0.0)
Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->captum) (3.2.0)
Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib->captum) (2.9.0.post0)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib->captum) (1.17.0)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10->captum) (3.0.2)
Requirement already satisfied: intel-openmp>=2024 in /usr/local/lib/python3.10/dist-packages (from mkl->numpy<2.0->captum) (2024.2.0)
Requirement already satisfied: tbb==2022.* in /usr/local/lib/python3.10/dist-packages (from mkl->numpy<2.0->captum) (2022.0.0)
Requirement already satisfied: tcmlib==1.* in /usr/local/lib/python3.10/dist-packages (from tbb==2022.*->mkl->numpy<2.0->captum) (1.2.0)
Requirement already satisfied: intel-cmplr-lib-rt in /usr/local/lib/python3.10/dist-packages (from mkl_umath->numpy<2.0->captum) (2024.2.0)
Requirement already satisfied: intel-cmplr-lib-ur==2024.2.0 in /usr/local/lib/python3.10/dist-packages (from intel-openmp>=2024->mkl->numpy<2.0->captum) (2024.2.0)
Downloading captum-0.8.0-py3-none-any.whl (1.4 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.4/1.4 MB 35.1 MB/s eta 0:00:0000:01
Installing collected packages: captum
Successfully installed captum-0.8.0

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils import clip_grad_norm_ as clip

import optuna
from optuna.visualization import plot_param_importances

from captum.attr import IntegratedGradients

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# | --------------------------|
# | 1.1 Import data           |
# | --------------------------|
train = pd.read_csv('/kaggle/input/playing-the-stock-market-is-a-fools-game/train.csv', 
                    index_col='ID')

print('Overview:\n'+'-'*30)
print(f'  train.shape: \t{train.shape}')
print(f'  Total companies: \t{len(train.index.unique())}')
print('='*30)

# | --------------------------|
# | 1.2 Format data           |
# | --------------------------|
train = train.T
train.index.name = 'Date'
train.index = pd.to_datetime(train.index, format = '%d/%m/%Y')

print('\nTotal NAs:\n'+'-'*30+f'\n{train.isna().sum()}')
print('='*30)

# | --------------------------|
# | 1.3 Summary data          |
# | --------------------------|
summary_stats = train.describe()
print("\nSummary Statistics for First 10 Companies\n"+'-'*70)
print(summary_stats.iloc[:, :10])
print('='*70)

# | --------------------------|
# | 1.4 Visualization         |
# | --------------------------|
plt.figure(figsize=(15, 8))
for company in train.columns:
    plt.plot(train.index, train[company], alpha=0.5)
plt.xlabel("Date")
plt.ylabel("Daily percentage change")
plt.title("Return of 442 companies from 05/04/2010 to 31/03/2022")
plt.tight_layout()
plt.show()

# | --------------------------|
# | 1.5 Normalization         |
# | --------------------------|
scaler = MinMaxScaler()
data = scaler.fit_transform(train.values)

# | --------------------------|
# | 1.6 Split ratio           |
# | --------------------------|
thres_date = int(len(data) * 0.8)
train_set = data[:thres_date, :]
test_set = data[thres_date:, :]

print("\nData includes:\n"+'-'*30)
print(f'Train shape: \t{train_set.shape}')
print(f'Test shape: \t{test_set.shape}')
print('='*30)

Overview:
------------------------------
  train.shape: 	(442, 3021)
  Total companies: 	442
==============================

Total NAs:
------------------------------
ID
company_0      0
company_1      0
company_2      0
company_3      0
company_4      0
              ..
company_437    0
company_438    0
company_439    0
company_440    0
company_441    0
Length: 442, dtype: int64
==============================

Summary Statistics for First 10 Companies
----------------------------------------------------------------------
ID       company_0    company_1    company_2    company_3    company_4  \
count  3021.000000  3021.000000  3021.000000  3021.000000  3021.000000   
mean      0.021781    -0.019361     0.029437     0.048183     0.043363   
std       1.695075     3.130242     1.755380     1.984768     1.853193   
min     -19.350000   -43.450000   -15.030000   -19.270000   -25.860000   
25%      -0.660000    -1.360000    -0.780000    -0.830000    -0.800000   
50%       0.050000     0.030000     0.080000     0.110000     0.060000   
75%       0.800000     1.400000     0.900000     1.000000     0.940000   
max      13.760000    20.450000    15.370000    15.050000    17.410000   

ID       company_5    company_6    company_7    company_8    company_9  
count  3021.000000  3021.000000  3021.000000  3021.000000  3021.000000  
mean     -0.025650    -0.039649     0.032939    -0.034052     0.014386  
std       2.638759     3.042905     2.122935     3.312811     2.277523  
min     -41.380000   -59.740000   -19.130000   -88.150000   -31.320000  
25%      -1.060000    -1.360000    -0.920000    -1.390000    -0.880000  
50%       0.080000     0.000000     0.080000     0.030000     0.070000  
75%       1.120000     1.360000     1.110000     1.450000     1.000000  
max      22.890000    17.400000    13.460000    18.860000    19.560000  
======================================================================

Data includes:
------------------------------
Train shape: 	(2416, 442)
Test shape: 	(605, 442)
==============================

# | --------------------------|
# | 2.1 Rolling windows       |
# | --------------------------|
def rolling_windows(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i : (i + seq_length)])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)
    
# | --------------------------|
# | 2.2 Custom dataset        |
# | --------------------------|
class TimeSeries(Dataset):
    def __init__(self, data, seq_length):
        self.X, self.y = rolling_windows(data, seq_length)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return (torch.tensor(self.X[idx], dtype=torch.float32),
                torch.tensor(self.y[idx], dtype=torch.float32))

# | --------------------------|
# | 2.3 Early stopping        |
# | --------------------------|
class EarlyStopping:
    def __init__(self, patience=20, min_delta=1e-4):
        self.patience = patience
        self.min_delta = min_delta
        self.best_loss = None
        self.counter = 0

    def step(self, current_loss):
        if self.best_loss is None or current_loss < (self.best_loss - self.min_delta):
            self.best_loss = current_loss
            self.counter = 0
            return False  # Don't stop
        else:
            self.counter += 1
            return self.counter >= self.patience

def my_LSTM(best_params, input_dim, output_dim):
    num_layers = 2 # best_params["num_layers"]
    hidden_dim = best_params["hidden_dim"]
    dropout = best_params["dropout"]

    class LSTM(nn.Module):
        def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout):
            super(LSTM, self).__init__()
            self.input_dim = input_dim
            self.hidden_dim = hidden_dim
            self.num_layers = num_layers

            self.rnn = nn.LSTM(
                input_size=input_dim,
                hidden_size=hidden_dim,
                num_layers=num_layers,
                batch_first=False,
                dropout = dropout if num_layers > 1 else 0.0
            )
            self.linear = nn.Linear(hidden_dim, output_dim)

        def init_hidden(self, batch_size, device):
            return (torch.zeros(self.num_layers, 
                                batch_size, 
                                self.hidden_dim, 
                                device = device),
                    torch.zeros(self.num_layers, 
                                batch_size, 
                                self.hidden_dim, 
                                device = device))

        def forward(self, x):
            batch_size = x.size(1)
            hidden = self.init_hidden(batch_size, x.device)
            y, hidden = self.rnn(x, hidden)
            y = self.linear(y)
            return y, hidden

    model = LSTM(input_dim, hidden_dim, output_dim, num_layers, dropout)
    return model

def define_lstm_model(trial, input_dim, output_dim):
    num_layers = 2 # trial.suggest_int("num_layers", 1, 3)
    hidden_dim = trial.suggest_int("hidden_dim", 300, 350)
    dropout = trial.suggest_float("dropout", 0.15, 0.18)

    class LSTM(nn.Module):
        def __init__(self, input_dim, hidden_dim, output_dim, num_layers, dropout):
            super(LSTM, self).__init__()
            self.rnn = nn.LSTM(
                input_size=input_dim,
                hidden_size=hidden_dim,
                num_layers=num_layers,
                batch_first=False,
                dropout = dropout if num_layers > 1 else 0.0
            )
            self.linear = nn.Linear(hidden_dim, output_dim)

        def forward(self, x):
            out, _ = self.rnn(x)
            out = self.linear(out) 
            return out

    return LSTM(input_dim, hidden_dim, output_dim, num_layers, dropout)

def objective(trial):
# | --------------------------|
# | 4.1 Hyperparameters       |
# | --------------------------|
    seq_length = 24 # trial.suggest_int("seq_length", 23, 31)
    
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-4, log=True)
    lr = trial.suggest_float("lr", 1e-5, 1e-3, log=True)
    clip_value = trial.suggest_float("clip_value", 1.5, 2.0, log=True)

    batch_size = 64
    early_stopper = EarlyStopping(patience=20, min_delta=1e-4)

# | --------------------------|
# | 4.2 Define the model      |
# | --------------------------|
    input_dim = train_set.shape[1]  # e.g. 432 companies
    output_dim = train_set.shape[1]
    model = define_lstm_model(trial, input_dim, output_dim).to(DEVICE)

    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.MSELoss()
    EPOCHS = 100  

# | --------------------------|
# | 4.3 Window for train      |
# | --------------------------|
    train_dataset = TimeSeries(train_set, seq_length)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

# | --------------------------|
# | 4.4 Training loops        |
# | --------------------------|
    for epoch in range(EPOCHS):
        model.train()
        epoch_loss = 0.0
        for X_batch, y_batch in train_loader:
            # X_batch: (batch_size, seq_length, input_dim)
            # Transpose to (seq_length, batch_size, input_dim)
            X_batch = X_batch.transpose(0, 1).to(DEVICE)
            y_batch = y_batch.to(DEVICE)
        
            optimizer.zero_grad()

            # Add white noise epsilon ~ N(0,0.0001)
            X_noisy = X_batch + torch.randn_like(X_batch) * 0.01
        
            outputs = model(X_noisy)
      
            # Compare final time step to y
            loss = criterion(outputs[-1], y_batch)

            loss.backward()
        
            # Gradient clipping
            clip(model.parameters(), clip_value)

            optimizer.step()
            epoch_loss += loss.item()
        avg_loss = epoch_loss / len(train_loader)
        trial.report(avg_loss, epoch)
        if early_stopper.step(avg_loss):
            print(f"Early stopping at epoch {epoch+1}")
            break
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

# | --------------------------|
# | 4.5 Evaluate on test set  |
# | --------------------------|
    test_dataset = TimeSeries(test_set, seq_length)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch = X_batch.transpose(0, 1).to(DEVICE)
            y_batch = y_batch.to(DEVICE)
            outputs = model(X_batch)
            loss = criterion(outputs[-1], y_batch)
            total_loss += loss.item()
    avg_test_loss = total_loss / len(test_loader)
    return avg_test_loss

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, timeout=600)

print("Number of finished trials:", len(study.trials))
print("Best trial:", study.best_trial.params)
print("Best value (MSE):", study.best_trial.value)

[I 2025-04-11 14:00:30,504] A new study created in memory with name: no-name-5c359c16-4e72-4fa2-8b1c-da00e9e45394
[I 2025-04-11 14:00:50,465] Trial 0 finished with value: 0.005437457200605423 and parameters: {'weight_decay': 9.010696366541017e-05, 'lr': 3.2631921523822867e-05, 'clip_value': 1.866483325115156, 'hidden_dim': 329, 'dropout': 0.1754962395750958}. Best is trial 0 with value: 0.005437457200605423.

Early stopping at epoch 42

[I 2025-04-11 14:01:02,646] Trial 1 finished with value: 0.00541713695274666 and parameters: {'weight_decay': 1.6820759117508032e-06, 'lr': 0.00033567838742119805, 'clip_value': 1.8276086428463114, 'hidden_dim': 348, 'dropout': 0.15032483610439398}. Best is trial 1 with value: 0.00541713695274666.

Early stopping at epoch 30

[I 2025-04-11 14:01:15,396] Trial 2 finished with value: 0.005414817866403609 and parameters: {'weight_decay': 1.913899886289534e-06, 'lr': 7.877108245309796e-05, 'clip_value': 1.8749800376757997, 'hidden_dim': 311, 'dropout': 0.1556428663624871}. Best is trial 2 with value: 0.005414817866403609.

Early stopping at epoch 33

[I 2025-04-11 14:01:28,776] Trial 3 finished with value: 0.005433881666976959 and parameters: {'weight_decay': 3.0946875752567993e-06, 'lr': 1.1342870077167936e-05, 'clip_value': 1.8135418545573978, 'hidden_dim': 303, 'dropout': 0.16711117775015494}. Best is trial 2 with value: 0.005414817866403609.

Early stopping at epoch 35

[I 2025-04-11 14:01:40,502] Trial 4 finished with value: 0.005442619521636516 and parameters: {'weight_decay': 1.006123553176289e-05, 'lr': 0.0009483162432304677, 'clip_value': 1.6389921555239382, 'hidden_dim': 329, 'dropout': 0.16492601675200816}. Best is trial 2 with value: 0.005414817866403609.

Early stopping at epoch 29

fig = plot_param_importances(study)
fig.show()

# | --------------------------|
# | 5.1 Define hyperparameters|
# | --------------------------|
best_params = study.best_trial.params
seq_length = 24 # best_params["seq_length"]

input_dim = train_set.shape[1]
output_dim = train_set.shape[1]

# Build final model using my_LSTM
final_model = my_LSTM(best_params, input_dim, output_dim).to(DEVICE)

lr = best_params["lr"]
weight_decay = best_params["weight_decay"]
clip_value = best_params["clip_value"]

final_optimizer = optim.Adam(final_model.parameters(), lr=lr, weight_decay=weight_decay)
criterion = nn.MSELoss()
EPOCHS_FINAL = 1000
batch_size = 64
early_stopper_final = EarlyStopping(patience=20, min_delta=1e-4)

# | --------------------------|
# | 5.2 Full training data    |
# | --------------------------|
train_dataset_full = TimeSeries(train_set, seq_length)
train_loader_full = DataLoader(train_dataset_full, batch_size=batch_size, shuffle=False)

print(f"Final Training Dataset: {len(train_dataset_full)} samples")

# | --------------------------|
# | 5.3 Training loops        |
# | --------------------------|
for epoch in range(EPOCHS_FINAL):
    final_model.train()
    epoch_loss = 0.0
    for X_batch, y_batch in train_loader_full:
        X_batch = X_batch.transpose(0, 1).to(DEVICE)
        y_batch = y_batch.to(DEVICE)

        final_optimizer.zero_grad()
        X_noisy = X_batch + torch.randn_like(X_batch) * 0.01

        outputs, _ = final_model(X_noisy)  
        loss = criterion(outputs[-1], y_batch)
        loss.backward()
    
        # Gradient clipping
        clip(final_model.parameters(), clip_value)
    
        final_optimizer.step()
        epoch_loss += loss.item()
    avg_loss = epoch_loss / len(train_loader_full)
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}/{EPOCHS_FINAL} => Loss: {avg_loss:.6f}")
        
    if early_stopper_final.step(avg_loss):
        print(f"Early stopping at epoch {epoch+1}")
        print(f"Epoch {epoch+1}/{EPOCHS_FINAL} => Loss: {avg_loss:.6f}")
        break

Final Training Dataset: 2392 samples
Early stopping at epoch 34
Epoch 34/1000 => Loss: 0.002531

# | --------------------------|
# | 6.1 Full test data        |
# | --------------------------|
test_dataset_final = TimeSeries(test_set, seq_length)
test_loader_final = DataLoader(test_dataset_final, batch_size=batch_size, shuffle=False)

# | --------------------------|
# | 6.2 Evaluate on test set  |
# | --------------------------|
final_model.eval()
total_test_loss = 0.0
with torch.no_grad():
    for X_batch, y_batch in test_loader_final:
        X_batch = X_batch.transpose(0, 1).to(DEVICE)
        y_batch = y_batch.to(DEVICE)
        y_test_pred, _ = final_model(X_batch)
        loss = criterion(y_test_pred[-1], y_batch)
        total_test_loss += loss.item()
avg_test_loss = total_test_loss / len(test_loader_final)
print("Final Test MSE:", avg_test_loss)

Final Test MSE: 0.005414343986194581

# | --------------------------|
# | 7.1 Define last sequence  |
# | --------------------------|
last_window = test_set[-seq_length:]
print("Last window shape:", last_window.shape)  
last_window_t = torch.tensor(last_window, dtype=torch.float32).unsqueeze(1).to(DEVICE)

final_model.eval()
with torch.no_grad():
    out_seq, _ = final_model(last_window_t)
    next_day_pred_norm = out_seq[-1, 0].cpu().numpy()

# | --------------------------|
# | 7.2 Inverse to real values|
# | --------------------------|
next_day_pred = scaler.inverse_transform(next_day_pred_norm.reshape(1, -1)).flatten()

Last window shape: (24, 442)

# | --------------------------|
# | 8.1 Define forward pass   |
# | --------------------------|
class CaptumModel(nn.Module):
    def __init__(self, lstm_model, target_index=1):
        super().__init__()
        self.lstm_model = lstm_model
        self.target_index = target_index

    def forward(self, x):
        outputs, _ = self.lstm_model(x)    
        last_output = outputs[-1]         
        return last_output[:, self.target_index]
        
md = CaptumModel(final_model, target_index=1).to(DEVICE)
last_window_t.requires_grad_()
md.train()

# | --------------------------|
# | 8.2 Integrated Gradients  |
# | --------------------------|
ig = IntegratedGradients(md)
baseline = torch.zeros_like(last_window_t).to(DEVICE)

# | --------------------------|
# | 8.3 Compute attributions  |
# | --------------------------|
attributions, delta = ig.attribute(last_window_t, 
                                   baseline, 
                                   return_convergence_delta=True)
attr = attributions.squeeze(1).detach().cpu().numpy()
avg_attr_per_timestep = attr.mean(axis=1)

print("Average attributions per time step:\n", avg_attr_per_timestep)

# | --------------------------|
# | 8.4 Plot time-step        |
# | --------------------------|
plt.figure(figsize=(12, 6))
plt.plot(range(len(avg_attr_per_timestep)), avg_attr_per_timestep, marker='o')
plt.xlabel("Time Step")
plt.ylabel("Average Attribution")
plt.title("Integrated Gradients Attributions over the Last Window (Time Steps)")
plt.grid(True)
plt.show()

Average attributions per time step:
 [8.4618076e-11 9.7116891e-11 1.3332961e-10 1.6506448e-10 2.1382948e-10
 2.5399199e-10 3.3404243e-10 4.1872006e-10 5.5158889e-10 7.0463962e-10
 9.3649077e-10 1.2263686e-09 1.6076981e-09 1.9979736e-09 2.6368880e-09
 3.4501841e-09 4.6910014e-09 6.3431669e-09 8.2293745e-09 1.0982817e-08
 1.3993827e-08 2.0980094e-08 2.9321937e-08 3.3955605e-08]

temp = pd.read_csv("/kaggle/input/playing-the-stock-market-is-a-fools-game/sample_submission.csv")
temp["value"] = next_day_pred
temp.to_csv("submission.csv", index=False)
print("Done")

Done

Preliminary¶

Library¶

1. Preprocess¶

2. Data loader¶

3. Model architecture¶

4. Hyperparameters tuning with Optuna¶

5. Training phrase¶

6. Evaluate on test set¶

7. Forecasting¶

8. Interpretation with Captum¶

9. Submission¶