Transformer model¶
In [1]:
Copied!
#!pip install deepts_forecasting
#!pip install deepts_forecasting
Import libraries¶
In [2]:
Copied!
import numpy as np
import torch
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
from deepts_forecasting.utils.data import TimeSeriesDataSet
from deepts_forecasting.utils.data.encoders import TorchNormalizer
from deepts_forecasting.datasets import AirPassengersDataset
from deepts_forecasting.models.transformer import TransformerModel
import numpy as np
import torch
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
from deepts_forecasting.utils.data import TimeSeriesDataSet
from deepts_forecasting.utils.data.encoders import TorchNormalizer
from deepts_forecasting.datasets import AirPassengersDataset
from deepts_forecasting.models.transformer import TransformerModel
Dataset¶
In [3]:
Copied!
data = AirPassengersDataset().load()
data['year'] = data['Month'].dt.year
data['month'] = data['Month'].dt.month
data['group'] = '0'
data['time_idx'] = np.arange(len(data))
data['Passengers'] = data['Passengers'].astype(float)
data['month'] = data['month'].astype('str')
data.head()
data = AirPassengersDataset().load()
data['year'] = data['Month'].dt.year
data['month'] = data['Month'].dt.month
data['group'] = '0'
data['time_idx'] = np.arange(len(data))
data['Passengers'] = data['Passengers'].astype(float)
data['month'] = data['month'].astype('str')
data.head()
Out[3]:
Month | Passengers | year | month | group | time_idx | |
---|---|---|---|---|---|---|
0 | 1949-01-01 | 112.0 | 1949 | 1 | 0 | 0 |
1 | 1949-02-01 | 118.0 | 1949 | 2 | 0 | 1 |
2 | 1949-03-01 | 132.0 | 1949 | 3 | 0 | 2 |
3 | 1949-04-01 | 129.0 | 1949 | 4 | 0 | 3 |
4 | 1949-05-01 | 121.0 | 1949 | 5 | 0 | 4 |
Split train/test sets¶
In [4]:
Copied!
max_encoder_length = 18
max_prediction_length = 12
training_cutoff = data["time_idx"].max() - max_encoder_length - max_prediction_length
training = TimeSeriesDataSet(
data[lambda x: x.time_idx <= training_cutoff],
max_encoder_length= max_encoder_length,
min_encoder_length=max_encoder_length,
max_prediction_length=max_prediction_length,
min_prediction_length=max_prediction_length,
time_idx="time_idx",
target="Passengers",
group_ids=["group"],
static_categoricals=[],
static_reals=[],
time_varying_known_categoricals=['month'],
time_varying_known_reals=[],
time_varying_unknown_reals=["Passengers"],
time_varying_unknown_categoricals=[],
target_normalizer=TorchNormalizer(method="standard",
transformation=None),
)
training.get_parameters()
validation = TimeSeriesDataSet.from_dataset(training,
data[lambda x: x.time_idx > training_cutoff])
batch_size = 16
train_dataloader = DataLoader(training, batch_size=batch_size, shuffle=False, drop_last=False)
val_dataloader = DataLoader(validation, batch_size=batch_size, shuffle=False, drop_last=False)
max_encoder_length = 18
max_prediction_length = 12
training_cutoff = data["time_idx"].max() - max_encoder_length - max_prediction_length
training = TimeSeriesDataSet(
data[lambda x: x.time_idx <= training_cutoff],
max_encoder_length= max_encoder_length,
min_encoder_length=max_encoder_length,
max_prediction_length=max_prediction_length,
min_prediction_length=max_prediction_length,
time_idx="time_idx",
target="Passengers",
group_ids=["group"],
static_categoricals=[],
static_reals=[],
time_varying_known_categoricals=['month'],
time_varying_known_reals=[],
time_varying_unknown_reals=["Passengers"],
time_varying_unknown_categoricals=[],
target_normalizer=TorchNormalizer(method="standard",
transformation=None),
)
training.get_parameters()
validation = TimeSeriesDataSet.from_dataset(training,
data[lambda x: x.time_idx > training_cutoff])
batch_size = 16
train_dataloader = DataLoader(training, batch_size=batch_size, shuffle=False, drop_last=False)
val_dataloader = DataLoader(validation, batch_size=batch_size, shuffle=False, drop_last=False)
Define model¶
In [7]:
Copied!
pl.seed_everything(123)
# create PyTorch Lighning Trainer with early stopping
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4,
patience=60, verbose=False, mode="min")
lr_logger = LearningRateMonitor()
trainer = pl.Trainer(
max_epochs=300,
gpus=0, # run on CPU, if on multiple GPUs, use accelerator="ddp"
gradient_clip_val=0.1,
limit_train_batches=30, # 30 batches per epoch
callbacks=[lr_logger, early_stop_callback],
logger=TensorBoardLogger("lightning_logs")
)
model = TransformerModel.from_dataset(training,
dim_feedforward=32,
n_head=1,
n_layers=2,
d_model=16,
)
model.summarize
pl.seed_everything(123)
# create PyTorch Lighning Trainer with early stopping
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4,
patience=60, verbose=False, mode="min")
lr_logger = LearningRateMonitor()
trainer = pl.Trainer(
max_epochs=300,
gpus=0, # run on CPU, if on multiple GPUs, use accelerator="ddp"
gradient_clip_val=0.1,
limit_train_batches=30, # 30 batches per epoch
callbacks=[lr_logger, early_stop_callback],
logger=TensorBoardLogger("lightning_logs")
)
model = TransformerModel.from_dataset(training,
dim_feedforward=32,
n_head=1,
n_layers=2,
d_model=16,
)
model.summarize
Global seed set to 123 GPU available: False, used: False TPU available: False, using: 0 TPU cores IPU available: False, using: 0 IPUs
Out[7]:
<bound method LightningModule.summarize of TransformerModel( (loss): L1Loss() (logging_metrics): ModuleList() (encoder_input_linear): Linear(in_features=7, out_features=16, bias=True) (decoder_input_linear): Linear(in_features=6, out_features=16, bias=True) (encoder_positional_encoding): PositionalEncoding( (dropout): Dropout(p=0.1, inplace=False) ) (decoder_positional_encoding): PositionalEncoding( (dropout): Dropout(p=0.1, inplace=False) ) (transformer_encoder): TransformerEncoder( (layers): ModuleList( (0): TransformerEncoderLayer( (self_attn): MultiheadAttention( (out_proj): _LinearWithBias(in_features=16, out_features=16, bias=True) ) (linear1): Linear(in_features=16, out_features=32, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear2): Linear(in_features=32, out_features=16, bias=True) (norm1): LayerNorm((16,), eps=1e-05, elementwise_affine=True) (norm2): LayerNorm((16,), eps=1e-05, elementwise_affine=True) (dropout1): Dropout(p=0.1, inplace=False) (dropout2): Dropout(p=0.1, inplace=False) ) (1): TransformerEncoderLayer( (self_attn): MultiheadAttention( (out_proj): _LinearWithBias(in_features=16, out_features=16, bias=True) ) (linear1): Linear(in_features=16, out_features=32, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear2): Linear(in_features=32, out_features=16, bias=True) (norm1): LayerNorm((16,), eps=1e-05, elementwise_affine=True) (norm2): LayerNorm((16,), eps=1e-05, elementwise_affine=True) (dropout1): Dropout(p=0.1, inplace=False) (dropout2): Dropout(p=0.1, inplace=False) ) ) (norm): LayerNorm((16,), eps=1e-05, elementwise_affine=True) ) (transformer_decoder): TransformerDecoder( (layers): ModuleList( (0): TransformerDecoderLayer( (self_attn): MultiheadAttention( (out_proj): _LinearWithBias(in_features=16, out_features=16, bias=True) ) (multihead_attn): MultiheadAttention( (out_proj): _LinearWithBias(in_features=16, out_features=16, bias=True) ) (linear1): Linear(in_features=16, out_features=32, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear2): Linear(in_features=32, out_features=16, bias=True) (norm1): LayerNorm((16,), eps=1e-05, elementwise_affine=True) (norm2): LayerNorm((16,), eps=1e-05, elementwise_affine=True) (norm3): LayerNorm((16,), eps=1e-05, elementwise_affine=True) (dropout1): Dropout(p=0.1, inplace=False) (dropout2): Dropout(p=0.1, inplace=False) (dropout3): Dropout(p=0.1, inplace=False) ) (1): TransformerDecoderLayer( (self_attn): MultiheadAttention( (out_proj): _LinearWithBias(in_features=16, out_features=16, bias=True) ) (multihead_attn): MultiheadAttention( (out_proj): _LinearWithBias(in_features=16, out_features=16, bias=True) ) (linear1): Linear(in_features=16, out_features=32, bias=True) (dropout): Dropout(p=0.1, inplace=False) (linear2): Linear(in_features=32, out_features=16, bias=True) (norm1): LayerNorm((16,), eps=1e-05, elementwise_affine=True) (norm2): LayerNorm((16,), eps=1e-05, elementwise_affine=True) (norm3): LayerNorm((16,), eps=1e-05, elementwise_affine=True) (dropout1): Dropout(p=0.1, inplace=False) (dropout2): Dropout(p=0.1, inplace=False) (dropout3): Dropout(p=0.1, inplace=False) ) ) (norm): LayerNorm((16,), eps=1e-05, elementwise_affine=True) ) (out_linear): Linear(in_features=16, out_features=1, bias=True) (embeddings): ModuleDict( (month): Embedding(12, 6) ) )>
In [8]:
Copied!
model.hparams
model.hparams
Out[8]:
"activation": relu "categorical_groups": {} "d_model": 16 "dim_feedforward": 32 "dropout": 0.1 "embedding_labels": {'month': array(['1', '10', '11', '12', '2', '3', '4', '5', '6', '7', '8', '9'], dtype=object)} "embedding_paddings": [] "embedding_sizes": {'month': [12, 6]} "learning_rate": 0.001 "log_interval": -1 "log_val_interval": None "logging_metrics": ModuleList() "loss": L1Loss() "max_encoder_length": 18 "max_prediction_length": 12 "monotone_constaints": {} "n_head": 1 "n_layers": 2 "output_size": 1 "output_transformer": TorchNormalizer() "static_categoricals": [] "static_reals": [] "time_varying_categoricals_decoder": ['month'] "time_varying_categoricals_encoder": ['month'] "time_varying_reals_decoder": [] "time_varying_reals_encoder": ['Passengers'] "x_categoricals": ['month'] "x_reals": ['Passengers']
Train model with early stopping¶
In [10]:
Copied!
trainer.fit(
model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader,
)
# (given that we use early stopping, this is not necessarily the last epoch)
best_model_path = trainer.checkpoint_callback.best_model_path
best_model = TransformerModel.load_from_checkpoint(best_model_path)
# calcualte mean absolute error on validation set
actuals = torch.cat([model.transform_output(prediction=y, target_scale=x['target_scale'])
for x, y in iter(val_dataloader)])
predictions, x_index = best_model.predict(val_dataloader)
mae = (actuals - predictions).abs().mean()
# print('predictions shape is:', predictions.shape)
# print('actuals shape is:', actuals.shape)
print(torch.cat([actuals, predictions]))
print('MAE is:', mae)
trainer.fit(
model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader,
)
# (given that we use early stopping, this is not necessarily the last epoch)
best_model_path = trainer.checkpoint_callback.best_model_path
best_model = TransformerModel.load_from_checkpoint(best_model_path)
# calcualte mean absolute error on validation set
actuals = torch.cat([model.transform_output(prediction=y, target_scale=x['target_scale'])
for x, y in iter(val_dataloader)])
predictions, x_index = best_model.predict(val_dataloader)
mae = (actuals - predictions).abs().mean()
# print('predictions shape is:', predictions.shape)
# print('actuals shape is:', actuals.shape)
print(torch.cat([actuals, predictions]))
print('MAE is:', mae)
| Name | Type | Params ------------------------------------------------------------------- 0 | loss | L1Loss | 0 1 | logging_metrics | ModuleList | 0 2 | encoder_input_linear | Linear | 128 3 | decoder_input_linear | Linear | 112 4 | encoder_positional_encoding | PositionalEncoding | 0 5 | decoder_positional_encoding | PositionalEncoding | 0 6 | transformer_encoder | TransformerEncoder | 4.5 K 7 | transformer_decoder | TransformerDecoder | 6.7 K 8 | out_linear | Linear | 17 9 | embeddings | ModuleDict | 72 ------------------------------------------------------------------- 11.5 K Trainable params 0 Non-trainable params 11.5 K Total params 0.046 Total estimated model params size (MB) D:\Anaconda3\envs\DeepTS_Forecasting\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:631: UserWarning: Checkpoint directory lightning_logs\default\version_17\checkpoints exists and is not empty. rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
Global seed set to 123
tensor([[[417.0000], [391.0000], [419.0000], [461.0000], [472.0000], [535.0000], [622.0000], [606.0000], [508.0000], [461.0000], [390.0000], [432.0000]], [[350.6286], [331.8389], [368.7483], [367.6466], [376.8046], [437.5001], [456.6611], [470.3661], [442.6349], [358.7311], [334.2704], [360.6251]]], dtype=torch.float64) MAE is: tensor(88.1287, dtype=torch.float64)