import torch import torch.nn as nn from .activation import GELU class FeedForward(nn.Module): ''' Feed-forward neural network with GELU activation function. - Multi-Head Self-Attention → Captures relationships between tokens. - Feedforward Neural Network (FFN) → Processes each token independently after attention. ''' def __init__(self, cfg): super().__init__() self.layers = nn.Sequential( nn.Linear(cfg["emb_dim"], 4 * cfg["emb_dim"]), GELU(), nn.Linear(4 * cfg["emb_dim"], cfg["emb_dim"]), ) def forward(self, x): return self.layers(x)