1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2024-12-18 14:10:28 +00:00

disabling torch.jit.script here for massive performance boost when using torch.compile, our default. see issue #11. thanks @vgoklani for flagging

This commit is contained in:
Andrej Karpathy 2023-01-02 23:05:01 +00:00
parent ea4de192e0
commit 177d5f7dc5

View File

@ -14,8 +14,8 @@ import torch
import torch.nn as nn import torch.nn as nn
from torch.nn import functional as F from torch.nn import functional as F
@torch.jit.script # @torch.jit.script # good to enable when not using torch.compile, disable when using (our default)
def fused_gelu(x): def new_gelu(x):
""" """
Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT).
Reference: Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415 Reference: Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415
@ -71,7 +71,7 @@ class MLP(nn.Module):
def forward(self, x): def forward(self, x):
x = self.c_fc(x) x = self.c_fc(x)
x = fused_gelu(x) x = new_gelu(x)
x = self.c_proj(x) x = self.c_proj(x)
x = self.dropout(x) x = self.dropout(x)
return x return x