1
0
mirror of https://github.com/osmarks/nanogpt-experiments.git synced 2024-11-10 20:09:58 +00:00

disabling torch.jit.script here for massive performance boost when using torch.compile, our default. see issue #11. thanks @vgoklani for flagging

This commit is contained in:
Andrej Karpathy 2023-01-02 23:05:01 +00:00
parent ea4de192e0
commit 177d5f7dc5

View File

@ -14,8 +14,8 @@ import torch
import torch.nn as nn
from torch.nn import functional as F
@torch.jit.script
def fused_gelu(x):
# @torch.jit.script # good to enable when not using torch.compile, disable when using (our default)
def new_gelu(x):
"""
Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT).
Reference: Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415
@ -71,7 +71,7 @@ class MLP(nn.Module):
def forward(self, x):
x = self.c_fc(x)
x = fused_gelu(x)
x = new_gelu(x)
x = self.c_proj(x)
x = self.dropout(x)
return x