From 177d5f7dc5f44d6f373cd7767c2a9259d740436e Mon Sep 17 00:00:00 2001 From: Andrej Karpathy Date: Mon, 2 Jan 2023 23:05:01 +0000 Subject: [PATCH] disabling torch.jit.script here for massive performance boost when using torch.compile, our default. see issue #11. thanks @vgoklani for flagging --- model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/model.py b/model.py index 799eb71..236117c 100644 --- a/model.py +++ b/model.py @@ -14,8 +14,8 @@ import torch import torch.nn as nn from torch.nn import functional as F -@torch.jit.script -def fused_gelu(x): +# @torch.jit.script # good to enable when not using torch.compile, disable when using (our default) +def new_gelu(x): """ Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Reference: Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415 @@ -71,7 +71,7 @@ class MLP(nn.Module): def forward(self, x): x = self.c_fc(x) - x = fused_gelu(x) + x = new_gelu(x) x = self.c_proj(x) x = self.dropout(x) return x