mirror of
https://github.com/osmarks/nanogpt-experiments.git
synced 2024-12-18 14:10:28 +00:00
disabling torch.jit.script here for massive performance boost when using torch.compile, our default. see issue #11. thanks @vgoklani for flagging
This commit is contained in:
parent
ea4de192e0
commit
177d5f7dc5
6
model.py
6
model.py
@ -14,8 +14,8 @@ import torch
|
|||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
|
||||||
@torch.jit.script
|
# @torch.jit.script # good to enable when not using torch.compile, disable when using (our default)
|
||||||
def fused_gelu(x):
|
def new_gelu(x):
|
||||||
"""
|
"""
|
||||||
Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT).
|
Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT).
|
||||||
Reference: Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415
|
Reference: Gaussian Error Linear Units (GELU) paper: https://arxiv.org/abs/1606.08415
|
||||||
@ -71,7 +71,7 @@ class MLP(nn.Module):
|
|||||||
|
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
x = self.c_fc(x)
|
x = self.c_fc(x)
|
||||||
x = fused_gelu(x)
|
x = new_gelu(x)
|
||||||
x = self.c_proj(x)
|
x = self.c_proj(x)
|
||||||
x = self.dropout(x)
|
x = self.dropout(x)
|
||||||
return x
|
return x
|
||||||
|
Loading…
Reference in New Issue
Block a user