mirror of
				https://github.com/osmarks/nanogpt-experiments.git
				synced 2025-10-31 15:23:01 +00:00 
			
		
		
		
	Merge pull request #220 from python273/patch-1
Fix GPT.crop_block_size when flash attention is available
This commit is contained in:
		
							
								
								
									
										1
									
								
								model.py
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								model.py
									
									
									
									
									
								
							| @@ -207,6 +207,7 @@ class GPT(nn.Module): | |||||||
|         self.config.block_size = block_size |         self.config.block_size = block_size | ||||||
|         self.transformer.wpe.weight = nn.Parameter(self.transformer.wpe.weight[:block_size]) |         self.transformer.wpe.weight = nn.Parameter(self.transformer.wpe.weight[:block_size]) | ||||||
|         for block in self.transformer.h: |         for block in self.transformer.h: | ||||||
|  |             if hasattr(block.attn, 'bias'): | ||||||
|                 block.attn.bias = block.attn.bias[:,:,:block_size,:block_size] |                 block.attn.bias = block.attn.bias[:,:,:block_size,:block_size] | ||||||
|  |  | ||||||
|     @classmethod |     @classmethod | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Andrej
					Andrej