This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export CHECKPOINT_PATH=../../../checkpoints # path to checkpoints folder | |
# README EVALUATIONS | |
export MODEL_REPO=meta-llama/Llama-2-7b-chat-hf | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth #12.212 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int8dq --compile #12.262 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int8wo #12.204 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization fp6 --compile --precision float16 #12.369 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int4wo-64-hqq #12.825717540084083 | |
python eval.py --checkpoint_path $CHECKPOINT_PATH/$MODEL_REPO/model.pth --quantization int4wo-64 #12.87233037343588 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
from torch.utils._pytree import tree_flatten, tree_unflatten | |
import gc | |
class MultiTensor(torch.Tensor): | |
@staticmethod | |
def __new__(cls, input, **kwargs): | |
if isinstance(input, (list, tuple)): | |
input = input[0] | |
kwargs["dtype"]=kwargs.get("dtype", input.dtype) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from lm_eval.models.huggingface import HFLM | |
from lm_eval.evaluator import evaluate | |
from lm_eval.tasks import get_task_dict | |
path_to_hf_checkpoint = "/home/cdhernandez/local/gpt-fast/checkpoints/meta-llama/Meta-Llama-3-8B" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
from torch.utils._pytree import tree_flatten, tree_unflatten | |
class MultiTensor(torch.Tensor): | |
@staticmethod | |
def __new__(cls, input, **kwargs): | |
if isinstance(input, (list, tuple)): | |
input = input[0] | |
kwargs["dtype"]=kwargs.get("dtype", input.dtype) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn.functional as F | |
import triton | |
import triton.language as tl | |
from triton import Config | |
from torch._inductor import config | |
from torch import _dynamo | |
aten = torch.ops.aten | |
def get_configs_io_bound(): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn.functional as F | |
import triton | |
import triton.language as tl | |
from triton.ops.matmul import matmul as triton_matmul | |
from triton.ops.matmul import _kernel | |
from triton import Config | |
from torch._inductor import config | |
from torch import _dynamo | |
torch._inductor.config.coordinate_descent_tuning = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###################################################################### | |
# Comparing Torchao # | |
# and BitsandBytes # | |
###################################################################### | |
# Set up Your Environment | |
# -------------------------------- | |
# | |
# First, let's configure your environment. This guide requires you to use CUDA 12.1. | |
# We have run this tutorial on an A100-PG509-200 power limited to 330.00 W. If you | |
# are using a different hardware, you might see different performance numbers. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/home/cdhernandez/.conda/envs/pytorch-3.10/lib/python3.10/site-packages/transformers/utils/generic.py:441: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. | |
_torch_pytree._register_pytree_node( | |
/home/cdhernandez/.conda/envs/pytorch-3.10/lib/python3.10/site-packages/transformers/utils/generic.py:309: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. | |
_torch_pytree._register_pytree_node( | |
/home/cdhernandez/local/diffusers/src/diffusers/utils/outputs.py:63: UserWarning: torch.utils._pytree._register_pytree_node is deprecated. Please use torch.utils._pytree.register_pytree_node instead. | |
torch.utils._pytree._register_pytree_node( | |
Namespace(no_bf16=False, no_sdpa=False, batch_size=1, num_inference_steps=30, enable_fused_projections=True, upcast_vae=False, compile_unet=True, compile_vae=True, compile_mode='max-autotune', change_comp_config=True, do_quan |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Full definition of a LLaMA Language Model, all of it in this single file. | |
Based on the nanoGPT implementation: https://github.com/karpathy/nanoGPT. | |
""" | |
# mypy: ignore-errors | |
import math | |
from dataclasses import dataclass | |
from typing import List, Optional, Tuple, Union | |
import torch |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@triton.jit | |
def matmul_kernel_with_block_pointers( | |
# Pointers to matrices | |
a_ptr, b_ptr, c_ptr, s1_ptr, s2_ptr, | |
# Matrix dimensions | |
M, N, K, | |
# The stride variables represent how much to increase the ptr by when moving by 1 | |
# element in a particular dimension. E.g. `stride_am` is how much to increase `a_ptr` | |
# by to get the element one row down (A has M rows). | |
stride_am, stride_ak, |
NewerOlder