Skip to content

Instantly share code, notes, and snippets.

View vwxyzjn's full-sized avatar
😃

Costa Huang vwxyzjn

😃
View GitHub Profile
[project]
name = "open-instruct"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
# core dependencies
"torch>=2.4.0",
"datasets>=2.21.0",
from typing import Dict, Optional
from transformers import AutoTokenizer
from vllm import LLM, EngineArgs, SamplingParams
from vllm.engine.llm_engine import LLMEngine
from vllm.engine.metrics import StatLoggerBase
from vllm.executor.gpu_executor import GPUExecutor
from vllm.usage.usage_lib import UsageContext
from vllm.utils import Counter
Step 1: install pyenv on NFS
```bash
export PYENV_ROOT="/net/nfs.cirrascale/allennlp/$(whoami)/pyenv"
curl https://pyenv.run | bash
```
Step 2: add the following to your `~/.bashrc`. Now your python environments are managed by pyenv
from typing import Dict, List
from rich.console import Console
from rich.panel import Panel
from datasets import load_dataset
def print_hf_messages(messages: List[Dict[str, str]]):
console = Console()
colors = ["red", "green"]
color_idx = 0
console.rule(f"[bold yellow]The number of turns is {len(messages)}")
import os
import pandas as pd
def process_files(directory):
# Loop through all Excel files in the given directory
for filename in os.listdir(directory):
if filename.endswith(".xlsx"):
file_path = os.path.join(directory, filename)
@vwxyzjn
vwxyzjn / bf16_logprobs.py
Last active June 5, 2024 03:15
The generation logprobs and forward logprobs are different under bf16
import argparse
import torch
import torch.nn.functional as F
import transformers
torch.set_printoptions(precision=4, sci_mode=False)
parser = argparse.ArgumentParser()
parser.add_argument("--bf16", action="store_true")
```
## r.sbatch
#!/bin/bash
#SBATCH --job-name=trl
#SBATCH --partition=hopper-prod
#SBATCH --gpus-per-task=8
#SBATCH --cpus-per-gpu=12
#SBATCH --ntasks=1
#SBATCH --output=slurm/logs/%x_%j.out
module load cuda/12.1
from typing import Tuple
import torch
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer
def first_true_indices(bools: torch.Tensor, dtype=torch.long):
"""
Takes an N-dimensional bool tensor and returns an (N-1)-dimensional tensor of integers giving
data = [{'content': 'The wages earned by Robin is 30% more than that earned by Erica. The wages earned by Charles is 60% more than that earned by Erica. How much percent is the wages earned by Charles more than that earned by Robin?', 'role': 'user'}, {'content': "Let's assume Erica earns a wage of $E.\n\nRobin earns 30% more than Erica, so Robin's wage is:\nRobin's wage = E + 30% of E\nRobin's wage = E + 0.30E\nRobin's wage = 1.30E\n\nCharles earns 60% more than Erica, so Charles's wage is:\nCharles's wage = E + 60% of E\nCharles's wage = E + 0.60E\nCharles's wage = 1.60E\n\nNow, we want to find out how much percent the wages earned by Charles is more than that earned by Robin. To do this, we calculate the difference between Charles's and Robin's wages and then find out what percentage this difference is of Robin's wages.\n\nDifference in wages = Charles's wage - Robin's wage\nDifference in wages = 1.60E - 1.30E\nDifference in wages = 0.30E\n\nNow, we find out what percentage this difference is of Robin's wa
# flake8: noqa
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software