Created
March 1, 2024 18:26
-
-
Save philschmid/0ed1531fe28a11cb7f5d6bce4cb96f61 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
start=$(date +%s) | |
# Initialize RESULT_DIRECTORY with default value and HF_MODEL_ID with an empty string | |
RESULT_DIRECTORY="nous" | |
HF_MODEL_ID="" | |
TRUST_REMOTE_CODE="False" | |
CURRENT_DIR=$(pwd) | |
# List of Benchmarking Tasks | |
BENCMARK="agieval gpt4all truthfulqa bigbench" # whitespace-separated list of benchmarking tasks | |
# Flag to check if HF_MODEL_ID is provided | |
HF_MODEL_ID_PROVIDED=0 | |
# Parse command line arguments | |
while [[ "$#" -gt 0 ]]; do | |
case $1 in | |
--model-id) HF_MODEL_ID="$2"; HF_MODEL_ID_PROVIDED=1; shift ;; # If --HF_MODEL_ID is provided, assign the next argument as its value and set the flag | |
--save-directory) RESULT_DIRECTORY="$2"; shift ;; # If --save-directory is provided, assign the next argument as its value | |
--benchmark) BENCMARK="$2"; shift ;; # If --benchmark is provided, assign the next argument as its value | |
--trust-remote-code) TRUST_REMOTE_CODE="True"; shift ;; # Assign next argument as value for trust_remote_code | |
*) echo "Unknown parameter: $1"; exit 1 ;; # If an unknown parameter is provided, exit script | |
esac | |
shift # Move to next argument | |
done | |
# Check if HF_MODEL_ID was provided | |
if [[ $HF_MODEL_ID_PROVIDED -eq 0 ]]; then | |
echo "--HF_MODEL_ID parameter is required." | |
exit 1 | |
fi | |
# Detect the number of NVIDIA GPUs and create a device string | |
gpu_count=$(nvidia-smi -L | wc -l) | |
if [ $gpu_count -eq 0 ]; then | |
echo "No NVIDIA GPUs detected. Exiting." | |
exit 1 | |
fi | |
# Construct the CUDA device string | |
cuda_devices="" | |
for ((i=0; i<gpu_count; i++)); do | |
if [ $i -gt 0 ]; then | |
cuda_devices+="," | |
fi | |
cuda_devices+="$i" | |
done | |
#### ECHO PARAMETERS #### | |
echo "HF_MODEL_ID: $HF_MODEL_ID" | |
echo "RESULT_DIRECTORY: $RESULT_DIRECTORY" | |
echo "TRUST_REMOTE_CODE: $TRUST_REMOTE_CODE" | |
echo "CUDA_DEVICES: $cuda_devices" | |
echo "BENCHMARK: $BENCMARK" | |
# Clone respository and install requirements for Benchmarking | |
mkdir -p $RESULT_DIRECTORY | |
cd $RESULT_DIRECTORY | |
git clone -b add-agieval https://github.com/dmahan93/lm-evaluation-harness | |
cd lm-evaluation-harness | |
pip install -e . | |
pip install -q requests accelerate sentencepiece pytablewriter einops protobuf | |
# #################### Run Nous Benchmarking #################### | |
for bench in $BENCMARK; do | |
echo "Running $bench benchmark" | |
if [ $bench == "agieval" ]; then | |
tasks="agieval_aqua_rat,agieval_logiqa_en,agieval_lsat_ar,agieval_lsat_lr,agieval_lsat_rc,agieval_sat_en,agieval_sat_en_without_passage,agieval_sat_math" | |
elif [ $bench == "gpt4all" ]; then | |
tasks="hellaswag,openbookqa,winogrande,arc_easy,arc_challenge,boolq,piqa" | |
elif [ $bench == "truthfulqa" ]; then | |
tasks="truthfulqa_mc" | |
elif [ $bench == "bigbench" ]; then | |
tasks="bigbench_causal_judgement,bigbench_date_understanding,bigbench_disambiguation_qa,bigbench_geometric_shapes,bigbench_logical_deduction_five_objects,bigbench_logical_deduction_seven_objects,bigbench_logical_deduction_three_objects,bigbench_movie_recommendation,bigbench_navigate,bigbench_reasoning_about_colored_objects,bigbench_ruin_names,bigbench_salient_translation_error_detection,bigbench_snarks,bigbench_sports_understanding,bigbench_temporal_sequences,bigbench_tracking_shuffled_objects_five_objects,bigbench_tracking_shuffled_objects_seven_objects,bigbench_tracking_shuffled_objects_three_objects" | |
else | |
echo "Unknown benchmark: $bench" | |
exit 1 | |
fi | |
echo "Tasks: $tasks" | |
# Run the evaluation harness | |
python main.py \ | |
--model hf-causal \ | |
--model_args pretrained=$HF_MODEL_ID,trust_remote_code=$TRUST_REMOTE_CODE \ | |
--tasks $tasks \ | |
--device cuda:$cuda_devices \ | |
--batch_size auto \ | |
--output_path ${RESULT_DIRECTORY}/${bench}.json | |
done | |
end=$(date +%s) | |
echo "Elapsed Time: $((end - start)) seconds" | |
# Options to run | |
# **Single Benchmark** | |
# ./run_nous.sh --model-id HuggingFaceH4/zephyr-7b-gemma-v0.1 --save-directory /path/to/your --benchmark truthfulqa | |
# **All Benchmarks** | |
# ./run_nous.sh --model-id HuggingFaceH4/zephyr-7b-gemma-v0.1 --save-directory /path/to/your | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment