philschmid · March 1, 2024 18:26
diff --git a/run_nous b/run_nous
 #!/bin/bash
 start=$(date +%s)

 # Initialize RESULT_DIRECTORY with default value and HF_MODEL_ID with an empty string
 RESULT_DIRECTORY="nous"
 HF_MODEL_ID=""
 TRUST_REMOTE_CODE="False"
 CURRENT_DIR=$(pwd)

 # List of Benchmarking Tasks
 BENCMARK="agieval gpt4all truthfulqa bigbench" # whitespace-separated list of benchmarking tasks

 # Flag to check if HF_MODEL_ID is provided
 HF_MODEL_ID_PROVIDED=0


 # Parse command line arguments
 while [[ "$#" -gt 0 ]]; do
    case $1 in
        --model-id) HF_MODEL_ID="$2"; HF_MODEL_ID_PROVIDED=1; shift ;; # If --HF_MODEL_ID is provided, assign the next argument as its value and set the flag
        --save-directory) RESULT_DIRECTORY="$2"; shift ;; # If  --save-directory is provided, assign the next argument as its value
        --benchmark) BENCMARK="$2"; shift ;; # If --benchmark is provided, assign the next argument as its value
        --trust-remote-code) TRUST_REMOTE_CODE="True"; shift ;; # Assign next argument as value for trust_remote_code
        *) echo "Unknown parameter: $1"; exit 1 ;; # If an unknown parameter is provided, exit script
    esac
    shift # Move to next argument
 done

 # Check if HF_MODEL_ID was provided
 if [[ $HF_MODEL_ID_PROVIDED -eq 0 ]]; then
    echo "--HF_MODEL_ID parameter is required."
    exit 1
 fi

 # Detect the number of NVIDIA GPUs and create a device string
 gpu_count=$(nvidia-smi -L | wc -l)
 if [ $gpu_count -eq 0 ]; then
    echo "No NVIDIA GPUs detected. Exiting."
    exit 1
 fi
 # Construct the CUDA device string
 cuda_devices=""
 for ((i=0; i<gpu_count; i++)); do
    if [ $i -gt 0 ]; then
        cuda_devices+=","
    fi
    cuda_devices+="$i"
 done

 #### ECHO PARAMETERS ####
 echo "HF_MODEL_ID: $HF_MODEL_ID"
 echo "RESULT_DIRECTORY: $RESULT_DIRECTORY"
 echo "TRUST_REMOTE_CODE: $TRUST_REMOTE_CODE"
 echo "CUDA_DEVICES: $cuda_devices"
 echo "BENCHMARK: $BENCMARK"


 # Clone respository and install requirements for Benchmarking
 mkdir -p $RESULT_DIRECTORY
 cd $RESULT_DIRECTORY
 git clone -b add-agieval https://github.com/dmahan93/lm-evaluation-harness
 cd lm-evaluation-harness
 pip install -e .
 pip install -q requests accelerate sentencepiece pytablewriter einops protobuf


 # #################### Run Nous Benchmarking ####################
 for bench in $BENCMARK; do
    echo "Running $bench benchmark"
    if [ $bench == "agieval" ]; then
        tasks="agieval_aqua_rat,agieval_logiqa_en,agieval_lsat_ar,agieval_lsat_lr,agieval_lsat_rc,agieval_sat_en,agieval_sat_en_without_passage,agieval_sat_math"
    elif [ $bench == "gpt4all" ]; then
        tasks="hellaswag,openbookqa,winogrande,arc_easy,arc_challenge,boolq,piqa"
    elif [ $bench == "truthfulqa" ]; then
        tasks="truthfulqa_mc"
    elif [ $bench == "bigbench" ]; then
        tasks="bigbench_causal_judgement,bigbench_date_understanding,bigbench_disambiguation_qa,bigbench_geometric_shapes,bigbench_logical_deduction_five_objects,bigbench_logical_deduction_seven_objects,bigbench_logical_deduction_three_objects,bigbench_movie_recommendation,bigbench_navigate,bigbench_reasoning_about_colored_objects,bigbench_ruin_names,bigbench_salient_translation_error_detection,bigbench_snarks,bigbench_sports_understanding,bigbench_temporal_sequences,bigbench_tracking_shuffled_objects_five_objects,bigbench_tracking_shuffled_objects_seven_objects,bigbench_tracking_shuffled_objects_three_objects"
    else
        echo "Unknown benchmark: $bench"
        exit 1
    fi
    echo "Tasks: $tasks"

    # Run the evaluation harness
    python main.py \
        --model hf-causal \
        --model_args pretrained=$HF_MODEL_ID,trust_remote_code=$TRUST_REMOTE_CODE \
        --tasks $tasks \
        --device cuda:$cuda_devices \
        --batch_size auto \
        --output_path ${RESULT_DIRECTORY}/${bench}.json
 done

 end=$(date +%s)
 echo "Elapsed Time: $((end - start)) seconds"


 # Options to run 
 # **Single Benchmark** 
 # ./run_nous.sh --model-id HuggingFaceH4/zephyr-7b-gemma-v0.1 --save-directory /path/to/your --benchmark truthfulqa
 # **All Benchmarks** 
 # ./run_nous.sh --model-id HuggingFaceH4/zephyr-7b-gemma-v0.1 --save-directory /path/to/your
	#!/bin/bash
	start=$(date +%s)

	# Initialize RESULT_DIRECTORY with default value and HF_MODEL_ID with an empty string
	RESULT_DIRECTORY="nous"
	HF_MODEL_ID=""
	TRUST_REMOTE_CODE="False"
	CURRENT_DIR=$(pwd)

	# List of Benchmarking Tasks
	BENCMARK="agieval gpt4all truthfulqa bigbench" # whitespace-separated list of benchmarking tasks

	# Flag to check if HF_MODEL_ID is provided
	HF_MODEL_ID_PROVIDED=0


	# Parse command line arguments
	while [[ "$#" -gt 0 ]]; do
	case $1 in
	--model-id) HF_MODEL_ID="$2"; HF_MODEL_ID_PROVIDED=1; shift ;; # If --HF_MODEL_ID is provided, assign the next argument as its value and set the flag
	--save-directory) RESULT_DIRECTORY="$2"; shift ;; # If --save-directory is provided, assign the next argument as its value
	--benchmark) BENCMARK="$2"; shift ;; # If --benchmark is provided, assign the next argument as its value
	--trust-remote-code) TRUST_REMOTE_CODE="True"; shift ;; # Assign next argument as value for trust_remote_code
	*) echo "Unknown parameter: $1"; exit 1 ;; # If an unknown parameter is provided, exit script
	esac
	shift # Move to next argument
	done

	# Check if HF_MODEL_ID was provided
	if [[ $HF_MODEL_ID_PROVIDED -eq 0 ]]; then
	echo "--HF_MODEL_ID parameter is required."
	exit 1
	fi

	# Detect the number of NVIDIA GPUs and create a device string
	gpu_count=$(nvidia-smi -L \| wc -l)
	if [ $gpu_count -eq 0 ]; then
	echo "No NVIDIA GPUs detected. Exiting."
	exit 1
	fi
	# Construct the CUDA device string
	cuda_devices=""
	for ((i=0; i<gpu_count; i++)); do
	if [ $i -gt 0 ]; then
	cuda_devices+=","
	fi
	cuda_devices+="$i"
	done

	#### ECHO PARAMETERS ####
	echo "HF_MODEL_ID: $HF_MODEL_ID"
	echo "RESULT_DIRECTORY: $RESULT_DIRECTORY"
	echo "TRUST_REMOTE_CODE: $TRUST_REMOTE_CODE"
	echo "CUDA_DEVICES: $cuda_devices"
	echo "BENCHMARK: $BENCMARK"


	# Clone respository and install requirements for Benchmarking
	mkdir -p $RESULT_DIRECTORY
	cd $RESULT_DIRECTORY
	git clone -b add-agieval https://github.com/dmahan93/lm-evaluation-harness
	cd lm-evaluation-harness
	pip install -e .
	pip install -q requests accelerate sentencepiece pytablewriter einops protobuf


	# #################### Run Nous Benchmarking ####################
	for bench in $BENCMARK; do
	echo "Running $bench benchmark"
	if [ $bench == "agieval" ]; then
	tasks="agieval_aqua_rat,agieval_logiqa_en,agieval_lsat_ar,agieval_lsat_lr,agieval_lsat_rc,agieval_sat_en,agieval_sat_en_without_passage,agieval_sat_math"
	elif [ $bench == "gpt4all" ]; then
	tasks="hellaswag,openbookqa,winogrande,arc_easy,arc_challenge,boolq,piqa"
	elif [ $bench == "truthfulqa" ]; then
	tasks="truthfulqa_mc"
	elif [ $bench == "bigbench" ]; then
	tasks="bigbench_causal_judgement,bigbench_date_understanding,bigbench_disambiguation_qa,bigbench_geometric_shapes,bigbench_logical_deduction_five_objects,bigbench_logical_deduction_seven_objects,bigbench_logical_deduction_three_objects,bigbench_movie_recommendation,bigbench_navigate,bigbench_reasoning_about_colored_objects,bigbench_ruin_names,bigbench_salient_translation_error_detection,bigbench_snarks,bigbench_sports_understanding,bigbench_temporal_sequences,bigbench_tracking_shuffled_objects_five_objects,bigbench_tracking_shuffled_objects_seven_objects,bigbench_tracking_shuffled_objects_three_objects"
	else
	echo "Unknown benchmark: $bench"
	exit 1
	fi
	echo "Tasks: $tasks"

	# Run the evaluation harness
	python main.py \
	--model hf-causal \
	--model_args pretrained=$HF_MODEL_ID,trust_remote_code=$TRUST_REMOTE_CODE \
	--tasks $tasks \
	--device cuda:$cuda_devices \
	--batch_size auto \
	--output_path ${RESULT_DIRECTORY}/${bench}.json
	done

	end=$(date +%s)
	echo "Elapsed Time: $((end - start)) seconds"


	# Options to run
	# Single Benchmark
	# ./run_nous.sh --model-id HuggingFaceH4/zephyr-7b-gemma-v0.1 --save-directory /path/to/your --benchmark truthfulqa
	# All Benchmarks
	# ./run_nous.sh --model-id HuggingFaceH4/zephyr-7b-gemma-v0.1 --save-directory /path/to/your