Safetensors
English
qwen2
Qwen2.5-7B-Instruct-RLVR / launch_reward.sh
virtuoussy's picture
Upload 3 files
1680393 verified
set -x
MODEL_PATH=$1
ANSWER_PATH=$2
METRIC=$3
PORT=8800
export VLLM_ENGINE_ITERATION_TIMEOUT_S=60
nohup vllm serve ${MODEL_PATH} \
--trust-remote-code \
--served-model-name server_model \
--max-num-seqs 256 \
--max-model-len 4096 \
--port 8000 \
> vllm_server.log &
# sleep 60
if [[ "${METRIC}" == "prob" ]]; then
nohup python server.py \
--tokenizer_path ${MODEL_PATH} \
--answer_path ${ANSWER_PATH} \
--normalize_reward \
--port ${PORT} \
--prob_reward \
--vllm_url "http://localhost:8000/v1" \
--vllm_model server_model \
> reward_server.log &
else
nohup python server.py \
--tokenizer_path ${MODEL_PATH} \
--answer_path ${ANSWER_PATH} \
--normalize_reward \
--port ${PORT} \
--vllm_url "http://localhost:8000/v1" \
--vllm_model server_model \
> reward_server.log &
fi