-
Notifications
You must be signed in to change notification settings - Fork 830
Open
Description
@hjh0119 我4卡A800训练也会卡死,每次都是卡在第228个step
export MODELSCOPE_CACHE='/slow_disk2/ccl/datasets/多模态数据'
export CUDA_VISIBLE_DEVICES=2,3
export MAX_PIXELS=200704
swift rollout \
--model Qwen/Qwen2.5-VL-3B-Instruct \
--vllm_data_parallel_size 2
# sss
# export NO_PROXY=localhost,127.0.0.1,192.168.2.158
export MODELSCOPE_CACHE='/slow_disk2/ccl/datasets/多模态数据'
export WANDB_API_KEY=************************************
export WANDB_MODE=offline
export CUDA_VISIBLE_DEVICES=0,1
export MAX_PIXELS=401408
export NPROC_PER_NODE=2
swift rlhf \
--rlhf_type grpo \
--model Qwen/Qwen2.5-VL-3B-Instruct \
--external_plugins ./ms-swift/examples/train/grpo/plugin/plugin.py \
--reward_funcs external_r1v_acc format \
--use_vllm true \
--vllm_mode server \
--vllm_server_host 127.0.0.1 \
--vllm_server_port 8000 \
--train_type lora \
--freeze_vit true \
--torch_dtype bfloat16 \
--dataset 'AI-ModelScope/GEOQA_R1V_Train_8K' \
--max_completion_length 1024 \
--num_train_epochs 1 \
--per_device_train_batch_size 8 \
--per_device_eval_batch_size 8 \
--learning_rate 1e-6 \
--gradient_accumulation_steps 2 \
--save_strategy 'steps' \
--eval_strategy 'steps' \
--eval_steps 400 \
--save_steps 400 \
--save_total_limit 10 \
--logging_steps 1 \
--output_dir output/GRPO_GEOQA \
--warmup_ratio 0.05 \
--dataloader_num_workers 4 \
--num_generations 8 \
--temperature 1.0 \
--repetition_penalty 1.1 \
--system './ms-swift/examples/train/grpo/prompt.txt' \
--deepspeed zero3 \
--log_completions true \
--report_to wandb \
--num_iterations 2 \
--async_generate false \
--beta 0.001 \
--max_grad_norm 0.5
Metadata
Metadata
Assignees
Labels
No labels