import os

os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
os.environ['MAX_PIXELS'] = '602112'

if __name__ == '__main__':
    from swift.megatron import MegatronRLHFArguments, megatron_rlhf_main
    megatron_rlhf_main(
        MegatronRLHFArguments(
            rlhf_type='grpo',
            model='Qwen/Qwen2.5-VL-3B-Instruct',
            load_safetensors=True,
            save_safetensors=True,
            context_parallel_size=1,
            train_type='lora',
            tensor_model_parallel_size=2,
            dataset=['AI-ModelScope/clevr_cogen_a_train#10000'],
            max_epochs=1,
            global_batch_size=128,
            micro_batch_size=4,
            steps_per_generation=4,
            num_generations=8,
            external_plugins=['examples/train/grpo/plugin/plugin.py'],
            reward_funcs=['external_r1v_acc', 'format'],
            use_vllm=True,
            vllm_mode='colocate',
            vllm_gpu_memory_utilization=0.5,
            vllm_max_model_len=8192,
            max_length=8192,
            max_completion_length=2048,
            lr=1e-4,
            bf16=True,
            beta=0.001,
            importance_sampling_level='token',
            epsilon=0.2,
            epsilon_high=0.2,
            dynamic_sample=False,
            overlong_filter=True,
            loss_type='grpo',
            sleep_level=2,
            offload_model=True,
            offload_bridge=False,
            offload_optimizer=True,
            log_interval=1,
            recompute_granularity='full',
            recompute_method='uniform',
            recompute_num_layers=1,
            finetune=True,
            num_workers=4,
            dataset_num_proc=4,
            no_save_optim=True,
            no_save_rng=True,
            attention_backend='flash',
            temperature=1,
            system='examples/train/grpo/prompt.txt',
            padding_free=True,
            log_completions=True,
            train_iters=100,
            eval_interval=1000,
            save_interval=1000,
        ))
