import argparse
import json
import os

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        choices=[
            "instructblip-vicuna-13b",
            "llava-1.5-13b-hf",
            "blip2-flan-t5-xxl",
            "fuyu-8b",
            'Qwen-VL-Chat'
        ],
    )

    parser.add_argument(
        "--img_path",
        type=str,
        default="images",
    )

    parser.add_argument(
        "--total_part",
        type=int,
    )

    parser.add_argument(
        "--this_part",
        type=int,
    )

    return parser.parse_args()

perception_low_level_dict = {
'question':"You are given a puzzle. The puzzle consists of a question part on the top and the choices part in the bottom. The question part on the top contains some grids with the last missing blank grid to be completed. The choices part on the bottome conatin a sequence of grids representing the possible choices. How many grids, including the blank grid, are there in the question part?" ,
'answer': "You are given a puzzle. The puzzle consists of a question part on the top and the choices part in the bottom. The question part on the top contains some grids with the last missing blank grid to be completed. The choices part on the bottome conatin a sequence of grids representing the possible choices. How many grids are there in the choice part?",
'overall': "You are given a puzzle. The puzzle consists of a question part on the top and the choices part in the bottom. The question part on the top contains some grids with the last missing blank grid to be completed. The choices part on the bottome conatin a sequence of grids representing the possible choices. How many grids, including the blank grid, are there in the whole puzzle?"    
}