# model settings
model = dict(
    type='VA_SF',
)

# dataset settings
dataset_type = 'KSoundDataset'
data_root = 'data/ksound/'

train_pipeline = [
    dict(type='LoadKSoundFromZip', video_zip_name='data/ksound/train-frame.zip', audio_zip_name='data/ksound/train-audio-npy.zip'),
    #dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'audio'], meta_keys=[]),
]
test_pipeline = [
    dict(type='LoadKSoundFromZip', video_zip_name='data/ksound/train-frame.zip', audio_zip_name='data/ksound/train-audio-npy.zip'),
    dict(type='Collect', keys=['img', 'audio'], meta_keys=['filename']),
]
data = dict(
    imgs_per_gpu=32,
    workers_per_gpu=4,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'train-frame.zip',
        img_prefix='',

        pipeline=train_pipeline),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'train-frame.zip',
        img_prefix='',

        pipeline=test_pipeline))
train_cfg = dict()
test_cfg = dict()
evaluation = dict(interval=1, metric='bbox')
# optimizer
#optimizer = dict(type='Adam', lr=0.0003, momentum=0.9, weight_decay=0.0001)
optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[60, 80])
checkpoint_config = dict(interval=1, create_symlink=False)
# yapf:disable
log_config = dict(
    interval=10,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 90
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/ksound_sf'
load_from = None
resume_from = None
workflow = [('train', 1)]
