119 lines
3.0 KiB
Python
119 lines
3.0 KiB
Python
"""Default configuration parameters.
|
|
|
|
Attributes:
|
|
default_kwargs (dict): Default hyperparameters for the training process.
|
|
slurm_defaults (dict): Default values for SLURM batch job settings.
|
|
|
|
"""
|
|
|
|
from paths_config import user
|
|
|
|
default_kwargs = {
|
|
"amp": True,
|
|
"aug_color_jitter_factor": 0.3,
|
|
"aug_crop": True,
|
|
"aug_cutmix_alpha": 1.0,
|
|
"aug_flip": True,
|
|
"aug_gauss_blur": True,
|
|
"aug_grayscale": True,
|
|
"aug_mixup_alpha": 0.0,
|
|
"aug_normalize": True,
|
|
"aug_rand_rot": 0,
|
|
"aug_random_erase_count": 1,
|
|
"aug_random_erase_mode": "pixel",
|
|
"aug_random_erase_prob": 0.0,
|
|
"aug_repeated_augment_repeats": 1,
|
|
"aug_resize": True,
|
|
"aug_solarize": True,
|
|
"augment_engine": "torchvision",
|
|
"augment_strategy": "3-augment",
|
|
"auto_augment_strategy": "rand-m9-mstd0.5-inc1",
|
|
"batch_size": 2048,
|
|
"compile_model": False,
|
|
"cuda": True,
|
|
"custom_dataset_path": None,
|
|
"debug": False,
|
|
"drop_path_rate": 0.05,
|
|
"dropout": 0.0,
|
|
"eval_amp": True,
|
|
"experiment_name": "none",
|
|
"fused_attn": True,
|
|
"gather_stats_during_training": True,
|
|
"imsize": 224,
|
|
"input_dim": None,
|
|
"keep_interm_states": 2,
|
|
"label_smoothing": 0.1,
|
|
"layer_scale": True,
|
|
"layer_scale_init_values": 1e-4,
|
|
"log_level": "info",
|
|
"loss": "ce",
|
|
"loss_weight": "none",
|
|
"lr": 3e-3,
|
|
"max_grad_norm": 1.0,
|
|
"max_seq_len": None,
|
|
"min_lr": 1e-5,
|
|
"momentum": 0.0,
|
|
"num_heads": None,
|
|
"num_workers": 44,
|
|
"opt": "fusedlamb",
|
|
"opt_eps": 1e-7,
|
|
"pin_memory": False,
|
|
"pre_norm": False,
|
|
"prefetch_factor": 2,
|
|
"qkv_bias": True,
|
|
"run_name": None,
|
|
"save_epochs": 10,
|
|
"sched": "cosine",
|
|
"seed": None,
|
|
"shuffle": True,
|
|
"tqdm": True,
|
|
"wandb": True,
|
|
"warmup_epochs": 5,
|
|
"warmup_lr": 1e-6,
|
|
"warmup_sched": "linear",
|
|
"weight_decay": 0.02,
|
|
"weighted_sampler": False,
|
|
}
|
|
# , 'model_ema': True, 'model_ema_decay': 0.99996}
|
|
|
|
|
|
deit_kwargs = {
|
|
"aug_mixup_alpha": 0.8,
|
|
"aug_repeated_augment_repeats": 3,
|
|
"augment_strategy": "deit",
|
|
"aug_random_erase_prob": 0.25,
|
|
"batch_size": 1024,
|
|
"lr": 1e-3,
|
|
"max_grad_norm": 0.0,
|
|
"num_workers": 10,
|
|
"opt": "adamw",
|
|
"opt_eps": 1e-8,
|
|
"weight_decay": 0.05,
|
|
}
|
|
|
|
|
|
def get_default_kwargs(settings="deitiii"):
|
|
if settings.lower() == "deitiii":
|
|
return default_kwargs
|
|
if settings.lower() == "deit":
|
|
return {**default_kwargs, **deit_kwargs}
|
|
raise NotImplementedError(f"No such defaults setting: {settings}")
|
|
|
|
|
|
slurm_defaults = {
|
|
"after_job": None,
|
|
"container_image": f"PATH/TO/ENROOT/IMAGE",
|
|
"container_mounts": f'MOUNT_ALL_IMPORTANT_STORAGE_SERVERS_HERE,"`pwd`":"`pwd`"',
|
|
"container_workdir": '"`pwd`"',
|
|
"cpus_per_task": 24,
|
|
"exclude": None,
|
|
"export": "ALL,TQDM_DISABLE=1",
|
|
"job_name": None,
|
|
"mem_per_gpu": 90,
|
|
"nodes": 1,
|
|
"ntasks": 4,
|
|
"partition": ["A100", "H100", "H200"],
|
|
"task_prolog": None,
|
|
"time": "1-0",
|
|
}
|