% !TeX root = ../supplementary.tex \section{Training Setup} \label{sec:training_setup} \begin{table} \centering \begin{tabular}{lc} \toprule Parameter & Value \\ \midrule Image Resolution & $224 \times 224$ \\ Epochs & 300 \\ Learning Rate & 3e-3 \\ Learning Rate Schedule & cosine decay \\ Batch Size & 2048 \\ Warmup Schedule & linear \\ Warmup Epochs & 3 \\ Weight Decay & 0.02 \\ Label Smoothing & 0.1 \\ Optimizer & Lamb \cite{You2020} \\ Data Augmentation Policy & 3-Augment \cite{Touvron2022} \\ \bottomrule \end{tabular} \caption{Training setup for our ImageNet and \name training.} \label{tab:in-setup} \end{table} On ImageNet we use the same training setup as \cite{Nauen2023} and \cite{Touvron2022} without pretraining. As our focus is on evaluating the changes in accuracy due to \schemename/\name, like \cite{Nauen2023}, we stick to one set of hyperparameters for all models. We list the settings used for training on ImageNet and \name in \Cref{tab:in-setup} and the ones used for finetuning those weights on the downstream datasets in \Cref{tab:downstream-setup}. \begin{table} \centering \begin{tabular}{lccc} \toprule Dataset & Batch Size & Epochs & Learning Rate \\ \midrule Aircraft & 512 & 500 & 3e-4 \\ Cars & 1024 & 500 & 3e-4 \\ Flowers & 256 & 500 & 3e-4 \\ Food & 2048 & 100 & 3e-4 \\ Pets & 512 & 500 & 3e-4 \\ \bottomrule \end{tabular} \caption{Training setup for finetuning on different downstream datasets. Other settings are the same as in \Cref{tab:in-setup}.} \label{tab:downstream-setup} \end{table} \section{Infill Model Comparison} \begin{table}[h!] \centering \resizebox{\textwidth}{!}{\begin{tabular}{cc@{\hskip 0.3in}cc} \toprule LaMa & Att. Eraser & LaMa & Att. Eraser \\ \midrule \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000090.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000090.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000890.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000890.JPEG} \\ \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00002106.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00002106.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00005045.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00005045.JPEG} \\ \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00007437.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00007437.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00008542.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00008542.JPEG} \\ \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00009674.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00009674.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00002743.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00002743.JPEG} \\ \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00003097.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00003097.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00011629.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00011629.JPEG} \\ \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000547.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000547.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00025256.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00025256.JPEG} \\ \bottomrule \end{tabular}} \caption{Example infills of LaMa and Attentive Eraser.} \end{table} \section{Images with High Infill Ratio} \begin{table}[h] \centering \begin{tabular}{ccc} \toprule Infill Ratio & LaMa & Att. Eraser \\ \midrule 93.7 & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00003735.JPEG}} & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00003735.JPEG}} \\ \\ 95.7 & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00012151.JPEG}} & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00012151.JPEG}} \\ \\ 83.7 & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00022522.JPEG}} & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00022522.JPEG}} \\ \\ 88.2 & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00026530.JPEG}} & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00026530.JPEG}} \end{tabular} \caption{Example infills with a large relative foreground area size that is infilled (infill ratio).} \label{tbl:high-rat} \end{table}