Files
ForAug/sec/appendix.tex
Tobias Christian Nauen 78765791be iccv 2025 submission
2026-02-24 12:08:38 +01:00

88 lines
7.1 KiB
TeX

% !TeX root = ../supplementary.tex
\section{Training Setup}
\label{sec:training_setup}
\begin{table}
\centering
\begin{tabular}{lc}
\toprule
Parameter & Value \\
\midrule
Image Resolution & $224 \times 224$ \\
Epochs & 300 \\
Learning Rate & 3e-3 \\
Learning Rate Schedule & cosine decay \\
Batch Size & 2048 \\
Warmup Schedule & linear \\
Warmup Epochs & 3 \\
Weight Decay & 0.02 \\
Label Smoothing & 0.1 \\
Optimizer & Lamb \cite{You2020} \\
Data Augmentation Policy & 3-Augment \cite{Touvron2022} \\
\bottomrule
\end{tabular}
\caption{Training setup for our ImageNet and \name training.}
\label{tab:in-setup}
\end{table}
On ImageNet we use the same training setup as \cite{Nauen2023} and \cite{Touvron2022} without pretraining.
As our focus is on evaluating the changes in accuracy due to \schemename/\name, like \cite{Nauen2023}, we stick to one set of hyperparameters for all models.
We list the settings used for training on ImageNet and \name in \Cref{tab:in-setup} and the ones used for finetuning those weights on the downstream datasets in \Cref{tab:downstream-setup}.
\begin{table}
\centering
\begin{tabular}{lccc}
\toprule
Dataset & Batch Size & Epochs & Learning Rate \\
\midrule
Aircraft & 512 & 500 & 3e-4 \\
Cars & 1024 & 500 & 3e-4 \\
Flowers & 256 & 500 & 3e-4 \\
Food & 2048 & 100 & 3e-4 \\
Pets & 512 & 500 & 3e-4 \\
\bottomrule
\end{tabular}
\caption{Training setup for finetuning on different downstream datasets. Other settings are the same as in \Cref{tab:in-setup}.}
\label{tab:downstream-setup}
\end{table}
\section{Infill Model Comparison}
\begin{table}[h!]
\centering
\resizebox{\textwidth}{!}{\begin{tabular}{cc@{\hskip 0.3in}cc}
\toprule
LaMa & Att. Eraser & LaMa & Att. Eraser \\
\midrule
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000090.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000090.JPEG} &
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000890.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000890.JPEG} \\
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00002106.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00002106.JPEG} &
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00005045.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00005045.JPEG} \\
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00007437.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00007437.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00008542.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00008542.JPEG} \\
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00009674.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00009674.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00002743.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00002743.JPEG} \\
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00003097.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00003097.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00011629.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00011629.JPEG} \\
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000547.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000547.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00025256.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00025256.JPEG} \\
\bottomrule
\end{tabular}}
\caption{Example infills of LaMa and Attentive Eraser.}
\end{table}
\section{Images with High Infill Ratio}
\begin{table}[h]
\centering
\begin{tabular}{ccc}
\toprule
Infill Ratio & LaMa & Att. Eraser \\
\midrule
93.7 & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00003735.JPEG}} & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00003735.JPEG}} \\ \\
95.7 & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00012151.JPEG}} & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00012151.JPEG}} \\ \\
83.7 & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00022522.JPEG}} & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00022522.JPEG}} \\ \\
88.2 & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00026530.JPEG}} & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00026530.JPEG}}
\end{tabular}
\caption{Example infills with a large relative foreground area size that is infilled (infill ratio).}
\label{tbl:high-rat}
\end{table}