arxiv V1
This commit is contained in:
88
arXiv version_arXiv/sec/appendix.tex
Normal file
88
arXiv version_arXiv/sec/appendix.tex
Normal file
@@ -0,0 +1,88 @@
|
||||
|
||||
\section{Training Setup}
|
||||
\label{sec:training_setup}
|
||||
|
||||
\begin{table}[h]
|
||||
\centering
|
||||
\begin{tabular}{lc}
|
||||
\toprule
|
||||
Parameter & Value \\
|
||||
\midrule
|
||||
Image Resolution & $224 \times 224$ \\
|
||||
Epochs & 300 \\
|
||||
Learning Rate & 3e-3 \\
|
||||
Learning Rate Schedule & cosine decay \\
|
||||
Batch Size & 2048 \\
|
||||
Warmup Schedule & linear \\
|
||||
Warmup Epochs & 3 \\
|
||||
Weight Decay & 0.02 \\
|
||||
Label Smoothing & 0.1 \\
|
||||
Optimizer & Lamb \cite{You2020} \\
|
||||
Data Augmentation Policy & 3-Augment \cite{Touvron2022} \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\caption{Training setup for our ImageNet and \name training.}
|
||||
\label{tab:in-setup}
|
||||
\end{table}
|
||||
\begin{table}[h]
|
||||
\centering
|
||||
\begin{tabular}{lccc}
|
||||
\toprule
|
||||
Dataset & Batch Size & Epochs & Learning Rate \\
|
||||
\midrule
|
||||
Aircraft & 512 & 500 & 3e-4 \\
|
||||
Cars & 1024 & 500 & 3e-4 \\
|
||||
Flowers & 256 & 500 & 3e-4 \\
|
||||
Food & 2048 & 100 & 3e-4 \\
|
||||
Pets & 512 & 500 & 3e-4 \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\caption{Training setup for finetuning on different downstream datasets. Other settings are the same as in \Cref{tab:in-setup}.}
|
||||
\label{tab:downstream-setup}
|
||||
\end{table}
|
||||
|
||||
On ImageNet we use the same training setup as \cite{Nauen2023} and \cite{Touvron2022} without pretraining.
|
||||
As our focus is on evaluating the changes in accuracy due to \schemename/\name, like \cite{Nauen2023}, we stick to one set of hyperparameters for all models.
|
||||
We list the settings used for training on ImageNet and \name in \Cref{tab:in-setup} and the ones used for finetuning those weights on the downstream datasets in \Cref{tab:downstream-setup}.
|
||||
|
||||
\newpage
|
||||
\section{Infill Model Comparison}
|
||||
\label{sec:infill-model-comparison}
|
||||
\begin{table}[h!]
|
||||
\centering
|
||||
\resizebox{\textwidth}{!}{\begin{tabular}{cc@{\hskip 0.3in}cc}
|
||||
\toprule
|
||||
LaMa & Att. Eraser & LaMa & Att. Eraser \\
|
||||
\midrule
|
||||
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000090.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000090.JPEG} &
|
||||
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000890.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000890.JPEG} \\
|
||||
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00002106.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00002106.JPEG} &
|
||||
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00005045.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00005045.JPEG} \\
|
||||
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00007437.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00007437.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00008542.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00008542.JPEG} \\
|
||||
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00009674.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00009674.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00002743.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00002743.JPEG} \\
|
||||
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00003097.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00003097.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00011629.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00011629.JPEG} \\
|
||||
\includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000547.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000547.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00025256.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00025256.JPEG} \\
|
||||
\bottomrule
|
||||
\end{tabular}}
|
||||
\caption{Example infills of LaMa and Attentive Eraser.}
|
||||
\end{table}
|
||||
|
||||
\section{Images with High Infill Ratio}
|
||||
\label{sec:high-infill-ratio}
|
||||
\begin{table}[h!]
|
||||
\centering
|
||||
\begin{tabular}{ccc}
|
||||
\toprule
|
||||
Infill Ratio & LaMa & Att. Eraser \\
|
||||
\midrule
|
||||
93.7 & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00003735.JPEG}} & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00003735.JPEG}} \\ \\
|
||||
95.7 & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00012151.JPEG}} & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00012151.JPEG}} \\ \\
|
||||
83.7 & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00022522.JPEG}} & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00022522.JPEG}} \\ \\
|
||||
88.2 & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00026530.JPEG}} & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00026530.JPEG}}
|
||||
\end{tabular}
|
||||
\caption{Example infills with a large relative foreground area size that is infilled (infill ratio).}
|
||||
\label{tbl:high-rat}
|
||||
\end{table}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user