455 lines
38 KiB
TeX
455 lines
38 KiB
TeX
%%%%%%%% ICML 2024 EXAMPLE LATEX SUBMISSION FILE %%%%%%%%%%%%%%%%%
|
|
|
|
\documentclass{article}
|
|
|
|
% Recommended, but optional, packages for figures and better typesetting:
|
|
\usepackage{microtype}
|
|
\usepackage{graphicx}
|
|
\usepackage{subfigure}
|
|
\usepackage{booktabs} % for professional tables
|
|
|
|
% hyperref makes hyperlinks in the resulting PDF.
|
|
% If your build breaks (sometimes temporarily if a hyperlink spans a page)
|
|
% please comment out the following usepackage line and replace
|
|
% \usepackage{icml2024} with \usepackage[nohyperref]{icml2024} above.
|
|
\usepackage{hyperref}
|
|
|
|
|
|
% Attempt to make hyperref and algorithmic work together better:
|
|
\newcommand{\theHalgorithm}{\arabic{algorithm}}
|
|
|
|
% Use the following line for the initial blind version submitted for review:
|
|
% \usepackage{icml2024}
|
|
|
|
% If accepted, instead use the following line for the camera-ready submission:
|
|
\usepackage[accepted]{icml2024}
|
|
|
|
% For theorems and such
|
|
\usepackage{amsmath}
|
|
\usepackage{amssymb}
|
|
\usepackage{mathtools}
|
|
\usepackage{amsthm}
|
|
|
|
% if you use cleveref..
|
|
\usepackage[capitalize,noabbrev]{cleveref}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% THEOREMS
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\theoremstyle{plain}
|
|
\newtheorem{theorem}{Theorem}[section]
|
|
\newtheorem{proposition}[theorem]{Proposition}
|
|
\newtheorem{lemma}[theorem]{Lemma}
|
|
\newtheorem{corollary}[theorem]{Corollary}
|
|
\theoremstyle{definition}
|
|
\newtheorem{definition}[theorem]{Definition}
|
|
\newtheorem{assumption}[theorem]{Assumption}
|
|
\theoremstyle{remark}
|
|
\newtheorem{remark}[theorem]{Remark}
|
|
|
|
\input{../packages}
|
|
|
|
|
|
% The \icmltitle you define below is probably too long as a header.
|
|
% Therefore, a short form for the running title is supplied here:
|
|
\icmltitlerunning{Segment \& Recombine}
|
|
|
|
\begin{document}
|
|
|
|
\twocolumn[
|
|
\icmltitle{RecombiNet: A dataset for better ImageNet}
|
|
|
|
% It is OKAY to include author information, even for blind
|
|
% submissions: the style file will automatically remove it for you
|
|
% unless you've provided the [accepted] option to the icml2024
|
|
% package.
|
|
|
|
% List of affiliations: The first argument should be a (short)
|
|
% identifier you will use later to specify author affiliations
|
|
% Academic affiliations should list Department, University, City, Region, Country
|
|
% Industry affiliations should list Company, City, Region, Country
|
|
|
|
% You can specify symbols, otherwise they are numbered in order.
|
|
% Ideally, you should not use this facility. Affiliations will be numbered
|
|
% in order of appearance and this is the preferred way.
|
|
\icmlsetsymbol{equal}{*}
|
|
|
|
\begin{icmlauthorlist}
|
|
\icmlauthor{Tobias Christian Nauen}{rptu,dfki}
|
|
\icmlauthor{Brian Moser}{dfki}
|
|
\icmlauthor{Federico Raue}{dfki}
|
|
\icmlauthor{Stansilav Frolov}{dfki}
|
|
\icmlauthor{Andreas Dengel}{rptu,dfki}
|
|
\end{icmlauthorlist}
|
|
|
|
\icmlaffiliation{rptu}{Department of Computer Science, RPTU Kaiserslautern-Landau, Kaiserslautern, Germany}
|
|
\icmlaffiliation{dfki}{German Research Center for Artificial Intelligence (DFKI), Kaiserslautern, Germany}
|
|
|
|
\icmlcorrespondingauthor{Tobias Christian Nauen}{tobias\_christian.nauen@dfki.de}
|
|
|
|
% You may provide any keywords that you
|
|
% find helpful for describing your paper; these are used to populate
|
|
% the "keywords" metadata in the PDF but will not be shown in the document
|
|
\icmlkeywords{Machine Learning, ICML}
|
|
|
|
\vskip 0.3in
|
|
]
|
|
|
|
% this must go after the closing bracket ] following \twocolumn[ ...
|
|
|
|
% This command actually creates the footnote in the first column
|
|
% listing the affiliations and the copyright notice.
|
|
% The command takes one argument, which is text to display at the start of the footnote.
|
|
% The \icmlEqualContribution command is standard text for equal contribution.
|
|
% Remove it (just {}) if you do not need this facility.
|
|
|
|
\printAffiliationsAndNotice{} % leave blank if no need to mention equal contribution
|
|
% \printAffiliationsAndNotice{\icmlEqualContribution} % otherwise use the standard text.
|
|
|
|
\begin{abstract}
|
|
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
|
|
|
|
Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet.
|
|
\end{abstract}
|
|
|
|
ImageNet \cite{Deng2009}
|
|
|
|
\section{Experiments}
|
|
\begin{itemize}
|
|
\item Train on all variants, then evaluate on all variants.
|
|
\item Train different models... ViT, ResNet, Swin, ? at multiple sizes.
|
|
\item Finetune for downstream tasks (classification).
|
|
\item Finetune for dense tasks / semantic segmentation.
|
|
\item Evaluate Diversity and Affinity from \cite{GontijoLopes2021}.
|
|
\item Intermediate bg-fg combinations between only \emph{same} and \emph{all}.
|
|
\item Train with different data augmentation setups on top. 3-Augment, AutoAugment, Real Guidance. With special focus on operations like Mixup \& CutMix.
|
|
\item Look into filtering foregrounds and backgrounds based on segmentation quality + size...
|
|
\end{itemize}
|
|
|
|
\section{Training on RecombinationNet}
|
|
\begin{table*}
|
|
\centering
|
|
\begin{tabular}{cccccccc}
|
|
\toprule
|
|
Model & Mode & \makecell{Foreground \\ Rotation} & \makecell{ImageNet \\ Accuracy [\%]} & \makecell{Recombine same \\ Accuracy [\%]} & \makecell{Recombine all \\ Accuracy [\%]} & \makecell{Backgrounds \\ Accuracy [\%]} \\
|
|
\midrule
|
|
ViT-S/16 & ImageNet & & 79.73 & 68.2 & 50.6 & 15.7 \\
|
|
\midrule
|
|
ViT-S/16 & same & $\pm 0$ & 82.3 & 82.0 & 67.9 & 27.2 \\
|
|
ViT-S/16 & same & $\pm 10$ & 82.2 & 81.9 & 67.9 & 27.3 \\
|
|
ViT-S/16 & same & $\pm 20$ & 82.2 & 82.0 & 67.9 & 27.0 \\
|
|
ViT-S/16 & same & $\pm 30$ & 82.2 & 82.1 & 67.8 & 27.0 \\
|
|
ViT-S/16 & all & $\pm 0$ & 76.8 & 76.4 & 76.5 & 03.1 \\
|
|
ViT-S/16 & all & $\pm 20$ & \\
|
|
ViT-S/16 & same & & \\
|
|
ViT-S/16 & same & & \\
|
|
ViT-S/16 & same & & \\
|
|
ViT-S/16 & same & & \\
|
|
\bottomrule
|
|
\end{tabular}
|
|
\caption{Training on RecombinationNet v24-10 (300 ep), evaluating on ImageNet.}
|
|
\end{table*}
|
|
|
|
\begin{table*}
|
|
\centering
|
|
\begin{tabular}{ccccccc}
|
|
\toprule
|
|
Model & Train DS & \makecell{ImageNet \\ Accuracy [\%]} & \makecell{Recombine same \\ Accuracy [\%]} & \makecell{Recombine all \\ Accuracy [\%]} & \makecell{Backgrounds \\ Accuracy [\%]} \\
|
|
\midrule
|
|
ViT-Ti/16 & IN1k & 76.1 & 64.5/67.3 & 47.3 & 12.8 \\
|
|
ViT-Ti/16 & RN same & 64.7 & 75.4 & 59.4 & 19.5 \\
|
|
ViT-Ti/16 & RN all & 53.5 & 70.6 & 70.6 & 03.2 \\
|
|
\midrule
|
|
ViT-S/16 & IN1k & 79.6 \\
|
|
ViT-S/16 & RN same & 69.7 & 82.0 & 67.9 & 27.0 \\
|
|
ViT-S/16 & RN all & 50.1 & 76.6 & 76.4 & 02.9 \\
|
|
\midrule
|
|
ViT-B/16 & IN1k & 78.0 & 65.9/68.8 & 48.2 & 16.1 \\
|
|
ViT-B/16 & RN same & 70.1 & 81.5 & 70.7 & 30.9 \\
|
|
ViT-B/16 & RN all & 44.9 & 75.9 & 76.0 & 02.5 \\
|
|
\midrule
|
|
Swin-Ti & IN1k & 77.9 & 66.5/69.2 & 48.9 & 15.6 \\
|
|
Swin-Ti & RN same & 63.6 & 81.4 & 70.1 & 29.1 \\
|
|
Swin-Ti & RN all & 09.4 & 76.9 & 75.7 & 00.6 \\
|
|
\midrule
|
|
Swin-S & IN1k & 79.7 & 67.9/70.9 & 50.5 & 16.8 \\
|
|
Swin-S & RN same & 65.0 & 82.5 & 72.4 & 34.1 \\
|
|
Swin-S & RN all & FAIL \\
|
|
\midrule
|
|
ResNet-34 & IN1k & 75.3 & 65.9/67.8 & 51.7 & 11.8 \\
|
|
ResNet-34 & RN same & 63.5 & 75.7 & 63.9 & 17.9 \\
|
|
ResNet-34 & RN all & 37.2 & 72.0 & 71.4 & 02.0 \\
|
|
\midrule
|
|
ResNet-50 & IN1k & 78.4 & 69.2/71.2 & 55.9 & 13.9 \\
|
|
ResNet-50 & RN same & 66.8 & 80.3 & 70.4 & 23.5 \\
|
|
ResNet-50 & RN all & 21.4 & 75.6 & 75.6 & 01.1 \\
|
|
\midrule
|
|
ResNet-101 & IN1k & 79.5 & 70.2/72.1 & 57.1 & 14.8 \\
|
|
ResNet-101 & RN same & 68.9 & 81.8 & 72.7 & 27.3 \\
|
|
ResNet-101 & RN all & 24.0 & 76.9 & 76.8 & 01.5 \\
|
|
\bottomrule
|
|
\end{tabular}
|
|
\caption{Training on RecombinationNet v24-10? (300 ep), evaluating on ImageNet.}
|
|
\end{table*}
|
|
|
|
\begin{table*}
|
|
\centering
|
|
\small
|
|
\begin{tabular}{llcccccc}
|
|
\toprule
|
|
& & \multicolumn{3}{c}{ViT-Ti} & \multicolumn{3}{c}{ResNet34} \\
|
|
\cmidrule(r){3-5} \cmidrule(l){6-8}
|
|
Version & Version ID & \makecell{TIN \\ Acc. [\%]} & \makecell{TRN same \\ max \\ Acc. [\%]} & \makecell{TRN same \\ max pt=.6 \\ Acc. [\%]} & \makecell{TIN \\ Acc. [\%]} & \makecell{TRN same \\ max \\ Acc. [\%]} & \makecell{TRN same \\ max pt=.6 \\ Acc. [\%]} \\
|
|
\midrule
|
|
24-01-10 & TINS & 66.7 & 60.1 & 60.7 & 77.9 & 71.4 & 73.2 \\
|
|
25-01-13 & TINS\_v2 & 66.7 & 61.0 & 62.4 & 77.9 & 72.2 & 74.2 \\
|
|
25-01-17 man & TINS\_v3\_f1 & 66.7 & 61.8 & 61.9 & 77.9 & 73.5 & 74.2 \\
|
|
25-01-17 auto & TINS\_v3\_f2 & 66.7 & 61.4 & 61.7 & 77.9 & 73.2 & 74.1 \\
|
|
25-01-24 first & TINS\_v4\_f1 & 66.7 & 62.7 & 62.7 & 77.9 & 73.6 & 74.9 \\
|
|
25-01-24 all man & TINS\_v5\_f1 & 66.7 & 61.9 & 62.3 & 77.9 & 73.7 & 74.5 \\
|
|
25-01-24 all auto & TINS\_v5\_f2 & 66.7 & 61.5 & 62.2 & 77.9 & 73.8 & 74.4 \\
|
|
25-02-03 all man & TINS\_v6\_f1 & 66.7 & 61.3 & 62.1 & 77.9 & 73.1 & 75.0 \\
|
|
25-02-04 all man & TINS\_v7\_f1 & 66.7 & & & 77.9 \\
|
|
\bottomrule
|
|
\end{tabular}
|
|
\caption{Training on TinyImageNet and evaluating on different TinyRecombNet versions.}
|
|
\end{table*}
|
|
|
|
|
|
\begin{table*}
|
|
\centering
|
|
\small
|
|
\begin{tabular}{clcccccccccc}
|
|
\toprule
|
|
& & ViT-Ti/16 & ViT-S/16 & ResNet34 & ResNet50 & ResNet101 \\
|
|
\cmidrule(r){3-3} \cmidrule(lr){4-4} \cmidrule(l){5-5} \cmidrule(l){6-6} \cmidrule(l){7-7}
|
|
Version & Train DS & \makecell{TIN \\ Acc. [\%]} & \makecell{TIN \\ Acc. [\%]} & \makecell{TIN \\ Acc. [\%]} & \makecell{TRN same \\ max \\ Acc. [\%]} & \makecell{TRN same \\ max pt=.6 \\ Acc. [\%]} \\
|
|
\midrule
|
|
% \multirow{4}{*}{\rot{\makecell{v25-01-10 \\ TINS}}} & TIN & 66.7 & 68.9 & 77.9 & \\
|
|
% & TRN same/range & \textbf{67.1} & \textbf{72.6} & 75.9 \\
|
|
% & TRN same/range pt=1.0 & 66.8 & \underline{\textbf{73.5}} & \underline{76.2} \\
|
|
% & TRN same/range pt=0.8 & 65.8 & & 75.7 \\
|
|
% \midrule
|
|
% \multirow{4}{*}{\rot{\makecell{v25-01-13 \\ TINS\_v2}}} & TIN & 66.7 & 68.9 & 77.9 & \\
|
|
% & TRN same/range & 65.0 & & \\
|
|
% & TRN same/range pt=1.0 & 65.9 & & 73.1 \\
|
|
% & TRN same/range pt=0.8 & 65.3 & & 73.9 \\
|
|
% \midrule
|
|
% \multirow{4}{*}{\rot{\makecell{v25-01-17 \\ man \\ TINS\_v3\_f1}}} & TIN & 66.7 & 68.9 & 77.9 & \\
|
|
% & TRN same/range & 65.9 & & 75.3 \\
|
|
% & TRN same/range pt=1.0 & 65.0 & & 75.6 \\
|
|
% & TRN same/range pt=0.8 & 64.2 & & 75.0 \\
|
|
% \midrule
|
|
% \multirow{4}{*}{\rot{\makecell{v25-01-17 \\ auto \\ TINS\_v3\_f2}}} & TIN & 66.7 & 68.9 & 77.9 & \\
|
|
% & TRN same/range & \textbf{67.0} & \textbf{73.0} & 76.1 \\
|
|
% & TRN same/range pt=1.0 & 65.4 & \textbf{72.1} & 75.9 \\
|
|
% & TRN same/range pt=0.8 & 65.4 & & 75.3 \\
|
|
% \midrule
|
|
% \multirow{4}{*}{\rot{\makecell{v25-01-24 \\ first \\ TINS\_v4\_f1}}} & TIN & 66.7 & 68.9 & 77.9 & \\
|
|
% & TRN same/range & 65.3 & \textbf{70.9} & 75.1 \\
|
|
% & TRN same/range pt=1.0 & 65.3 & \textbf{72.0} & 75.3 \\
|
|
% & TRN same/range pt=0.8 & 66.0 & \textbf{70.5} & 75.5 \\
|
|
% \midrule
|
|
\multirow{13}{*}{\rot{\makecell{v25-01-24 \\ all-man \\ TINS\_v5\_f1}}} & TIN & 66.7/65.7/65.9 & 68.9 & 77.9/77.9/78.1 & 79.1 & 79.8 \\
|
|
& TRN same/range & \textbf{66.7}/67.2/66.9 & \textbf{72.3} & 75.4 \\
|
|
& TRN same/range pt=1.0 & \textbf{67.5}/66.6 & \textbf{71.9} & 76.1 \\
|
|
& TRN same/range pt=0.8 & 65.8(?)/66.2 & \textbf{70.8} & 75.9 \\
|
|
\cmidrule{2-2}
|
|
& TRN same/range p$\to$t & \textbf{66.7}/66.6 & \textbf{72.1}/71.6/71.8 & 75.6 \\
|
|
& TRN same/range pt=1.0 p$\to$t & \textbf{66.9}/66.5/66.5 & \textbf{72.7}/73.1 & 75.8 \\
|
|
& TRN same/range pt=0.9 p$\to$t & \textbf{66.9} & \textbf{73.2} \\
|
|
& TRN same/range pt=0.8 p$\to$t & \makecell{\textbf{67.8}/64.7/67.1 \\ 66.6/66.3} & \makecell{\textbf{73.5}/70.1/72.2 \\ 71.7} & 75.9 \\
|
|
& TRN same/range pt=0.7 p$\to$t & \textbf{66.9} & \textbf{71.1} & 76.0 \\
|
|
& TRN same/range pt=0.6 p$\to$t & 66.4 & \textbf{72.0} & 76.3 \\
|
|
& TRN same/range pt=0.5 p$\to$t & 66.6 & \textbf{69.3} & 75.3 \\
|
|
\cmidrule{2-2}
|
|
& TRN all/range & 56.6 & 62.2 & 63.3 \\
|
|
\midrule
|
|
% \multirow{4}{*}{\rot{\makecell{v25-01-24 \\ all-auto \\ TINS\_v5\_f2}}} & TIN & 66.7 & 68.9 & 77.9 & \\
|
|
% & TRN same/range & 66.0(?) & \textbf{71.7} & 75.6 \\
|
|
% & TRN same/range pt=1.0 & 66.5 & \textbf{71.1} & 75.7 \\
|
|
% & TRN same/range pt=0.8 & 66.3 & \textbf{71.2} & 75.8 \\
|
|
% \midrule
|
|
\multirow{12}{*}{\rot{\makecell{v25-02-03 \\ all-man \\ TINS\_v6\_f1}}} & TIN & 66.7 & 68.9 & 77.9 \\
|
|
& TRN same/range & \textbf{67.3} & \textbf{72.9} & 76.0 \\
|
|
& TRN same/range pt=1.0 & \textbf{67.2} & \textbf{73.6} & 76.2 \\
|
|
& TRN same/range pt=0.8 & 66.6 & \textbf{73.1} & 75.5 \\
|
|
\cmidrule{2-2}
|
|
& TRN same/range p$\to$t & & \textbf{73.6/74.2} & 75.8 \\
|
|
& TRN same/range pt=1.0 p$\to$t & \textbf{67.4} & \underline{\textbf{74.2}} & 76.0 \\
|
|
& TRN same/range pt=0.9 p$\to$t & \textbf{67.4} & \textbf{74.1} & 76.0 \\
|
|
& TRN same/range pt=0.8 p$\to$t & \underline{\textbf{68.4}} & \textbf{72.4} & 76.0 \\
|
|
& TRN same/range pt=0.7 p$\to$t & & \textbf{72.2} & 75.7 \\
|
|
\cmidrule{2-2}
|
|
& TRN same/range p$\to$t ed blr 2 & 66.3 & \textbf{72.3} & 72.5 \\
|
|
& TRN same/range p$\to$t ed blr 4 & 65.3 & \textbf{69.8} & 74.5 \\
|
|
\midrule
|
|
% \multirow{10}{*}{\rot{\makecell{v25-02-04 \\ all-man \\ TINS\_v7\_f1}}} & TIN & 66.7 & 68.9 & 77.9 \\
|
|
% & TRN same/range & \textbf{68.2/66.8/67.0} & \textbf{/72.2/73.5} & 75.8 \\
|
|
% & TRN same/range pt=1.0 & \textbf{67.4} & \textbf{73.3} & 75.5 \\
|
|
% & TRN same/range pt=0.8 & \textbf{67.0} & \textbf{73.3} & 75.4 \\
|
|
% \cmidrule{2-2}
|
|
% & TRN same/range p$\to$t & \textbf{67.1/67.8/ } & \textbf{73.7/ /73.8} & \underline{76.2} \\
|
|
% & TRN same/range pt=1.0 p$\to$t & \textbf{68.0} & \textbf{73.5} & 75.5 \\
|
|
% & TRN same/range pt=0.9 p$\to$t & \textbf{67.8} & \textbf{73.2} & 75.8 \\
|
|
% & TRN same/range pt=0.8 p$\to$t & 66.4/\textbf{67.4/67.7} & \textbf{72.8/73.6} & 75.7 \\
|
|
% & TRN same/range pt=0.7 p$\to$t & \textbf{67.7} & \textbf{71.4} & 76.0 \\
|
|
% \midrule
|
|
\multirow{28}{*}{\rot{\makecell{v25-02-04 \\ all-man RETEST \\ TINS\_v7\_f1}}} & TIN & 66.7 & 68.9 & 77.9 \\
|
|
& TRN same/range & 68.7/66.4/67.5 & 72.1/73.0/72.2 \\
|
|
& TRN same/range & $67.5 \pm 1.2$ & $72.4 \pm 0.5$ \\
|
|
& TRN same/range pt=1.0 & 67.1/66.5/66.6 & 72.8/72.4/72.5 \\
|
|
& TRN same/range pt=1.0 & $66.7 \pm 0.2$ & $72.6 \pm 0.1$ \\
|
|
& TRN same/range pt=0.8 & 66.1/66.1/66.3 & 72.6/71.4/71.5 \\
|
|
& TRN same/range pt=0.8 & $66.2 \pm 0.1$ & $71.8 \pm 0.9$ \\
|
|
\cmidrule{2-2}
|
|
& TRN same/range p$\to$t & 68.5/66.1/66.8 & 73.0/72.3/73.3 \\
|
|
& TRN same/range p$\to$t & $67.1 \pm 1.2$ & $72.9 \pm 0.5$ \\
|
|
& TRN same/range pt=1.0 p$\to$t & 67.6/66.1/67.2 & 72.5/73.1/73.3 \\
|
|
& TRN same/range pt=1.0 p$\to$t & $67.0 \pm 1.2$ & $73.0 \pm 0.3$ \\
|
|
& TRN same/range pt=0.8 p$\to$t & 67.5/67.5/66.5 & 72.2/73.3/73.3 \\
|
|
& TRN same/range pt=0.8 p$\to$t & $67.2 \pm 0.7$ & $72.9 \pm 0.8$ \\
|
|
\cmidrule{2-2}
|
|
& TRN s/r p$\to$t pt=0.8 edgb=2 & 67.6 \\
|
|
& TRN s/r p$\to$t pt=0.8 edgb=4 & 65.8 \\
|
|
\cmidrule{2-2}
|
|
& TRN s/r p$\to$t pt=0.8 orig p=0.1 & 69.4 \\
|
|
& TRN s/r p$\to$t pt=0.8 orig p=0.2 & 70.4 \\
|
|
& TRN s/r p$\to$t pt=0.8 orig p=0.33 & 70.0 & 74.1 \\
|
|
& TRN s/r p$\to$t pt=0.8 orig p=0.5 & 70.0 \\
|
|
& TRN s/r p$\to$t pt=0.8 orig p=lin & 69.5 \\
|
|
& TRN s/r p$\to$t pt=0.8 orig p=invlin & 67.5 \\
|
|
& TRN s/r p$\to$t pt=0.8 orig p=cos & 71.3 \\
|
|
\cmidrule{2-2}
|
|
& TRN s/r p$\to$t pt=0.8 orig p=0.5 edgb=2 & 69.5 \\
|
|
& TRN s/r p$\to$t pt=0.8 orig p=0.5 edgb=4 & 70.6 \\
|
|
& TRN s/r p$\to$t pt=0.8 orig p=cos edgb=2 \\
|
|
& TRN s/r p$\to$t pt=0.8 orig p=cos edgb=4 \\
|
|
\bottomrule
|
|
\end{tabular}
|
|
\caption{Training on TinyRecombinationNet (300 ep), evaluating on ImageNet. TIN=TinyImageNet, TRN=TinyRecombNet, TBN=TinyBackgroundNet. Versions are for train and test sets. v25-02-03 is comparable to v25-01-17, v25-02-04 is comparable to v25-01-24.}
|
|
\end{table*}
|
|
|
|
\begin{table*}
|
|
\centering
|
|
\small
|
|
\begin{tabular}{llcccccccc}
|
|
\toprule
|
|
\multirow{2}{*}{Model} & Test Dataset $\rightarrow$ & \multicolumn{4}{c}{TinyRecombNet-5-1/same/range} & \multicolumn{4}{c}{TinyRecombNet-5-1/all/range} \\
|
|
\cmidrule(rl){3-6} \cmidrule(l){7-10}
|
|
& Train Dataset $\downarrow$ & IG & GradCAM & GradCAM++ & Attn & IG & GradCAM & GradCAM++ & Attn \\
|
|
\midrule
|
|
\multirow{3}{*}{ViT-Ti/16} & TIN & 1.07 & 1.56 & 1.83 & 1.72 & 1.12 & 2.20 & 2.21 & 1.78 \\
|
|
& TRN-5-1/same & 1.16 & 2.00 & 2.17 & 1.61 & 1.31 & 2.76 & 2.37 & 1.62 \\
|
|
& TRN-5-1/all & 1.51 & 2.90 & 2.69 & 2.37 & 1.56 & 3.00 & 2.74 & 2.33 \\
|
|
\midrule
|
|
\multirow{3}{*}{ViT-S/16} & TIN & 1.10 & 1.59 & 1.77 & 1.47 & 1.15 & 2.25 & 2.26 & 1.52 \\
|
|
& TRN-5-1/same & 1.18 & 1.67 & 1.69 & 1.38 & 1.40 & 2.68 & 2.46 & 1.49 \\
|
|
& TRN-5-1/all & 1.40 & 2.56 & 2.70 & 1.81 & 1.40 & 2.65 & 2.76 & 1.68 \\
|
|
\midrule
|
|
\multirow{3}{*}{ResNet34} & TIN & 1.40 & 1.89 & 1.71 & & 1.49 & 2.25 & 1.83 \\
|
|
& TRN-5-1/same & 1.41 & 1.97 & 1.86 & & 1.70 & 2.52 & 2.07 \\
|
|
& TRN-5-1/all & 2.19 & 2.67 & 2.64 & & 1.98 & 2.64 & 2.60 \\
|
|
\bottomrule
|
|
\end{tabular}
|
|
\caption{Relative foreground importance ratio for different models and training datasets. We calculate the per-pixel importance for the foreground label class. Then we aggregate the results for the foreground and background regions. The relative foreground importance is the ratio of the foreground importance divided by the ratio of the foreground size: $\text{Rel fg importance} := \frac{\text{importance in fg region}}{\text{total importance}} / \frac{\text{fg size}}{\text{total image size}}$. We average the results on the whole validation set. Training and evaluation were done without background pruning with range fg insertion mode. Using TinyRecombNet v25-01-24.
|
|
\\
|
|
\tldr Our dataset/data augmentation improves the focus on the foreground class. Maybe it does not really work with ResNet, because that already has a good focus on the foreground object.}
|
|
\end{table*}
|
|
|
|
\begin{table*}
|
|
\centering
|
|
\begin{tabular}{llccccccccc}
|
|
\toprule
|
|
\multirow{2}{*}{Model} & Test Dataset $\rightarrow$ & TIN & \multicolumn{4}{c}{RecombNet-5-1/same/range} & \multicolumn{4}{c}{RecombNet-5-1/all/range} \\
|
|
\cmidrule(rl){4-7} \cmidrule(l){8-11}
|
|
& Train Dataset $\downarrow$ & & $\eta = 1$ & $\eta = 2$ & $\eta = 3$ & $\eta = 4$ & $\eta = 1$ & $\eta = 2$ & $\eta = 3$ & $\eta = 4$ \\
|
|
\midrule
|
|
\multirow{6}{*}{ViT-Ti/16} & TIN & 66.7 & 58.7 & 59.6 & 60.1 & 60.2 & 34.3 & 34.4 & 35.3 & 35.4 \\
|
|
& TRN-5-1/same $\eta = -3$ & 66.0 \\
|
|
& TRN-5-1/same $\eta = -2$ & 66.8 & \\
|
|
& TRN-5-1/same $\eta = 1$ & 66.7 & 74.0 & 74.7 & 74.8 & 75.3 & 49.4 & 49.1 & 49.8 & 49.9 \\
|
|
& TRN-5-1/same $\eta = 2$ & 66.0 & 74.0 & 74.9 & 74.8 & 74.6 & 49.6 & 50.3 & 50.6 & 51.4 \\
|
|
& TRN-5-1/same $\eta = 3$ & 64.5 & 71.8 & 74.0 & 74.3 & 74.3 & 48.0 & 48.9 & 48.7 & 50.1 \\
|
|
\midrule
|
|
\multirow{6}{*}{ViT-S/16} & TIN & 68.9 & 60.8 & 62.6 & 63.1 & 62.5 & 36.6 & 37.9 & 38.7 & 39.1 \\
|
|
& TRN-5-1/same $\eta = -3$ & 71.3 \\
|
|
& TRN-5-1/same $\eta = -2$ & 71.5 & \\
|
|
& TRN-5-1/same $\eta = 1$ & 72.3 & 80.2 & 79.7 & 80.4 & 80.1 & 56.4 & 57.5 & 57.2 & 57.2 \\
|
|
& TRN-5-1/same $\eta = 2$ & 71.3 & 79.1 & 79.6 & 79.9 & 80.1 & 58.2 & 57.3 & 58.2 & 58.2 \\
|
|
& TRN-5-1/same $\eta = 3$ & 71.4 & 78.6 & 79.6 & 79.7 & 80.1 & 55.6 & 57.5 & 57.9 & 57.9 \\
|
|
\midrule
|
|
\multirow{4}{*}{ResNet34} & TIN & 77.9 & 72.2 & 72.7 & 72.7 & 73.2 & 54.2 & 54.6 & 54.2 & 54.7 \\
|
|
& TRN-5-1/same $\eta = 1$ & 75.4 & 83.4 & 83.7 & 83.2 & 83.4 & 69.2 & 69.6 & 68.6 & 68.81 \\
|
|
& TRN-5-1/same $\eta = 2$ & 76.0 & 83.1 & 83.3 & 83.4 & 83.7 & 68.5 & 69.1 & 69.2 & 69.3 \\
|
|
& TRN-5-1/same $\eta = 3$ & 75.8 & 83.1 & 83.1 & 84.0 & 83.5 & 67.3 & 67.8 & 69.2 & 69.0 \\
|
|
\bottomrule
|
|
\end{tabular}
|
|
\caption{Importance of foreground centering via bates distribution. $\eta$ is the parameter of the bates distribution. $\eta = 1$ is the uniform distribution. Training and evaluation were done without background pruning with range fg insertion mode. \\
|
|
\tldr Focussing the foreground object in the center makes the task easier (increasing performance left to right), but this then does not aid training (decreasing performance top to bottom) $\Rightarrow \eta = 1$ is the optimum (for training).}
|
|
\end{table*}
|
|
|
|
\begin{table*}
|
|
\centering
|
|
\begin{tabular}{lccc}
|
|
\toprule
|
|
\makecell[l]{Augmentation \\ Policy} & Dataset & \multicolumn{2}{c}{\makecell{TinyImageNet \\ Accuracy [\%]}} \\
|
|
& & ViT-Ti & ResNet34 \\
|
|
\cmidrule(r){1-2} \cmidrule(l){3-4}
|
|
\multirow{5}{*}{\makecell{minimal \\ w/o cutmix}} & TinyImageNet & 37.7 & 69.1 \\
|
|
& TinyRecombNet-3-2 & 50.7 & 69.1 \\
|
|
& TinyRecombNet-3-1 & 51.0 & 69.2 \\
|
|
& TinyDoublecombNet-3-2 & 44.9 & 68.6 + \\
|
|
& TinyDoublecombNet-3-1 & 46.7 & 68.6 \\
|
|
\cmidrule(r){1-2} \cmidrule(l){3-4}
|
|
\multirow{3}{*}{\makecell{minimal \\ w/ cutmix}} & TinyImageNet & 57.7 & \textbf{73.0} \\
|
|
& TinyRecombNet-3-2 & \textbf{60.6} & 70.3 \\
|
|
& TinyRecombNet-3-1 & 59.8 & 70.2 \\
|
|
\cmidrule(r){1-2} \cmidrule(l){3-4}
|
|
\multirow{3}{*}{\makecell{3-augment \\ w/o cutmix}} & TinyImageNet & 45.3 & 71.6 \\
|
|
& TinyRecombNet-3-2 & \textbf{59.9} & \textbf{74.4} \\
|
|
& TinyRecombNet-3-1 & 59.8 & 74.3 \\
|
|
\cmidrule(r){1-2} \cmidrule(l){3-4}
|
|
\multirow{3}{*}{\makecell{3-augment \\ w/ cutmix}} & TinyImageNet & \textbf{65.9} & \textbf{78.0} \\
|
|
& TinyRecombNet-3-2 & 65.4 & 76.0 \\
|
|
& TinyRecombNet-3-1 & \textbf{65.9} & 75.3 \\
|
|
\bottomrule
|
|
\end{tabular}
|
|
\caption{Training on TinyRecombinationNet (300 ep), evaluating on ImageNet with different data augmentation setups. TinyRecombNet version is v25-01-17-man/auto.}
|
|
\end{table*}
|
|
|
|
\section{Evalutaing on RecombinationNet}
|
|
\begin{table*}
|
|
\centering
|
|
\begin{tabular}{llcccccccc}
|
|
\toprule
|
|
Model & Eval DS & IN1k Baseline & No pruning & $pt = 1.0$ & $pt = 0.8$ & $pt = 0.6$ & $pt = 0.4$ & $pt = 0.3$ & $pt = 0.2$ \\
|
|
\midrule
|
|
ViT-B/16 & RN same & 78.0 & 68.8 & 68.9 & 69.4 & 70.2 & 70.7 & 71.2 & 71.2 \\
|
|
ViT-B/16 & Backgrounds & & 16.1 & 16.1 & 17.9 & 13.1 & 15.1 \\
|
|
\midrule
|
|
ResNet-101 & RN same & 79.5 & 72.2 & 72.3 & 73.0 & 73.1 & 73.8 & 73.8 & 74.4 \\
|
|
ResNet-101 & Backgrounds & & 14.8 & 14.8 & 16.4 & 11.6 & 13.4 \\
|
|
\bottomrule
|
|
\end{tabular}
|
|
\caption{Training on ImageNet and evaluating with different background prune thresholds. \textbf{RecombNet v25-01-10}.}
|
|
\end{table*}
|
|
|
|
|
|
|
|
\bibliography{../JabRef/main_bib.bib}
|
|
\bibliographystyle{icml2024}
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
% APPENDIX
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\newpage
|
|
\appendix
|
|
\onecolumn
|
|
|
|
\end{document}
|
|
|