arxiv V2

arxiv V1
2026-02-24 11:57:25 +01:00 · 2026-02-24 11:52:26 +01:00
466 changed files with 19637 additions and 3246 deletions
--- a/algorithm.sty
+++ b/algorithm.sty
@@ -1,79 +0,0 @@
-% ALGORITHM STYLE -- Released 8 April 1996
-%    for LaTeX-2e
-% Copyright -- 1994 Peter Williams
-% E-mail Peter.Williams@dsto.defence.gov.au
-\NeedsTeXFormat{LaTeX2e}
-\ProvidesPackage{algorithm}
-\typeout{Document Style `algorithm' - floating environment}
-
-\RequirePackage{float}
-\RequirePackage{ifthen}
-\newcommand{\ALG@within}{nothing}
-\newboolean{ALG@within}
-\setboolean{ALG@within}{false}
-\newcommand{\ALG@floatstyle}{ruled}
-\newcommand{\ALG@name}{Algorithm}
-\newcommand{\listalgorithmname}{List of \ALG@name s}
-
-% Declare Options
-% first appearance
-\DeclareOption{plain}{
-  \renewcommand{\ALG@floatstyle}{plain}
-}
-\DeclareOption{ruled}{
-  \renewcommand{\ALG@floatstyle}{ruled}
-}
-\DeclareOption{boxed}{
-  \renewcommand{\ALG@floatstyle}{boxed}
-}
-% then numbering convention
-\DeclareOption{part}{
-  \renewcommand{\ALG@within}{part}
-  \setboolean{ALG@within}{true}
-}
-\DeclareOption{chapter}{
-  \renewcommand{\ALG@within}{chapter}
-  \setboolean{ALG@within}{true}
-}
-\DeclareOption{section}{
-  \renewcommand{\ALG@within}{section}
-  \setboolean{ALG@within}{true}
-}
-\DeclareOption{subsection}{
-  \renewcommand{\ALG@within}{subsection}
-  \setboolean{ALG@within}{true}
-}
-\DeclareOption{subsubsection}{
-  \renewcommand{\ALG@within}{subsubsection}
-  \setboolean{ALG@within}{true}
-}
-\DeclareOption{nothing}{
-  \renewcommand{\ALG@within}{nothing}
-  \setboolean{ALG@within}{true}
-}
-\DeclareOption*{\edef\ALG@name{\CurrentOption}}
-
-% ALGORITHM
-%
-\ProcessOptions
-\floatstyle{\ALG@floatstyle}
-\ifthenelse{\boolean{ALG@within}}{
-  \ifthenelse{\equal{\ALG@within}{part}}
-     {\newfloat{algorithm}{htbp}{loa}[part]}{}
-  \ifthenelse{\equal{\ALG@within}{chapter}}
-     {\newfloat{algorithm}{htbp}{loa}[chapter]}{}
-  \ifthenelse{\equal{\ALG@within}{section}}
-     {\newfloat{algorithm}{htbp}{loa}[section]}{}
-  \ifthenelse{\equal{\ALG@within}{subsection}}
-     {\newfloat{algorithm}{htbp}{loa}[subsection]}{}
-  \ifthenelse{\equal{\ALG@within}{subsubsection}}
-     {\newfloat{algorithm}{htbp}{loa}[subsubsection]}{}
-  \ifthenelse{\equal{\ALG@within}{nothing}}
-     {\newfloat{algorithm}{htbp}{loa}}{}
-}{
-  \newfloat{algorithm}{htbp}{loa}
-}
-\floatname{algorithm}{\ALG@name}
-
-\newcommand{\listofalgorithms}{\listof{algorithm}{\listalgorithmname}}
-
--- a/algorithmic.sty
+++ b/algorithmic.sty
@@ -1,201 +0,0 @@
-% ALGORITHMIC STYLE -- Released 8 APRIL 1996
-%    for LaTeX version 2e
-% Copyright -- 1994 Peter Williams
-% E-mail PeterWilliams@dsto.defence.gov.au
-%
-% Modified by Alex Smola (08/2000)
-% E-mail Alex.Smola@anu.edu.au
-%
-\NeedsTeXFormat{LaTeX2e}
-\ProvidesPackage{algorithmic}
-\typeout{Document Style `algorithmic' - environment}
-%
-\RequirePackage{ifthen}
-\RequirePackage{calc}
-\newboolean{ALC@noend}
-\setboolean{ALC@noend}{false}
-\newcounter{ALC@line}
-\newcounter{ALC@rem}
-\newlength{\ALC@tlm}
-%
-\DeclareOption{noend}{\setboolean{ALC@noend}{true}}
-%
-\ProcessOptions
-%
-% ALGORITHMIC
-\newcommand{\algorithmicrequire}{\textbf{Require:}}
-\newcommand{\algorithmicensure}{\textbf{Ensure:}}
-\newcommand{\algorithmiccomment}[1]{\{#1\}}
-\newcommand{\algorithmicend}{\textbf{end}}
-\newcommand{\algorithmicif}{\textbf{if}}
-\newcommand{\algorithmicthen}{\textbf{then}}
-\newcommand{\algorithmicelse}{\textbf{else}}
-\newcommand{\algorithmicelsif}{\algorithmicelse\ \algorithmicif}
-\newcommand{\algorithmicendif}{\algorithmicend\ \algorithmicif}
-\newcommand{\algorithmicfor}{\textbf{for}}
-\newcommand{\algorithmicforall}{\textbf{for all}}
-\newcommand{\algorithmicdo}{\textbf{do}}
-\newcommand{\algorithmicendfor}{\algorithmicend\ \algorithmicfor}
-\newcommand{\algorithmicwhile}{\textbf{while}}
-\newcommand{\algorithmicendwhile}{\algorithmicend\ \algorithmicwhile}
-\newcommand{\algorithmicloop}{\textbf{loop}}
-\newcommand{\algorithmicendloop}{\algorithmicend\ \algorithmicloop}
-\newcommand{\algorithmicrepeat}{\textbf{repeat}}
-\newcommand{\algorithmicuntil}{\textbf{until}}
-
-%changed by alex smola
-\newcommand{\algorithmicinput}{\textbf{input}}
-\newcommand{\algorithmicoutput}{\textbf{output}}
-\newcommand{\algorithmicset}{\textbf{set}}
-\newcommand{\algorithmictrue}{\textbf{true}}
-\newcommand{\algorithmicfalse}{\textbf{false}}
-\newcommand{\algorithmicand}{\textbf{and\ }}
-\newcommand{\algorithmicor}{\textbf{or\ }}
-\newcommand{\algorithmicfunction}{\textbf{function}}
-\newcommand{\algorithmicendfunction}{\algorithmicend\ \algorithmicfunction}
-\newcommand{\algorithmicmain}{\textbf{main}}
-\newcommand{\algorithmicendmain}{\algorithmicend\ \algorithmicmain}
-%end changed by alex smola
-
-\def\ALC@item[#1]{%
-\if@noparitem \@donoparitem
-  \else \if@inlabel \indent \par \fi
-         \ifhmode \unskip\unskip \par \fi
-         \if@newlist \if@nobreak \@nbitem \else
-                        \addpenalty\@beginparpenalty
-                        \addvspace\@topsep \addvspace{-\parskip}\fi
-           \else \addpenalty\@itempenalty \addvspace\itemsep
-          \fi
-    \global\@inlabeltrue
-\fi
-\everypar{\global\@minipagefalse\global\@newlistfalse
-          \if@inlabel\global\@inlabelfalse \hskip -\parindent \box\@labels
-             \penalty\z@ \fi
-          \everypar{}}\global\@nobreakfalse
-\if@noitemarg \@noitemargfalse \if@nmbrlist \refstepcounter{\@listctr}\fi \fi
-\sbox\@tempboxa{\makelabel{#1}}%
-\global\setbox\@labels
- \hbox{\unhbox\@labels \hskip \itemindent
-       \hskip -\labelwidth \hskip -\ALC@tlm
-       \ifdim \wd\@tempboxa >\labelwidth
-                \box\@tempboxa
-          \else \hbox to\labelwidth {\unhbox\@tempboxa}\fi
-       \hskip \ALC@tlm}\ignorespaces}
-%
-\newenvironment{algorithmic}[1][0]{
-\let\@item\ALC@item
-  \newcommand{\ALC@lno}{%
-\ifthenelse{\equal{\arabic{ALC@rem}}{0}}
-{{\footnotesize \arabic{ALC@line}:}}{}%
-}
-\let\@listii\@listi
-\let\@listiii\@listi
-\let\@listiv\@listi
-\let\@listv\@listi
-\let\@listvi\@listi
-\let\@listvii\@listi
-  \newenvironment{ALC@g}{
-    \begin{list}{\ALC@lno}{ \itemsep\z@ \itemindent\z@
-    \listparindent\z@ \rightmargin\z@ 
-    \topsep\z@ \partopsep\z@ \parskip\z@\parsep\z@
-    \leftmargin 1em
-    \addtolength{\ALC@tlm}{\leftmargin}
-    }
-  }
-  {\end{list}}
-  \newcommand{\ALC@it}{\addtocounter{ALC@line}{1}\addtocounter{ALC@rem}{1}\ifthenelse{\equal{\arabic{ALC@rem}}{#1}}{\setcounter{ALC@rem}{0}}{}\item}
-  \newcommand{\ALC@com}[1]{\ifthenelse{\equal{##1}{default}}%
-{}{\ \algorithmiccomment{##1}}}
-  \newcommand{\REQUIRE}{\item[\algorithmicrequire]}
-  \newcommand{\ENSURE}{\item[\algorithmicensure]}
-  \newcommand{\STATE}{\ALC@it}
-  \newcommand{\COMMENT}[1]{\algorithmiccomment{##1}}
-%changes by alex smola
-  \newcommand{\INPUT}{\item[\algorithmicinput]}
-  \newcommand{\OUTPUT}{\item[\algorithmicoutput]}
-  \newcommand{\SET}{\item[\algorithmicset]}
-%  \newcommand{\TRUE}{\algorithmictrue}
-%  \newcommand{\FALSE}{\algorithmicfalse}
-  \newcommand{\AND}{\algorithmicand}
-  \newcommand{\OR}{\algorithmicor}
-  \newenvironment{ALC@func}{\begin{ALC@g}}{\end{ALC@g}}
-  \newenvironment{ALC@main}{\begin{ALC@g}}{\end{ALC@g}}
-%end changes by alex smola
-  \newenvironment{ALC@if}{\begin{ALC@g}}{\end{ALC@g}}
-  \newenvironment{ALC@for}{\begin{ALC@g}}{\end{ALC@g}}
-  \newenvironment{ALC@whl}{\begin{ALC@g}}{\end{ALC@g}}
-  \newenvironment{ALC@loop}{\begin{ALC@g}}{\end{ALC@g}}
-  \newenvironment{ALC@rpt}{\begin{ALC@g}}{\end{ALC@g}}
-  \renewcommand{\\}{\@centercr}
-  \newcommand{\IF}[2][default]{\ALC@it\algorithmicif\ ##2\ \algorithmicthen%
-\ALC@com{##1}\begin{ALC@if}}
-  \newcommand{\SHORTIF}[2]{\ALC@it\algorithmicif\ ##1\
-    \algorithmicthen\ {##2}}
-  \newcommand{\ELSE}[1][default]{\end{ALC@if}\ALC@it\algorithmicelse%
-\ALC@com{##1}\begin{ALC@if}}
-  \newcommand{\ELSIF}[2][default]%
-{\end{ALC@if}\ALC@it\algorithmicelsif\ ##2\ \algorithmicthen%
-\ALC@com{##1}\begin{ALC@if}}
-  \newcommand{\FOR}[2][default]{\ALC@it\algorithmicfor\ ##2\ \algorithmicdo%
-\ALC@com{##1}\begin{ALC@for}}
-  \newcommand{\FORALL}[2][default]{\ALC@it\algorithmicforall\ ##2\ %
-\algorithmicdo%
-\ALC@com{##1}\begin{ALC@for}}
-  \newcommand{\SHORTFORALL}[2]{\ALC@it\algorithmicforall\ ##1\ %
-    \algorithmicdo\ {##2}}
-  \newcommand{\WHILE}[2][default]{\ALC@it\algorithmicwhile\ ##2\ %
-\algorithmicdo%
-\ALC@com{##1}\begin{ALC@whl}}
-  \newcommand{\LOOP}[1][default]{\ALC@it\algorithmicloop%
-\ALC@com{##1}\begin{ALC@loop}}
-%changed by alex smola
-  \newcommand{\FUNCTION}[2][default]{\ALC@it\algorithmicfunction\ ##2\ %
-    \ALC@com{##1}\begin{ALC@func}}
-  \newcommand{\MAIN}[2][default]{\ALC@it\algorithmicmain\ ##2\ %
-    \ALC@com{##1}\begin{ALC@main}}
-%end changed by alex smola
-  \newcommand{\REPEAT}[1][default]{\ALC@it\algorithmicrepeat%
-    \ALC@com{##1}\begin{ALC@rpt}}
-    \newcommand{\UNTIL}[1]{\end{ALC@rpt}\ALC@it\algorithmicuntil\ ##1}
-  \ifthenelse{\boolean{ALC@noend}}{
-    \newcommand{\ENDIF}{\end{ALC@if}}
-    \newcommand{\ENDFOR}{\end{ALC@for}}
-    \newcommand{\ENDWHILE}{\end{ALC@whl}}
-    \newcommand{\ENDLOOP}{\end{ALC@loop}}
-    \newcommand{\ENDFUNCTION}{\end{ALC@func}}
-    \newcommand{\ENDMAIN}{\end{ALC@main}}
-  }{
-    \newcommand{\ENDIF}{\end{ALC@if}\ALC@it\algorithmicendif}
-    \newcommand{\ENDFOR}{\end{ALC@for}\ALC@it\algorithmicendfor}
-    \newcommand{\ENDWHILE}{\end{ALC@whl}\ALC@it\algorithmicendwhile}
-    \newcommand{\ENDLOOP}{\end{ALC@loop}\ALC@it\algorithmicendloop}
-    \newcommand{\ENDFUNCTION}{\end{ALC@func}\ALC@it\algorithmicendfunction}
-    \newcommand{\ENDMAIN}{\end{ALC@main}\ALC@it\algorithmicendmain}
-  } 
-  \renewcommand{\@toodeep}{}
-  \begin{list}{\ALC@lno}{\setcounter{ALC@line}{0}\setcounter{ALC@rem}{0}%
-      \itemsep\z@ \itemindent\z@ \listparindent\z@%
-      \partopsep\z@ \parskip\z@ \parsep\z@%
-      \labelsep 0.5em \topsep 0.2em%
-      \ifthenelse{\equal{#1}{0}}
-      {\labelwidth 0.5em }
-      {\labelwidth  1.2em }
-      \leftmargin\labelwidth \addtolength{\leftmargin}{\labelsep}
-      \ALC@tlm\labelsep
-      }
-    }
-  {\end{list}}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
--- a/version_arXiv/Archive.zip
+++ b/version_arXiv/Archive.zip
--- a/version_arXiv/iccv.sty
+++ b/version_arXiv/iccv.sty
@@ -0,0 +1,508 @@
+% ---------------------------------------------------------------
+%
+% No guarantee is given that the format corresponds perfectly to
+% IEEE 8.5" x 11" Proceedings, but most features should be ok.
+%
+% ---------------------------------------------------------------
+% with LaTeX2e:
+% =============
+%
+% use as
+%   \documentclass[times,10pt,twocolumn]{article}
+%   \usepackage[options]{ICCV}
+%   \usepackage{times}
+%
+% "options" should be replaced by
+%  * "review" for submitting a paper for review,
+%  * "final" for the camera ready, and
+%  * "rebuttal" for the author rebuttal.
+%
+% specify references as
+%   {\small
+%   \bibliographystyle{ieee}
+%   \bibliography{...your files...}
+%   }
+% ---------------------------------------------------------------
+
+\NeedsTeXFormat{LaTeX2e}[1999/12/01]
+\ProvidesPackage{iccv}[2025 LaTeX class for IEEE ICCV]
+
+\RequirePackage{times}    % Integrate Times for here
+\RequirePackage{xspace}
+\RequirePackage[dvipsnames]{xcolor}
+\RequirePackage{graphicx}
+\RequirePackage{amsmath}
+\RequirePackage{amssymb}
+\RequirePackage{booktabs}
+\RequirePackage[numbers,sort&compress]{natbib}
+\setlength{\bibsep}{1pt plus 1pt minus 1pt}
+
+\RequirePackage{silence}  % Suppress unwanted warnings
+\hbadness=10000 \vbadness=10000 \vfuzz=30pt \hfuzz=30pt
+\WarningFilter{latexfont}{Font shape declaration}
+\WarningFilter{latex}{Font shape}
+\WarningFilter[rebuttal]{latex}{No \author given}
+\RequirePackage{etoolbox}
+
+% Use modern caption package to allow for sub-figures etc.
+% Reproduces the original CVPR/ICCV style as closely as possible.
+\RequirePackage[format=plain,labelformat=simple,labelsep=period,font=small,compatibility=false]{caption}
+\RequirePackage[font=footnotesize,skip=3pt,subrefformat=parens]{subcaption}
+
+
+\newtoggle{iccvfinal}        % Camera-ready version
+\newtoggle{iccvrebuttal}     % Rebuttal
+\newtoggle{iccvpagenumbers}  % Force page numbers (in camera ready)
+\toggletrue{iccvfinal}
+\togglefalse{iccvrebuttal}
+\togglefalse{iccvpagenumbers}
+\DeclareOption{review}{\togglefalse{iccvfinal}\toggletrue{iccvpagenumbers}}
+\DeclareOption{rebuttal}{\togglefalse{iccvfinal}\toggletrue{iccvrebuttal}}
+\DeclareOption{pagenumbers}{\toggletrue{iccvpagenumbers}}
+\DeclareOption*{\PackageWarning{iccv}{Unkown option `\CurrentOption'}}
+\ProcessOptions\relax
+
+% Don't warn about missing author for rebuttal
+\iftoggle{iccvrebuttal}{%
+  \ActivateWarningFilters[rebuttal]
+}{}
+
+% Breaking lines for URLs in the bib
+\RequirePackage[hyphens]{url}
+\Urlmuskip=0mu plus 1mu\relax
+
+
+% ---------------------------------------------------------------
+% Inlined version of the obsolete "everyshi-2001-05-15" package.
+\newcommand{\@EveryShipout@Hook}{}
+\newcommand{\@EveryShipout@AtNextHook}{}
+\newcommand*{\EveryShipout}[1]
+   {\g@addto@macro\@EveryShipout@Hook{#1}}
+\newcommand*{\AtNextShipout}[1]
+   {\g@addto@macro\@EveryShipout@AtNextHook{#1}}
+\newcommand{\@EveryShipout@Shipout}{%
+   \afterassignment\@EveryShipout@Test
+   \global\setbox\@cclv= %
+   }
+\newcommand{\@EveryShipout@Test}{%
+   \ifvoid\@cclv\relax
+      \aftergroup\@EveryShipout@Output
+   \else
+      \@EveryShipout@Output
+   \fi%
+   }
+\newcommand{\@EveryShipout@Output}{%
+   \@EveryShipout@Hook%
+   \@EveryShipout@AtNextHook%
+      \gdef\@EveryShipout@AtNextHook{}%
+   \@EveryShipout@Org@Shipout\box\@cclv%
+   }
+\newcommand{\@EveryShipout@Org@Shipout}{}
+\newcommand*{\@EveryShipout@Init}{%
+   \message{ABD: EveryShipout initializing macros}%
+   \let\@EveryShipout@Org@Shipout\shipout
+   \let\shipout\@EveryShipout@Shipout
+   }
+\AtBeginDocument{\@EveryShipout@Init}
+% ---------------------------------------------------------------
+
+
+% ---------------------------------------------------------------
+% Inlined simplified version of the "eso-pic" package.
+\newcommand\LenToUnit[1]{#1\@gobble}
+\newcommand\AtPageUpperLeft[1]{%
+  \begingroup
+    \@tempdima=0pt\relax\@tempdimb=\ESO@yoffsetI\relax
+    \put(\LenToUnit{\@tempdima},\LenToUnit{\@tempdimb}){#1}%
+  \endgroup
+}
+\newcommand\AtPageLowerLeft[1]{\AtPageUpperLeft{%
+  \put(0,\LenToUnit{-\paperheight}){#1}}}
+\newcommand\AtPageCenter[1]{\AtPageUpperLeft{%
+  \put(\LenToUnit{.5\paperwidth},\LenToUnit{-.5\paperheight}){#1}}%
+}
+\newcommand\AtTextUpperLeft[1]{%
+  \begingroup
+    \setlength\@tempdima{1in}%
+    \ifodd\c@page%
+      \advance\@tempdima\oddsidemargin%
+    \else%
+      \advance\@tempdima\evensidemargin%
+    \fi%
+    \@tempdimb=\ESO@yoffsetI\relax\advance\@tempdimb-1in\relax%
+    \advance\@tempdimb-\topmargin%
+    \advance\@tempdimb-\headheight\advance\@tempdimb-\headsep%
+    \put(\LenToUnit{\@tempdima},\LenToUnit{\@tempdimb}){#1}%
+  \endgroup
+}
+\newcommand\AtTextLowerLeft[1]{\AtTextUpperLeft{%
+  \put(0,\LenToUnit{-\textheight}){#1}}}
+\newcommand\AtTextCenter[1]{\AtTextUpperLeft{%
+  \put(\LenToUnit{.5\textwidth},\LenToUnit{-.5\textheight}){#1}}}
+\newcommand{\ESO@HookI}{} \newcommand{\ESO@HookII}{}
+\newcommand{\ESO@HookIII}{}
+\newcommand{\AddToShipoutPicture}{%
+  \@ifstar{\g@addto@macro\ESO@HookII}{\g@addto@macro\ESO@HookI}}
+\newcommand{\ClearShipoutPicture}{\global\let\ESO@HookI\@empty}
+\newcommand\ESO@isMEMOIR[1]{}
+\@ifclassloaded{memoir}{\renewcommand\ESO@isMEMOIR[1]{#1}}{}
+\newcommand{\@ShipoutPicture}{%
+  \bgroup
+    \@tempswafalse%
+    \ifx\ESO@HookI\@empty\else\@tempswatrue\fi%
+    \ifx\ESO@HookII\@empty\else\@tempswatrue\fi%
+    \ifx\ESO@HookIII\@empty\else\@tempswatrue\fi%
+    \if@tempswa%
+      \@tempdima=1in\@tempdimb=-\@tempdima%
+      \advance\@tempdimb\ESO@yoffsetI%
+      \ESO@isMEMOIR{%
+        \advance\@tempdima\trimedge%
+        \advance\@tempdima\paperwidth%
+        \advance\@tempdima-\stockwidth%
+        \if@twoside\ifodd\c@page\else%
+          \advance\@tempdima-2\trimedge%
+          \advance\@tempdima-\paperwidth%
+          \advance\@tempdima\stockwidth%
+        \fi\fi%
+        \advance\@tempdimb\trimtop}%
+      \unitlength=1pt%
+      \global\setbox\@cclv\vbox{%
+        \vbox{\let\protect\relax
+          \pictur@(0,0)(\strip@pt\@tempdima,\strip@pt\@tempdimb)%
+            \ESO@HookIII\ESO@HookI\ESO@HookII%
+            \global\let\ESO@HookII\@empty%
+          \endpicture}%
+          \nointerlineskip%
+        \box\@cclv}%
+    \fi
+  \egroup
+}
+\EveryShipout{\@ShipoutPicture}
+\RequirePackage{keyval}
+\newif\ifESO@dvips\ESO@dvipsfalse
+\newif\ifESO@texcoord\ESO@texcoordfalse
+
+\AtBeginDocument{%
+  \IfFileExists{color.sty}
+  {%
+    \RequirePackage{color}
+    \let\ESO@color=\color\let\ESO@colorbox=\colorbox
+    \let\ESO@fcolorbox=\fcolorbox
+  }{}
+  \@ifundefined{Gin@driver}{}%
+  {%
+    \ifx\Gin@driver\@empty\else%
+      \filename@parse{\Gin@driver}\def\reserved@a{dvips}%
+      \ifx\filename@base\reserved@a\ESO@dvipstrue\fi%
+    \fi
+  }%
+  \ifx\pdfoutput\undefined\else
+    \ifx\pdfoutput\relax\else
+      \ifcase\pdfoutput\else
+        \ESO@dvipsfalse%
+      \fi
+    \fi
+  \fi
+}
+\ifESO@texcoord
+  \def\ESO@yoffsetI{0pt}\def\ESO@yoffsetII{-\paperheight}
+\else
+  \def\ESO@yoffsetI{\paperheight}\def\ESO@yoffsetII{0pt}
+\fi
+% ---------------------------------------------------------------
+
+
+\typeout{ICCV 8.5 x 11-Inch Proceedings Style `iccv.sty'.}
+
+% ten point helvetica bold required for captions
+% eleven point times bold required for second-order headings
+% in some sites the name of the fonts may differ,
+% change the name here:
+\font\iccvtenhv  = phvb at 8pt % *** IF THIS FAILS, SEE iccv.sty ***
+\font\elvbf  = ptmb scaled 1100
+\font\tenbf  = ptmb scaled 1000
+
+% If the above lines give an error message, try to comment them and
+% uncomment these:
+%\font\iccvtenhv  = phvb7t at 8pt
+%\font\elvbf  = ptmb7t scaled 1100
+%\font\tenbf  = ptmb7t scaled 1000
+
+% set dimensions of columns, gap between columns, and paragraph indent
+\setlength{\textheight}{8.875in}
+\setlength{\textwidth}{6.875in}
+\setlength{\columnsep}{0.3125in}
+\setlength{\topmargin}{0in}
+\setlength{\headheight}{0in}
+\setlength{\headsep}{0in}
+\setlength{\parindent}{1pc}
+\setlength{\oddsidemargin}{-0.1875in}
+\setlength{\evensidemargin}{-0.1875in}
+
+
+% Suppress page numbers when the appropriate option is given
+\iftoggle{iccvpagenumbers}{}{%
+  \pagestyle{empty}
+}
+
+\AtBeginDocument{%
+  % Print an error if document class other than article is used
+  \@ifclassloaded{article}{}{%
+    \PackageError{iccv}{Package only meant to be used with document class `article'}{Change document class to `article'.}
+  }
+  % Print a warning if incorrect options for article are specified
+  \@ifclasswith{article}{10pt}{}{%
+    \PackageWarningNoLine{iccv}{Incorrect font size specified - ICCV requires 10-point fonts. Please load document class `article' with `10pt' option}
+  }
+  \@ifclasswith{article}{twocolumn}{}{%
+    \PackageWarningNoLine{iccv}{Single column document - ICCV requires papers to have two-column layout. Please load document class `article' with `twocolumn' option}
+  }
+  \@ifclasswith{article}{letterpaper}{}{%
+    \PackageWarningNoLine{iccv}{Incorrect paper size - ICCV uses paper size `letter'. Please load document class `article' with `letterpaper' option}
+  }
+  % Print a warning if hyperref is not loaded and/or if the pagebackref option is missing
+  \iftoggle{iccvfinal}{%
+    \@ifpackageloaded{hyperref}{}{%
+      \PackageWarningNoLine{iccv}{Package `hyperref' is not loaded, but highly recommended for camera-ready version}
+    }
+  }{%
+    \@ifpackageloaded{hyperref}{
+      \@ifpackagewith{hyperref}{pagebackref}{}{
+        \PackageWarningNoLine{iccv}{Package `hyperref' is not loaded with option `pagebackref', which is strongly recommended for review version}
+      }
+    }{%
+      \PackageWarningNoLine{iccv}{Package `hyperref' is not loaded, but strongly recommended for review version}
+    }
+  }
+}
+
+\def\@maketitle{
+   \newpage
+   \null
+   \iftoggle{iccvrebuttal}{\vspace*{-.3in}}{\vskip .375in}
+   \begin{center}
+      % smaller title font only for rebuttal
+      \iftoggle{iccvrebuttal}{{\large \bf \@title \par}}{{\Large \bf \@title \par}}
+      % additional two empty lines at the end of the title
+      \iftoggle{iccvrebuttal}{\vspace*{-22pt}}{\vspace*{24pt}}{
+        \large
+        \lineskip .5em
+        \begin{tabular}[t]{c}
+          \iftoggle{iccvfinal}{
+            \@author
+          }{
+            \iftoggle{iccvrebuttal}{}{
+              Anonymous \confName~submission\\
+              \vspace*{1pt}\\
+              Paper ID \paperID
+            }
+          }
+        \end{tabular}
+        \par
+      }
+      % additional small space at the end of the author name
+      \vskip .5em
+      % additional empty line at the end of the title block
+      \vspace*{12pt}
+   \end{center}
+}
+
+\def\abstract{%
+   % Suppress page numbers when the appropriate option is given
+   \iftoggle{iccvpagenumbers}{}{%
+     \thispagestyle{empty}
+   }
+   \centerline{\large\bf Abstract}%
+   \vspace*{12pt}\noindent%
+   \it\ignorespaces%
+}
+
+\def\endabstract{%
+   % additional empty line at the end of the abstract
+   \vspace*{12pt}
+   }
+
+\def\affiliation#1{\gdef\@affiliation{#1}} \gdef\@affiliation{}
+
+% correct heading spacing and type
+\def\iccvsection{\@startsection {section}{1}{\z@}
+   {-10pt plus -2pt minus -2pt}{7pt} {\large\bf}}
+\def\iccvssect#1{\iccvsection*{#1}}
+\def\iccvsect#1{\iccvsection{\texorpdfstring{\hskip -1em.~}{}#1}}
+\def\section{\@ifstar\iccvssect\iccvsect}
+
+\def\iccvsubsection{\@startsection {subsection}{2}{\z@}
+   {-8pt plus -2pt minus -2pt}{5pt} {\elvbf}}
+\def\iccvssubsect#1{\iccvsubsection*{#1}}
+\def\iccvsubsect#1{\iccvsubsection{\texorpdfstring{\hskip -1em.~}{}#1}}
+\def\subsection{\@ifstar\iccvssubsect\iccvsubsect}
+
+\def\iccvsubsubsection{\@startsection {subsubsection}{3}{\z@}
+   {-6pt plus -2pt minus -2pt}{3pt} {\tenbf}}
+\def\iccvssubsubsect#1{\iccvsubsubsection*{#1}}
+\def\iccvsubsubsect#1{\iccvsubsubsection{\texorpdfstring{\hskip -1em.~}{}#1}}
+\def\subsubsection{\@ifstar\iccvssubsubsect\iccvsubsubsect}
+
+%% --------- Page background marks: Ruler and confidentiality (only for review and rebuttal)
+\iftoggle{iccvfinal}{
+  % In review and rebuttal mode, we use the "lineno" package for numbering lines.
+  % When switching to a different mode, the "\@LN" macro may remain in cached .aux files,
+  % leading to build errors (https://github.com/cvpr-org/author-kit/issues/49).
+  % Defining the macro as empty fixes that (https://tex.stackexchange.com/a/125779).
+  \makeatletter
+  \providecommand{\@LN}[2]{}
+  \makeatother
+}{
+  % ----- define vruler
+  \makeatletter
+  \newbox\iccvrulerbox
+  \newcount\iccvrulercount
+  \newdimen\iccvruleroffset
+  \newdimen\cv@lineheight
+  \newdimen\cv@boxheight
+  \newbox\cv@tmpbox
+  \newcount\cv@refno
+  \newcount\cv@tot
+  % NUMBER with left flushed zeros  \fillzeros[<WIDTH>]<NUMBER>
+  \newcount\cv@tmpc@ \newcount\cv@tmpc
+  \def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
+  \cv@tmpc=1 %
+  \loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
+    \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
+  \ifnum#2<0\advance\cv@tmpc1\relax-\fi
+  \loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
+  \cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
+  \makeatother
+  % ----- end of vruler
+
+  %% Define linenumber setup
+  \RequirePackage[switch,mathlines]{lineno}
+
+  % Line numbers in ICCV blue using font from \iccvtenhv
+  \renewcommand\linenumberfont{\iccvtenhv\color[rgb]{.5,.5,1}}
+
+  \renewcommand\thelinenumber{\fillzeros[3]{\arabic{linenumber}}}
+
+  \setlength{\linenumbersep}{.75cm}
+
+  % Bug: An equation with $$ ... $$ isn't numbered, nor is the previous line.
+
+  % Patch amsmath commands so that the previous line and the equation itself
+  % are numbered. Bug: multiline has an extra line number.
+  % https://tex.stackexchange.com/questions/461186/how-to-use-lineno-with-amsmath-align
+  \RequirePackage{etoolbox} %% <- for \pretocmd, \apptocmd and \patchcmd
+
+  \newcommand*\linenomathpatch[1]{%
+    \expandafter\pretocmd\csname #1\endcsname {\linenomath}{}{}%
+    \expandafter\pretocmd\csname #1*\endcsname {\linenomath}{}{}%
+    \expandafter\apptocmd\csname end#1\endcsname {\endlinenomath}{}{}%
+    \expandafter\apptocmd\csname end#1*\endcsname {\endlinenomath}{}{}%
+  }
+  \newcommand*\linenomathpatchAMS[1]{%
+    \expandafter\pretocmd\csname #1\endcsname {\linenomathAMS}{}{}%
+    \expandafter\pretocmd\csname #1*\endcsname {\linenomathAMS}{}{}%
+    \expandafter\apptocmd\csname end#1\endcsname {\endlinenomath}{}{}%
+    \expandafter\apptocmd\csname end#1*\endcsname {\endlinenomath}{}{}%
+  }
+
+  %% Definition of \linenomathAMS depends on whether the mathlines option is provided
+  \expandafter\ifx\linenomath\linenomathWithnumbers
+  \let\linenomathAMS\linenomathWithnumbers
+  %% The following line gets rid of an extra line numbers at the bottom:
+  \patchcmd\linenomathAMS{\advance\postdisplaypenalty\linenopenalty}{}{}{}
+  \else
+  \let\linenomathAMS\linenomathNonumbers
+  \fi
+
+  % Add the numbers
+  \linenumbers
+  \AtBeginDocument{%
+    \linenomathpatch{equation}%
+    \linenomathpatchAMS{gather}%
+    \linenomathpatchAMS{multline}%
+    \linenomathpatchAMS{align}%
+    \linenomathpatchAMS{alignat}%
+    \linenomathpatchAMS{flalign}%
+  }
+
+  % \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
+  \def\iccvruler#1{\makevruler[12pt][#1][1][3][0.993\textheight]\usebox{\iccvrulerbox}}
+  \AddToShipoutPicture{%
+    \color[rgb]{.5,.5,1}
+
+    \def\pid{\parbox{1in}{\begin{center}\bf\sf{\small \confName}\\\small \#\paperID\end{center}}}
+    \AtTextUpperLeft{%paperID in corners
+      \put(\LenToUnit{-65pt},\LenToUnit{45pt}){\pid}
+      \put(\LenToUnit{\textwidth-12pt},\LenToUnit{45pt}){\pid}
+    }
+    \AtTextUpperLeft{%confidential
+      \put(0,\LenToUnit{1cm}){\parbox{\textwidth}{\centering\iccvtenhv
+      \confName~\confYear~Submission \#\paperID. CONFIDENTIAL REVIEW COPY.  DO NOT DISTRIBUTE.}}
+    }
+  }
+} % end of not iccvfinal
+
+%%% Make figure placement a little more predictable.
+% We trust the user to move figures if this results
+% in ugliness.
+% Minimize bad page breaks at figures
+\renewcommand{\textfraction}{0.01}
+\renewcommand{\floatpagefraction}{0.99}
+\renewcommand{\topfraction}{0.99}
+\renewcommand{\bottomfraction}{0.99}
+\renewcommand{\dblfloatpagefraction}{0.99}
+\renewcommand{\dbltopfraction}{0.99}
+\setcounter{totalnumber}{99}
+\setcounter{topnumber}{99}
+\setcounter{bottomnumber}{99}
+
+% Add a period to the end of an abbreviation unless there's one
+% already, then \xspace.
+\makeatletter
+\DeclareRobustCommand\onedot{\futurelet\@let@token\@onedot}
+\def\@onedot{\ifx\@let@token.\else.\null\fi\xspace}
+
+\def\eg{\emph{e.g}\onedot} \def\Eg{\emph{E.g}\onedot}
+\def\ie{\emph{i.e}\onedot} \def\Ie{\emph{I.e}\onedot}
+\def\cf{\emph{cf}\onedot} \def\Cf{\emph{Cf}\onedot}
+\def\etc{\emph{etc}\onedot} \def\vs{\emph{vs}\onedot}
+\def\wrt{w.r.t\onedot} \def\dof{d.o.f\onedot}
+\def\iid{i.i.d\onedot} \def\wolog{w.l.o.g\onedot}
+\def\etal{\emph{et al}\onedot}
+\makeatother
+
+% ---------------------------------------------------------------
+
+%% redefine the \title command so that a variable name is saved in \thetitle, and provides the \maketitlesupplementary command
+\let\titleold\title
+\renewcommand{\title}[1]{\titleold{#1}\newcommand{\thetitle}{#1}}
+\def\maketitlesupplementary
+   {
+   \newpage
+       \twocolumn[
+        \centering
+        \Large
+        \textbf{\thetitle}\\
+        \vspace{0.5em}Supplementary Material \\
+        \vspace{1.0em}
+       ] %< twocolumn
+   }
+
+% ---------------------------------------------------------------
+
+%% Support for easy cross-referencing (e.g. \cref{sec:intro}
+% configured with \AtEndPreamble as it needs to be called after hyperref
+\AtEndPreamble{
+    \usepackage[capitalize]{cleveref}
+    \crefname{section}{Sec.}{Secs.}
+    \Crefname{section}{Section}{Sections}
+    \Crefname{table}{Table}{Tables}
+    \crefname{table}{Tab.}{Tabs.}
+}
+
+% ---------------------------------------------------------------
+
+%% More compact compact itemize/enumeration (e.g. list contributions)
+\RequirePackage[shortlabels,inline]{enumitem}
+\setlist[itemize]{noitemsep,leftmargin=*,topsep=0em}
+\setlist[enumerate]{noitemsep,leftmargin=*,topsep=0em}
--- a/version_arXiv/ieeenat_fullname.bst
+++ b/version_arXiv/ieeenat_fullname.bst
@@ -1,12 +1,5 @@
-%% File: `icml2024.bst'
-%% A modification of `plainnl.bst' for use with natbib package 
-%%
-%% Copyright 2010 Hal Daum\'e III
-%% Modified by J. Fürnkranz
-%% - Changed labels from (X and Y, 2000) to (X & Y, 2000)
-%% - Changed References to last name first and abbreviated first names.
-%% Modified by Iain Murray 2018 (who suggests adopting a standard .bst in future...)
-%% - Made it actually use abbreviated first names
+%% File: `abbrvnat.bst'
+%% A modification of `abbrv.bst' for use with natbib package 
 %%
 %% Copyright 1993-2007 Patrick W Daly
 %% Max-Planck-Institut f\"ur Sonnensystemforschung
@@ -21,7 +14,7 @@
 %% version 1 of the License, or any later version.
 %%
 % Version and source file information:
- % \ProvidesFile{icml2010.mbs}[2007/11/26 1.93 (PWD)]
+ % \ProvidesFile{natbst.mbs}[2007/11/26 1.93 (PWD)]
 %
 % BibTeX `plainnat' family
 %   version 0.99b for BibTeX versions 0.99a or later,
@@ -226,7 +219,8 @@ FUNCTION {format.names}
  s num.names$ 'numnames :=
  numnames 'namesleft :=
    { namesleft #0 > }
-    { s nameptr "{vv~}{ll}{, jj}{, f.}" format.name$ 't :=
+    % Formerly { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't :=
+    { s nameptr "{ff }{vv }{ll}{, jj}" format.name$ 't :=
      nameptr #1 >
        { namesleft #1 >
            { ", " * t * }
@@ -268,8 +262,8 @@ FUNCTION {format.editors}
    { "" }
    { editor format.names
      editor num.names$ #1 >
-        { " (eds.)" * }
-        { " (ed.)" * }
+        { ", editors" * }
+        { ", editor" * }
      if$
    }
  if$
@@ -278,28 +272,32 @@ FUNCTION {format.editors}
 FUNCTION {format.isbn}
 { isbn empty$
    { "" }
-    { new.block "ISBN " isbn * }
+%    { new.block "ISBN " isbn * }
+    { "" }
  if$
 }

 FUNCTION {format.issn}
 { issn empty$
    { "" }
-    { new.block "ISSN " issn * }
+%    { new.block "ISSN " issn * }
+    { "" }
  if$
 }

 FUNCTION {format.url}
 { url empty$
    { "" }
-    { new.block "URL \url{" url * "}" * }
+%    { new.block "URL \url{" url * "}" * }
+    { "" }
  if$
 }

 FUNCTION {format.doi}
 { doi empty$
    { "" }
-    { new.block "\doi{" doi * "}" * }
+%    { new.block "\doi{" doi * "}" * }
+    { "" }
  if$
 }

@@ -427,12 +425,13 @@ FUNCTION {format.date}
       pop$ "" }
    'skip$
  if$
-  month empty$
-    'skip$
-    { month
-      " " * swap$ *
-    }
-  if$
+%% CR: Leave out months.
+%  month empty$
+%    'skip$
+%    { month
+%      " " * swap$ *
+%    }
+%  if$
  extra.label *
 }

@@ -458,20 +457,24 @@ FUNCTION {either.or.check}
 FUNCTION {format.bvolume}
 { volume empty$
    { "" }
-    { "volume" volume tie.or.space.connect
-      series empty$
-        'skip$
-        { " of " * series emphasize * }
-      if$
-      "volume and number" number either.or.check
-    }
+%% CR: Don't show "volume 1234 of LNCS" etc.
+%    { "volume" volume tie.or.space.connect
+%      series empty$
+%        'skip$
+%        { " of " * series emphasize * }
+%      if$
+%      "volume and number" number either.or.check
+%    }
+    { "" }
  if$
 }

 FUNCTION {format.number.series}
 { volume empty$
    { number empty$
-        { series field.or.null }
+%% CR: Leave out series information.
+%        { series field.or.null }
+        { "" }
        { output.state mid.sentence =
            { "number" }
            { "Number" }
@@ -525,8 +528,8 @@ FUNCTION {format.pages}
 { pages empty$
    { "" }
    { pages multi.page.check
-        { "pp.\ " pages n.dashify tie.or.space.connect }
-        { "pp.\ " pages tie.or.space.connect }
+        { "pages" pages n.dashify tie.or.space.connect }
+        { "page" pages tie.or.space.connect }
      if$
    }
  if$
@@ -600,11 +603,13 @@ FUNCTION {format.chapter.pages}
 FUNCTION {format.in.ed.booktitle}
 { booktitle empty$
    { "" }
-    { editor empty$
-        { "In " booktitle emphasize * }
-        { "In " format.editors * ", " * booktitle emphasize * }
-      if$
-    }
+%% CR: Leave out editors even if the information is available.
+%    { editor empty$
+%        { "In " booktitle emphasize * }
+%        { "In " format.editors * ", " * booktitle emphasize * }
+%      if$
+%    }
+    { "In " booktitle emphasize * }
  if$
 }

@@ -1019,13 +1024,13 @@ FUNCTION {unpublished}
 FUNCTION {default.type} { misc }


-MACRO {jan} {"January"}
+MACRO {jan} {"Jan."}

-MACRO {feb} {"February"}
+MACRO {feb} {"Feb."}

-MACRO {mar} {"March"}
+MACRO {mar} {"Mar."}

-MACRO {apr} {"April"}
+MACRO {apr} {"Apr."}

 MACRO {may} {"May"}

@@ -1033,58 +1038,58 @@ MACRO {jun} {"June"}

 MACRO {jul} {"July"}

-MACRO {aug} {"August"}
+MACRO {aug} {"Aug."}

-MACRO {sep} {"September"}
+MACRO {sep} {"Sept."}

-MACRO {oct} {"October"}
+MACRO {oct} {"Oct."}

-MACRO {nov} {"November"}
+MACRO {nov} {"Nov."}

-MACRO {dec} {"December"}
+MACRO {dec} {"Dec."}



-MACRO {acmcs} {"ACM Computing Surveys"}
+MACRO {acmcs} {"ACM Comput. Surv."}

-MACRO {acta} {"Acta Informatica"}
+MACRO {acta} {"Acta Inf."}

-MACRO {cacm} {"Communications of the ACM"}
+MACRO {cacm} {"Commun. ACM"}

-MACRO {ibmjrd} {"IBM Journal of Research and Development"}
+MACRO {ibmjrd} {"IBM J. Res. Dev."}

-MACRO {ibmsj} {"IBM Systems Journal"}
+MACRO {ibmsj} {"IBM Syst.~J."}

-MACRO {ieeese} {"IEEE Transactions on Software Engineering"}
+MACRO {ieeese} {"IEEE Trans. Softw. Eng."}

-MACRO {ieeetc} {"IEEE Transactions on Computers"}
+MACRO {ieeetc} {"IEEE Trans. Comput."}

 MACRO {ieeetcad}
- {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"}
+ {"IEEE Trans. Comput.-Aided Design Integrated Circuits"}

-MACRO {ipl} {"Information Processing Letters"}
+MACRO {ipl} {"Inf. Process. Lett."}

-MACRO {jacm} {"Journal of the ACM"}
+MACRO {jacm} {"J.~ACM"}

-MACRO {jcss} {"Journal of Computer and System Sciences"}
+MACRO {jcss} {"J.~Comput. Syst. Sci."}

-MACRO {scp} {"Science of Computer Programming"}
+MACRO {scp} {"Sci. Comput. Programming"}

-MACRO {sicomp} {"SIAM Journal on Computing"}
+MACRO {sicomp} {"SIAM J. Comput."}

-MACRO {tocs} {"ACM Transactions on Computer Systems"}
+MACRO {tocs} {"ACM Trans. Comput. Syst."}

-MACRO {tods} {"ACM Transactions on Database Systems"}
+MACRO {tods} {"ACM Trans. Database Syst."}

-MACRO {tog} {"ACM Transactions on Graphics"}
+MACRO {tog} {"ACM Trans. Gr."}

-MACRO {toms} {"ACM Transactions on Mathematical Software"}
+MACRO {toms} {"ACM Trans. Math. Softw."}

-MACRO {toois} {"ACM Transactions on Office Information Systems"}
+MACRO {toois} {"ACM Trans. Office Inf. Syst."}

-MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"}
+MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."}

-MACRO {tcs} {"Theoretical Computer Science"}
+MACRO {tcs} {"Theoretical Comput. Sci."}


 READ
@@ -1115,7 +1120,7 @@ FUNCTION {format.lab.names}
        'skip$
        { s #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
            { " et~al." * }
-            { " \& " * s #2 "{vv~}{ll}" format.name$ * }
+            { " and " * s #2 "{vv~}{ll}" format.name$ * }
          if$
        }
      if$
--- a/version_arXiv/img/ResNet101_ImageNet_v1.pdf
+++ b/version_arXiv/img/ResNet101_ImageNet_v1.pdf
--- a/version_arXiv/img/ResNet101_ImageNet_v2.pdf
+++ b/version_arXiv/img/ResNet101_ImageNet_v2.pdf
--- a/version_arXiv/img/ResNet101_ImageNet_v3.pdf
+++ b/version_arXiv/img/ResNet101_ImageNet_v3.pdf
@@ -0,0 +1,69 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœ<EFBFBD>”KnÛ@†÷<O <4F>CÛ¶€×m=@`»
+¢m<>äúÅ<;‚]t!p¿æûIŽôðéøòãñøåð?~…‡Ëìñ0>ã	Ÿ<>ðÈxBÂX	Ÿ{lÏ3ÐŒßNð›lÛü}ÄoøiIQ
+sEZ„5<"‹#-…™¼˜5õ¹3ðßoLWÝ”jtK3´E8-Q)ƒ$KinÄKÕ¤6®ºí4þ=!ì„¸BZG¤
+àÆŸv:9ÝŠd’7ïÕ\Ò¹j¶¬Š‹TSâ<53>¾£„<C2A3>r@ßw¢S7ÓÔ «j”H.`EÅk›˜iDq<44>ôÛJØ)Gm×ëTÓzgè«<C3A8><C2AB>´(y¨°¸ô¾U+®µÿ¹=<3D>_¯sêço¸
+ß“tMâ4òh
+L)ÎÁQÁ“‹(ë,ñ%ì”»^jÛ¼ÙŒ<C399>Ûå…‹1ÕžˆfaeCZ8(…”yVùŽvÊQÎõ:Ý÷Û,÷è±„IdO8]ÙÄCz‹Rå$Ù.ØM!ì„º^§: o.6[oL¬™Ú<E284A2>Z]%U}¶ÅFÍuòo+a§œàõRôËÏæÍËfNðþg÷¥
+endstream
+endobj
+12 0 obj
+437
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250227094736+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000987 00000 n 
+0000000882 00000 n 
+0000000903 00000 n 
+0000000924 00000 n 
+0000000945 00000 n 
+0000000966 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000862 00000 n 
+0000000208 00000 n 
+0000000842 00000 n 
+0000001047 00000 n 
--- a/version_arXiv/img/ResNet101_RecombNetAll_v1.pdf
+++ b/version_arXiv/img/ResNet101_RecombNetAll_v1.pdf
--- a/version_arXiv/img/ResNet101_RecombNetAll_v2.pdf
+++ b/version_arXiv/img/ResNet101_RecombNetAll_v2.pdf
--- a/version_arXiv/img/ResNet101_RecombNetAll_v3.pdf
+++ b/version_arXiv/img/ResNet101_RecombNetAll_v3.pdf
@@ -0,0 +1,70 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœ<EFBFBD>”MnÜ0…÷<Oà<4F>"ÅŸm[`Öm=@03mh
+´×/ôãÌÄ˜)º0dIÏúÞ#m?|:þþñxürø€¿ÂÃeöøŸ€ñŒ„OHøÈxBÂX	ŸûØ®g 9~8ÁOl²móõˆßðiÉt®šJÚ&ÌAÂµÒbÄäV)ñõŒÜ‘øß¦'<27>nI5º¡9´E8-Áiä¡Âí´"^Ø´V¤¥õ®j<C2AE>W	;å@®<>Ö!i¹9˜†:;³ºSToIÂ„j1/
+-…=S(ú¶vÂ<76>|ß‡ÎÜ<LK<4C>-nVT¼6ž—ô´"¥M$Š$Kxø%ì”£¶ëuÐ´^ë9ôÕÆFZtR\<5C>–ÂÕÂŠkíÀnÏã×ëLýü
+·á{HIUe6ëæÄÅ¢QR-U4d„¼£„<C2A3>rb×KmÛMoF6c³Ã’¥*KâN¤–Ü<E28093>jÎ%H¶ßTÂN9Ê¹^Ç}ÿ©Írº„Id´·9]ÙÄCZÍ*UNžð›BØ	't½Ž: o.6[3|±¬Ì½‹NY¬šyO%3BÍ¶ð7•°SNðz)úåWóæe3'ø`™÷‹
+endstream
+endobj
+12 0 obj
+441
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250227094747+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000991 00000 n 
+0000000886 00000 n 
+0000000907 00000 n 
+0000000928 00000 n 
+0000000949 00000 n 
+0000000970 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000866 00000 n 
+0000000208 00000 n 
+0000000846 00000 n 
+0000001051 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
+startxref
--- a/version_arXiv/img/ResNet50_ImageNet_v1.pdf
+++ b/version_arXiv/img/ResNet50_ImageNet_v1.pdf
@@ -0,0 +1,72 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœ<EFBFBD>”KnÛ@D÷}Š>Ý=Óßm@ë$‹À<>”¦<>8@rý`>”dB² †Ÿâ¼ªj<C2AA>ŸŽ¿<¿>àÇ¯ðp½züŒOÀxFÂ'$üƒŒd<!á
+,‚„Ï}mÇ3Ð\¿œà'6Ùöðõˆßðiq
+õjEi!MR)”‰´°R‘Lª‰¯gäÎ<À¿1]ItS(Ñ-Í¥Ý„Ð’Jµ˜§Ò’ž’æi±`uekÞUÂN9<4E>+¤uHÚ@n¦¡ÎŽÈ4éÍ}	óªá-J«¨jö;JØ)ôí,:us1M<31>ä™Y*“µ"3ÝØ*qç¢T˜E}&¿¯„<C2AF>r´»ÞFMëmÏ¥ßml¤EÈC*×^uaµ°â¢øÏÇsûõ6SßÃmøQ1³›8i+.Õµö<C2B5>Êé¥rT™ßWÂN9±ëµÛvÒÁ›‘ÍX7`V¤º–†$·ªZZ<5A>,Y‹sðñ}!ì„£Ìõ6ìÛ<C3AC>m–½MØ-+ç˜[	‰$i<>zu*R
+ëeÂ÷”°SNìzu`/>6c£}•\¢O‘-<¸'d«NVÌb¶_	;å¯×Ò¯¿›‹—Íœà3ü”øM
+endstream
+endobj
+12 0 obj
+443
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250227094737+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000993 00000 n 
+0000000888 00000 n 
+0000000909 00000 n 
+0000000930 00000 n 
+0000000951 00000 n 
+0000000972 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000868 00000 n 
+0000000208 00000 n 
+0000000848 00000 n 
+0000001053 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
+startxref
+1210
+%%EOF
--- a/version_arXiv/img/ResNet50_ImageNet_v2.pdf
+++ b/version_arXiv/img/ResNet50_ImageNet_v2.pdf
--- a/version_arXiv/img/ResNet50_ImageNet_v3.pdf
+++ b/version_arXiv/img/ResNet50_ImageNet_v3.pdf
--- a/version_arXiv/img/ResNet50_RecombNetAll_v1.pdf
+++ b/version_arXiv/img/ResNet50_RecombNetAll_v1.pdf
@@ -0,0 +1,69 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœ<EFBFBD>”MnÛ@…÷<Oà<4F>C¶m¯Û.z€ÀvDšÉõ‹ù‘ívÑ…0éy¾÷!?|9¼ýz<|ÛÂÏßáá²{üŒOÀxBÂ'$|GÆ=2ž€<C5BE>pVEÂç¾¶ëh®?Žð›l}ùzÀø‚´Ë4	#ªŠ´‹ð4c³DÚUcJŽpÇ×rgîá¿1]itS¨Ñ-Í¥=„#Ð.H²pµ°æ†-k%viÇ±šFáRš<52>»JØ(r<>´IÈÕÁ4ÔÙ™Õ<E284A2>¢z<C2A2>bBµ˜—†–Âž)}[á@~œDg®¦¥Î7+*^ÏKzZ‘Ò6E’%<üŽ6ÊÑír4w=—þ´±‘vJ*,½èQjqøÏ×óøå:S?ÅøR-"ztëÎÂáEÚüäÒhyG	åÄ.—nÛM¯FVcsÂ¢¦TKâêÎE9ÚF¥–Tf·uÄ7•°QŽ:—ë¸?¶Y÷ K˜DFmç»²‰‡´€f•*'	OøM!l„º\GÐ³‹ÕÖ_,+<2B>¯É)‹U3ç>ß’¡vS	å/—Ò/6g/«98ÂWø‚ø?
+endstream
+endobj
+12 0 obj
+437
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250227094746+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000987 00000 n 
+0000000882 00000 n 
+0000000903 00000 n 
+0000000924 00000 n 
+0000000945 00000 n 
+0000000966 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000862 00000 n 
+0000000208 00000 n 
+0000000842 00000 n 
+0000001047 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
+startxref
+1204
+%%EOF
--- a/version_arXiv/img/ResNet50_RecombNetAll_v2.pdf
+++ b/version_arXiv/img/ResNet50_RecombNetAll_v2.pdf
--- a/version_arXiv/img/ResNet50_RecombNetAll_v3.pdf
+++ b/version_arXiv/img/ResNet50_RecombNetAll_v3.pdf
@@ -0,0 +1,69 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœ<EFBFBD>”MnÜ0…÷<Oà<4F>ÅŸmS`Öm=@03ih´×/ô7ž“ C–üÌï=ÊòÝ×ãß_Çï‡/xÿî¶ÙÃ+0>ã	Ÿ<>ð2<1E>ñ„„+°>·±^Ï@c|8Á¬²ùðåˆ?ñ7Ò¡¹XÒâa…(”ÒRÌÈ‹eÍørFnÌ|ú<>áJ¼™GñfiuN@‹¹§\RW7Lìž=ê¤<C3AA>HxÒˆjà]%ì”¹Bhƒ„vät05v„fW¢Ò¢¸…*«Fe+S°»Yg¿£„<C2A3>²CßîE£NÃT£×R&Ì¦b¤HKÞ6èÇ‚ÞËõ:XhëíÚj%ÕRd.™³e¤%qQ×dRçÃÇ£üz<C3BC> ÕŸ¸‰ï‘R”dL¥:/B!Åj;s°¨xâ4’ÝÒÁN7<4E>ëÖÅzÓ ÓÄ45v3›±&!L33»W\+™’òÜÍ›JØ){+×ë¨oÖhu§sIš³R=	¡’×<>mÑœI%Ñ…~[	;åÀ®×a;öâcñ“‹IÅ¥·
+ý%òÿ¦vÊ^·¶o¿–‹—iNð
+þ{ô/
+endstream
+endobj
+12 0 obj
+430
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250227094746+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000980 00000 n 
+0000000875 00000 n 
+0000000896 00000 n 
+0000000917 00000 n 
+0000000938 00000 n 
+0000000959 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000855 00000 n 
+0000000208 00000 n 
+0000000835 00000 n 
+0000001040 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
+startxref
--- a/version_arXiv/img/Swin-S_ImageNet_v1.pdf
+++ b/version_arXiv/img/Swin-S_ImageNet_v1.pdf
@@ -0,0 +1,70 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœ<EFBFBD>”KnÛ@D÷}Š>Ý=ÝÓŸm@ë$‹À<>œ¦<>8@rý`>´$B
+² †Ÿ¾ªš!>ÿx<~9|À<>_áá|õøŸ<>ñ		Ÿ‘ð2<1E>ñ		W`U$|éc;^€æøà?±É¶‡oGü†¯HKr)¦•Å<E280A2>1¥$¤¥pZ¨‹¾=!wæþ{Æt¥ÑMi F·4‡vN@K$G¸KÍMUõ$µvaÄäV)ÿ®®…¸BZG¤
+àÆŸv:9=‰‚\i©á©VG*«VÂHúŽvÊ½^‰NÝ\LSƒž\DYÉ M(™9‘§¢¥p<C2A5>ôÛJØ)G·ëeÔ´ÞõúÝÆFZ”<TXæºU+®øÏÇóõëe¦þþ
+·á{H’ló[®$ªämý4S‹”oa'œÐõÜl;éØÍÆfkt¬¢!ÅkkN2…<k4~ÑŒbi³ãÛJØ)G™ëeØëOm–=vvgfî€¬ÕÃŠö-T)ŠÍ¾£„<C2A3>rb×Ë°ûîc36âGÍS“m³´€uî¢2ÓßÂN8±ë¹ôó¯æÝÉf
+Nðþ½)÷±
+endstream
+endobj
+12 0 obj
+438
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250227094738+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000988 00000 n 
+0000000883 00000 n 
+0000000904 00000 n 
+0000000925 00000 n 
+0000000946 00000 n 
+0000000967 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000863 00000 n 
+0000000208 00000 n 
+0000000843 00000 n 
+0000001048 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
--- a/version_arXiv/img/Swin-S_ImageNet_v2.pdf
+++ b/version_arXiv/img/Swin-S_ImageNet_v2.pdf
--- a/version_arXiv/img/Swin-S_ImageNet_v3.pdf
+++ b/version_arXiv/img/Swin-S_ImageNet_v3.pdf
--- a/version_arXiv/img/Swin-S_RecombNetAll_v1.pdf
+++ b/version_arXiv/img/Swin-S_RecombNetAll_v1.pdf
--- a/version_arXiv/img/Swin-S_RecombNetAll_v2.pdf
+++ b/version_arXiv/img/Swin-S_RecombNetAll_v2.pdf
@@ -0,0 +1,69 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœµ”MnÛ@…÷<O <4F>C‡Ü6¼NºèÛmh4×æÏv”ºÈ¦aôó¤ï½'J7_÷~>ìïw_ðöÜœ<C39C>^€ñ<18>Høˆ„¯È¸CÆ#®ÀªHøÔÖº=<01>õÀ~c•Í‹Ï{üŽ¿<C5BD>–ÏILë<4C>Fh’<14>´˜eÊ$ŒÏGäÆÜÁ§ï®Ô›)uTo–ÆROÂh)ÙÜRÑÌH‹‡YÎ,É<>uåÎª®*a£ìÈÂ$¬#§ƒa¨±#B¨Dö\£Ïj…“W‰¨zéì+JØ(;ôý»hÔéb˜jôêæ^ÃšGX–(µH1)…M:ýŠ6ÊÞîz5¬µ=–v¶²k»T\…¥Hðì¶ÿyy<~½ÌÔž?qßCrŠÈDÜ$‹f“šC”Õ•Tm„ü»6Ê<36>]ÏÝÖ<C39D>žF¦±ñŽ?7àW•ðq°ÃzË3îûÏmÔýÿ¦»á/ÂvìÉÇ46G\<5C>‹æ6¸.ÂÊ•P“<>”§ÿ¨ƒ<C2A8>n@×såçŸÍÉÇ4¸ƒ7/,øE
+endstream
+endobj
+12 0 obj
+401
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250227094745+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000951 00000 n 
+0000000846 00000 n 
+0000000867 00000 n 
+0000000888 00000 n 
+0000000909 00000 n 
+0000000930 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000826 00000 n 
+0000000208 00000 n 
+0000000806 00000 n 
+0000001011 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
+startxref
+1168
+%%EOF
--- a/version_arXiv/img/Swin-S_RecombNetAll_v3.pdf
+++ b/version_arXiv/img/Swin-S_RecombNetAll_v3.pdf
--- a/version_arXiv/img/Swin-Ti_ImageNet_v1.pdf
+++ b/version_arXiv/img/Swin-Ti_ImageNet_v1.pdf
--- a/version_arXiv/img/Swin-Ti_ImageNet_v2.pdf
+++ b/version_arXiv/img/Swin-Ti_ImageNet_v2.pdf
@@ -0,0 +1,71 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœ<EFBFBD>”MnÜ0…÷<Oà<4F>"ÅŸm[`Öm=@0“6ˆ4Úë’¥™‰1StaÈ–žõ=>Ñ~øtüýãñøåð?~…‡ËÓã/`|Æ'$|FÂ?Èx@Æ' $\<5C>U‘ð¥<C3B0>ízãw€üÄ&›‹oGü†¯HK˜›Y‰R<E280B0>–"B!*H»YQñZðí	¹3ðßoWÝ”jtKch“pjÛI-©ÌnHK
+iDqq¤¥Š¦¥e3pW	;å†\!CÒ6ät0uvf©Ê"ÔÜ;qu¶@Z$JzÝØw”°SnÐ÷gÑ©ÓÅ05èN¤–né¤mT˜µú„ßÂN¸e»^šÖ³CŸmd¤EÉC…Å[=…«…×Úyÿ\Û¯×õý'nâ{‰nêT#J++‰HBµ¬‘ÅEª<45>€ï(a§Øõ’l»éàidÛº+#¢’jkV¡ªéµ1
+Í>Ýu[	;åçz]îû<C3AE>mÄ½ÑIrK¯Å–µ÷L+«i.“~[	;åÀ®×ÅnØ³<C398>il6¸eå<65>é”Åª™s?á’¡fç¿¥„<C2A5>r€×Kì—ßÍÙË4'øøø
+endstream
+endobj
+12 0 obj
+440
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250301004049+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000990 00000 n 
+0000000885 00000 n 
+0000000906 00000 n 
+0000000927 00000 n 
+0000000948 00000 n 
+0000000969 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000865 00000 n 
+0000000208 00000 n 
+0000000845 00000 n 
+0000001050 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
+startxref
+1207
+%%EOF
--- a/version_arXiv/img/Swin-Ti_ImageNet_v3.pdf
+++ b/version_arXiv/img/Swin-Ti_ImageNet_v3.pdf
@@ -0,0 +1,70 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœ<EFBFBD>”InAE÷<OÐf±8Õ6	 u’E`HJµ<>8€}ý &K-´ƒ,ªº~ó}ÒÃ—ãë¯Çã·Ã'üü®§Ç?<3F>ð	ž‘ð		ß0áž<><C5BE>p…$‚„—ëç4âO€üÆ*›—/Gü<47>ÏHK…hp(Ò’Ä-«²D=(SÑ,^ðåŒ©1ðßoWõ/5H4K#Ô‡pªéJŠpÏZÓñBbõ`”ÈM©ñ?ÂVØ<56>+kˆb8ùÃN#—P
+GZT]ˆJ©ï-™Ù™)wð:Øè:p;…Fœ†¡N.Aîf,5½<35><C2BD>kÖJ6)¤A*6ØûJ¸Sö¾®·ek}¡=l¤EÈCrÊž‘Njaì¢
+øÏë‘~½©åŸ¸‰ïƒmIX¥Të9‡'
+¯"Îd‘Æd÷•p§ØõÚÛú¥<C3BA>§‘i¬È…«—¶›!*"d}OÃ„cÌw_[]oåz[êöG6ZÝÉš8%ò65±læÁµt+‰z_[á€®·evè»‹ik,v!
+ò\ÒðE¬WÎ¦Æa$sµw•p§àõÚðëŸÌ»—iNðþçaöY
+endstream
+endobj
+12 0 obj
+437
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250301004048+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000987 00000 n 
+0000000882 00000 n 
+0000000903 00000 n 
+0000000924 00000 n 
+0000000945 00000 n 
+0000000966 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000862 00000 n 
+0000000208 00000 n 
+0000000842 00000 n 
+0000001047 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
--- a/version_arXiv/img/Swin-Ti_RecombNetAll_v1.pdf
+++ b/version_arXiv/img/Swin-Ti_RecombNetAll_v1.pdf
--- a/version_arXiv/img/Swin-Ti_RecombNetAll_v2.pdf
+++ b/version_arXiv/img/Swin-Ti_RecombNetAll_v2.pdf
--- a/version_arXiv/img/Swin-Ti_RecombNetAll_v3.pdf
+++ b/version_arXiv/img/Swin-Ti_RecombNetAll_v3.pdf
--- a/version_arXiv/img/ViT-B_ImageNet_v1.pdf
+++ b/version_arXiv/img/ViT-B_ImageNet_v1.pdf
@@ -0,0 +1,70 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœ<EFBFBD>”MŽ1…÷uŠ:AO•]¿[@ÊXp€QM<>Ä Áõ‘z’´2ˆEä¸ýÚß«Wv?|:þþñxürø€¿ÂÃeöøŸ€ñŒ„OHøÈxBÂX	ŸûØ~Ï@süp‚ŸØdÛâë¿áÒâN!Åia¦Îhe	âÌÄ×3rgà¿ß˜®$º)	”è–æÐÂ	h‰ä÷ªm»TOk#&7¥ÎW·Â\!#ÒpãO;<3B>œl^¤VkÞ«T*œ,†´”R3É½Ä@¿£„<C2A3>r@o;Ñ©›‹ijÐÓÉƒÄ²2ÜÒIZGD©0‹ú„ßÂN8’]¯MëIÏ¡?md¤EÈC*W¯XV+.Úyÿ\žÛ¯×õý7Ü†¥šãõ‘aª÷M•Å$
+—2{{_	;åÄ®—dÛŸÞŒlÆºWqJ/Þ'Õ*æ2N*{±¢ólÝÂN8Â\¯‹½½h3ìÑ_ªêªâ½mšBT*5€±“·U›
+¾¯„<EFBFBD>rb×ëRöÍÇfl¤ª¤êíÈ/¥]	«ýÒf
+	m—v¤_	;å¯—Ð/Ÿš7/›98ÁgøOö÷[
+endstream
+endobj
+12 0 obj
+440
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250227094737+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000990 00000 n 
+0000000885 00000 n 
+0000000906 00000 n 
+0000000927 00000 n 
+0000000948 00000 n 
+0000000969 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000865 00000 n 
+0000000208 00000 n 
+0000000845 00000 n 
+0000001050 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
+startxref
--- a/version_arXiv/img/ViT-B_ImageNet_v2.pdf
+++ b/version_arXiv/img/ViT-B_ImageNet_v2.pdf
--- a/version_arXiv/img/ViT-B_ImageNet_v3.pdf
+++ b/version_arXiv/img/ViT-B_ImageNet_v3.pdf
@@ -0,0 +1,69 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœ<EFBFBD>”Mn1…÷<O0!EŠ"·i¯Û.r€Àvd4’ëú‹<C3BA>‰]daH½Ñ÷Þ“ì›ïû×‡ûýÏÝ-~û7§o÷<C3B7>ñ<18>Høˆ„oÈ¸CÆ#®ÀªHøÔÆúyão€üÁ*›‹/{¼Ãg¤ÅÉ³d.îHçL\„¸ÍSÒ`ÑÀ—#rCîà«/OêÍ’:ª7Cc¨áPw³dæ‰4#-¡‰‹‘[Ý.S°{)•U	eG®Ö a9C<>œ’if1¤EDL)¨Ò’8ÌµˆYg_QÂFÙ¡O¢Q§‹aªÓ#„JdoyŠgµÂ©–ZXHDÕGò+JØ({»ëyÔ°ÖöÚÓÊFZ”Š«°Œ¸ÙÜRÑÜ€ÿ]Û¯ç™Úþ7ñýxë[NZ§Â¤‘kKNáE8åy¼—•°Qìzê¶Nx™Æº<C386>¯]ïkBø|ÃzÇ3ìÇŸÚ(»³[‰”j$))kP<6B>g<EFBFBD>+ƒ}Qá€®çA;ôÝÅ´Õ/˜h6±µQqÊšC%ê<>*'Öà"ã‚]VÂF9Àë©òÓ_Í»—iðþ¡¼÷	
+endstream
+endobj
+12 0 obj
+424
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250227094736+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000974 00000 n 
+0000000869 00000 n 
+0000000890 00000 n 
+0000000911 00000 n 
+0000000932 00000 n 
+0000000953 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000849 00000 n 
+0000000208 00000 n 
+0000000829 00000 n 
+0000001034 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
+startxref
+1191
+%%EOF
--- a/version_arXiv/img/ViT-B_RecombNetAll_v1.pdf
+++ b/version_arXiv/img/ViT-B_RecombNetAll_v1.pdf
@@ -0,0 +1,70 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœ<EFBFBD>”KŽAD÷yŠ<<3C>'³ò[[@òXp€‘mM#1Hp}TŸö§E#VÙÝá|Ñi?}8ýúö|út|‡ï?ÃÓíÓóO`|Æ¾ áod<"ã	`U$|íg{½Íó+À~`“7ßNø¿#jçH©‚t3%OG:¨r˜ppÅ·rgá¿¿1]ivSš¨Ù-Í£]„3Ð!½¸g!µæF‡S:#Ì™*gF4»JØ(r<>êR} WÓPg×ª©Éb-J²WvjãµŠs2ì%l”úø,:uu1MuºJ¯2ŠL%r¶–\j1e’AßQÂF9Ú]î£VïmÏ£_mì6“"UX¢Í,lž^BGÜÞžã—ûL}þŠ[ñ=¤d
+Îhbš.ÆÚB¦	‹(<28>Œ;BxNèrk¶½éØÕÆjk>áµ¡65(’ÔkëXM=¸äÚñŽ6ÊQæröñÇ6Ë»*æÄ^ûÆÕ(A
+`”â“¾£„<C2A3>rb—û°{õ±[Ü«Šf_[ª¥D)Ý€¦Ww3¾Æÿ«6Ê	^nµßþn®^Vsp†<70>ð•ø
+endstream
+endobj
+12 0 obj
+437
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250227094745+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000987 00000 n 
+0000000882 00000 n 
+0000000903 00000 n 
+0000000924 00000 n 
+0000000945 00000 n 
+0000000966 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000862 00000 n 
+0000000208 00000 n 
+0000000842 00000 n 
+0000001047 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
+startxref
+1204
--- a/version_arXiv/img/ViT-B_RecombNetAll_v2.pdf
+++ b/version_arXiv/img/ViT-B_RecombNetAll_v2.pdf
--- a/version_arXiv/img/ViT-B_RecombNetAll_v3.pdf
+++ b/version_arXiv/img/ViT-B_RecombNetAll_v3.pdf
--- a/version_arXiv/img/ViT-L_ImageNet_v1.pdf
+++ b/version_arXiv/img/ViT-L_ImageNet_v1.pdf
--- a/version_arXiv/img/ViT-L_ImageNet_v2.pdf
+++ b/version_arXiv/img/ViT-L_ImageNet_v2.pdf
--- a/version_arXiv/img/ViT-L_ImageNet_v3.pdf
+++ b/version_arXiv/img/ViT-L_ImageNet_v3.pdf
@@ -0,0 +1,69 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœµ”MnA…÷uŠ:<3A>S]]¿[@òXp€ÈD™H	®<>úÏv,Ø°õ¸ç¹¿÷^Ùs÷áðóÛýáÓþ¾ÿwçO÷? à#|@ÂG$ü…÷Xð	7("HøÔ×v=Íõ+À¾c“‡/ü‚ÏH»,æ,µZ íªTâ’EiÇ\3É<33>_°tæþùÓ•D7%<25>ÝÒ\Ú&ÛqjÅÉ©¶2ŠŠª¦ í,’„Í5›<35>7•p¥È
+Ò:$m —ƒih°¥JTvõ%³’§†¶(’Á–‘6Ù·•p¥Ð×³èÔåbštóÐÐ2â˜˜¹ôlNJÕ”Ûœ:ü¦®„£Ûí2hZïz.}·‘‘vBRKõÚÂµ0vÑÎûããyüv™¨Ÿ¿pß#FPÉL®¥w˜ªÆm¸JÁf4˜o)áJ9±Û¹ÙvÓÁËÈ26:&KjC,¬™#RõpZô¿Ê ÷›6ú]A_ÿÍfÑÿïWÝñ1öäc›Xˆ“¹M³Y¦÷nYCj’•ÿ–®„»<>ë>¿fNN–58ÂGø
+Àöù
+endstream
+endobj
+12 0 obj
+415
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250227094738+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000965 00000 n 
+0000000860 00000 n 
+0000000881 00000 n 
+0000000902 00000 n 
+0000000923 00000 n 
+0000000944 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000840 00000 n 
+0000000208 00000 n 
+0000000820 00000 n 
+0000001025 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
+startxref
--- a/version_arXiv/img/ViT-L_RecombNetAll_v1.pdf
+++ b/version_arXiv/img/ViT-L_RecombNetAll_v1.pdf
--- a/version_arXiv/img/ViT-L_RecombNetAll_v2.pdf
+++ b/version_arXiv/img/ViT-L_RecombNetAll_v2.pdf
@@ -0,0 +1,70 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœ<EFBFBD>”AnÜ0E÷<Oà<4F>")‘Û¶À¬Û.z€`&mh
+´×/DK3c¦ÈÂ ,}èýOÉ~øtüóãñøåð?~…‡ËÛão`|Æ'$|FÂ¿Èx@Æ' $\<5C>U‘ð%k^€Fýp‚_Øesñõˆßð'ÒQM¢EQ¤Å£4«}l5ÈÝ¹àërðNýp¤ž†ÔQ=íŒÒ'á´8s«ÚÈ¢;)$â"œ»q´"ì¢W	;å†\!jB¢nÈé`Jöû‚ßÓÁ.pßžA§ƒa(ÉbRÔ›‘!-<2D>]Ù"Ä‘iÕ´©sÛØw”°Sn<53>]¯cFÍN<C38D>’³<E28099><C2B3>´(5Wai‚´¶êµ4µþwyl¿^gÊý'nâ·<C3A2>ÊV)Ä9—VÌ›¶)µ˜¯#äm%ì”»^zÛ	žF¦±y¾[”<·ªôFš…“Äå„o)a§ÜÚ¹^Ç}û™<C3BB>vo7[¬„öKÛBê^šôø&ê5ÆÍ¾„<C2AD>r`×ë°öìc›ñ‹”Âž¡\8¼ÕH&Uìÿ–vÊ^/m¿ühÎ^¦98Ágø’Ñ÷}
+endstream
+endobj
+12 0 obj
+421
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250227094744+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000971 00000 n 
+0000000866 00000 n 
+0000000887 00000 n 
+0000000908 00000 n 
+0000000929 00000 n 
+0000000950 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000846 00000 n 
+0000000208 00000 n 
+0000000826 00000 n 
+0000001031 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
+startxref
+1188
+%%EOF
--- a/version_arXiv/img/ViT-L_RecombNetAll_v3.pdf
+++ b/version_arXiv/img/ViT-L_RecombNetAll_v3.pdf
--- a/version_arXiv/img/ViT-S_ImageNet_v1.pdf
+++ b/version_arXiv/img/ViT-S_ImageNet_v1.pdf
--- a/version_arXiv/img/ViT-S_ImageNet_v2.pdf
+++ b/version_arXiv/img/ViT-S_ImageNet_v2.pdf
--- a/version_arXiv/img/ViT-S_ImageNet_v3.pdf
+++ b/version_arXiv/img/ViT-S_ImageNet_v3.pdf
@@ -0,0 +1,70 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœ<EFBFBD>”MnÛ0…÷sŠ9<C5A0>2CÎï¶-àuÛEØnƒ(@S ½~ÁÅŽ`]”È'~ï
+Gzøtüýãñøåð?~…‡ËÓã/`|Æ3>!ád< ã	W`$|îc»ž<C2BB>æøà?±É¶Å×#~Ã¤Å²rJEZH¤$™º -,™RjÉÄ×3rGà_˜ž$º%	”è†æÐ&á´„³($m»”ÂnÆH‹Sr„{ãßUÂN9<4E>+¤uHÚ@n¦¡ÎÎšTªh<C2AA>R#³X*¤¥ˆj¡Ôªƒ}G	;å€¾?‰NÝ\LSƒžaY‚©çÑw.ÒèL¢®l“~[	;å¨îz5W{}¶±‘!©\½¶¸¬V\FÜ.Ïí×ëL}ÿ
+·áÇñrÚØªY/Õ›hË¡E<ƒUfÈ;JØ)'v½Ô¶Ýtðfd3Ö
+8Q„×„”Í‹Ôê]•(<28>Ùr¸£„<C2A3>r”s½Žûþc›åñ]ªµ7[Ï:¥§Pw
+õjuÆ¿„<EFBFBD>rb×ë°ûæc36[œ˜j<CB9C>µ¾ªÎl’Õ¬jµ¤ˆà²µøM%ì”¼^Ê~ùÝ¼yÙÌÁ	>Ã_<¸÷û
+endstream
+endobj
+12 0 obj
+443
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250227094737+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000993 00000 n 
+0000000888 00000 n 
+0000000909 00000 n 
+0000000930 00000 n 
+0000000951 00000 n 
+0000000972 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000868 00000 n 
+0000000208 00000 n 
+0000000848 00000 n 
+0000001053 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
--- a/version_arXiv/img/ViT-S_RecombNetAll_v1.pdf
+++ b/version_arXiv/img/ViT-S_RecombNetAll_v1.pdf
--- a/version_arXiv/img/ViT-S_RecombNetAll_v2.pdf
+++ b/version_arXiv/img/ViT-S_RecombNetAll_v2.pdf
--- a/version_arXiv/img/ViT-S_RecombNetAll_v3.pdf
+++ b/version_arXiv/img/ViT-S_RecombNetAll_v3.pdf
@@ -0,0 +1,71 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœ<EFBFBD>”ËnAE÷þ
+ÁÄ.»üØÒ¬<C392>Í¢t$‚¿<>ê•™´&ˆE«úqÛçúººï><3E>~ÿ¸?}9~À<>_áîruÿ<1F>ñ	‘ð2‘ñ	7`U$|êk;ž€æúà?±ÉÖÃ—~Ãg¤C:9IõZ‘µ¸r©"†tŠæÌŠ/È<>y„ÿ~cºÒè¦4P£[šK»	çVNU£DrénÌŠŠ×va.¤ÅµxW	;å@n<>Ö!i¹LCƒíV$“Ü[+æ’ÎU³µR\¤šOöm%ì”úv<16>º\LS<4C>ndé¢fI)ÙRU¦PÎ<50>ôw”°SŽt·ëVÓzÚséw»1ÈC…ÅéP¸ZXqí³þ÷ãY~»î©×_¸…ïM¶¬
+cmÖYÅ“¢÷HV¥¦º<C2A6>oa'œÐí’l;éØecÙ6«•¥X«ªéZ”¬±d%%±\¾„<C2AD>r„¹]7ûöc›az%)æéÚ·PRhZOÔ‚I«W¶I¿„<C2AD>rb·ëföÕÇ267x¡p«Ñ>Ï*®}³hjPúÚß·„°Nìv	ýò³yu²¬Á>Ã_·7ø	
+endstream
+endobj
+12 0 obj
+441
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.4, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.4)
+/CreationDate (D:20250227094746+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000991 00000 n 
+0000000886 00000 n 
+0000000907 00000 n 
+0000000928 00000 n 
+0000000949 00000 n 
+0000000970 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000866 00000 n 
+0000000208 00000 n 
+0000000846 00000 n 
+0000001051 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
+startxref
+1208
+%%EOF
--- a/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00000090.JPEG
+++ b/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00000090.JPEG
--- a/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00000547.JPEG
+++ b/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00000547.JPEG
--- a/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00000890.JPEG
+++ b/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00000890.JPEG
--- a/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00002106.JPEG
+++ b/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00002106.JPEG
--- a/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00002743.JPEG
+++ b/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00002743.JPEG
--- a/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00003097.JPEG
+++ b/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00003097.JPEG
--- a/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00005045.JPEG
+++ b/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00005045.JPEG
--- a/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00007437.JPEG
+++ b/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00007437.JPEG
--- a/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00008542.JPEG
+++ b/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00008542.JPEG
--- a/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00009674.JPEG
+++ b/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00009674.JPEG
--- a/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00011629.JPEG
+++ b/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00011629.JPEG
--- a/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00025256.JPEG
+++ b/version_arXiv/img/att_err_infills/comp/ILSVRC2012_val_00025256.JPEG
--- a/version_arXiv/img/att_err_infills/high_rat/ILSVRC2012_val_00003735.JPEG
+++ b/version_arXiv/img/att_err_infills/high_rat/ILSVRC2012_val_00003735.JPEG
--- a/version_arXiv/img/att_err_infills/high_rat/ILSVRC2012_val_00012151.JPEG
+++ b/version_arXiv/img/att_err_infills/high_rat/ILSVRC2012_val_00012151.JPEG
--- a/version_arXiv/img/att_err_infills/high_rat/ILSVRC2012_val_00022522.JPEG
+++ b/version_arXiv/img/att_err_infills/high_rat/ILSVRC2012_val_00022522.JPEG
--- a/version_arXiv/img/att_err_infills/high_rat/ILSVRC2012_val_00026530.JPEG
+++ b/version_arXiv/img/att_err_infills/high_rat/ILSVRC2012_val_00026530.JPEG
--- a/version_arXiv/img/bates.pdf
+++ b/version_arXiv/img/bates.pdf
--- a/version_arXiv/img/colorbar_horizontal.pdf
+++ b/version_arXiv/img/colorbar_horizontal.pdf
--- a/version_arXiv/img/fig-1.pdf
+++ b/version_arXiv/img/fig-1.pdf
--- a/version_arXiv/img/fig-2.pdf
+++ b/version_arXiv/img/fig-2.pdf
--- a/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00000090.JPEG
+++ b/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00000090.JPEG
--- a/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00000547.JPEG
+++ b/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00000547.JPEG
--- a/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00000890.JPEG
+++ b/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00000890.JPEG
--- a/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00002106.JPEG
+++ b/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00002106.JPEG
--- a/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00002743.JPEG
+++ b/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00002743.JPEG
--- a/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00003097.JPEG
+++ b/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00003097.JPEG
--- a/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00005045.JPEG
+++ b/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00005045.JPEG
--- a/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00007437.JPEG
+++ b/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00007437.JPEG
--- a/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00008542.JPEG
+++ b/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00008542.JPEG
--- a/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00009674.JPEG
+++ b/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00009674.JPEG
--- a/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00011629.JPEG
+++ b/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00011629.JPEG
--- a/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00025256.JPEG
+++ b/version_arXiv/img/lama_infills/comp/ILSVRC2012_val_00025256.JPEG
--- a/version_arXiv/img/lama_infills/high_rat/ILSVRC2012_val_00003735.JPEG
+++ b/version_arXiv/img/lama_infills/high_rat/ILSVRC2012_val_00003735.JPEG
--- a/version_arXiv/img/lama_infills/high_rat/ILSVRC2012_val_00012151.JPEG
+++ b/version_arXiv/img/lama_infills/high_rat/ILSVRC2012_val_00012151.JPEG
--- a/version_arXiv/img/lama_infills/high_rat/ILSVRC2012_val_00022522.JPEG
+++ b/version_arXiv/img/lama_infills/high_rat/ILSVRC2012_val_00022522.JPEG
--- a/version_arXiv/img/lama_infills/high_rat/ILSVRC2012_val_00026530.JPEG
+++ b/version_arXiv/img/lama_infills/high_rat/ILSVRC2012_val_00026530.JPEG
--- a/version_arXiv/img/size_bias.pdf
+++ b/version_arXiv/img/size_bias.pdf
--- a/version_arXiv/main.tex
+++ b/version_arXiv/main.tex
@@ -0,0 +1,48 @@
+
+\documentclass[10pt,twocolumn,letterpaper]{article}
+
+\usepackage[pagenumbers]{iccv} %
+
+\input{packages}
+
+\definecolor{iccvblue}{rgb}{0.21,0.49,0.74}
+\usepackage[pagebackref,breaklinks,colorlinks,allcolors=iccvblue]{hyperref}
+\usepackage[capitalize,noabbrev]{cleveref}
+
+\def\paperID{6426} %
+\def\confName{ICCV}
+\def\confYear{2025}
+
+\newcommand{\name}{\textit{ForNet}\xspace}
+\newcommand{\schemename}{\textit{ForAug}\xspace}
+
+\title{\schemename: Recombining Foregrounds and Backgrounds to Improve Vision Transformer Training with Bias Mitigation}
+
+\author{Tobias Christian Nauen${}^{1,2}$ Brian Moser${}^2$ Federico Raue${}^2$ Stanislav Frolov${}^2$ Andreas Dengel${}^{1,2}$\\
+${}^1$RPTU Kaiserslautern-Landau, Kaiserslautern, Germany \\
+${}^2$German Research Center for Artificial Intelligence (DFKI), Kaiserslautern, Germany \\
+{\tt\small first\_second.last@dfki.de / first.last@dfki.de}
+}
+
+\begin{document}
+\maketitle
+\input{sec/abstract}
+\input{sec/intro}
+\input{sec/related_work}
+\input{sec/method}
+\input{sec/experiments}
+\input{sec/conclusion}
+\input{sec/acks}
+
+{
+    \small
+    \bibliographystyle{ieeenat_fullname}
+    \bibliography{../../JabRef/main_bib}
+}
+
+\newpage
+\onecolumn
+\appendix
+\input{sec/appendix}
+
+\end{document}
--- a/version_arXiv/packages.tex
+++ b/version_arXiv/packages.tex
@@ -0,0 +1,116 @@
+
+
+
+\usepackage{amssymb}
+\usepackage{amsfonts}
+\usepackage{amsmath}
+\usepackage{amsxtra}
+\usepackage{cancel}
+\usepackage{dsfont}
+\usepackage{graphicx}
+\usepackage{tikz}
+\usepackage{tikz-qtree}
+\usetikzlibrary{shapes}
+\usetikzlibrary{positioning}
+\usetikzlibrary{trees}
+\usepackage{mathcomp}
+\usepackage{mathtools}
+\usepackage{multirow}
+\usepackage{verbatim}
+\usepackage{polynom}
+\usepackage{textcomp}
+\usepackage{float}
+\usepackage{pdflscape}
+\usepackage{csquotes}
+\usepackage{afterpage}
+\usepackage{makecell}
+\usepackage{listings}
+\usepackage{url}
+\usepackage{enumitem}
+\usepackage{minibox}
+\usepackage{algorithm}
+\usepackage{algorithmic}
+\usepackage{shuffle}
+\usepackage{svg}
+\usepackage{pifont}
+\usepackage{subcaption}
+\usepackage{xspace}
+\usepackage{siunitx}
+\usepackage{booktabs}
+\usepackage{microtype}
+
+\DeclareMathSymbol{\mlq}{\mathord}{operators}{``}
+\DeclareMathSymbol{\mrq}{\mathord}{operators}{`'}
+
+
+\newcommand{\R}{\mathbb R}
+\newcommand{\N}{\mathbb N}
+\newcommand{\calN}{\mathcal{N}}
+\newcommand{\D}{\mathbb D}
+\newcommand{\calD}{\mathcal D}
+\newcommand{\C}{\mathbb C}
+\renewcommand{\P}{\mathbb{P}}
+\newcommand{\A}{\mathcal A}
+\newcommand{\B}{\mathcal B}
+\newcommand{\I}{\mathcal I}
+\newcommand{\Z}{\mathbb{Z}}
+\newcommand{\Q}{\mathbb{Q}}
+\newcommand{\E}{\mathbb{E}}
+\newcommand{\F}{\mathcal{F}}
+\newcommand{\G}{\mathcal{G}}
+\newcommand{\M}{\mathcal{M}}
+\renewcommand{\H}{\mathcal{H}}
+\newcommand{\X}{\mathbb{X}}
+\newcommand{\sigB}{\mathbb B}
+\newcommand{\sigE}{\mathcal{E}}
+\newcommand{\Ball}[2]{B_#1(#2)} %
+\renewcommand{\L}{\mathcal{L}}
+\newcommand{\eps}{\varepsilon}
+\newcommand{\1}{\mathds{1}}
+\newcommand{\To}{\longrightarrow}
+\newcommand{\eqover}[1]{\stackrel{#1}{=}}
+\newcommand{\darover}[1]{\xrightarrow{#1}}
+\newcommand{\id}{\mathrm{id}}
+\newcommand{\del}{\partial}
+\newcommand{\indep}{\perp\!\!\!\perp}
+\renewcommand{\Re}{\operatorname{Re}}
+\renewcommand{\phi}{\varphi}
+\renewcommand{\Im}{\operatorname{Im}}
+\newcommand{\cov}{\operatorname{cov}}
+\newcommand{\corr}{\operatorname{corr}}
+\newcommand{\att}{\operatorname{attention}}
+\newcommand{\norm}[1]{\left\lVert#1\right\rVert}
+\newcommand{\abs}[1]{\left| #1 \right|}
+\newcommand{\mat}[4]{\begin{pmatrix}	#1 & #2 \\ #3 & #4 \end{pmatrix}}
+\newcommand{\softmax}{\operatorname{softmax}}
+\newcommand{\argmax}{\operatorname{argmax}}
+\newcommand{\suff}{\operatorname{suff}}
+\newcommand{\comp}{\operatorname{comp}}
+\newcommand{\In}{\operatorname{In}}
+\newcommand{\Var}{\operatorname{Var}}
+\newcommand{\tensor}{\otimes}
+\newcommand{\bigtensor}{\bigotimes}
+\newcommand{\bx}{\mathbf{x}}
+\newcommand{\by}{\mathbf{y}}
+\newcommand{\bz}{\mathbf{z}}
+\newcommand{\bB}{\mathbf{B}}
+\newcommand{\grad}{\nabla}
+\newcommand{\spanop}{\operatorname{span}}
+\renewcommand{\S}{\mathcal{S}}
+\newcommand{\Y}{\mathbb Y}
+\newcommand{\Hoel}{\text{Höl}}
+\newcommand{\Tau}{\mathcal{T}}
+\newcommand{\W}{\mathcal{W}}
+\renewcommand{\O}{\mathcal{O}}
+\newcommand{\emptyword}{\varnothing}
+\newcommand{\todo}[1]{\colorbox{red}{TODO: #1}}
+\newcommand{\taylorsm}{\operatorname{T-SM}}
+\newcommand{\ops}{\operatorname{ops}}
+\newcommand{\entr}{\operatorname{entries}}
+\newcommand{\gtxt}[1]{\text{\textcolor{gray}{#1}}}
+\newcommand{\grntxt}[1]{\text{\textcolor{ForestGreen}{#1}}}
+\newcommand{\code}[1]{\texttt{#1}}
+
+\newcommand*\rot{\rotatebox{90}}
+\newcommand{\tldr}{\textbf{TL;DR:}\xspace}
+
--- a/version_arXiv/sec/abstract.tex
+++ b/version_arXiv/sec/abstract.tex
@@ -0,0 +1,13 @@
+
+\begin{abstract}
+    Transformers, particularly Vision Transformers (ViTs), have achieved state-of-the-art performance in large-scale image classification.
+    However, they often require large amounts of data and can exhibit biases that limit their robustness and generalizability.
+    This paper introduces \schemename, a novel data augmentation scheme that addresses these challenges and explicitly includes inductive biases, which commonly are part of the neural network architecture, into the training data.
+    \schemename is constructed by using pretrained foundation models to separate and recombine foreground objects with different backgrounds, enabling fine-grained control over image composition during training.
+    It thus increases the data diversity and effective number of training samples.
+    We demonstrate that training on \name, the application of \schemename to ImageNet, significantly improves the accuracy of ViTs and other architectures by up to 4.5 percentage points (p.p.) on ImageNet and 7.3 p.p. on downstream tasks.
+    Importantly, \schemename enables novel ways of analyzing model behavior and quantifying biases.
+    Namely, we introduce metrics for background robustness, foreground focus, center bias, and size bias and show that training on \name substantially reduces these biases compared to training on ImageNet.
+    In summary, \schemename provides a valuable tool for analyzing and mitigating biases, enabling the development of more robust and reliable computer vision models.
+    Our code and dataset are publicly available at \url{https://github.com/tobna/ForAug}.
+\end{abstract}
--- a/version_arXiv/sec/acks.tex
+++ b/version_arXiv/sec/acks.tex
@@ -0,0 +1,5 @@
+
+\subsection*{Acknowledgements}
+\label{sec:acknowledgements}
+This work was funded by the Carl-Zeiss Foundation under the Sustainable Embedded AI project (P2021-02-009) and by the EU project SustainML (Horizon Europe grant agreement No 101070408).
+All compute was done thanks to the Pegasus cluster at DFKI.
--- a/version_arXiv/sec/appendix.tex
+++ b/version_arXiv/sec/appendix.tex
@@ -0,0 +1,88 @@
+
+\section{Training Setup}
+\label{sec:training_setup}
+
+\begin{table}[h]
+    \centering
+    \begin{tabular}{lc}
+        \toprule
+        Parameter                & Value                        \\
+        \midrule
+        Image Resolution         & $224 \times 224$             \\
+        Epochs                   & 300                          \\
+        Learning Rate            & 3e-3                         \\
+        Learning Rate Schedule   & cosine decay                 \\
+        Batch Size               & 2048                         \\
+        Warmup Schedule          & linear                       \\
+        Warmup Epochs            & 3                            \\
+        Weight Decay             & 0.02                         \\
+        Label Smoothing          & 0.1                          \\
+        Optimizer                & Lamb \cite{You2020}          \\
+        Data Augmentation Policy & 3-Augment \cite{Touvron2022} \\
+        \bottomrule
+    \end{tabular}
+    \caption{Training setup for our ImageNet and \name training.}
+    \label{tab:in-setup}
+\end{table}
+\begin{table}[h]
+    \centering
+    \begin{tabular}{lccc}
+        \toprule
+        Dataset  & Batch Size & Epochs & Learning Rate \\
+        \midrule
+        Aircraft & 512        & 500    & 3e-4          \\
+        Cars     & 1024       & 500    & 3e-4          \\
+        Flowers  & 256        & 500    & 3e-4          \\
+        Food     & 2048       & 100    & 3e-4          \\
+        Pets     & 512        & 500    & 3e-4          \\
+        \bottomrule
+    \end{tabular}
+    \caption{Training setup for finetuning on different downstream datasets. Other settings are the same as in \Cref{tab:in-setup}.}
+    \label{tab:downstream-setup}
+\end{table}
+
+On ImageNet we use the same training setup as \cite{Nauen2023} and \cite{Touvron2022} without pretraining.
+As our focus is on evaluating the changes in accuracy due to \schemename/\name, like \cite{Nauen2023}, we stick to one set of hyperparameters for all models.
+We list the settings used for training on ImageNet and \name in \Cref{tab:in-setup} and the ones used for finetuning those weights on the downstream datasets in \Cref{tab:downstream-setup}.
+
+\newpage
+\section{Infill Model Comparison}
+\label{sec:infill-model-comparison}
+\begin{table}[h!]
+    \centering
+    \resizebox{\textwidth}{!}{\begin{tabular}{cc@{\hskip 0.3in}cc}
+            \toprule
+            LaMa                                                                                        & Att. Eraser                                                                                    & LaMa                                                                                        & Att. Eraser                                                                                    \\
+            \midrule
+            \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000090.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000090.JPEG} &
+            \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000890.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000890.JPEG}                                                                                                                                                                                                \\
+            \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00002106.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00002106.JPEG} &
+            \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00005045.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00005045.JPEG}                                                                                                                                                                                                \\
+            \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00007437.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00007437.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00008542.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00008542.JPEG} \\
+            \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00009674.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00009674.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00002743.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00002743.JPEG} \\
+            \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00003097.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00003097.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00011629.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00011629.JPEG} \\
+            \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000547.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000547.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00025256.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00025256.JPEG} \\
+            \bottomrule
+        \end{tabular}}
+    \caption{Example infills of LaMa and Attentive Eraser.}
+\end{table}
+
+\section{Images with High Infill Ratio}
+\label{sec:high-infill-ratio}
+\begin{table}[h!]
+    \centering
+    \begin{tabular}{ccc}
+        \toprule
+        Infill Ratio & LaMa                                                                                                             & Att. Eraser                                                                                                         \\
+        \midrule
+        93.7         & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00003735.JPEG}} & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00003735.JPEG}} \\ \\
+        95.7         & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00012151.JPEG}} & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00012151.JPEG}} \\ \\
+        83.7         & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00022522.JPEG}} & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00022522.JPEG}} \\ \\
+        88.2         & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00026530.JPEG}} & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00026530.JPEG}}
+    \end{tabular}
+    \caption{Example infills with a large relative foreground area size that is infilled (infill ratio).}
+    \label{tbl:high-rat}
+\end{table}
+
+
+
--- a/version_arXiv/sec/conclusion.tex
+++ b/version_arXiv/sec/conclusion.tex
@@ -0,0 +1,10 @@
+
+\section{Discussion \& Conclusion}
+\label{sec:conclusion}
+
+We introduce \schemename, a novel data augmentation scheme that facilitates improved Transformer training for image classification.
+By explicitly separating and recombining foreground objects and backgrounds, \schemename enables controlled data augmentation, leading to significant performance gains on ImageNet and downstream fine-grained classification tasks.
+Furthermore, \schemename provides a powerful framework for analyzing model behavior and quantifying biases, including background robustness, foreground focus, center bias, and size bias.
+Our experiments demonstrate that training on \name, the instantiation of \schemename on ImageNet, not only boosts accuracy but also significantly reduces these biases, resulting in more robust and generalizable models.
+In the future, we see \schemename be also applied to other datasets and tasks, like video recognition or segmentation.
+\schemename's ability to both improve performance and provide insights into model behavior makes it a valuable tool for advancing CV research and developing more reliable AI systems.
--- a/version_arXiv/sec/experiments.tex
+++ b/version_arXiv/sec/experiments.tex
@@ -0,0 +1,376 @@
+
+\section{Experiments}
+\label{sec:experiments}
+
+
+We conduct a comprehensive suit of experiments to validate the effectiveness of our approach.
+We compare training on \name, the ImageNet instantiation of \schemename, to training on ImageNet for 7 different models.
+Furthermore, we assess the impact of using \name for pretraining on multiple fine-grained downstream datasets.
+Additionally, we use \schemename's control over the image distribution to quantify some model behaviors and biases.
+
+\subsection{Design Choices of \schemename}
+\label{sec:ablation}
+
+We start by ablating the design choices of \schemename.
+For this, we revert to TinyImageNet \cite{Le2015}, a subset of ImageNet containing 200 categories with 500 images each, and Tiny\name, a version of \schemename derived from TinyImageNet.
+\Cref{tab:ablation} presents the results of these ablations.
+
+\begin{table*}[t]
+    \centering
+    \resizebox{\textwidth}{!}{
+        \begin{tabular}{lccccccccccccc}
+            \toprule
+            \multirow{2}{*}{Dataset} & Detect.         & Infill                            & FG.          & Augmentation                    & BG.         & BG.        & edge                             & original     & \multicolumn{2}{c}{TinyImageNet Accuracy}                \\
+                                     & prompt          & Model                             & size         & Order                           & strategy    & pruning    & smoothing                        & image mixing & ViT-Ti [\%]                               & ViT-S [\%]   \\
+            \cmidrule(r){1-1} \cmidrule(lr){2-9} \cmidrule(l){10-11}
+            TinyImageNet             &                 &                                   &              &                                 &             &            &                                  &              & $66.1\pm0.5$                              & $68.3\pm0.7$ \\
+            Tiny\name                & specific        & LaMa \cite{Suvorov2021}           & mean         & crop$\to$paste$\to$color        & same        & -          & -                                & \gtxt{-}     & $64.6\pm0.5$                              & $70.0\pm0.6$ \\
+            \gtxt{Tiny\name}         & \gtxt{specific} & \gtxt{LaMa \cite{Suvorov2021}}    & range        & \gtxt{crop$\to$paste$\to$color} & \gtxt{same} & \gtxt{-}   & \gtxt{-}                         & \gtxt{-}     & $65.5\pm0.4$                              & $71.2\pm0.5$ \\
+            \gtxt{Tiny\name}         & general         & \gtxt{LaMa \cite{Suvorov2021}}    & \gtxt{range} & \gtxt{crop$\to$paste$\to$color} & \gtxt{same} & \gtxt{-}   & \gtxt{-}                         & \gtxt{-}     & $66.4\pm0.6$                              & $72.9\pm0.6$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & Att. Eraser \cite{Sun2024}        & \gtxt{range} & \gtxt{crop$\to$paste$\to$color} & \gtxt{same} & \gtxt{-}   & \gtxt{-}                         & \gtxt{-}     & $67.5\pm1.2$                              & $72.4\pm0.5$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & paste$\to$crop$\to$color        & \gtxt{same} & \gtxt{-}   & \gtxt{-}                         & \gtxt{-}     & $67.1\pm1.2$                              & $72.9\pm0.5$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & 1.0        & \gtxt{-}                         & \gtxt{-}     & $67.0\pm1.2$                              & $73.0\pm0.3$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & 0.8        & \gtxt{-}                         & \gtxt{-}     & $67.2\pm1.2$                              & $72.9\pm0.8$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & 0.6        & \gtxt{-}                         & \gtxt{-}     & $67.5\pm1.0$                              & $72.8\pm0.7$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & $\sigma_\text{max} = 2.0$        & \gtxt{-}     & $67.2\pm0.4$                              & $72.9\pm0.5$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & $\sigma_\text{max} = 4.0$        & \gtxt{-}     & $65.9\pm0.5$                              & $72.4\pm0.6$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & \gtxt{-}                         & $p=0.2$      & $69.8\pm0.5$                              & $75.0\pm0.3$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & \gtxt{-}                         & $p=0.33$     & $69.5\pm0.4$                              & $75.2\pm1.0$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & \gtxt{-}                         & $p=0.5$      & $70.3\pm1.0$                              & $74.2\pm0.2$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & \gtxt{-}                         & linear       & $70.1\pm0.7$                              & $74.9\pm0.8$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & \gtxt{-}                         & reverse lin. & $67.6\pm0.2$                              & $73.2\pm0.3$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & \gtxt{-}                         & cos          & $71.3\pm1.0$                              & $75.7\pm0.8$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & $\sigma_\text{max} = 4.0$        & \gtxt{cos}   & $70.0\pm0.8$                              & $75.5\pm0.7$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & orig.       & \gtxt{0.8} & \gtxt{$\sigma_\text{max} = 4.0$} & \gtxt{cos}   & $67.2\pm0.9$                              & $69.9\pm1.0$ \\
+            \gtxt{Tiny\name}         & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & all         & \gtxt{0.8} & \gtxt{$\sigma_\text{max} = 4.0$} & \gtxt{cos}   & $70.1\pm0.7$                              & $77.5\pm0.6$ \\
+            \midrule
+            \name                    & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & \gtxt{-}                         & \gtxt{cos}   & -                                         & $80.5\pm0.1$ \\
+            \gtxt{\name}             & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & $\sigma_\text{max} = 4.0$        & \gtxt{cos}   & -                                         & $80.7\pm0.1$ \\
+            \gtxt{\name}             & \gtxt{general}  & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & all         & \gtxt{0.8} & \gtxt{$\sigma_\text{max} = 4.0$} & \gtxt{cos}   & -                                         & $81.3\pm0.1$ \\
+            \bottomrule
+        \end{tabular}}
+    \caption{Ablation of design decisions of Tiny\name on TinyImageNet and \name on ImageNet.}
+    \label{tab:ablation}
+\end{table*}
+
+\textbf{Prompt.}
+First, we evaluate the type of prompt used to detect the foreground object.
+Here, the \emph{general} prompt, which contains the class and the more general object category, outperforms only having the class name (\emph{specific}).
+
+\textbf{Inpainting.} Attentive Eraser \cite{Sun2024} produces superior results compared to LaMa \cite{Suvorov2021} (see \Cref{sec:infill-model-comparison} for examples).
+
+\textbf{Foreground size}
+significantly impacts performance.
+Employing a \emph{range} of sizes during recombination, rather than a fixed \emph{mean} size, boosts accuracy by approximately 1 p.p.
+This suggests that the added variability is beneficial.
+
+\textbf{Order of data augmentation.}
+Applying all augmentations after foreground-background recombination (\emph{paste$\to$crop$\to$color}) slightly improves ViT-S's performance compared to applying crop-related augmentations before pasting (\emph{crop$\to$paste$\to$color}).
+For ViT-Ti, the results are ambiguous.
+
+\textbf{Background pruning.}
+When it comes to the choice of backgrounds to use, we test two pruning thresholds ($t_\text{prune}$) to exclude backgrounds with excessive inpainting.
+A threshold of $t_\text{prune}=1.0$ means that we use all backgrounds that are not fully infilled.
+Varying $t_\text{prune}$ has minimal impact.
+Therefore, we choose $t_\text{prune} = 0.8$ to exclude predominantly artificial backgrounds.
+Similarly, applying edge smoothing to foreground masks with Gaussian blurring actually hurts performance on Tiny\name, but slightly improves it on \name.
+
+\textbf{Mixing} \name with the original ImageNet data proves crucial.
+While constant and linear mixing schedules improve performance over no mixing by $2-3$ p.p. compared to only using Tiny\name, the cosine annealing schedule yields the best results, boosting accuracy by another $0.5-1$ p.p.
+
+\textbf{Background strategy.}
+Another point is the allowed choice of background image for each foreground object.
+We compare using the original background, a background from the same class, and any background.
+These strategies go from low diversity and high shared information content between the foreground and background to high diversity and low shared information content.
+For \emph{ViT-Ti}, the latter two strategies perform comparably, while \emph{ViT-S} benefits from the added diversity of using any background.
+The same is true when training on the full (ImageNet) version of \name.
+
+\begin{figure}
+    \centering
+    \includegraphics[width=.7\columnwidth]{img/bates.pdf}
+    \caption{Plot of the probability distribution function (PDF) of the extended Bates distribution for different parameters $\eta$. Higher values of $\eta$ concentrate the distribution around the center.}
+    \label{fig:bates-pdf}
+\end{figure}
+
+\begin{table}
+    \centering
+    \resizebox{\columnwidth}{!}{
+        \begin{tabular}{ccccccc}
+            \toprule
+            \multirow{2.5}{*}{\makecell{Training Set/                     \\ Bates Parameter}} & \multirow{2.5}{*}{TIN} & \multicolumn{5}{c}{Tiny\name}                               \\
+            \cmidrule(l){3-7}
+                         &      & $\eta=-3$ & $-2$ & $1/-1$ & $2$  & $3$  \\
+            \midrule
+            TinyImageNet & 68.9 & 60.5      & 60.2 & 60.8   & 62.6 & 63.1 \\
+            $\eta=-3$    & 71.3 & 79.3      & 79.5 & 79.1   & 79.3 & 79.1 \\
+            $\eta=-2$    & 71.5 & 80.0      & 78.7 & 79.3   & 79.1 & 78.8 \\
+            $\eta=1/-1$  & 72.3 & 79.5      & 78.9 & 80.2   & 79.7 & 80.4 \\
+            $\eta=2$     & 71.3 & 78.2      & 77.8 & 79.1   & 79.6 & 79.9 \\
+            $\eta=3$     & 71.4 & 77.2      & 76.9 & 78.6   & 79.6 & 79.7 \\
+            \bottomrule
+        \end{tabular}}
+    \caption{Accuracy of ViT-S trained on TinyImageNet (TIN) and Tiny\name with different foreground position distributions by varying the parameter of a Bates distribution $\eta$.
+        The best performance is achieved using the uniform distribution ($\eta=1$).}
+\end{table}
+
+\textbf{Foreground position.}
+Finally, we analyze the foreground object's positioning in the image.
+We utilize an extended Bates distribution to sample the position of the foreground object.
+The Bates distribution~\cite{Bates1955} with parameter $\eta \geq 1$ is the mean of $\eta$ independent uniformly distributed random variables \cite{Jonhson1995}.
+Therefore, the larger $\eta$, the more concentrated the distribution is around the center.
+We extend this concept to $\eta \leq -1$ by defining ${X \sim \text{Bates}(\eta) :\Leftrightarrow s(X) \sim \text{Bates}(-\eta)}$ for $\eta \leq 1$ with $s$ being the sawtooth function on $[0, 1]$:
+\begin{align}
+    s(x) = \begin{cases}
+               x + 0.5 & \text{if } 0 < x < 0.5       \\
+               x - 0.5 & \text{if } 0.5 \leq x \leq 1
+           \end{cases}
+\end{align}
+Note that $s \circ s = \id$ on $[0, 1]$.
+This way, distributions with $\eta \leq -1$ are more concentrated around the borders.
+$\eta = 1$ and $\eta = -1$ both correspond to the uniform distribution.
+The PDF of this extended Bates distribution is visualized in \Cref{fig:bates-pdf}.
+
+When sampling more towards the center of the image, the difficulty of the task is reduced, which then reduces the performance on TinyImageNet.
+This is reflected in the performance when evaluating on Tiny\name with $\eta=2$ and $\eta=3$ compared to $\eta=-1/1$.
+We observe a similar reduction for $\eta < -1$.
+This experiment is conducted using the LaMa infill model.
+
+\begin{table}
+    \centering
+    \small
+    \begin{tabular}{lccc}
+        \toprule
+        Dataset      & Classes & \makecell{Training          \\ Images} & \makecell{Validation \\ Images} \\
+        \midrule
+        TinyImageNet & 200     & 100,000            & 10,000 \\
+        Tiny\name    & 200     & 99,404             & 9,915  \\
+        ImageNet     & 1,000   & 1,281,167          & 50,000 \\
+        \name        & 1,000   & 1,274,557          & 49,751 \\
+        \bottomrule
+    \end{tabular}
+    \caption{Dataset statistics for TinyImageNet, Tiny\name, ImageNet, and \name. For \name and Tiny\name we report the number of foreground/background pairs.}
+    \label{tab:dataset-stats}
+\end{table}
+After fixing the optimal design parameters in \Cref{tab:ablation} (last row), we construct the full \name dataset using the entire ImageNet dataset.
+\Cref{tab:dataset-stats} compares the dataset statistics of ImageNet and \name.
+The slightly reduced image count in \name is due to instances where Grounded SAM failed to produce valid object detections.
+
+\subsection{Image Classification Results}
+
+\begin{table}
+    \centering
+    \begin{tabular}{lccc}
+        \toprule
+        \multirow{2.5}{*}{Model} & \multicolumn{2}{c}{\makecell{ImageNet Accuracy                                \\ when trained on}} & \multirow{2.5}{*}{Delta}   \\
+        \cmidrule(lr){2-3}
+                                 & ImageNet                                       & \name        &               \\
+        \midrule
+        ViT-S                    & $79.1\pm0.1$                                   & $81.4\pm0.1$ & \grntxt{+2.3} \\
+        ViT-B                    & $77.6\pm0.2$                                   & $81.1\pm0.4$ & \grntxt{+3.5} \\
+        ViT-L                    & $75.3\pm0.4$                                   & $79.8\pm0.1$ & \grntxt{+4.5} \\
+        \midrule
+        Swin-Ti                  & $77.9\pm0.2$                                   & $79.7\pm0.1$ & \grntxt{+1.8} \\
+        Swin-S                   & $79.4\pm0.1$                                   & $80.6\pm0.1$ & \grntxt{+1.2} \\
+        \midrule
+        ResNet-50                & $78.3\pm0.1$                                   & $78.8\pm0.1$ & \grntxt{+0.5} \\
+        ResNet-101               & $79.4\pm0.1$                                   & $80.4\pm0.1$ & \grntxt{+1.0} \\
+        \bottomrule
+    \end{tabular}
+    \caption{ImageNet results of models trained on \name and on ImageNet directly. \name improves the performance of all models in our test.}
+    \label{tab:imagenet-results}
+\end{table}
+
+\Cref{tab:imagenet-results} compares the ImageNet performance of models trained on \name and ones trained directly on ImageNet.
+We adopt the training setup of \cite{Nauen2023} and \cite{Touvron2022} (details in \Cref{sec:training_setup}) for training ViT \cite{Dosovitskiy2021}, Swin \cite{Liu2021} and ResNet \cite{He2016} models.
+Notably, \name improves performance across all tested architectures, including the ResNet models (up to $1$ p.p.), demonstrating benefits beyond Transformers.
+For Transformer models, we observe improvements from $1.2$ p.p. to $4.5$ p.p.
+This improvement is more substantial for the larger models, with ViT-L gaining $4.5$ p.p. in accuracy.
+\name's improvements mostly counteract the drop in performance due to overfitting for large models.
+When training on ImageNet, this drop is $3.8$ p.p. from ViT-S to ViT-L, while for \name it is reduced to $1.6$ p.p.
+
+
+\begin{table}
+    \centering
+    \resizebox{\columnwidth}{!}{\begin{tabular}{lccccc}
+            \toprule
+            Model                 & Aircraft      & Cars          & Flowers       & Food          & Pets                  \\
+            \midrule
+            ViT-S @ ImageNet      & $72.4\pm1.0$  & $89.8\pm0.3$  & $94.5\pm0.2$  & $89.1\pm0.1$  & $93.8\pm0.2$          \\
+            ViT-S @ \name         & $78.6\pm0.5$  & $92.2\pm0.2$  & $95.5\pm0.2$  & $89.6\pm0.1$  & $94.5\pm0.2$          \\
+                                  & \grntxt{+6.2} & \grntxt{+2.4} & \grntxt{+1.0} & \grntxt{+0.5} & \grntxt{+0.7}         \\
+            \cmidrule(r){1-1}
+            ViT-B @ ImageNet      & $71.7\pm0.5$  & $90.0\pm0.2$  & $94.8\pm0.4$  & $89.8\pm0.2$  & $94.1\pm0.4$          \\
+            ViT-B @ \name         & $79.0\pm2.2$  & $93.3\pm0.1$  & $ 96.5\pm0.1$ & $90.9\pm0.1$  & $95.1\pm0.4$          \\
+                                  & \grntxt{+7.3} & \grntxt{+3.3} & \grntxt{+1.7} & \grntxt{+1.1} & \grntxt{+1.0}         \\
+            \cmidrule(r){1-1}
+            ViT-L @ ImageNet      & $72.1\pm1.0$  & $88.8\pm0.3$  & $94.4\pm0.3$  & $90.1\pm0.2$  & $94.2\pm0.4$          \\
+            ViT-L @ \name         & $77.6\pm1.2$  & $89.1\pm0.2$  & $96.6\pm0.1$  & $91.3\pm0.1$  & $95.1\pm0.1$          \\
+                                  & \grntxt{+5.5} & \grntxt{+0.3} & \grntxt{+2.2} & \grntxt{+1.2} & \grntxt{+0.9}         \\
+            \midrule
+            Swin-Ti @ ImageNet    & $77.0\pm0.1$  & $91.3\pm0.6$  & $95.9\pm0.1$  & $90.0\pm0.2$  & $94.2\pm0.1$          \\
+            Swin-Ti @ \name       & $81.1\pm0.8$  & $92.8\pm0.4$  & $96.2\pm0.1$  & $90.4\pm0.3$  & $94.8\pm0.5$          \\
+                                  & \grntxt{+4.1} & \grntxt{+2.5} & \grntxt{+0.3} & \grntxt{+0.4} & \grntxt{+0.6}         \\
+            \cmidrule(r){1-1}
+            Swin-S @ ImageNet     & $75.7\pm1.4$  & $91.0\pm0.3$  & $95.9\pm0.5$  & $91.1\pm0.2$  & $94.4\pm0.1$          \\
+            Swin-S @ \name        & $81.4\pm0.2$  & $93.1\pm0.2$  & $96.3\pm0.3$  & $91.2\pm0.2$  & $94.9\pm0.3$          \\
+                                  & \grntxt{+5.7} & \grntxt{+2.1} & \grntxt{+1.4} & \grntxt{+0.1} & \grntxt{+0.5}         \\
+            \midrule
+            ResNet-50 @ ImageNet  & $78.2\pm0.5$  & $89.8\pm0.2$  & $91.7\pm0.4$  & $84.4\pm0.2$  & $93.7\pm0.3$          \\
+            ResNet-50 @ \name     & $80.3\pm0.4$  & $90.4\pm0.2$  & $91.7\pm0.2$  & $84.5\pm0.2$  & $93.7\pm0.3$          \\
+                                  & \grntxt{+2.1} & \grntxt{+0.6} & \gtxt{$\pm$0} & \grntxt{+0.1} & \gtxt{$\pm$0}         \\
+            \cmidrule(r){1-1}
+            ResNet-101 @ ImageNet & $78.4\pm0.6$  & $90.3\pm0.1$  & $91.2\pm0.5$  & $86.0\pm0.2$  & $94.3\pm0.2$          \\
+            ResNet-101 @ \name    & $81.4\pm0.5$  & $91.3\pm0.1$  & $92.9\pm0.2$  & $86.3\pm0.1$  & $94.0\pm0.3$          \\
+                                  & \grntxt{+3.0} & \grntxt{+1.3} & \grntxt{+1.7} & \grntxt{+0.3} & \textcolor{red}{-0.3} \\
+            \bottomrule
+        \end{tabular}}
+    \caption{Downstream accuracy in percent when finetuning on other datasets. Models were pretrained on \name and ImageNet. Pretraining on \name increases Transformer downstream accuracy on all datasets.}
+\end{table}
+
+To assess the transferability of \name-trained models, we finetune models pretrained on ImageNet and \name on five fine-grained datasets:
+FGVC-Aircraft \cite{Maji2013}, Stanford Cars~\cite{Dehghan2017}, Oxford Flowers \cite{Nilsback2008}, Food-101 \cite{Kaur2017}, and Oxford-IIIT Pets \cite{Parkhi2012}.
+While for ResNets, the performance of both training datasets is about the same,  for every Transformer, we see the accuracy improve on all downstream dataset by up to 7.3 p.p. and a reduction of error rate of up to $39.3\%$.
+In summary, these results demonstrate that the improved representation learning achieved by training on \name translates to superior performance not only on ImageNet, but also on a variety of fine-grained image classification tasks.
+
+\subsection{Further Model Evaluation}
+Beyond its use for training, \name's unique properties and controlled data generation capabilities make it a powerful tool for analyzing model behavior and biases.
+
+\paragraph*{Background Robustness}
+\begin{table}
+    \centering
+    \begin{tabular}{lccc}
+        \toprule
+        \multirow{2.5}{*}{Model} & \multicolumn{2}{c}{\makecell{Background Robustness                                  \\ when trained on}} & \multirow{2.5}{*}{Delta}   \\
+        \cmidrule(lr){2-3}
+                                 & ImageNet                                           & \name         &                \\
+        \midrule
+        ViT-S                    & $0.73\pm0.01$                                      & $0.99\pm0.01$ & \grntxt{+0.26} \\
+        ViT-B                    & $0.72\pm0.01$                                      & $1.00\pm0.01$ & \grntxt{+0.28} \\
+        ViT-L                    & $0.70\pm0.01$                                      & $1.00\pm0.01$ & \grntxt{+0.30} \\
+        \midrule
+        Swin-Ti                  & $0.72\pm0.01$                                      & $1.00\pm0.01$ & \grntxt{+0.28} \\
+        Swin-S                   & $0.72\pm0.01$                                      & $1.00\pm0.01$ & \grntxt{+0.28} \\
+        \midrule
+        ResNet-50                & $0.79\pm0.01$                                      & $0.99\pm0.01$ & \grntxt{+0.20} \\
+        ResNet-101               & $0.79\pm0.01$                                      & $1.00\pm0.01$ & \grntxt{+0.21} \\
+        \bottomrule
+    \end{tabular}
+    \caption{Evaluation of the background robustness of models trained on \name and on ImageNet directly. Training on \name improves the background robustness of all model to $\approx1.00$, meaning the model is indifferent to the choice of background.}
+    \label{tab:background-robustness}
+\end{table}
+
+We assess the robustness of models to shifts in the background distribution from a class-related background to any background.
+Background robustness is defined to be the ratio of accuracy on \name with same-class backgrounds to accuracy with any background:
+\begin{align}
+    \text{Background Robustness} = \frac{\text{Acc}(\name_\text{all})}{\text{Acc}(\name_\text{same})}
+\end{align}
+It represents the relative drop in performance under a background distribution shift.
+\Cref{tab:background-robustness} presents the background robustness of various models.
+When trained on ImageNet, smaller models generally exhibit greater robustness to changes in the background distribution than larger models and ResNet is more robust than the tested Transformer models.
+Crucially, training on \name instead of ImageNet improves the background robustness of all models to $\approx1.00$, meaning that these models are agnostic to the choice of background and only classify based on the foreground.
+These findings highlight the generalization benefits of \name.
+
+\paragraph*{Foreground Focus}
+\begin{table}
+    \centering
+    \resizebox{\columnwidth}{!}{
+        \begin{tabular}{lcccccc}
+            \toprule
+            \multirow{4}{*}{Model} & \multicolumn{6}{c}{Foreground Focus when trained on}                                                                                                    \\
+            \cmidrule(l){2-7}
+                                   & IN                                                   & FN                            & IN                     & FN          & IN          & FN          \\
+            \cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(l){6-7}
+                                   & \multicolumn{2}{c}{GradCam}                          & \multicolumn{2}{c}{GradCam++} & \multicolumn{2}{c}{IG}                                           \\
+            \midrule
+            ViT-S                  & $1.2\pm0.1$                                          & $2.3\pm0.3$                   & $1.2\pm0.1$            & $2.1\pm0.4$ & $1.9\pm0.1$ & $2.7\pm0.1$ \\
+            ViT-B                  & $1.2\pm0.1$                                          & $2.4\pm0.7$                   & $1.1\pm0.1$            & $2.1\pm0.1$ & $1.7\pm0.1$ & $2.7\pm0.1$ \\
+            ViT-L                  & $1.3\pm0.1$                                          & $1.6\pm0.1$                   & $1.1\pm0.1$            & $1.3\pm0.1$ & $1.3\pm0.1$ & $2.6\pm0.1$ \\
+            \midrule
+            Swin-Ti                & $0.9\pm0.1$                                          & $0.7\pm0.1$                   & $1.0\pm0.3$            & $0.7\pm0.3$ & $2.5\pm01$  & $4.8\pm0.3$ \\
+            Swin-S                 & $0.8\pm0.1$                                          & $0.7\pm0.1$                   & $0.7\pm0.1$            & $0.7\pm0.4$ & $2.4\pm0.1$ & $4.6\pm0.3$ \\
+            \midrule
+            ResNet-50              & $2.2\pm0.1$                                          & $2.7\pm0.1$                   & $2.0\pm0.1$            & $2.9\pm0.1$ & $3.2\pm0.1$ & $4.9\pm0.2$ \\
+            ResNet-101             & $2.3\pm0.1$                                          & $2.8\pm0.1$                   & $2.2\pm0.1$            & $3.0\pm0.1$ & $3.2\pm0.1$ & $4.8\pm0.1$ \\
+            \bottomrule
+        \end{tabular}}
+    \caption{Evaluation of the foreground focus using GradCam, GradCam++ and IntegratedGradients of models trained on \name (FN) and on ImageNet (IN) directly. Training on \name improves the foreground focus of almost all models.}
+    \label{tab:foreground-focus}
+\end{table}
+
+Leveraging our inherent knowledge of the foreground masks when using \name, as well as common XAI techniques~\cite{Selvaraju2016,Chattopadhay2018,Sundararajan2017}, we can evaluate a model's focus on the foreground object.
+We can directly evaluate ImageNet trained models, but this technique can also be extended to other datasets without relying on manually annotated foreground-masks.
+To evaluate the foreground focus, we employ Grad-CAM \cite{Selvaraju2016}, Grad-CAM++ \cite{Chattopadhay2018} or IntegratedGradients (IG) \cite{Sundararajan2017} to compute the per-pixel importance of an image for the model's prediction.
+The foreground focus is defined to be the ratio of the foreground's relative importance to its relative size in the image:
+\begin{align}
+    \text{FG Focus}(\text{img}) = \frac{\text{Area}(\text{img}) \hspace{3pt} \text{Importance}(\text{fg})}{\text{Area}(\text{fg}) \hspace{3pt} \text{Importance}(\text{img})}
+\end{align}
+The foreground focus of a model is its average foreground focus over all test images.
+\Cref{tab:foreground-focus} presents our findings.
+Training on \name significantly increasees the foreground focus of ViT and ResNet across all metrics used.
+For Swin, the foreground focus stagnates when measured using GradCam and GradCam++, but almost doubles when using IG.
+
+\paragraph*{Center Bias}
+\begin{table}
+    \centering
+    \resizebox{\columnwidth}{!}{
+        \begin{tabular}{lccc}
+            \toprule
+            \multirow{2.5}{*}{Model} & \multicolumn{2}{c}{\makecell{Center Bias when trained on}}                                                                                                                                                                            & \multirow{2.5}{*}{Delta}                                                                                                                                                                                                                                            \\
+            \cmidrule(lr){2-3}
+                                     & ImageNet                                                                                                                                                                                                                              & \name                                                                                                                                                                                                                                                               \\
+            \midrule
+            ViT-S                    & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ViT-S_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-S_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-S_ImageNet_v3.pdf}}             & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ViT-S_RecombNetAll_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-S_RecombNetAll_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-S_RecombNetAll_v3.pdf}}                               \\
+                                     & $0.255\pm0.008$                                                                                                                                                                                                                       & $0.220\pm003$                                                                                                                                                                                                                                     & \grntxt{-0.035} \\
+            ViT-B                    & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ViT-B_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-B_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-B_ImageNet_v3.pdf}}             & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ViT-B_RecombNetAll_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-B_RecombNetAll_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-B_RecombNetAll_v3.pdf}}                               \\
+                                     & $0.254\pm0.004$                                                                                                                                                                                                                       & $0.190\pm0.002$                                                                                                                                                                                                                                   & \grntxt{-0.064} \\
+            ViT-L                    & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ViT-L_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-L_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-L_ImageNet_v3.pdf}}             & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ViT-L_RecombNetAll_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-L_RecombNetAll_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-L_RecombNetAll_v3.pdf}}                               \\
+                                     & $0.243\pm0.011$                                                                                                                                                                                                                       & $0.117\pm0.007$                                                                                                                                                                                                                                   & \grntxt{-0.126} \\
+            \midrule
+            Swin-Ti                  & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/Swin-Ti_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-Ti_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-Ti_ImageNet_v3.pdf}}       & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/Swin-Ti_RecombNetAll_v1.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-Ti_RecombNetAll_v2.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-Ti_RecombNetAll_v3.pdf}}                         \\
+                                     & $0.250\pm0.007$                                                                                                                                                                                                                       & $0.165\pm0.002$                                                                                                                                                                                                                                   & \grntxt{-0.085} \\
+            Swin-S                   & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/Swin-S_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-S_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-S_ImageNet_v3.pdf}}          & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/Swin-S_RecombNetAll_v1.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-S_RecombNetAll_v2.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-S_RecombNetAll_v3.pdf}}                            \\
+                                     & $0.232\pm0.001$                                                                                                                                                                                                                       & $0.156\pm002$                                                                                                                                                                                                                                     & \grntxt{-0.076} \\
+            \midrule
+            ResNet50                 & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ResNet50_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet50_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet50_ImageNet_v3.pdf}}    & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ResNet50_RecombNetAll_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet50_RecombNetAll_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet50_RecombNetAll_v3.pdf}}                      \\
+                                     & $0.263\pm0.003$                                                                                                                                                                                                                       & $0.197\pm0.003$                                                                                                                                                                                                                                   & \grntxt{-0.066} \\
+            ResNet101                & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ResNet101_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet101_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet101_ImageNet_v3.pdf}} & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ResNet101_RecombNetAll_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet101_RecombNetAll_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet101_RecombNetAll_v3.pdf}}                   \\
+                                     & $0.230\pm0.003$                                                                                                                                                                                                                       & $0.199\pm002$                                                                                                                                                                                                                                     & \grntxt{-0.031} \\
+            \bottomrule
+        \end{tabular} }
+    \includegraphics[width=.75\columnwidth]{img/colorbar_horizontal.pdf}
+    \caption{Evaluation of the position bias. We plot the accuracy relative to the center accuracy of multiple instantiations of the models when the foreground objects is in different cells a $3 \times 3$ grid.
+        Training on \name significantly reduces a models center bias.}
+    \label{tab:center-bias}
+\end{table}
+With \name we have unique control over the position of the foreground object in the image.
+This lets us quantify the center bias of ImageNet- and \name-trained models.
+We divide the image into a $3 \times 3$ grid and evaluate model accuracy when the foreground object is in each of the $9$ grid cells.
+Each cell's accuracy is divided by the accuracy in the center cell for normalization, which gives us the relative performance drop when the foreground is in each part of the image.
+The center bias is calculated as one minus the average of the minimum performance of a corner cell and the minimum performance of a side cell:
+\begin{align}
+    \begin{split}
+        & \text{Center Bias} =                                                                                                                                                          \\
+        & \hspace{7pt} 1 - \frac{\min\limits_{a, b \in \{0, 2\}} \text{Acc}(\text{cell}_{(a, b)}) + \min\limits_{\substack{a=1 \text{ or } b=1 \\ a \neq b}} \text{Acc}(\text{cell}_{(a, b)})}{2 \text{Acc}(\text{cell}_{(1, 1)})}
+    \end{split}
+\end{align}
+\Cref{tab:center-bias} visualizes the center bias of three instantiations of each model.
+Performance is generally highest in the center and the center top and bottom and center left and right cells, and lowest in the four corners.
+Interestingly, ImageNet-trained models perform slightly better when the foreground object is on the right side of the image, compared to the left side, despite our use of random flipping with a probability of $0.5$ during training.
+Training on \name significantly reduces center bias across all models.
+This demonstrates that \name promotes a more uniform spatial attention distribution.
+Their accuracy is higher in the center left and right cells than in the center top and bottom ones, which is not the case for ImageNet-trained models.
+
+\paragraph*{Size Bias}
+\begin{figure}
+    \centering
+    \includegraphics[width=.9\columnwidth]{img/size_bias.pdf}
+    \caption{Evaluation of the size bias of models trained on \name. We plot the accuracy relative to the accuracy when using the mean foreground size.}
+    \label{fig:size-bias}
+\end{figure}
+Finally, we evaluate the impact of different-sized foreground objects on the accuracy.
+For this evaluation, we use the \emph{mean} foreground size strategy.
+We introduce a size factor $f_\text{size}$ by which we additionally scale the foreground object before pasting it onto the background.
+Results are again normalized by the accuracy when using the mean foreground size ($f_\text{size} = 1.0$).
+\Cref{fig:size-bias} shows the size bias curves of ViT-S and ViT-B when trained on ImageNet and \name.
+Models trained on \name maintain better performance even with smaller foreground objects, when ImageNet-trained models exhibit a more rapid performance decline.
+Therefore, \name-training improves robustness to variations in object scale.
--- a/version_arXiv/sec/intro.tex
+++ b/version_arXiv/sec/intro.tex
@@ -0,0 +1,46 @@
+
+\section{Introduction}
+\label{sec:intro}
+
+
+\begin{figure}
+    \centering
+    \includegraphics[width=\columnwidth]{img/fig-1.pdf}
+    \caption{Comparison of \name and ImageNet. \name recombines foreground objects with different backgrounds each epoch, thus creating a more diverse training set. We still apply traditional data augmentation afterwards.}
+    \label{fig:fig-1}
+\end{figure}
+
+Image classification, a fundamental task in computer vision (CV), involves assigning a label to an image from a predefined set of categories.
+This seemingly simple task underpins a wide range of applications, including medical diagnosis~\cite{Sanderson2022,Vezakis2024}, autonomous driving~\cite{Wang2022b}, and object recognition~\cite{Carion2020,He2017,Girshick2013}.
+Furthermore, image classification is used for large-scale pretraining of vision models~\cite{Dosovitskiy2021,Liu2021,Touvron2021b} and to judge the progress of the field of CV \cite{Khan2022, Rangel2024}.
+The advent of large-scale datasets, particularly ImageNet \cite{Deng2009}, containing millions of labeled images across thousands of categories, has been instrumental in driving significant progress in this field.
+ImageNet served as a catalyst for the rise of large-scale CV models~\cite{Krizhevsky2012, He2016} and remains the most important CV benchmark for more than a decade \cite{Krizhevsky2012,Touvron2022, Wortsman2022, He2016}.
+
+While traditionally, convolutional neural networks (CNNs) have been the go-to architecture for image classification, Transformers \cite{Vaswani2017}, particularly the Vision Transformer (ViT) \cite{Dosovitskiy2021}, have emerged as a powerful alternative.
+These attention-based models have demonstrated superior performance in various vision tasks, including image classification \cite{Wortsman2022,Yu2022,Carion2020,Zong2022,Wang2022a}.
+
+Data augmentation is a key technique for training image classification models.
+Traditional data augmentation methods, such as random cropping, flipping, and color jittering, are commonly employed to increase the diversity of the training data and improve the model's performance~\cite{Xu2023d, Shorten2019}.
+These basic transformations, originally designed for CNNs, change the input images in a way that preserves their semantic meaning~\cite{Alomar2023}.
+However, the architectural differences of CNNs and Transformers suggest that the latter might benefit from different data augmentation strategies.
+In particular, the Transformers self-attention mechanism is not translation equivariant~\cite{RojasGomez2023,Ding2023a}, meaning that the model does not inherently understand the spatial relationships between pixels.
+
+Inspired by this inductive bias of CNNs, that is not inherent to ViTs, we propose \schemename, a novel data augmentation scheme for image classification which makes the translation equivariance of CNNs explicit in the training data by recombining foreground objects at varying positions with different backgrounds.
+Applying \schemename to ImageNet gives rise to \name, a novel dataset that enables this data augmentation with with fine-grained control over the image composition.
+Recognizing that Transformers need to learn the spatial relationships from data, since they are not inherently translation invariant, and in general are usually trained on larger datasets~\cite{Kolesnikov2020}, we separate the foreground objects in ImageNet from their backgrounds, using an open-world object detector~\cite{Ren2024}, and fill in the background in a plausible way using an object removal model~\cite{Sun2024,Suvorov2021}.
+This allows us to recombine any foreground object with any background on the fly, creating a highly diverse training set.
+During recombination, we can control important parameters, like the size and position of the foreground object, to help the model learn the spatial invariances necessary for image classification.
+We show that training on \name instead of ImageNet increases the model accuracy of Transformers by up to 4.5 p.p. on ImageNet and an up to $39.3\%$ reduction in error rate on downstream tasks.
+
+Additionally, \schemename is a useful tool for analyzing model behavior and biases, when used during the evaluation phase.
+We utilize our control over the image distribution to quantify a model's background robustness (by varying the choice of background), foreground focus (by leveraging our knowledge about the placement of the foreground object), center bias (by controlling the object's position), and size bias (by controlling object size).
+These analyses provide insights into model behavior and biases, which is crucial for model deployment and future robustness optimizations.
+We show that training on \name, instead of ImageNet, significantly reduces all of these biases, completely removing the models' dependence on the background distribution.
+We make our code for \schemename and the \name-dataset publicly available\footnote{\url{https://github.com/tobna/ForAug}} to facilitate further research.
+
+\subsection*{Contributions}
+\begin{itemize}
+    \item We propose \schemename, a novel data augmentation scheme, that recombines objects and backgrounds to train Transformers for image classification.
+    \item We show that training on \name, the ImageNet instantiation of \schemename, leads to 4.5 p.p. improved accuracy on ImageNet and 7.3 p.p. on downstream tasks.
+    \item We propose novel \schemename-based metrics to analyze and quantify fine-grained biases trained models: Background Robustness, Foreground Focus, Center Bias, and Size Bias. Training on \name, instead of ImageNet, significantly reduces these biases.
+\end{itemize}
--- a/version_arXiv/sec/method.tex
+++ b/version_arXiv/sec/method.tex
@@ -0,0 +1,82 @@
+
+\section{RecombiNet (Method)}
+\label{sec:method}
+
+
+\begin{figure*}
+    \centering
+    \includegraphics[width=\textwidth]{img/fig-2.pdf}
+    \caption{Overview of \name. The data creation consists of two stages: (1, offline) Segmentation, where we segment the foreground objects from the background and fill in the background. (2, online) Recombination, where we combine the foreground objects with different backgrounds to create new samples.}
+    \label{fig:method}
+\end{figure*}
+
+We introduce \schemename, a data augmentation scheme designed to enhance Transformer training by explicitly separating and recombining foreground objects and backgrounds.
+\schemename involves two stages: Segmentation and Recombination, both visualized in \Cref{fig:method}.
+
+\subsubsection*{Segmentation}
+The segmentation stage isolates the foreground objects and their corresponding backgrounds.
+We then fill in the background in a visually plausible way~\cite{Sun2024} using a pretrained object-removal model.
+This stage is computed once offline and the results are stored for the recombination stage.
+
+First, foreground objects are detected and segmented from their backgrounds using a prompt-based segmentation model to exploit the classification datasets labels.
+We use the state-of-the-art Grounded SAM~\cite{Ren2024}, which is based on Grounding DINO~\cite{Liu2023e} and SAM~\cite{Kirillov2023}.
+The prompt we use is ``\code{a <class name>, a type of <object category>}'', where \code{<class name>} is the specific name of the objects class as defined by the dataset and \code{<object category>} is a the broader category of the object.
+The \code{<object category>} guides the segmentation model towards the correct object in case the \code{<class name>} alone is too specific.
+This can be the case with prompts like ``sorrel'' or ``guenon'', where the more general name ``horse'' or ``monkey'' is more helpful.
+We derive the \code{<object category>} from the WordNet hierarchy, using the immediate hypernym.
+
+We iteratively extract up to $n$ foreground masks for each dataset-image, using different more and more general prompts based on the more general synsets of WordNet (e.g. ``a sorrel, a type of horse'', ``a horse, a type of equine'', ...).
+Masks that are very similar, with a pairwise IoU of at least $0.9$, are merged.
+The output is a set of masks delineating the foreground objects and the backgrounds.
+We select the best mask per image (according to \Cref{eq:filtering-score}) in a later filtering step, described below.
+
+An inpainting model that is specifically optimized to remove objects from images, such as LaMa~\cite{Suvorov2021} or Attentive Eraser~\cite{Sun2024}, is used to inpaint the foreground regions in the backgrounds.
+To ensure the quality of the foreground and background images (for each dataset-image), we select a foreground/background pair from the $\leq n$ variants we have extracted and infilled in the previous steps.
+Using an ensemble of six ViT, ResNet, and Swin Transformer models pretrained on the original dataset, we select the foreground/background pair that maximizes foreground performance while minimizing the performance on the background and size of the foreground according to:
+\begin{align} \begin{split} \label{eq:filtering-score}
+        \text{score}(\mathrm{fg}, \mathrm{bg}, c) &= \log \left( \frac{1}{\abs{E}} \sum_{m \in E} \P[m(\mathrm{fg}) = c] \right)                                                \\
+        & + \log \left( 1 - \frac{1}{\abs E} \sum_{m \in E} \P[m(\mathrm{bg}) = c] \right)                                             \\
+        & + \lambda \log \left( 1 - \abs{\frac{\operatorname{size}(\mathrm{fg})}{\operatorname{size}(\mathrm{bg})} - \eps} \right).
+    \end{split} \end{align}
+Here, $E$ is the ensemble of models and $m$ is a pretrained model, $c$ is the correct foreground class, $\mathrm{fg}$, and $\mathrm{bg}$ are the foreground and background and $\operatorname{size}(\cdot)$ is the size in number of pixels.
+We ran a hyperparameter search using a manually annotated subset of foreground/background variants to find the factors in \Cref{eq:filtering-score}: $\lambda = 2$ and $\eps = 0.1$.
+The \textit{optimal foreground size} of $10\%$ of the full image balances the smallest possible foreground size that encompasses all the respective class information in the image with still conveying the foreground information after pasting it onto another background.
+This filtering step ensures we segment all the relevant foreground objects.
+
+Finally, we filter out backgrounds that are more than $80\%$ infilled, as these tend to be overly synthetic, plain and don't carry much information (see \Cref{sec:high-infill-ratio}).
+We ablate this choice in \Cref{sec:ablation}.
+In summary, we factorize the dataset into a set of foreground objects with a transparent background and a set of diverse backgrounds per class.
+The next step is to recombine them as data augmentation before applying common data augmentation operations during training.
+
+\subsubsection*{Recombination}
+The recombination stage, which is performed online, combines the foreground objects with different backgrounds to create new training samples.
+For each object, we follow the pipeline of: Pick an appropriate background, resize it to a fitting size, place it in the background image, smooth the transition edge, and apply other data augmentations.
+
+For each foreground object, we sample a background using one of the following strategies:
+(1) the original image background, (2) the set of backgrounds from the same class, or (3) the set of all possible backgrounds.
+These sets are trading off the amount of information the model can learn from the background against the diversity of new images created.
+In each epoch, each foreground object is seen exactly once, but a background may appear multiple times.
+
+The selected foreground is resized based on its relative size within its original image and the relative size of the original foreground in the selected background image.
+The final size is randomly selected from a 30\% range around upper and lower limits ($s_u$ and $s_l$), based on the original sizes:
+\begin{align}
+    s \sim \mathcal U \left[ (1 - 0.3)  s_l, (1 + 0.3)  s_u \right].
+\end{align}
+To balance the size of the foreground and that of the backgrounds original foreground, the upper and lower limit $s_u$ and $s_l$ are set to the mean or range of both sizes, depending on the foreground size strategy: \emph{mean} or \emph{range}.
+
+The resized foreground is then placed at a random position within the background image.
+This position is sampled from a generalization of the Bates distribution~\cite{Bates1955} with parameter $\eta \in \N$, visualized in \Cref{fig:bates-pdf}.
+We choose the bates distribution, as it presents an easy way to sample from a bounded domain with just one hyperparameter that controls the concentration of the distribution.
+$\eta = 1$ corresponds to the uniform distribution; $\eta > 1$ concentrates the distribution around the center; and for $\eta < -1$, the distribution is concentrated at the borders.
+To more seamlessly integrate the foreground, we apply a Gaussian blur with ${\sigma \in [\frac{\sigma_{\text{max}}}{10}, \sigma_{\text{max}}]}$, inspired by the standard range for the Gaussian blur operation in \cite{Touvron2022}, to the foreground's alpha-mask.
+
+We can apply standard data augmentation techniques in two modes:
+Either we apply all augmentations to the recombined image, or we apply the cropping and resizing to the background only and then apply the other augmentations after recombination.
+The second mode ensures the foreground object remains fully visible, while the first mode mirrors standard data augmentation practices.
+
+
+We experiment with a constant mixing ratio, or a linear or cosine annealing schedule that increases the amount of images from the original dataset over time.
+The mixing ratio acts as a probability of selecting an image from the original dataset;
+otherwise, an image with the same foreground is recombined using \schemename.
+Thus, we still ensure each foreground is seen once per epoch.
+
--- a/version_arXiv/sec/related_work.tex
+++ b/version_arXiv/sec/related_work.tex
@@ -0,0 +1,35 @@
+
+\section{Related Work}
+\label{sec:related_work}
+
+\paragraph{Data Augmentation for Image Classification}
+Data augmentation is a crucial technique for improving the performance and generalization of image classification models.
+Traditional augmentation strategies rely on simple geometric or color-space transformations like cropping, flipping, roatation, blurring, color jittering, or random erasing \cite{Zhong2017} to increase the diversity of the training data without changing their semantic meaning.
+With the advent of Transformers, new data augmentation operations like PatchDropout \cite{Liu2022d} have been proposed.
+Other transformations like Mixup \cite{Zhang2018a}, CutMix \cite{Yun2019}, or random cropping and patching \cite{Takahashi2018} combine multiple input images.
+These simple transformations are usually bundled to form more complex augmentation policies like AutoAugment \cite{Cubuk2018} and RandAugment \cite{Cubuk2019}, which automatically search for optimal augmentation policies or 3-augment \cite{Touvron2022} which is optimized to train a ViT.
+For a general overview of data augmentation techniques for image classification, we refer to \cite{Shorten2019, Xu2023d}.
+
+We build upon these general augmentation techniques by introducing a novel approach to explicitly separate and recombine foregrounds and backgrounds for image classification.
+Our approach is used in tandem with traditional data augmentation techniques to improve model performance and reduce biases.
+
+\paragraph{Copy-Paste Augmentation}
+The copy-paste augmentation \cite{Ghiasi2020}, which is used for object detection \cite{Shermaine2025,Ghiasi2020} and instance segmentation \cite{Werman2021,Ling2022}, involves copying segmented objects from one image and pasting them onto another.
+While typically human-annotated segmentation masks are used to extract the foreground objects, other foregound sources have been explored, like 3D models \cite{Hinterstoisser2019} and pretrained object-detection models for use on objects on white background \cite{Dwibedi2017} or synthetic images \cite{Ge2023}.
+DeePaste \cite{Werman2021} focuses on using inpainting for a more seamless integration of the pasted object.
+
+Unlike these methods, \name focuses on image classification.
+While for detection and segmentation, objects are pasted onto another image (with a different foreground) or on available or rendered background images of the target scene, we extract foreground objects and fill in the resulting holes in the background in a semantically neutral way.
+This way, we can recombine any foreground object with a large variety of neutral backgrounds from natural images, enabling a controlled and diverse manipulation of image composition.
+
+
+\paragraph{Model robustness evaluation}
+Evaluating model robustness to various image variations is critical for understanding and improving model generalization.
+Datasets like ImageNet-C \cite{Hendrycks2019} and ImageNet-P \cite{Hendrycks2019} introduce common corruptions and perturbations.
+ImageNet-E \cite{Li2023e} evaluates model robustness against a collection of distribution shifts.
+Other datasets, such as ImageNet-D \cite{Zhang2024f}, focus on varying background, texture, and material, but rely on synthetic data.
+Stylized ImageNet \cite{Geirhos2018} investigates the impact of texture changes.
+ImageNet-9 \cite{Xiao2020} explores background variations using segmented images, but the backgrounds are often artificial.
+
+In contrast to these existing datasets, which are used only for evaluation, \name provides fine-grained control over foreground object placement, size, and background selection, enabling a precise and comprehensive analysis of specific model biases within the context of a large-scale, real-world image distribution.
+As \name also provides controllable training set generation, it goes beyond simply measuring robustness to actively improving it through training.
--- a/arxiv_v2_arXiv/arxiv.zip
+++ b/arxiv_v2_arXiv/arxiv.zip
--- a/arxiv_v2_arXiv/cvpr.sty
+++ b/arxiv_v2_arXiv/cvpr.sty
@@ -0,0 +1,508 @@
+% ---------------------------------------------------------------
+%
+% No guarantee is given that the format corresponds perfectly to
+% IEEE 8.5" x 11" Proceedings, but most features should be ok.
+%
+% ---------------------------------------------------------------
+% with LaTeX2e:
+% =============
+%
+% use as
+%   \documentclass[times,10pt,twocolumn]{article}
+%   \usepackage[options]{cvpr}
+%   \usepackage{times}
+%
+% "options" should be replaced by
+%  * "review" for submitting a paper for review,
+%  * "final" for the camera ready, and
+%  * "rebuttal" for the author rebuttal.
+%
+% specify references as
+%   {\small
+%   \bibliographystyle{ieee}
+%   \bibliography{...your files...}
+%   }
+% ---------------------------------------------------------------
+
+\NeedsTeXFormat{LaTeX2e}[1999/12/01]
+\ProvidesPackage{cvpr}[2026 LaTeX class for IEEE CVPR]
+
+\RequirePackage{times}    % Integrate Times for here
+\RequirePackage{xspace}
+\RequirePackage[dvipsnames]{xcolor}
+\RequirePackage{graphicx}
+\RequirePackage{amsmath}
+\RequirePackage{amssymb}
+\RequirePackage{booktabs}
+\RequirePackage[numbers,sort&compress]{natbib}
+\setlength{\bibsep}{1pt plus 1pt minus 1pt}
+
+\RequirePackage{silence}  % Suppress unwanted warnings
+\hbadness=10000 \vbadness=10000 \vfuzz=30pt \hfuzz=30pt
+\WarningFilter{latexfont}{Font shape declaration}
+\WarningFilter{latex}{Font shape}
+\WarningFilter[rebuttal]{latex}{No \author given}
+\RequirePackage{etoolbox}
+
+% Use modern caption package to allow for sub-figures etc.
+% Reproduces the original CVPR/ICCV style as closely as possible.
+\RequirePackage[format=plain,labelformat=simple,labelsep=period,font=small,compatibility=false]{caption}
+\RequirePackage[font=footnotesize,skip=3pt,subrefformat=parens]{subcaption}
+
+
+\newtoggle{cvprfinal}        % Camera-ready version
+\newtoggle{cvprrebuttal}     % Rebuttal
+\newtoggle{cvprpagenumbers}  % Force page numbers (in camera ready)
+\toggletrue{cvprfinal}
+\togglefalse{cvprrebuttal}
+\togglefalse{cvprpagenumbers}
+\DeclareOption{review}{\togglefalse{cvprfinal}\toggletrue{cvprpagenumbers}}
+\DeclareOption{rebuttal}{\togglefalse{cvprfinal}\toggletrue{cvprrebuttal}}
+\DeclareOption{pagenumbers}{\toggletrue{cvprpagenumbers}}
+\DeclareOption*{\PackageWarning{cvpr}{Unkown option `\CurrentOption'}}
+\ProcessOptions\relax
+
+% Don't warn about missing author for rebuttal
+\iftoggle{cvprrebuttal}{%
+  \ActivateWarningFilters[rebuttal]
+}{}
+
+% Breaking lines for URLs in the bib
+\RequirePackage[hyphens]{url}
+\Urlmuskip=0mu plus 1mu\relax
+
+
+% ---------------------------------------------------------------
+% Inlined version of the obsolete "everyshi-2001-05-15" package.
+\newcommand{\@EveryShipout@Hook}{}
+\newcommand{\@EveryShipout@AtNextHook}{}
+\newcommand*{\EveryShipout}[1]
+   {\g@addto@macro\@EveryShipout@Hook{#1}}
+\newcommand*{\AtNextShipout}[1]
+   {\g@addto@macro\@EveryShipout@AtNextHook{#1}}
+\newcommand{\@EveryShipout@Shipout}{%
+   \afterassignment\@EveryShipout@Test
+   \global\setbox\@cclv= %
+   }
+\newcommand{\@EveryShipout@Test}{%
+   \ifvoid\@cclv\relax
+      \aftergroup\@EveryShipout@Output
+   \else
+      \@EveryShipout@Output
+   \fi%
+   }
+\newcommand{\@EveryShipout@Output}{%
+   \@EveryShipout@Hook%
+   \@EveryShipout@AtNextHook%
+      \gdef\@EveryShipout@AtNextHook{}%
+   \@EveryShipout@Org@Shipout\box\@cclv%
+   }
+\newcommand{\@EveryShipout@Org@Shipout}{}
+\newcommand*{\@EveryShipout@Init}{%
+   \message{ABD: EveryShipout initializing macros}%
+   \let\@EveryShipout@Org@Shipout\shipout
+   \let\shipout\@EveryShipout@Shipout
+   }
+\AtBeginDocument{\@EveryShipout@Init}
+% ---------------------------------------------------------------
+
+
+% ---------------------------------------------------------------
+% Inlined simplified version of the "eso-pic" package.
+\newcommand\LenToUnit[1]{#1\@gobble}
+\newcommand\AtPageUpperLeft[1]{%
+  \begingroup
+    \@tempdima=0pt\relax\@tempdimb=\ESO@yoffsetI\relax
+    \put(\LenToUnit{\@tempdima},\LenToUnit{\@tempdimb}){#1}%
+  \endgroup
+}
+\newcommand\AtPageLowerLeft[1]{\AtPageUpperLeft{%
+  \put(0,\LenToUnit{-\paperheight}){#1}}}
+\newcommand\AtPageCenter[1]{\AtPageUpperLeft{%
+  \put(\LenToUnit{.5\paperwidth},\LenToUnit{-.5\paperheight}){#1}}%
+}
+\newcommand\AtTextUpperLeft[1]{%
+  \begingroup
+    \setlength\@tempdima{1in}%
+    \ifodd\c@page%
+      \advance\@tempdima\oddsidemargin%
+    \else%
+      \advance\@tempdima\evensidemargin%
+    \fi%
+    \@tempdimb=\ESO@yoffsetI\relax\advance\@tempdimb-1in\relax%
+    \advance\@tempdimb-\topmargin%
+    \advance\@tempdimb-\headheight\advance\@tempdimb-\headsep%
+    \put(\LenToUnit{\@tempdima},\LenToUnit{\@tempdimb}){#1}%
+  \endgroup
+}
+\newcommand\AtTextLowerLeft[1]{\AtTextUpperLeft{%
+  \put(0,\LenToUnit{-\textheight}){#1}}}
+\newcommand\AtTextCenter[1]{\AtTextUpperLeft{%
+  \put(\LenToUnit{.5\textwidth},\LenToUnit{-.5\textheight}){#1}}}
+\newcommand{\ESO@HookI}{} \newcommand{\ESO@HookII}{}
+\newcommand{\ESO@HookIII}{}
+\newcommand{\AddToShipoutPicture}{%
+  \@ifstar{\g@addto@macro\ESO@HookII}{\g@addto@macro\ESO@HookI}}
+\newcommand{\ClearShipoutPicture}{\global\let\ESO@HookI\@empty}
+\newcommand\ESO@isMEMOIR[1]{}
+\@ifclassloaded{memoir}{\renewcommand\ESO@isMEMOIR[1]{#1}}{}
+\newcommand{\@ShipoutPicture}{%
+  \bgroup
+    \@tempswafalse%
+    \ifx\ESO@HookI\@empty\else\@tempswatrue\fi%
+    \ifx\ESO@HookII\@empty\else\@tempswatrue\fi%
+    \ifx\ESO@HookIII\@empty\else\@tempswatrue\fi%
+    \if@tempswa%
+      \@tempdima=1in\@tempdimb=-\@tempdima%
+      \advance\@tempdimb\ESO@yoffsetI%
+      \ESO@isMEMOIR{%
+        \advance\@tempdima\trimedge%
+        \advance\@tempdima\paperwidth%
+        \advance\@tempdima-\stockwidth%
+        \if@twoside\ifodd\c@page\else%
+          \advance\@tempdima-2\trimedge%
+          \advance\@tempdima-\paperwidth%
+          \advance\@tempdima\stockwidth%
+        \fi\fi%
+        \advance\@tempdimb\trimtop}%
+      \unitlength=1pt%
+      \global\setbox\@cclv\vbox{%
+        \vbox{\let\protect\relax
+          \pictur@(0,0)(\strip@pt\@tempdima,\strip@pt\@tempdimb)%
+            \ESO@HookIII\ESO@HookI\ESO@HookII%
+            \global\let\ESO@HookII\@empty%
+          \endpicture}%
+          \nointerlineskip%
+        \box\@cclv}%
+    \fi
+  \egroup
+}
+\EveryShipout{\@ShipoutPicture}
+\RequirePackage{keyval}
+\newif\ifESO@dvips\ESO@dvipsfalse
+\newif\ifESO@texcoord\ESO@texcoordfalse
+
+\AtBeginDocument{%
+  \IfFileExists{color.sty}
+  {%
+    \RequirePackage{color}
+    \let\ESO@color=\color\let\ESO@colorbox=\colorbox
+    \let\ESO@fcolorbox=\fcolorbox
+  }{}
+  \@ifundefined{Gin@driver}{}%
+  {%
+    \ifx\Gin@driver\@empty\else%
+      \filename@parse{\Gin@driver}\def\reserved@a{dvips}%
+      \ifx\filename@base\reserved@a\ESO@dvipstrue\fi%
+    \fi
+  }%
+  \ifx\pdfoutput\undefined\else
+    \ifx\pdfoutput\relax\else
+      \ifcase\pdfoutput\else
+        \ESO@dvipsfalse%
+      \fi
+    \fi
+  \fi
+}
+\ifESO@texcoord
+  \def\ESO@yoffsetI{0pt}\def\ESO@yoffsetII{-\paperheight}
+\else
+  \def\ESO@yoffsetI{\paperheight}\def\ESO@yoffsetII{0pt}
+\fi
+% ---------------------------------------------------------------
+
+
+\typeout{CVPR 8.5 x 11-Inch Proceedings Style `cvpr.sty'.}
+
+% ten point helvetica bold required for captions
+% eleven point times bold required for second-order headings
+% in some sites the name of the fonts may differ,
+% change the name here:
+\font\cvprtenhv  = phvb at 8pt % *** IF THIS FAILS, SEE cvpr.sty ***
+\font\elvbf  = ptmb scaled 1100
+\font\tenbf  = ptmb scaled 1000
+
+% If the above lines give an error message, try to comment them and
+% uncomment these:
+%\font\cvprtenhv  = phvb7t at 8pt
+%\font\elvbf  = ptmb7t scaled 1100
+%\font\tenbf  = ptmb7t scaled 1000
+
+% set dimensions of columns, gap between columns, and paragraph indent
+\setlength{\textheight}{8.875in}
+\setlength{\textwidth}{6.875in}
+\setlength{\columnsep}{0.3125in}
+\setlength{\topmargin}{0in}
+\setlength{\headheight}{0in}
+\setlength{\headsep}{0in}
+\setlength{\parindent}{1pc}
+\setlength{\oddsidemargin}{-0.1875in}
+\setlength{\evensidemargin}{-0.1875in}
+
+
+% Suppress page numbers when the appropriate option is given
+\iftoggle{cvprpagenumbers}{}{%
+  \pagestyle{empty}
+}
+
+\AtBeginDocument{%
+  % Print an error if document class other than article is used
+  \@ifclassloaded{article}{}{%
+    \PackageError{cvpr}{Package only meant to be used with document class `article'}{Change document class to `article'.}
+  }
+  % Print a warning if incorrect options for article are specified
+  \@ifclasswith{article}{10pt}{}{%
+    \PackageWarningNoLine{cvpr}{Incorrect font size specified - CVPR requires 10-point fonts. Please load document class `article' with `10pt' option}
+  }
+  \@ifclasswith{article}{twocolumn}{}{%
+    \PackageWarningNoLine{cvpr}{Single column document - CVPR requires papers to have two-column layout. Please load document class `article' with `twocolumn' option}
+  }
+  \@ifclasswith{article}{letterpaper}{}{%
+    \PackageWarningNoLine{cvpr}{Incorrect paper size - CVPR uses paper size `letter'. Please load document class `article' with `letterpaper' option}
+  }
+  % Print a warning if hyperref is not loaded and/or if the pagebackref option is missing
+  \iftoggle{cvprfinal}{%
+    \@ifpackageloaded{hyperref}{}{%
+      \PackageWarningNoLine{cvpr}{Package `hyperref' is not loaded, but highly recommended for camera-ready version}
+    }
+  }{%
+    \@ifpackageloaded{hyperref}{
+      \@ifpackagewith{hyperref}{pagebackref}{}{
+        \PackageWarningNoLine{cvpr}{Package `hyperref' is not loaded with option `pagebackref', which is strongly recommended for review version}
+      }
+    }{%
+      \PackageWarningNoLine{cvpr}{Package `hyperref' is not loaded, but strongly recommended for review version}
+    }
+  }
+}
+
+\def\@maketitle{
+   \newpage
+   \null
+   \iftoggle{cvprrebuttal}{\vspace*{-.3in}}{\vskip .375in}
+   \begin{center}
+      % smaller title font only for rebuttal
+      \iftoggle{cvprrebuttal}{{\large \bf \@title \par}}{{\Large \bf \@title \par}}
+      % additional two empty lines at the end of the title
+      \iftoggle{cvprrebuttal}{\vspace*{-22pt}}{\vspace*{24pt}}{
+        \large
+        \lineskip .5em
+        \begin{tabular}[t]{c}
+          \iftoggle{cvprfinal}{
+            \@author
+          }{
+            \iftoggle{cvprrebuttal}{}{
+              Anonymous \confName~submission\\
+              \vspace*{1pt}\\
+              Paper ID \paperID
+            }
+          }
+        \end{tabular}
+        \par
+      }
+      % additional small space at the end of the author name
+      \vskip .5em
+      % additional empty line at the end of the title block
+      \vspace*{12pt}
+   \end{center}
+}
+
+\def\abstract{%
+   % Suppress page numbers when the appropriate option is given
+   \iftoggle{cvprpagenumbers}{}{%
+     \thispagestyle{empty}
+   }
+   \centerline{\large\bf Abstract}%
+   \vspace*{12pt}\noindent%
+   \it\ignorespaces%
+}
+
+\def\endabstract{%
+   % additional empty line at the end of the abstract
+   \vspace*{12pt}
+   }
+
+\def\affiliation#1{\gdef\@affiliation{#1}} \gdef\@affiliation{}
+
+% correct heading spacing and type
+\def\cvprsection{\@startsection {section}{1}{\z@}
+   {-10pt plus -2pt minus -2pt}{7pt} {\large\bf}}
+\def\cvprssect#1{\cvprsection*{#1}}
+\def\cvprsect#1{\cvprsection{\texorpdfstring{\hskip -1em.~}{}#1}}
+\def\section{\@ifstar\cvprssect\cvprsect}
+
+\def\cvprsubsection{\@startsection {subsection}{2}{\z@}
+   {-8pt plus -2pt minus -2pt}{5pt} {\elvbf}}
+\def\cvprssubsect#1{\cvprsubsection*{#1}}
+\def\cvprsubsect#1{\cvprsubsection{\texorpdfstring{\hskip -1em.~}{}#1}}
+\def\subsection{\@ifstar\cvprssubsect\cvprsubsect}
+
+\def\cvprsubsubsection{\@startsection {subsubsection}{3}{\z@}
+   {-6pt plus -2pt minus -2pt}{3pt} {\tenbf}}
+\def\cvprssubsubsect#1{\cvprsubsubsection*{#1}}
+\def\cvprsubsubsect#1{\cvprsubsubsection{\texorpdfstring{\hskip -1em.~}{}#1}}
+\def\subsubsection{\@ifstar\cvprssubsubsect\cvprsubsubsect}
+
+%% --------- Page background marks: Ruler and confidentiality (only for review and rebuttal)
+\iftoggle{cvprfinal}{
+  % In review and rebuttal mode, we use the "lineno" package for numbering lines.
+  % When switching to a different mode, the "\@LN" macro may remain in cached .aux files,
+  % leading to build errors (https://github.com/cvpr-org/author-kit/issues/49).
+  % Defining the macro as empty fixes that (https://tex.stackexchange.com/a/125779).
+  \makeatletter
+  \providecommand{\@LN}[2]{}
+  \makeatother
+}{
+  % ----- define vruler
+  \makeatletter
+  \newbox\cvprrulerbox
+  \newcount\cvprrulercount
+  \newdimen\cvprruleroffset
+  \newdimen\cv@lineheight
+  \newdimen\cv@boxheight
+  \newbox\cv@tmpbox
+  \newcount\cv@refno
+  \newcount\cv@tot
+  % NUMBER with left flushed zeros  \fillzeros[<WIDTH>]<NUMBER>
+  \newcount\cv@tmpc@ \newcount\cv@tmpc
+  \def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
+  \cv@tmpc=1 %
+  \loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
+    \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
+  \ifnum#2<0\advance\cv@tmpc1\relax-\fi
+  \loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
+  \cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
+  \makeatother
+  % ----- end of vruler
+
+  %% Define linenumber setup
+  \RequirePackage[switch,mathlines]{lineno}
+
+  % Line numbers in CVPR blue using font from \cvprtenhv
+  \renewcommand\linenumberfont{\cvprtenhv\color[rgb]{.5,.5,1}}
+
+  \renewcommand\thelinenumber{\fillzeros[3]{\arabic{linenumber}}}
+
+  \setlength{\linenumbersep}{.75cm}
+
+  % Bug: An equation with $$ ... $$ isn't numbered, nor is the previous line.
+
+  % Patch amsmath commands so that the previous line and the equation itself
+  % are numbered. Bug: multiline has an extra line number.
+  % https://tex.stackexchange.com/questions/461186/how-to-use-lineno-with-amsmath-align
+  \RequirePackage{etoolbox} %% <- for \pretocmd, \apptocmd and \patchcmd
+
+  \newcommand*\linenomathpatch[1]{%
+    \expandafter\pretocmd\csname #1\endcsname {\linenomath}{}{}%
+    \expandafter\pretocmd\csname #1*\endcsname {\linenomath}{}{}%
+    \expandafter\apptocmd\csname end#1\endcsname {\endlinenomath}{}{}%
+    \expandafter\apptocmd\csname end#1*\endcsname {\endlinenomath}{}{}%
+  }
+  \newcommand*\linenomathpatchAMS[1]{%
+    \expandafter\pretocmd\csname #1\endcsname {\linenomathAMS}{}{}%
+    \expandafter\pretocmd\csname #1*\endcsname {\linenomathAMS}{}{}%
+    \expandafter\apptocmd\csname end#1\endcsname {\endlinenomath}{}{}%
+    \expandafter\apptocmd\csname end#1*\endcsname {\endlinenomath}{}{}%
+  }
+
+  %% Definition of \linenomathAMS depends on whether the mathlines option is provided
+  \expandafter\ifx\linenomath\linenomathWithnumbers
+  \let\linenomathAMS\linenomathWithnumbers
+  %% The following line gets rid of an extra line numbers at the bottom:
+  \patchcmd\linenomathAMS{\advance\postdisplaypenalty\linenopenalty}{}{}{}
+  \else
+  \let\linenomathAMS\linenomathNonumbers
+  \fi
+
+  % Add the numbers
+  \linenumbers
+  \AtBeginDocument{%
+    \linenomathpatch{equation}%
+    \linenomathpatchAMS{gather}%
+    \linenomathpatchAMS{multline}%
+    \linenomathpatchAMS{align}%
+    \linenomathpatchAMS{alignat}%
+    \linenomathpatchAMS{flalign}%
+  }
+
+  % \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
+  \def\cvprruler#1{\makevruler[12pt][#1][1][3][0.993\textheight]\usebox{\cvprrulerbox}}
+  \AddToShipoutPicture{%
+    \color[rgb]{.5,.5,1}
+
+    \def\pid{\parbox{1in}{\begin{center}\bf\sf{\small \confName}\\\small \#\paperID\end{center}}}
+    \AtTextUpperLeft{%paperID in corners
+      \put(\LenToUnit{-65pt},\LenToUnit{45pt}){\pid}
+      \put(\LenToUnit{\textwidth-12pt},\LenToUnit{45pt}){\pid}
+    }
+    \AtTextUpperLeft{%confidential
+      \put(0,\LenToUnit{1cm}){\parbox{\textwidth}{\centering\cvprtenhv
+      \confName~\confYear~Submission \#\paperID. CONFIDENTIAL REVIEW COPY.  DO NOT DISTRIBUTE.}}
+    }
+  }
+} % end of not cvprfinal
+
+%%% Make figure placement a little more predictable.
+% We trust the user to move figures if this results
+% in ugliness.
+% Minimize bad page breaks at figures
+\renewcommand{\textfraction}{0.01}
+\renewcommand{\floatpagefraction}{0.99}
+\renewcommand{\topfraction}{0.99}
+\renewcommand{\bottomfraction}{0.99}
+\renewcommand{\dblfloatpagefraction}{0.99}
+\renewcommand{\dbltopfraction}{0.99}
+\setcounter{totalnumber}{99}
+\setcounter{topnumber}{99}
+\setcounter{bottomnumber}{99}
+
+% Add a period to the end of an abbreviation unless there's one
+% already, then \xspace.
+\makeatletter
+\DeclareRobustCommand\onedot{\futurelet\@let@token\@onedot}
+\def\@onedot{\ifx\@let@token.\else.\null\fi\xspace}
+
+\def\eg{\emph{e.g}\onedot} \def\Eg{\emph{E.g}\onedot}
+\def\ie{\emph{i.e}\onedot} \def\Ie{\emph{I.e}\onedot}
+\def\cf{\emph{cf}\onedot} \def\Cf{\emph{Cf}\onedot}
+\def\etc{\emph{etc}\onedot} \def\vs{\emph{vs}\onedot}
+\def\wrt{w.r.t\onedot} \def\dof{d.o.f\onedot}
+\def\iid{i.i.d\onedot} \def\wolog{w.l.o.g\onedot}
+\def\etal{\emph{et al}\onedot}
+\makeatother
+
+% ---------------------------------------------------------------
+
+%% redefine the \title command so that a variable name is saved in \thetitle, and provides the \maketitlesupplementary command
+\let\titleold\title
+\renewcommand{\title}[1]{\titleold{#1}\newcommand{\thetitle}{#1}}
+\def\maketitlesupplementary
+   {
+   \newpage
+       \twocolumn[
+        \centering
+        \Large
+        \textbf{\thetitle}\\
+        \vspace{0.5em}Supplementary Material \\
+        \vspace{1.0em}
+       ] %< twocolumn
+   }
+
+% ---------------------------------------------------------------
+
+%% Support for easy cross-referencing (e.g. \cref{sec:intro}
+% configured with \AtEndPreamble as it needs to be called after hyperref
+\AtEndPreamble{
+    \usepackage[capitalize]{cleveref}
+    \crefname{section}{Sec.}{Secs.}
+    \Crefname{section}{Section}{Sections}
+    \Crefname{table}{Table}{Tables}
+    \crefname{table}{Tab.}{Tabs.}
+}
+
+% ---------------------------------------------------------------
+
+%% More compact compact itemize/enumeration (e.g. list contributions)
+\RequirePackage[shortlabels,inline]{enumitem}
+\setlist[itemize]{noitemsep,leftmargin=*,topsep=0em}
+\setlist[enumerate]{noitemsep,leftmargin=*,topsep=0em}
--- a/arxiv_v2_arXiv/ieeenat_fullname.bst
+++ b/arxiv_v2_arXiv/ieeenat_fullname.bst
--- a/arxiv_v2_arXiv/img/DeiT-B_ImageNet_v2.pdf
+++ b/arxiv_v2_arXiv/img/DeiT-B_ImageNet_v2.pdf
--- a/arxiv_v2_arXiv/img/DeiT-B_ImageNet_v3.pdf
+++ b/arxiv_v2_arXiv/img/DeiT-B_ImageNet_v3.pdf
@@ -0,0 +1,71 @@
+%PDF-1.4
+%¬Ü «º
+1 0 obj
+<< /Type /Catalog /Pages 2 0 R >>
+endobj
+8 0 obj
+<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R
+/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >>
+endobj
+11 0 obj
+<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ]
+/Contents 9 0 R /Annots 10 0 R >>
+endobj
+9 0 obj
+<< /Length 12 0 R /Filter /FlateDecode >>
+stream
+xœµ”MŽA…÷>…OÐc—«ü³¤¬<C2A4>%<25>Ñ4ƒ×GåªN2	bÃ"rRýâï=»»>}{<~:¼Ã÷Ÿááúëñ'0>ã	Ÿ<>ð72<1E>ñ„„+pHøœµž<7F>fý
+p‚ØeÛÅ—#~ÁïHKD)"Ö´ -êÊaÊ*H‹¸haõŠ/gäDà_ÿ0=UOKÕ±zš¥Â	hñbµQ8Õî¥S<>Š´´nÂ¥YçßUÂN9<4E>+ôøÜK"7ÓP²#šy³ÞÑU¨µžªIa‹ò<>~[;á@¾ÞC27ÓR²-”Ì«p¦aiÆF’pŠfR*ó€ßQÂN9f»Þ
+ÍYÏ’§<EFBFBD><EFBFBD>´ÔÑQ¬¯pS×>Ýþõòl¿ÞfÊþnÃgH5jîEZïã-ª¸Z±£RGÈ;JØ)'v½Î¶Iðfd367\Zeêm<C3AA>¸™
+«÷{5´IXlî(a§ã\oã¾~Ôæ¸ÇŽ3Q‰Ê¹9bwñàÜqáE#æŽßVÂN9±ëmØ<6D>½øØŒý¯<ñ—¡__5'›58ÁGøÀ<>÷“
+endstream
+endobj
+12 0 obj
+420
+endobj
+10 0 obj
+[ ]
+endobj
+3 0 obj
+<< >>
+endobj
+4 0 obj
+<< >>
+endobj
+5 0 obj
+<< >>
+endobj
+6 0 obj
+<< >>
+endobj
+7 0 obj
+<< >>
+endobj
+2 0 obj
+<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >>
+endobj
+13 0 obj
+<< /Creator (Matplotlib v3.9.2, https://matplotlib.org)
+/Producer (Matplotlib pdf backend v3.9.2)
+/CreationDate (D:20250802073347+02'00') >>
+endobj
+xref
+0 14
+0000000000 65535 f 
+0000000016 00000 n 
+0000000970 00000 n 
+0000000865 00000 n 
+0000000886 00000 n 
+0000000907 00000 n 
+0000000928 00000 n 
+0000000949 00000 n 
+0000000065 00000 n 
+0000000330 00000 n 
+0000000845 00000 n 
+0000000208 00000 n 
+0000000825 00000 n 
+0000001030 00000 n 
+trailer
+<< /Size 14 /Root 1 0 R /Info 13 0 R >>
+startxref
+1187
--- a/arxiv_v2_arXiv/img/DeiT-B_ImageNet_vNone.pdf
+++ b/arxiv_v2_arXiv/img/DeiT-B_ImageNet_vNone.pdf
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Tobias Christian Nauen	e8cc0ee8a6	arxiv V2	2026-02-24 11:57:25 +01:00
Tobias Christian Nauen	7e66c96a60	arxiv V1	2026-02-24 11:52:26 +01:00