diff --git a/ForAug-supplementary.pdf b/ForAug-supplementary.pdf new file mode 100644 index 0000000..9a35151 Binary files /dev/null and b/ForAug-supplementary.pdf differ diff --git a/ForAug.pdf b/ForAug.pdf new file mode 100644 index 0000000..62cfdbc Binary files /dev/null and b/ForAug.pdf differ diff --git a/eccv.sty b/eccv.sty deleted file mode 100644 index 55785ca..0000000 --- a/eccv.sty +++ /dev/null @@ -1,255 +0,0 @@ -% --------------------------------------------------------------- -% -% Formatting Package for ECCV Submissions -% -% initially created for ECCV 2024 -% by Stefan Roth -% -% based on previous ECCV templates: -% updated April 2002 by Antje Endemann -% Based on CVPR 07 and LNCS, with modifications by DAF, AZ and elle, 2008 and AA, 2010, and CC, 2011; TT, 2014; AAS, 2016; AAS, 2020; TH, 2022 -% -% and the CVPR templates: -% https://github.com/cvpr-org/author-kit -% -% No guarantee is given that the format corresponds perfectly to -% LNCS Proceedings, but most features should be ok. -% -% --------------------------------------------------------------- -% -% use as -% \documentclass[runningheads]{llncs} -% \usepackage[options]{eccv} -% -% "options" include -% * "review" for submitting a paper for review and -% * "final" for the camera ready (default). -% * "mobile" for camera ready on small-screen devices -% * "year=20??" allows to specify the conference year (default current year). -% * "ID=12345" allows to specify the paper ID (default `none'). -% -% specify references as -% \bibliographystyle{splncs04} -% \bibliography{...your files...} -% --------------------------------------------------------------- - -\NeedsTeXFormat{LaTeX2e}[1999/12/01] -\ProvidesPackage{eccv}[LaTeX style for ECCV] - - -% --------------------------------------------------------------- -% Suppress unwanted warnings - -\RequirePackage{silence} -\WarningFilter{amsmath}{Unable to redefine math accent \vec} -\WarningFilter{caption}{Unknown document class (or package)} -\RequirePackage{etoolbox} - - -% --------------------------------------------------------------- -% Basic packages - -\RequirePackage[T1]{fontenc} % Required to avoid font issues -\RequirePackage[left,mathlines]{lineno} % Support for line numbers -\RequirePackage[dvipsnames]{xcolor} % Color for line numbers -\RequirePackage{amsmath} % Need AMS packages to bug fix -\RequirePackage{amssymb} % line numbers in equations -\RequirePackage{cite} % Sort citations -\RequirePackage{xspace} - -% Breaking lines for URLs in the bib -\RequirePackage[hyphens]{url} -\Urlmuskip=0mu plus 1mu\relax - -% Color for links and line numbers -\definecolor{eccvblue}{rgb}{0.12,0.49,0.85} - -% --------------------------------------------------------------- -% Use modern caption package to allow for sub-figures etc. -% Reproduces the original LNCS style as closely as possible. - -\RequirePackage[labelfont=bf,font=small,tableposition=bottom]{caption} -\RequirePackage[skip=3pt]{subcaption} - - -% --------------------------------------------------------------- -% Process ECCV package options - -% Key value options -\RequirePackage{kvoptions} -\SetupKeyvalOptions{ - family=eccv, - prefix=eccv@ -} - -\DeclareBoolOption{review} -\DeclareComplementaryOption{final}{review} -\DeclareBoolOption{mobile} -\DeclareStringOption[\the\year]{year} -\DeclareStringOption[none]{ID} -\DeclareDefaultOption{\PackageWarning{eccv}{Unkown option `\CurrentOption'}} -\ProcessKeyvalOptions* - -% Enable processing options also in main paper with \eccvsetup{ key=value, ... } -\newcommand*{\eccvsetup} - {\setkeys{eccv}% -} - -% Warn if ECCV package for review version is not loaded with paper ID option -\ifeccv@review - \ifdefstring{\eccv@ID}{none}{% - \PackageWarningNoLine{eccv}{Review version requires a paper ID. Please load `eccv' package with `ID=*****' option and replace `*****' with your paper ID} - }{} -\fi - - -% --------------------------------------------------------------- -% Basic error handling - -\AtBeginDocument{% - % Print an error if document class other than llncs is used - \@ifclassloaded{llncs}{}{% - \PackageError{eccv}{Package only meant to be used with document class `llncs'}{Change document class to `llncs'.} - } - % Print a warning if incorrect options for llncs are specified - \@ifclasswith{llncs}{runningheads}{}{% - \PackageWarningNoLine{eccv}{Running heads incorrectly suppressed - ECCV requires running heads. Please load document class `llncs' with `runningheads' option} - } - % Print a warning if hyperref is not loaded and/or if the pagebackref option is missing - \ifeccv@review - \@ifpackageloaded{hyperref}{% - \@ifpackagewith{hyperref}{pagebackref}{}{% - \PackageWarningNoLine{eccv}{Package `hyperref' is not loaded with option `pagebackref', which is strongly recommended for review version} - } - }{% - \PackageWarningNoLine{eccv}{Package `hyperref' is not loaded, but strongly recommended for review version} - } - \else - \@ifpackageloaded{hyperref}{% - \@ifpackagewith{hyperref}{pagebackref}{% - \PackageWarningNoLine{eccv}{Package `hyperref' is loaded with option `pagebackref', which is *not* recommended for camera-ready version}{} - }{} - }{% - \PackageWarningNoLine{eccv}{Package `hyperref' is not loaded, but highly recommended for camera-ready version} - } - \fi -} - - -% --------------------------------------------------------------- -% Line number support for the review version - -% NUMBER with left flushed zeros \fillzeros[] -% from CVPR template -\newcount\cv@tmpc@ \newcount\cv@tmpc -\def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi -\cv@tmpc=1 % -\loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi - \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat -\ifnum#2<0\advance\cv@tmpc1\relax-\fi -\loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat -\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}% - - -% colored, bold, sans serif line numbers -\renewcommand\thelinenumber{\color{eccvblue}\normalfont\sffamily\scriptsize\fillzeros[3]{\arabic{linenumber}}\color[rgb]{0,0,0}} -% on both sides -\renewcommand\makeLineNumber{\hss\thelinenumber\ \hspace{4.5mm} \rlap{\hskip\textwidth\ \hspace{5mm}\thelinenumber}} - - -% Bug: An equation with $$ ... $$ isn't numbered, nor is the previous line. -% Patch amsmath commands so that the previous line and the equation itself -% are numbered. Bug: multiline has an extra line number. -% https://tex.stackexchange.com/questions/461186/how-to-use-lineno-with-amsmath-align - -%% Patch 'normal' math environments: -\newcommand*\linenomathpatch[1]{% - \cspreto{#1}{\linenomath}% - \cspreto{#1*}{\linenomath}% - \csappto{end#1}{\endlinenomath}% - \csappto{end#1*}{\endlinenomath}% -} -%% Patch AMS math environments: -\newcommand*\linenomathpatchAMS[1]{% - \cspreto{#1}{\linenomathAMS}% - \cspreto{#1*}{\linenomathAMS}% - \csappto{end#1}{\endlinenomath}% - \csappto{end#1*}{\endlinenomath}% -} - -%% Definition of \linenomathAMS depends on whether the mathlines option is provided -\expandafter\ifx\linenomath\linenomathWithnumbers - \let\linenomathAMS\linenomathWithnumbers - %% The following line gets rid of an extra line numbers at the bottom: - \patchcmd\linenomathAMS{\advance\postdisplaypenalty\linenopenalty}{}{}{} -\else - \let\linenomathAMS\linenomathNonumbers -\fi - -\linenomathpatch{equation} -\linenomathpatchAMS{gather} -\linenomathpatchAMS{multline} -\linenomathpatchAMS{align} -\linenomathpatchAMS{alignat} -\linenomathpatchAMS{flalign} - -% Disable line numbering during measurement step of multline -\makeatletter -\patchcmd{\mmeasure@}{\measuring@true}{ - \measuring@true - \ifnum-\linenopenaltypar>\interdisplaylinepenalty - \advance\interdisplaylinepenalty-\linenopenalty - \fi - }{}{} -\makeatother - - -% --------------------------------------------------------------- -% Modifications to LNCS template for review version - -\makeatletter -\ifeccv@review - % Display line numbers - \AtBeginDocument{% - \linenumbers - \linenomathpatch{equation}% - \linenomathpatchAMS{gather}% - \linenomathpatchAMS{multline}% - \linenomathpatchAMS{align}% - \linenomathpatchAMS{alignat}% - \linenomathpatchAMS{flalign}% - } - - % Crop the page for review version - \RequirePackage[width=122mm,left=12mm,paperwidth=146mm,height=193mm,top=12mm,paperheight=217mm]{geometry} - - % Replace authors, institute, and running title with review placeholders - \let\maketitleold\maketitle - \renewcommand{\maketitle}{\author{Anonymous ECCV \eccv@year{} Submission}% - \titlerunning{ECCV \eccv@year{} Submission \#\eccv@ID}% - \authorrunning{ECCV \eccv@year{} Submission \#\eccv@ID}% - \institute{Paper ID \#\eccv@ID}% - \maketitleold} -\fi - -\ifeccv@mobile - % Crop the page for mobile version - \RequirePackage[width=122mm,left=12mm,paperwidth=146mm,height=193mm,top=12mm,paperheight=217mm]{geometry} -\fi - -% Macro for ECCV year in main text -\newcommand{\ECCVyear}{\eccv@year\xspace} -\makeatother - - -% --------------------------------------------------------------- -% Support for easy cross-referencing (e.g., \cref{eq:loss}, \cref{sec:intro}) -% configured with \AtEndPreamble as it needs to be called after hyperref - -\AtEndPreamble{ - \usepackage[capitalize]{cleveref} - \crefname{section}{Sec.}{Secs.} - \Crefname{section}{Section}{Sections} - \crefname{table}{Tab.}{Tabs.} - \Crefname{table}{Table}{Tables} -} diff --git a/eccvabbrv.sty b/eccvabbrv.sty deleted file mode 100644 index ac0f75c..0000000 --- a/eccvabbrv.sty +++ /dev/null @@ -1,43 +0,0 @@ -% --------------------------------------------------------------- -% -% Formatting Package for ECCV Submissions -% -% initially created for ECCV 2024 -% by Stefan Roth -% -% based on previous ECCV templates: -% updated April 2002 by Antje Endemann -% Based on CVPR 07 and LNCS, with modifications by DAF, AZ and elle, 2008 and AA, 2010, and CC, 2011; TT, 2014; AAS, 2016; AAS, 2020; TH, 2022 -% -% and the CVPR templates: -% https://github.com/cvpr-org/author-kit -% -% No guarantee is given that the format corresponds perfectly to -% LNCS Proceedings, but most features should be ok. -% -% --------------------------------------------------------------- - -\NeedsTeXFormat{LaTeX2e}[1999/12/01] -\ProvidesPackage{eccvabbrv}[Common abbreviations for ECCV] - -% Add a period to the end of an abbreviation unless there's one -% already, then \xspace. -\RequirePackage{xspace} -\makeatletter -\DeclareRobustCommand\onedot{\futurelet\@let@token\@onedot} -\def\@onedot{\ifx\@let@token.\else.\null\fi\xspace} - -\def\eg{\emph{e.g}\onedot} -\def\Eg{\emph{E.g}\onedot} -\def\ie{\emph{i.e}\onedot} -\def\Ie{\emph{I.e}\onedot} -\def\cf{\emph{cf}\onedot} -\def\Cf{\emph{Cf}\onedot} -\def\etc{\emph{etc}\onedot} -\def\vs{\emph{vs}\onedot} -\def\wrt{w.r.t\onedot} -\def\dof{d.o.f\onedot} -\def\iid{i.i.d\onedot} -\def\wolog{w.l.o.g\onedot} -\def\etal{\emph{et al}\onedot} -\makeatother \ No newline at end of file diff --git a/eijkel2.eps b/eijkel2.eps deleted file mode 100644 index b62bd78..0000000 --- a/eijkel2.eps +++ /dev/null @@ -1,493 +0,0 @@ -%!PS-Adobe-2.0 EPSF-1.2 -%%Creator: MATLAB, The Mathworks, Inc. -%%Title: parz_sym.eps -%%CreationDate: 03/13/96 12:46:22 -%%DocumentNeededFonts: Helvetica -%%DocumentProcessColors: Cyan Magenta Yellow Black -%%Pages: 1 -%%BoundingBox: 59 192 549 590 -%%EndComments - -%%BeginProlog - -% MathWorks dictionary -/MathWorks 150 dict begin - -% definition operators -/bdef {bind def} bind def -/ldef {load def} bind def -/xdef {exch def} bdef -/xstore {exch store} bdef - -% operator abbreviations -/c /clip ldef -/cc /concat ldef -/cp /closepath ldef -/gr /grestore ldef -/gs /gsave ldef -/mt /moveto ldef -/np /newpath ldef -/cm /currentmatrix ldef -/sm /setmatrix ldef -/rc {rectclip} bdef -/rf {rectfill} bdef -/rm /rmoveto ldef -/rl /rlineto ldef -/s /show ldef -/sc {setcmykcolor} bdef -/sr /setrgbcolor ldef -/w /setlinewidth ldef -/j /setlinejoin ldef -/cap /setlinecap ldef - -% page state control -/pgsv () def -/bpage {/pgsv save def} bdef -/epage {pgsv restore} bdef -/bplot /gsave ldef -/eplot {stroke grestore} bdef - -% orientation switch -/portraitMode 0 def -/landscapeMode 1 def - -% coordinate system mappings -/dpi2point 0 def - -% font control -/FontSize 0 def -/FMS { - /FontSize xstore %save size off stack - findfont - [FontSize 0 0 FontSize neg 0 0] - makefont - setfont - }bdef - -/reencode { -exch dup where -{pop load} {pop StandardEncoding} ifelse -exch -dup 3 1 roll -findfont dup length dict begin - { 1 index /FID ne {def}{pop pop} ifelse } forall - /Encoding exch def - currentdict -end -definefont pop -} bdef - -/isroman { -findfont /CharStrings get -/Agrave known -} bdef - -/FMSR { -3 1 roll 1 index -dup isroman -{reencode} {pop pop} ifelse -exch FMS -} bdef - -/csm { - 1 dpi2point div -1 dpi2point div scale - neg translate - landscapeMode eq {90 rotate} if - } bdef - -% line types: solid, dotted, dashed, dotdash -/SO { [] 0 setdash } bdef -/DO { [.5 dpi2point mul 4 dpi2point mul] 0 setdash } bdef -/DA { [6 dpi2point mul] 0 setdash } bdef -/DD { [.5 dpi2point mul 4 dpi2point mul 6 dpi2point mul 4 dpi2point mul] 0 setdash } bdef - -% macros for lines and objects -/L { - lineto - stroke - } bdef -/MP { - 3 1 roll moveto - 1 sub {rlineto} repeat - } bdef -/AP { - {rlineto} repeat - } bdef -/PP { - closepath fill - } bdef -/DP { - closepath stroke - } bdef -/MR { - 4 -2 roll moveto - dup 0 exch rlineto - exch 0 rlineto - neg 0 exch rlineto - closepath - } bdef -/FR { - MR stroke - } bdef -/PR { - MR fill - } bdef -/L1i { - { currentfile picstr readhexstring pop } image - } bdef - -/tMatrix matrix def -/MakeOval { - newpath - tMatrix currentmatrix pop - translate scale - 0 0 1 0 360 arc - tMatrix setmatrix - } bdef -/FO { - MakeOval - stroke - } bdef -/PO { - MakeOval - fill - } bdef - -/PD { - 2 copy moveto lineto stroke - } bdef - - -currentdict end def -%%EndProlog - -%%BeginSetup -MathWorks begin - -0 cap - -end -%%EndSetup - -%%Page: 1 1 -%%BeginPageSetup -%%PageBoundingBox: 59 192 549 590 -MathWorks begin -bpage -%%EndPageSetup - -%%BeginObject: graph1 1 -bplot - -/dpi2point 12 def -portraitMode 0216 7344 csm - - 501 259 5882 4776 MR c np -76 dict begin %Colortable dictionary -/c0 { 0 0 0 sr} bdef -/c1 { 1 1 1 sr} bdef -/c2 { 1 0 0 sr} bdef -/c3 { 0 1 0 sr} bdef -/c4 { 0 0 1 sr} bdef -/c5 { 1 1 0 sr} bdef -/c6 { 1 0 1 sr} bdef -/c7 { 0 1 1 sr} bdef -%%IncludeResource: font Helvetica -/Helvetica /ISOLatin1Encoding 144 FMSR - -1 j -c1 - 0 0 6912 5184 PR -6 w -DO -4 w -SO -6 w -c0 - 898 4612 mt 6254 4612 L - 898 388 mt 6254 388 L -6254 4612 mt 6254 388 L - 898 4612 mt 898 388 L -6254 4612 mt 6254 4612 L - 898 4612 mt 898 4612 L - 898 4612 mt 6254 4612 L - 898 4612 mt 898 388 L - 898 4612 mt 898 4612 L - 898 4612 mt 898 4558 L - 898 388 mt 898 442 L - 734 4781 mt -(-25) s -1663 4612 mt 1663 4558 L -1663 388 mt 1663 442 L -1499 4781 mt -(-20) s -2428 4612 mt 2428 4558 L -2428 388 mt 2428 442 L -2264 4781 mt -(-15) s -3193 4612 mt 3193 4558 L -3193 388 mt 3193 442 L -3029 4781 mt -(-10) s -3959 4612 mt 3959 4558 L -3959 388 mt 3959 442 L -3835 4781 mt -(-5) s -4724 4612 mt 4724 4558 L -4724 388 mt 4724 442 L -4684 4781 mt -(0) s -5489 4612 mt 5489 4558 L -5489 388 mt 5489 442 L -5449 4781 mt -(5) s -6254 4612 mt 6254 4558 L -6254 388 mt 6254 442 L -6174 4781 mt -(10) s - 898 4612 mt 952 4612 L -6254 4612 mt 6200 4612 L - 783 4665 mt -(0) s - 898 3767 mt 952 3767 L -6254 3767 mt 6200 3767 L - 503 3820 mt -(0.005) s - 898 2922 mt 952 2922 L -6254 2922 mt 6200 2922 L - 583 2975 mt -(0.01) s - 898 2078 mt 952 2078 L -6254 2078 mt 6200 2078 L - 503 2131 mt -(0.015) s - 898 1233 mt 952 1233 L -6254 1233 mt 6200 1233 L - 583 1286 mt -(0.02) s - 898 388 mt 952 388 L -6254 388 mt 6200 388 L - 503 441 mt -(0.025) s - 898 388 mt 6254 388 L - 898 4612 mt 6254 4612 L - 898 4612 mt 898 388 L -6254 4612 mt 6254 388 L - 898 388 mt 898 388 L -6254 388 mt 6254 388 L -gs 898 388 5357 4225 MR c np -DA -16 0 15 0 15 0 16 0 15 0 15 0 15 0 16 0 -15 0 15 0 16 0 15 0 15 0 16 0 15 0 15 0 -15 0 16 0 15 0 15 0 16 0 15 0 15 0 16 0 -15 0 15 0 15 0 16 0 15 0 15 0 16 0 15 0 -15 0 16 0 15 0 15 1 16 0 15 0 15 0 15 0 -16 0 15 0 15 1 16 0 15 0 15 1 16 0 15 1 -15 0 15 1 16 1 15 0 15 1 16 1 15 2 15 1 -16 1 15 2 15 2 15 2 16 3 15 3 15 3 16 3 -15 4 15 4 16 5 15 5 15 6 16 6 15 7 15 8 -15 8 16 9 15 10 15 11 16 12 15 13 15 14 16 16 -15 16 15 18 15 19 16 21 15 22 15 24 16 25 15 27 -15 29 16 31 15 32 15 35 15 36 16 39 15 40 15 43 -16 45 15 47 15 49 4724 3846 100 MP stroke -16 51 15 53 15 55 15 58 16 59 15 61 15 63 16 65 -15 67 15 68 16 70 15 71 15 72 16 74 15 74 15 75 -15 77 16 76 15 77 15 77 16 77 15 77 15 77 16 76 -15 76 15 75 15 73 16 73 15 71 15 70 16 68 15 66 -15 65 16 63 15 60 15 59 15 56 16 54 15 52 15 49 -16 47 15 44 15 42 16 39 15 37 15 34 16 32 15 29 -15 27 15 24 16 22 15 20 15 17 16 15 15 12 15 11 -16 8 15 5 15 4 15 1 16 -1 15 -4 15 -5 16 -8 -15 -11 15 -12 16 -15 15 -17 15 -20 15 -22 16 -24 15 -27 -15 -29 16 -32 15 -34 15 -37 16 -39 15 -42 15 -44 16 -47 -15 -49 15 -52 15 -54 16 -56 15 -59 15 -60 16 -63 15 -65 -15 -66 16 -68 15 -70 15 -71 15 -73 16 -73 15 -75 15 -76 -16 -76 15 -77 15 -77 3209 2426 100 MP stroke -16 -77 15 -77 15 -77 15 -76 16 -77 15 -75 15 -74 16 -74 -15 -72 15 -71 16 -70 15 -68 15 -67 16 -65 15 -63 15 -61 -15 -59 16 -58 15 -55 15 -53 16 -51 15 -49 15 -47 16 -45 -15 -43 15 -40 15 -39 16 -36 15 -35 15 -32 16 -31 15 -29 -15 -27 16 -25 15 -24 15 -22 15 -21 16 -19 15 -18 15 -16 -16 -16 15 -14 15 -13 16 -12 15 -11 15 -10 16 -9 15 -8 -15 -8 15 -7 16 -6 15 -6 15 -5 16 -5 15 -4 15 -4 -16 -3 15 -3 15 -3 15 -3 16 -2 15 -2 15 -2 16 -1 -15 -1 15 -2 16 -1 15 -1 15 0 15 -1 16 -1 15 0 -15 -1 16 0 15 -1 15 0 16 0 15 -1 15 0 15 0 -16 0 15 0 15 0 16 0 15 -1 15 0 16 0 15 0 -15 0 16 0 15 0 15 0 15 0 16 0 15 0 15 0 -16 0 15 0 15 0 1694 4612 100 MP stroke -16 0 15 0 15 0 1648 4612 4 MP stroke -SO -16 0 15 0 15 0 16 0 15 0 15 0 15 0 16 0 -15 0 15 0 16 0 15 0 15 0 16 0 15 0 15 0 -15 0 16 0 15 0 15 0 16 0 15 0 15 0 16 0 -15 0 15 0 15 0 16 0 15 0 15 0 16 0 15 0 -15 0 16 0 15 0 15 0 16 0 15 0 15 0 15 0 -16 0 15 0 15 0 16 0 15 0 15 0 16 0 15 0 -15 0 15 0 16 0 15 0 15 0 16 0 15 0 15 0 -16 0 15 0 15 0 15 0 16 0 15 0 15 0 16 0 -15 0 15 0 16 0 15 0 15 0 16 0 15 0 15 0 -15 0 16 0 15 0 15 1 16 0 15 0 15 0 16 0 -15 0 15 0 15 1 16 0 15 0 15 1 16 0 15 1 -15 0 16 1 15 0 15 1 15 1 16 1 15 2 15 1 -16 1 15 2 15 2 4724 4596 100 MP stroke -16 2 15 3 15 2 15 4 16 3 15 4 15 4 16 5 -15 5 15 5 16 7 15 7 15 7 16 9 15 9 15 10 -15 11 16 12 15 12 15 14 16 15 15 17 15 17 16 19 -15 21 15 22 15 23 16 25 15 27 15 28 16 30 15 32 -15 34 16 35 15 38 15 39 15 41 16 43 15 46 15 47 -16 49 15 50 15 53 16 54 15 56 15 57 16 59 15 60 -15 62 15 62 16 64 15 64 15 65 16 65 15 65 15 66 -16 65 15 65 15 64 15 63 16 62 15 61 15 59 16 57 -15 55 15 53 16 50 15 48 15 44 15 42 16 38 15 35 -15 31 16 27 15 23 15 19 16 15 15 11 15 6 16 2 -15 -2 15 -6 15 -11 16 -15 15 -19 15 -23 16 -27 15 -31 -15 -35 16 -38 15 -42 15 -44 15 -48 16 -50 15 -53 15 -55 -16 -57 15 -59 15 -61 3209 2592 100 MP stroke -16 -62 15 -63 15 -64 15 -65 16 -65 15 -66 15 -65 16 -65 -15 -65 15 -64 16 -64 15 -62 15 -62 16 -60 15 -59 15 -57 -15 -56 16 -54 15 -53 15 -50 16 -49 15 -47 15 -46 16 -43 -15 -41 15 -39 15 -38 16 -35 15 -34 15 -32 16 -30 15 -28 -15 -27 16 -25 15 -23 15 -22 15 -21 16 -19 15 -17 15 -17 -16 -15 15 -14 15 -12 16 -12 15 -11 15 -10 16 -9 15 -9 -15 -7 15 -7 16 -7 15 -5 15 -5 16 -5 15 -4 15 -4 -16 -3 15 -4 15 -2 15 -3 16 -2 15 -2 15 -2 16 -1 -15 -1 15 -2 16 -1 15 -1 15 -1 15 0 16 -1 15 0 -15 -1 16 0 15 -1 15 0 16 0 15 -1 15 0 15 0 -16 0 15 0 15 0 16 0 15 -1 15 0 16 0 15 0 -15 0 16 0 15 0 15 0 15 0 16 0 15 0 15 0 -16 0 15 0 15 0 1694 4612 100 MP stroke -16 0 15 0 15 0 1648 4612 4 MP stroke -16 0 15 0 15 0 16 0 15 0 15 0 15 0 16 0 -15 0 15 0 16 0 15 0 15 0 16 0 15 0 15 0 -15 0 16 0 15 0 15 0 16 0 15 0 15 0 16 0 -15 0 15 0 15 0 16 0 15 0 15 0 16 0 15 0 -15 0 16 0 15 0 15 1 16 0 15 0 15 0 15 0 -16 0 15 0 15 1 16 0 15 0 15 1 16 0 15 1 -15 0 15 1 16 0 15 1 15 1 16 1 15 2 15 1 -16 1 15 2 15 2 15 2 16 3 15 2 15 4 16 3 -15 4 15 4 16 5 15 5 15 5 16 7 15 7 15 7 -15 9 16 9 15 10 15 11 16 12 15 12 15 14 16 15 -15 17 15 17 15 19 16 21 15 22 15 23 16 25 15 27 -15 28 16 30 15 32 15 34 15 35 16 38 15 39 15 41 -16 43 15 46 15 47 4724 3862 100 MP stroke -16 49 15 50 15 53 15 54 16 56 15 57 15 59 16 60 -15 62 15 62 16 64 15 64 15 65 16 65 15 65 15 66 -15 65 16 65 15 64 15 63 16 62 15 61 15 59 16 57 -15 55 15 53 15 50 16 48 15 44 15 42 16 38 15 35 -15 31 16 27 15 23 15 19 15 15 16 11 15 6 15 2 -16 -2 15 -6 15 -11 16 -15 15 -19 15 -23 16 -27 15 -31 -15 -35 15 -38 16 -42 15 -44 15 -48 16 -50 15 -53 15 -55 -16 -57 15 -59 15 -61 15 -62 16 -63 15 -64 15 -65 16 -65 -15 -66 15 -65 16 -65 15 -65 15 -64 15 -64 16 -62 15 -62 -15 -60 16 -59 15 -57 15 -56 16 -54 15 -53 15 -50 16 -49 -15 -47 15 -46 15 -43 16 -41 15 -39 15 -38 16 -35 15 -34 -15 -32 16 -30 15 -28 15 -27 15 -25 16 -23 15 -22 15 -21 -16 -19 15 -17 15 -17 3209 4446 100 MP stroke -16 -15 15 -14 15 -12 15 -12 16 -11 15 -10 15 -9 16 -9 -15 -7 15 -7 16 -7 15 -5 15 -5 16 -5 15 -4 15 -4 -15 -3 16 -4 15 -2 15 -3 16 -2 15 -2 15 -2 16 -1 -15 -1 15 -2 15 -1 16 -1 15 -1 15 0 16 -1 15 0 -15 -1 16 0 15 -1 15 0 15 0 16 -1 15 0 15 0 -16 0 15 0 15 0 16 0 15 -1 15 0 16 0 15 0 -15 0 15 0 16 0 15 0 15 0 16 0 15 0 15 0 -16 0 15 0 15 0 15 0 16 0 15 0 15 0 16 0 -15 0 15 0 16 0 15 0 15 0 15 0 16 0 15 0 -15 0 16 0 15 0 15 0 16 0 15 0 15 0 15 0 -16 0 15 0 15 0 16 0 15 0 15 0 16 0 15 0 -15 0 16 0 15 0 15 0 15 0 16 0 15 0 15 0 -16 0 15 0 15 0 1694 4612 100 MP stroke -16 0 15 0 15 0 1648 4612 4 MP stroke -DO -16 0 15 0 15 0 16 0 15 0 15 0 15 0 16 0 -15 0 15 0 16 0 15 0 15 0 16 0 15 0 15 0 -15 0 16 0 15 0 15 0 16 0 15 0 15 0 16 0 -15 0 15 0 15 0 16 0 15 0 15 0 16 0 15 0 -15 0 16 0 15 0 15 0 16 0 15 0 15 0 15 0 -16 0 15 0 15 0 16 0 15 0 15 1 16 0 15 0 -15 0 15 0 16 0 15 0 15 1 16 0 15 0 15 1 -16 0 15 1 15 0 15 1 16 0 15 1 15 1 16 1 -15 2 15 1 16 1 15 2 15 2 16 2 15 3 15 2 -15 4 16 3 15 4 15 4 16 5 15 5 15 5 16 7 -15 7 15 7 15 9 16 9 15 10 15 11 16 12 15 12 -15 14 16 15 15 17 15 17 15 19 16 21 15 22 15 23 -16 25 15 27 15 28 4724 4247 100 MP stroke -16 30 15 32 15 34 15 35 16 38 15 39 15 41 16 43 -15 46 15 47 16 49 15 50 15 53 16 54 15 56 15 57 -15 59 16 60 15 62 15 62 16 64 15 64 15 65 16 65 -15 65 15 66 15 65 16 65 15 64 15 63 16 62 15 61 -15 59 16 57 15 55 15 53 15 50 16 48 15 44 15 42 -16 38 15 35 15 31 16 27 15 23 15 19 16 15 15 11 -15 6 15 2 16 -2 15 -6 15 -11 16 -15 15 -19 15 -23 -16 -27 15 -31 15 -35 15 -38 16 -42 15 -44 15 -48 16 -50 -15 -53 15 -55 16 -57 15 -59 15 -61 15 -62 16 -63 15 -64 -15 -65 16 -65 15 -66 15 -65 16 -65 15 -65 15 -64 16 -64 -15 -62 15 -62 15 -60 16 -59 15 -57 15 -56 16 -54 15 -53 -15 -50 16 -49 15 -47 15 -46 15 -43 16 -41 15 -39 15 -38 -16 -35 15 -34 15 -32 3209 4217 100 MP stroke -16 -30 15 -28 15 -27 15 -25 16 -23 15 -22 15 -21 16 -19 -15 -17 15 -17 16 -15 15 -14 15 -12 16 -12 15 -11 15 -10 -15 -9 16 -9 15 -7 15 -7 16 -7 15 -5 15 -5 16 -5 -15 -4 15 -4 15 -3 16 -4 15 -2 15 -3 16 -2 15 -2 -15 -2 16 -1 15 -1 15 -2 15 -1 16 -1 15 -1 15 0 -16 -1 15 0 15 -1 16 0 15 -1 15 0 16 0 15 -1 -15 0 15 0 16 0 15 0 15 0 16 0 15 -1 15 0 -16 0 15 0 15 0 15 0 16 0 15 0 15 0 16 0 -15 0 15 0 16 0 15 0 15 0 15 0 16 0 15 0 -15 0 16 0 15 0 15 0 16 0 15 0 15 0 15 0 -16 0 15 0 15 0 16 0 15 0 15 0 16 0 15 0 -15 0 16 0 15 0 15 0 15 0 16 0 15 0 15 0 -16 0 15 0 15 0 1694 4612 100 MP stroke -16 0 15 0 15 0 1648 4612 4 MP stroke -0 -2703 4112 4612 2 MP stroke -0 -2703 3499 4612 2 MP stroke -0 -3823 3959 4612 2 MP stroke -SO - -gr -3463 3236 mt 3535 3236 L -3499 3200 mt 3499 3272 L -gs 898 388 5357 4225 MR c np - -gr -3923 3236 mt 3995 3236 L -3959 3200 mt 3959 3272 L -gs 898 388 5357 4225 MR c np - -gr -3923 789 mt 3995 789 L -3959 753 mt 3959 825 L -3923 753 mt 3995 825 L -3995 753 mt 3923 825 L -gs 898 388 5357 4225 MR c np - -gr -4076 2129 mt 4148 2201 L -4148 2129 mt 4076 2201 L -gs 898 388 5357 4225 MR c np - -gr -3923 2129 mt 3995 2201 L -3995 2129 mt 3923 2201 L -gs 898 388 5357 4225 MR c np - -gr -3423 5003 mt -(Xi) s -3867 5003 mt -(Xs) s -4050 5003 mt -(Xj) s - -end - -eplot -%%EndObject graph 1 - -epage -end - -showpage - -%%Trailer -%%EOF diff --git a/eijkel2.pdf b/eijkel2.pdf deleted file mode 100644 index e01e62b..0000000 Binary files a/eijkel2.pdf and /dev/null differ diff --git a/iccv.sty b/iccv.sty new file mode 100644 index 0000000..f8f39cd --- /dev/null +++ b/iccv.sty @@ -0,0 +1,508 @@ +% --------------------------------------------------------------- +% +% No guarantee is given that the format corresponds perfectly to +% IEEE 8.5" x 11" Proceedings, but most features should be ok. +% +% --------------------------------------------------------------- +% with LaTeX2e: +% ============= +% +% use as +% \documentclass[times,10pt,twocolumn]{article} +% \usepackage[options]{ICCV} +% \usepackage{times} +% +% "options" should be replaced by +% * "review" for submitting a paper for review, +% * "final" for the camera ready, and +% * "rebuttal" for the author rebuttal. +% +% specify references as +% {\small +% \bibliographystyle{ieee} +% \bibliography{...your files...} +% } +% --------------------------------------------------------------- + +\NeedsTeXFormat{LaTeX2e}[1999/12/01] +\ProvidesPackage{iccv}[2025 LaTeX class for IEEE ICCV] + +\RequirePackage{times} % Integrate Times for here +\RequirePackage{xspace} +\RequirePackage[dvipsnames]{xcolor} +\RequirePackage{graphicx} +\RequirePackage{amsmath} +\RequirePackage{amssymb} +\RequirePackage{booktabs} +\RequirePackage[numbers,sort&compress]{natbib} +\setlength{\bibsep}{1pt plus 1pt minus 1pt} + +\RequirePackage{silence} % Suppress unwanted warnings +\hbadness=10000 \vbadness=10000 \vfuzz=30pt \hfuzz=30pt +\WarningFilter{latexfont}{Font shape declaration} +\WarningFilter{latex}{Font shape} +\WarningFilter[rebuttal]{latex}{No \author given} +\RequirePackage{etoolbox} + +% Use modern caption package to allow for sub-figures etc. +% Reproduces the original CVPR/ICCV style as closely as possible. +\RequirePackage[format=plain,labelformat=simple,labelsep=period,font=small,compatibility=false]{caption} +\RequirePackage[font=footnotesize,skip=3pt,subrefformat=parens]{subcaption} + + +\newtoggle{iccvfinal} % Camera-ready version +\newtoggle{iccvrebuttal} % Rebuttal +\newtoggle{iccvpagenumbers} % Force page numbers (in camera ready) +\toggletrue{iccvfinal} +\togglefalse{iccvrebuttal} +\togglefalse{iccvpagenumbers} +\DeclareOption{review}{\togglefalse{iccvfinal}\toggletrue{iccvpagenumbers}} +\DeclareOption{rebuttal}{\togglefalse{iccvfinal}\toggletrue{iccvrebuttal}} +\DeclareOption{pagenumbers}{\toggletrue{iccvpagenumbers}} +\DeclareOption*{\PackageWarning{iccv}{Unkown option `\CurrentOption'}} +\ProcessOptions\relax + +% Don't warn about missing author for rebuttal +\iftoggle{iccvrebuttal}{% + \ActivateWarningFilters[rebuttal] +}{} + +% Breaking lines for URLs in the bib +\RequirePackage[hyphens]{url} +\Urlmuskip=0mu plus 1mu\relax + + +% --------------------------------------------------------------- +% Inlined version of the obsolete "everyshi-2001-05-15" package. +\newcommand{\@EveryShipout@Hook}{} +\newcommand{\@EveryShipout@AtNextHook}{} +\newcommand*{\EveryShipout}[1] + {\g@addto@macro\@EveryShipout@Hook{#1}} +\newcommand*{\AtNextShipout}[1] + {\g@addto@macro\@EveryShipout@AtNextHook{#1}} +\newcommand{\@EveryShipout@Shipout}{% + \afterassignment\@EveryShipout@Test + \global\setbox\@cclv= % + } +\newcommand{\@EveryShipout@Test}{% + \ifvoid\@cclv\relax + \aftergroup\@EveryShipout@Output + \else + \@EveryShipout@Output + \fi% + } +\newcommand{\@EveryShipout@Output}{% + \@EveryShipout@Hook% + \@EveryShipout@AtNextHook% + \gdef\@EveryShipout@AtNextHook{}% + \@EveryShipout@Org@Shipout\box\@cclv% + } +\newcommand{\@EveryShipout@Org@Shipout}{} +\newcommand*{\@EveryShipout@Init}{% + \message{ABD: EveryShipout initializing macros}% + \let\@EveryShipout@Org@Shipout\shipout + \let\shipout\@EveryShipout@Shipout + } +\AtBeginDocument{\@EveryShipout@Init} +% --------------------------------------------------------------- + + +% --------------------------------------------------------------- +% Inlined simplified version of the "eso-pic" package. +\newcommand\LenToUnit[1]{#1\@gobble} +\newcommand\AtPageUpperLeft[1]{% + \begingroup + \@tempdima=0pt\relax\@tempdimb=\ESO@yoffsetI\relax + \put(\LenToUnit{\@tempdima},\LenToUnit{\@tempdimb}){#1}% + \endgroup +} +\newcommand\AtPageLowerLeft[1]{\AtPageUpperLeft{% + \put(0,\LenToUnit{-\paperheight}){#1}}} +\newcommand\AtPageCenter[1]{\AtPageUpperLeft{% + \put(\LenToUnit{.5\paperwidth},\LenToUnit{-.5\paperheight}){#1}}% +} +\newcommand\AtTextUpperLeft[1]{% + \begingroup + \setlength\@tempdima{1in}% + \ifodd\c@page% + \advance\@tempdima\oddsidemargin% + \else% + \advance\@tempdima\evensidemargin% + \fi% + \@tempdimb=\ESO@yoffsetI\relax\advance\@tempdimb-1in\relax% + \advance\@tempdimb-\topmargin% + \advance\@tempdimb-\headheight\advance\@tempdimb-\headsep% + \put(\LenToUnit{\@tempdima},\LenToUnit{\@tempdimb}){#1}% + \endgroup +} +\newcommand\AtTextLowerLeft[1]{\AtTextUpperLeft{% + \put(0,\LenToUnit{-\textheight}){#1}}} +\newcommand\AtTextCenter[1]{\AtTextUpperLeft{% + \put(\LenToUnit{.5\textwidth},\LenToUnit{-.5\textheight}){#1}}} +\newcommand{\ESO@HookI}{} \newcommand{\ESO@HookII}{} +\newcommand{\ESO@HookIII}{} +\newcommand{\AddToShipoutPicture}{% + \@ifstar{\g@addto@macro\ESO@HookII}{\g@addto@macro\ESO@HookI}} +\newcommand{\ClearShipoutPicture}{\global\let\ESO@HookI\@empty} +\newcommand\ESO@isMEMOIR[1]{} +\@ifclassloaded{memoir}{\renewcommand\ESO@isMEMOIR[1]{#1}}{} +\newcommand{\@ShipoutPicture}{% + \bgroup + \@tempswafalse% + \ifx\ESO@HookI\@empty\else\@tempswatrue\fi% + \ifx\ESO@HookII\@empty\else\@tempswatrue\fi% + \ifx\ESO@HookIII\@empty\else\@tempswatrue\fi% + \if@tempswa% + \@tempdima=1in\@tempdimb=-\@tempdima% + \advance\@tempdimb\ESO@yoffsetI% + \ESO@isMEMOIR{% + \advance\@tempdima\trimedge% + \advance\@tempdima\paperwidth% + \advance\@tempdima-\stockwidth% + \if@twoside\ifodd\c@page\else% + \advance\@tempdima-2\trimedge% + \advance\@tempdima-\paperwidth% + \advance\@tempdima\stockwidth% + \fi\fi% + \advance\@tempdimb\trimtop}% + \unitlength=1pt% + \global\setbox\@cclv\vbox{% + \vbox{\let\protect\relax + \pictur@(0,0)(\strip@pt\@tempdima,\strip@pt\@tempdimb)% + \ESO@HookIII\ESO@HookI\ESO@HookII% + \global\let\ESO@HookII\@empty% + \endpicture}% + \nointerlineskip% + \box\@cclv}% + \fi + \egroup +} +\EveryShipout{\@ShipoutPicture} +\RequirePackage{keyval} +\newif\ifESO@dvips\ESO@dvipsfalse +\newif\ifESO@texcoord\ESO@texcoordfalse + +\AtBeginDocument{% + \IfFileExists{color.sty} + {% + \RequirePackage{color} + \let\ESO@color=\color\let\ESO@colorbox=\colorbox + \let\ESO@fcolorbox=\fcolorbox + }{} + \@ifundefined{Gin@driver}{}% + {% + \ifx\Gin@driver\@empty\else% + \filename@parse{\Gin@driver}\def\reserved@a{dvips}% + \ifx\filename@base\reserved@a\ESO@dvipstrue\fi% + \fi + }% + \ifx\pdfoutput\undefined\else + \ifx\pdfoutput\relax\else + \ifcase\pdfoutput\else + \ESO@dvipsfalse% + \fi + \fi + \fi +} +\ifESO@texcoord + \def\ESO@yoffsetI{0pt}\def\ESO@yoffsetII{-\paperheight} +\else + \def\ESO@yoffsetI{\paperheight}\def\ESO@yoffsetII{0pt} +\fi +% --------------------------------------------------------------- + + +\typeout{ICCV 8.5 x 11-Inch Proceedings Style `iccv.sty'.} + +% ten point helvetica bold required for captions +% eleven point times bold required for second-order headings +% in some sites the name of the fonts may differ, +% change the name here: +\font\iccvtenhv = phvb at 8pt % *** IF THIS FAILS, SEE iccv.sty *** +\font\elvbf = ptmb scaled 1100 +\font\tenbf = ptmb scaled 1000 + +% If the above lines give an error message, try to comment them and +% uncomment these: +%\font\iccvtenhv = phvb7t at 8pt +%\font\elvbf = ptmb7t scaled 1100 +%\font\tenbf = ptmb7t scaled 1000 + +% set dimensions of columns, gap between columns, and paragraph indent +\setlength{\textheight}{8.875in} +\setlength{\textwidth}{6.875in} +\setlength{\columnsep}{0.3125in} +\setlength{\topmargin}{0in} +\setlength{\headheight}{0in} +\setlength{\headsep}{0in} +\setlength{\parindent}{1pc} +\setlength{\oddsidemargin}{-0.1875in} +\setlength{\evensidemargin}{-0.1875in} + + +% Suppress page numbers when the appropriate option is given +\iftoggle{iccvpagenumbers}{}{% + \pagestyle{empty} +} + +\AtBeginDocument{% + % Print an error if document class other than article is used + \@ifclassloaded{article}{}{% + \PackageError{iccv}{Package only meant to be used with document class `article'}{Change document class to `article'.} + } + % Print a warning if incorrect options for article are specified + \@ifclasswith{article}{10pt}{}{% + \PackageWarningNoLine{iccv}{Incorrect font size specified - ICCV requires 10-point fonts. Please load document class `article' with `10pt' option} + } + \@ifclasswith{article}{twocolumn}{}{% + \PackageWarningNoLine{iccv}{Single column document - ICCV requires papers to have two-column layout. Please load document class `article' with `twocolumn' option} + } + \@ifclasswith{article}{letterpaper}{}{% + \PackageWarningNoLine{iccv}{Incorrect paper size - ICCV uses paper size `letter'. Please load document class `article' with `letterpaper' option} + } + % Print a warning if hyperref is not loaded and/or if the pagebackref option is missing + \iftoggle{iccvfinal}{% + \@ifpackageloaded{hyperref}{}{% + \PackageWarningNoLine{iccv}{Package `hyperref' is not loaded, but highly recommended for camera-ready version} + } + }{% + \@ifpackageloaded{hyperref}{ + \@ifpackagewith{hyperref}{pagebackref}{}{ + \PackageWarningNoLine{iccv}{Package `hyperref' is not loaded with option `pagebackref', which is strongly recommended for review version} + } + }{% + \PackageWarningNoLine{iccv}{Package `hyperref' is not loaded, but strongly recommended for review version} + } + } +} + +\def\@maketitle{ + \newpage + \null + \iftoggle{iccvrebuttal}{\vspace*{-.3in}}{\vskip .375in} + \begin{center} + % smaller title font only for rebuttal + \iftoggle{iccvrebuttal}{{\large \bf \@title \par}}{{\Large \bf \@title \par}} + % additional two empty lines at the end of the title + \iftoggle{iccvrebuttal}{\vspace*{-22pt}}{\vspace*{24pt}}{ + \large + \lineskip .5em + \begin{tabular}[t]{c} + \iftoggle{iccvfinal}{ + \@author + }{ + \iftoggle{iccvrebuttal}{}{ + Anonymous \confName~submission\\ + \vspace*{1pt}\\ + Paper ID \paperID + } + } + \end{tabular} + \par + } + % additional small space at the end of the author name + \vskip .5em + % additional empty line at the end of the title block + \vspace*{12pt} + \end{center} +} + +\def\abstract{% + % Suppress page numbers when the appropriate option is given + \iftoggle{iccvpagenumbers}{}{% + \thispagestyle{empty} + } + \centerline{\large\bf Abstract}% + \vspace*{12pt}\noindent% + \it\ignorespaces% +} + +\def\endabstract{% + % additional empty line at the end of the abstract + \vspace*{12pt} + } + +\def\affiliation#1{\gdef\@affiliation{#1}} \gdef\@affiliation{} + +% correct heading spacing and type +\def\iccvsection{\@startsection {section}{1}{\z@} + {-10pt plus -2pt minus -2pt}{7pt} {\large\bf}} +\def\iccvssect#1{\iccvsection*{#1}} +\def\iccvsect#1{\iccvsection{\texorpdfstring{\hskip -1em.~}{}#1}} +\def\section{\@ifstar\iccvssect\iccvsect} + +\def\iccvsubsection{\@startsection {subsection}{2}{\z@} + {-8pt plus -2pt minus -2pt}{5pt} {\elvbf}} +\def\iccvssubsect#1{\iccvsubsection*{#1}} +\def\iccvsubsect#1{\iccvsubsection{\texorpdfstring{\hskip -1em.~}{}#1}} +\def\subsection{\@ifstar\iccvssubsect\iccvsubsect} + +\def\iccvsubsubsection{\@startsection {subsubsection}{3}{\z@} + {-6pt plus -2pt minus -2pt}{3pt} {\tenbf}} +\def\iccvssubsubsect#1{\iccvsubsubsection*{#1}} +\def\iccvsubsubsect#1{\iccvsubsubsection{\texorpdfstring{\hskip -1em.~}{}#1}} +\def\subsubsection{\@ifstar\iccvssubsubsect\iccvsubsubsect} + +%% --------- Page background marks: Ruler and confidentiality (only for review and rebuttal) +\iftoggle{iccvfinal}{ + % In review and rebuttal mode, we use the "lineno" package for numbering lines. + % When switching to a different mode, the "\@LN" macro may remain in cached .aux files, + % leading to build errors (https://github.com/cvpr-org/author-kit/issues/49). + % Defining the macro as empty fixes that (https://tex.stackexchange.com/a/125779). + \makeatletter + \providecommand{\@LN}[2]{} + \makeatother +}{ + % ----- define vruler + \makeatletter + \newbox\iccvrulerbox + \newcount\iccvrulercount + \newdimen\iccvruleroffset + \newdimen\cv@lineheight + \newdimen\cv@boxheight + \newbox\cv@tmpbox + \newcount\cv@refno + \newcount\cv@tot + % NUMBER with left flushed zeros \fillzeros[] + \newcount\cv@tmpc@ \newcount\cv@tmpc + \def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi + \cv@tmpc=1 % + \loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi + \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat + \ifnum#2<0\advance\cv@tmpc1\relax-\fi + \loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat + \cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}% + \makeatother + % ----- end of vruler + + %% Define linenumber setup + \RequirePackage[switch,mathlines]{lineno} + + % Line numbers in ICCV blue using font from \iccvtenhv + \renewcommand\linenumberfont{\iccvtenhv\color[rgb]{.5,.5,1}} + + \renewcommand\thelinenumber{\fillzeros[3]{\arabic{linenumber}}} + + \setlength{\linenumbersep}{.75cm} + + % Bug: An equation with $$ ... $$ isn't numbered, nor is the previous line. + + % Patch amsmath commands so that the previous line and the equation itself + % are numbered. Bug: multiline has an extra line number. + % https://tex.stackexchange.com/questions/461186/how-to-use-lineno-with-amsmath-align + \RequirePackage{etoolbox} %% <- for \pretocmd, \apptocmd and \patchcmd + + \newcommand*\linenomathpatch[1]{% + \expandafter\pretocmd\csname #1\endcsname {\linenomath}{}{}% + \expandafter\pretocmd\csname #1*\endcsname {\linenomath}{}{}% + \expandafter\apptocmd\csname end#1\endcsname {\endlinenomath}{}{}% + \expandafter\apptocmd\csname end#1*\endcsname {\endlinenomath}{}{}% + } + \newcommand*\linenomathpatchAMS[1]{% + \expandafter\pretocmd\csname #1\endcsname {\linenomathAMS}{}{}% + \expandafter\pretocmd\csname #1*\endcsname {\linenomathAMS}{}{}% + \expandafter\apptocmd\csname end#1\endcsname {\endlinenomath}{}{}% + \expandafter\apptocmd\csname end#1*\endcsname {\endlinenomath}{}{}% + } + + %% Definition of \linenomathAMS depends on whether the mathlines option is provided + \expandafter\ifx\linenomath\linenomathWithnumbers + \let\linenomathAMS\linenomathWithnumbers + %% The following line gets rid of an extra line numbers at the bottom: + \patchcmd\linenomathAMS{\advance\postdisplaypenalty\linenopenalty}{}{}{} + \else + \let\linenomathAMS\linenomathNonumbers + \fi + + % Add the numbers + \linenumbers + \AtBeginDocument{% + \linenomathpatch{equation}% + \linenomathpatchAMS{gather}% + \linenomathpatchAMS{multline}% + \linenomathpatchAMS{align}% + \linenomathpatchAMS{alignat}% + \linenomathpatchAMS{flalign}% + } + + % \makevruler[][][][][] + \def\iccvruler#1{\makevruler[12pt][#1][1][3][0.993\textheight]\usebox{\iccvrulerbox}} + \AddToShipoutPicture{% + \color[rgb]{.5,.5,1} + + \def\pid{\parbox{1in}{\begin{center}\bf\sf{\small \confName}\\\small \#\paperID\end{center}}} + \AtTextUpperLeft{%paperID in corners + \put(\LenToUnit{-65pt},\LenToUnit{45pt}){\pid} + \put(\LenToUnit{\textwidth-12pt},\LenToUnit{45pt}){\pid} + } + \AtTextUpperLeft{%confidential + \put(0,\LenToUnit{1cm}){\parbox{\textwidth}{\centering\iccvtenhv + \confName~\confYear~Submission \#\paperID. CONFIDENTIAL REVIEW COPY. DO NOT DISTRIBUTE.}} + } + } +} % end of not iccvfinal + +%%% Make figure placement a little more predictable. +% We trust the user to move figures if this results +% in ugliness. +% Minimize bad page breaks at figures +\renewcommand{\textfraction}{0.01} +\renewcommand{\floatpagefraction}{0.99} +\renewcommand{\topfraction}{0.99} +\renewcommand{\bottomfraction}{0.99} +\renewcommand{\dblfloatpagefraction}{0.99} +\renewcommand{\dbltopfraction}{0.99} +\setcounter{totalnumber}{99} +\setcounter{topnumber}{99} +\setcounter{bottomnumber}{99} + +% Add a period to the end of an abbreviation unless there's one +% already, then \xspace. +\makeatletter +\DeclareRobustCommand\onedot{\futurelet\@let@token\@onedot} +\def\@onedot{\ifx\@let@token.\else.\null\fi\xspace} + +\def\eg{\emph{e.g}\onedot} \def\Eg{\emph{E.g}\onedot} +\def\ie{\emph{i.e}\onedot} \def\Ie{\emph{I.e}\onedot} +\def\cf{\emph{cf}\onedot} \def\Cf{\emph{Cf}\onedot} +\def\etc{\emph{etc}\onedot} \def\vs{\emph{vs}\onedot} +\def\wrt{w.r.t\onedot} \def\dof{d.o.f\onedot} +\def\iid{i.i.d\onedot} \def\wolog{w.l.o.g\onedot} +\def\etal{\emph{et al}\onedot} +\makeatother + +% --------------------------------------------------------------- + +%% redefine the \title command so that a variable name is saved in \thetitle, and provides the \maketitlesupplementary command +\let\titleold\title +\renewcommand{\title}[1]{\titleold{#1}\newcommand{\thetitle}{#1}} +\def\maketitlesupplementary + { + \newpage + \twocolumn[ + \centering + \Large + \textbf{\thetitle}\\ + \vspace{0.5em}Supplementary Material \\ + \vspace{1.0em} + ] %< twocolumn + } + +% --------------------------------------------------------------- + +%% Support for easy cross-referencing (e.g. \cref{sec:intro} +% configured with \AtEndPreamble as it needs to be called after hyperref +\AtEndPreamble{ + \usepackage[capitalize]{cleveref} + \crefname{section}{Sec.}{Secs.} + \Crefname{section}{Section}{Sections} + \Crefname{table}{Table}{Tables} + \crefname{table}{Tab.}{Tabs.} +} + +% --------------------------------------------------------------- + +%% More compact compact itemize/enumeration (e.g. list contributions) +\RequirePackage[shortlabels,inline]{enumitem} +\setlist[itemize]{noitemsep,leftmargin=*,topsep=0em} +\setlist[enumerate]{noitemsep,leftmargin=*,topsep=0em} diff --git a/ieeenat_fullname.bst b/ieeenat_fullname.bst new file mode 100644 index 0000000..261b8c3 --- /dev/null +++ b/ieeenat_fullname.bst @@ -0,0 +1,1448 @@ +%% File: `abbrvnat.bst' +%% A modification of `abbrv.bst' for use with natbib package +%% +%% Copyright 1993-2007 Patrick W Daly +%% Max-Planck-Institut f\"ur Sonnensystemforschung +%% Max-Planck-Str. 2 +%% D-37191 Katlenburg-Lindau +%% Germany +%% E-mail: daly@mps.mpg.de +%% +%% This program can be redistributed and/or modified under the terms +%% of the LaTeX Project Public License Distributed from CTAN +%% archives in directory macros/latex/base/lppl.txt; either +%% version 1 of the License, or any later version. +%% + % Version and source file information: + % \ProvidesFile{natbst.mbs}[2007/11/26 1.93 (PWD)] + % + % BibTeX `plainnat' family + % version 0.99b for BibTeX versions 0.99a or later, + % for LaTeX versions 2.09 and 2e. + % + % For use with the `natbib.sty' package; emulates the corresponding + % member of the `plain' family, but with author-year citations. + % + % With version 6.0 of `natbib.sty', it may also be used for numerical + % citations, while retaining the commands \citeauthor, \citefullauthor, + % and \citeyear to print the corresponding information. + % + % For version 7.0 of `natbib.sty', the KEY field replaces missing + % authors/editors, and the date is left blank in \bibitem. + % + % Includes field EID for the sequence/citation number of electronic journals + % which is used instead of page numbers. + % + % Includes fields ISBN and ISSN. + % + % Includes field URL for Internet addresses. + % + % Includes field DOI for Digital Object Idenfifiers. + % + % Works best with the url.sty package of Donald Arseneau. + % + % Works with identical authors and year are further sorted by + % citation key, to preserve any natural sequence. + % +ENTRY + { address + author + booktitle + chapter + doi + eid + edition + editor + howpublished + institution + isbn + issn + journal + key + month + note + number + organization + pages + publisher + school + series + title + type + url + volume + year + } + {} + { label extra.label sort.label short.list } + +INTEGERS { output.state before.all mid.sentence after.sentence after.block } + +FUNCTION {init.state.consts} +{ #0 'before.all := + #1 'mid.sentence := + #2 'after.sentence := + #3 'after.block := +} + +STRINGS { s t } + +FUNCTION {output.nonnull} +{ 's := + output.state mid.sentence = + { ", " * write$ } + { output.state after.block = + { add.period$ write$ + newline$ + "\newblock " write$ + } + { output.state before.all = + 'write$ + { add.period$ " " * write$ } + if$ + } + if$ + mid.sentence 'output.state := + } + if$ + s +} + +FUNCTION {output} +{ duplicate$ empty$ + 'pop$ + 'output.nonnull + if$ +} + +FUNCTION {output.check} +{ 't := + duplicate$ empty$ + { pop$ "empty " t * " in " * cite$ * warning$ } + 'output.nonnull + if$ +} + +FUNCTION {fin.entry} +{ add.period$ + write$ + newline$ +} + +FUNCTION {new.block} +{ output.state before.all = + 'skip$ + { after.block 'output.state := } + if$ +} + +FUNCTION {new.sentence} +{ output.state after.block = + 'skip$ + { output.state before.all = + 'skip$ + { after.sentence 'output.state := } + if$ + } + if$ +} + +FUNCTION {not} +{ { #0 } + { #1 } + if$ +} + +FUNCTION {and} +{ 'skip$ + { pop$ #0 } + if$ +} + +FUNCTION {or} +{ { pop$ #1 } + 'skip$ + if$ +} + +FUNCTION {new.block.checka} +{ empty$ + 'skip$ + 'new.block + if$ +} + +FUNCTION {new.block.checkb} +{ empty$ + swap$ empty$ + and + 'skip$ + 'new.block + if$ +} + +FUNCTION {new.sentence.checka} +{ empty$ + 'skip$ + 'new.sentence + if$ +} + +FUNCTION {new.sentence.checkb} +{ empty$ + swap$ empty$ + and + 'skip$ + 'new.sentence + if$ +} + +FUNCTION {field.or.null} +{ duplicate$ empty$ + { pop$ "" } + 'skip$ + if$ +} + +FUNCTION {emphasize} +{ duplicate$ empty$ + { pop$ "" } + { "\emph{" swap$ * "}" * } + if$ +} + +INTEGERS { nameptr namesleft numnames } + +FUNCTION {format.names} +{ 's := + #1 'nameptr := + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + % Formerly { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't := + { s nameptr "{ff }{vv }{ll}{, jj}" format.name$ 't := + nameptr #1 > + { namesleft #1 > + { ", " * t * } + { numnames #2 > + { "," * } + 'skip$ + if$ + t "others" = + { " et~al." * } + { " and " * t * } + if$ + } + if$ + } + 't + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {format.key} +{ empty$ + { key field.or.null } + { "" } + if$ +} + +FUNCTION {format.authors} +{ author empty$ + { "" } + { author format.names } + if$ +} + +FUNCTION {format.editors} +{ editor empty$ + { "" } + { editor format.names + editor num.names$ #1 > + { ", editors" * } + { ", editor" * } + if$ + } + if$ +} + +FUNCTION {format.isbn} +{ isbn empty$ + { "" } +% { new.block "ISBN " isbn * } + { "" } + if$ +} + +FUNCTION {format.issn} +{ issn empty$ + { "" } +% { new.block "ISSN " issn * } + { "" } + if$ +} + +FUNCTION {format.url} +{ url empty$ + { "" } +% { new.block "URL \url{" url * "}" * } + { "" } + if$ +} + +FUNCTION {format.doi} +{ doi empty$ + { "" } +% { new.block "\doi{" doi * "}" * } + { "" } + if$ +} + +FUNCTION {format.title} +{ title empty$ + { "" } + { title "t" change.case$ } + if$ +} + +FUNCTION {format.full.names} +{'s := + #1 'nameptr := + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { s nameptr + "{vv~}{ll}" format.name$ 't := + nameptr #1 > + { + namesleft #1 > + { ", " * t * } + { + numnames #2 > + { "," * } + 'skip$ + if$ + t "others" = + { " et~al." * } + { " and " * t * } + if$ + } + if$ + } + 't + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {author.editor.full} +{ author empty$ + { editor empty$ + { "" } + { editor format.full.names } + if$ + } + { author format.full.names } + if$ +} + +FUNCTION {author.full} +{ author empty$ + { "" } + { author format.full.names } + if$ +} + +FUNCTION {editor.full} +{ editor empty$ + { "" } + { editor format.full.names } + if$ +} + +FUNCTION {make.full.names} +{ type$ "book" = + type$ "inbook" = + or + 'author.editor.full + { type$ "proceedings" = + 'editor.full + 'author.full + if$ + } + if$ +} + +FUNCTION {output.bibitem} +{ newline$ + "\bibitem[" write$ + label write$ + ")" make.full.names duplicate$ short.list = + { pop$ } + { * } + if$ + "]{" * write$ + cite$ write$ + "}" write$ + newline$ + "" + before.all 'output.state := +} + +FUNCTION {n.dashify} +{ 't := + "" + { t empty$ not } + { t #1 #1 substring$ "-" = + { t #1 #2 substring$ "--" = not + { "--" * + t #2 global.max$ substring$ 't := + } + { { t #1 #1 substring$ "-" = } + { "-" * + t #2 global.max$ substring$ 't := + } + while$ + } + if$ + } + { t #1 #1 substring$ * + t #2 global.max$ substring$ 't := + } + if$ + } + while$ +} + +FUNCTION {format.date} +{ year duplicate$ empty$ + { "empty year in " cite$ * warning$ + pop$ "" } + 'skip$ + if$ +%% CR: Leave out months. +% month empty$ +% 'skip$ +% { month +% " " * swap$ * +% } +% if$ + extra.label * +} + +FUNCTION {format.btitle} +{ title emphasize +} + +FUNCTION {tie.or.space.connect} +{ duplicate$ text.length$ #3 < + { "~" } + { " " } + if$ + swap$ * * +} + +FUNCTION {either.or.check} +{ empty$ + 'pop$ + { "can't use both " swap$ * " fields in " * cite$ * warning$ } + if$ +} + +FUNCTION {format.bvolume} +{ volume empty$ + { "" } +%% CR: Don't show "volume 1234 of LNCS" etc. +% { "volume" volume tie.or.space.connect +% series empty$ +% 'skip$ +% { " of " * series emphasize * } +% if$ +% "volume and number" number either.or.check +% } + { "" } + if$ +} + +FUNCTION {format.number.series} +{ volume empty$ + { number empty$ +%% CR: Leave out series information. +% { series field.or.null } + { "" } + { output.state mid.sentence = + { "number" } + { "Number" } + if$ + number tie.or.space.connect + series empty$ + { "there's a number but no series in " cite$ * warning$ } + { " in " * series * } + if$ + } + if$ + } + { "" } + if$ +} + +FUNCTION {format.edition} +{ edition empty$ + { "" } + { output.state mid.sentence = + { edition "l" change.case$ " edition" * } + { edition "t" change.case$ " edition" * } + if$ + } + if$ +} + +INTEGERS { multiresult } + +FUNCTION {multi.page.check} +{ 't := + #0 'multiresult := + { multiresult not + t empty$ not + and + } + { t #1 #1 substring$ + duplicate$ "-" = + swap$ duplicate$ "," = + swap$ "+" = + or or + { #1 'multiresult := } + { t #2 global.max$ substring$ 't := } + if$ + } + while$ + multiresult +} + +FUNCTION {format.pages} +{ pages empty$ + { "" } + { pages multi.page.check + { "pages" pages n.dashify tie.or.space.connect } + { "page" pages tie.or.space.connect } + if$ + } + if$ +} + +FUNCTION {format.eid} +{ eid empty$ + { "" } + { "art." eid tie.or.space.connect } + if$ +} + +FUNCTION {format.vol.num.pages} +{ volume field.or.null + number empty$ + 'skip$ + { "\penalty0 (" number * ")" * * + volume empty$ + { "there's a number but no volume in " cite$ * warning$ } + 'skip$ + if$ + } + if$ + pages empty$ + 'skip$ + { duplicate$ empty$ + { pop$ format.pages } + { ":\penalty0 " * pages n.dashify * } + if$ + } + if$ +} + +FUNCTION {format.vol.num.eid} +{ volume field.or.null + number empty$ + 'skip$ + { "\penalty0 (" number * ")" * * + volume empty$ + { "there's a number but no volume in " cite$ * warning$ } + 'skip$ + if$ + } + if$ + eid empty$ + 'skip$ + { duplicate$ empty$ + { pop$ format.eid } + { ":\penalty0 " * eid * } + if$ + } + if$ +} + +FUNCTION {format.chapter.pages} +{ chapter empty$ + 'format.pages + { type empty$ + { "chapter" } + { type "l" change.case$ } + if$ + chapter tie.or.space.connect + pages empty$ + 'skip$ + { ", " * format.pages * } + if$ + } + if$ +} + +FUNCTION {format.in.ed.booktitle} +{ booktitle empty$ + { "" } +%% CR: Leave out editors even if the information is available. +% { editor empty$ +% { "In " booktitle emphasize * } +% { "In " format.editors * ", " * booktitle emphasize * } +% if$ +% } + { "In " booktitle emphasize * } + if$ +} + +FUNCTION {empty.misc.check} +{ author empty$ title empty$ howpublished empty$ + month empty$ year empty$ note empty$ + and and and and and + key empty$ not and + { "all relevant fields are empty in " cite$ * warning$ } + 'skip$ + if$ +} + +FUNCTION {format.thesis.type} +{ type empty$ + 'skip$ + { pop$ + type "t" change.case$ + } + if$ +} + +FUNCTION {format.tr.number} +{ type empty$ + { "Technical Report" } + 'type + if$ + number empty$ + { "t" change.case$ } + { number tie.or.space.connect } + if$ +} + +FUNCTION {format.article.crossref} +{ key empty$ + { journal empty$ + { "need key or journal for " cite$ * " to crossref " * crossref * + warning$ + "" + } + { "In \emph{" journal * "}" * } + if$ + } + { "In " } + if$ + " \citet{" * crossref * "}" * +} + +FUNCTION {format.book.crossref} +{ volume empty$ + { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ + "In " + } + { "Volume" volume tie.or.space.connect + " of " * + } + if$ + editor empty$ + editor field.or.null author field.or.null = + or + { key empty$ + { series empty$ + { "need editor, key, or series for " cite$ * " to crossref " * + crossref * warning$ + "" * + } + { "\emph{" * series * "}" * } + if$ + } + 'skip$ + if$ + } + 'skip$ + if$ + " \citet{" * crossref * "}" * +} + +FUNCTION {format.incoll.inproc.crossref} +{ editor empty$ + editor field.or.null author field.or.null = + or + { key empty$ + { booktitle empty$ + { "need editor, key, or booktitle for " cite$ * " to crossref " * + crossref * warning$ + "" + } + { "In \emph{" booktitle * "}" * } + if$ + } + { "In " } + if$ + } + { "In " } + if$ + " \citet{" * crossref * "}" * +} + +FUNCTION {article} +{ output.bibitem + format.authors "author" output.check + author format.key output + new.block + format.title "title" output.check + new.block + crossref missing$ + { journal emphasize "journal" output.check + eid empty$ + { format.vol.num.pages output } + { format.vol.num.eid output } + if$ + format.date "year" output.check + } + { format.article.crossref output.nonnull + eid empty$ + { format.pages output } + { format.eid output } + if$ + } + if$ + format.issn output + format.doi output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {book} +{ output.bibitem + author empty$ + { format.editors "author and editor" output.check + editor format.key output + } + { format.authors output.nonnull + crossref missing$ + { "author and editor" editor either.or.check } + 'skip$ + if$ + } + if$ + new.block + format.btitle "title" output.check + crossref missing$ + { format.bvolume output + new.block + format.number.series output + new.sentence + publisher "publisher" output.check + address output + } + { new.block + format.book.crossref output.nonnull + } + if$ + format.edition output + format.date "year" output.check + format.isbn output + format.doi output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {booklet} +{ output.bibitem + format.authors output + author format.key output + new.block + format.title "title" output.check + howpublished address new.block.checkb + howpublished output + address output + format.date output + format.isbn output + format.doi output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {inbook} +{ output.bibitem + author empty$ + { format.editors "author and editor" output.check + editor format.key output + } + { format.authors output.nonnull + crossref missing$ + { "author and editor" editor either.or.check } + 'skip$ + if$ + } + if$ + new.block + format.btitle "title" output.check + crossref missing$ + { format.bvolume output + format.chapter.pages "chapter and pages" output.check + new.block + format.number.series output + new.sentence + publisher "publisher" output.check + address output + } + { format.chapter.pages "chapter and pages" output.check + new.block + format.book.crossref output.nonnull + } + if$ + format.edition output + format.date "year" output.check + format.isbn output + format.doi output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {incollection} +{ output.bibitem + format.authors "author" output.check + author format.key output + new.block + format.title "title" output.check + new.block + crossref missing$ + { format.in.ed.booktitle "booktitle" output.check + format.bvolume output + format.number.series output + format.chapter.pages output + new.sentence + publisher "publisher" output.check + address output + format.edition output + format.date "year" output.check + } + { format.incoll.inproc.crossref output.nonnull + format.chapter.pages output + } + if$ + format.isbn output + format.doi output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {inproceedings} +{ output.bibitem + format.authors "author" output.check + author format.key output + new.block + format.title "title" output.check + new.block + crossref missing$ + { format.in.ed.booktitle "booktitle" output.check + format.bvolume output + format.number.series output + format.pages output + address empty$ + { organization publisher new.sentence.checkb + organization output + publisher output + format.date "year" output.check + } + { address output.nonnull + format.date "year" output.check + new.sentence + organization output + publisher output + } + if$ + } + { format.incoll.inproc.crossref output.nonnull + format.pages output + } + if$ + format.isbn output + format.doi output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {conference} { inproceedings } + +FUNCTION {manual} +{ output.bibitem + format.authors output + author format.key output + new.block + format.btitle "title" output.check + organization address new.block.checkb + organization output + address output + format.edition output + format.date output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {mastersthesis} +{ output.bibitem + format.authors "author" output.check + author format.key output + new.block + format.title "title" output.check + new.block + "Master's thesis" format.thesis.type output.nonnull + school "school" output.check + address output + format.date "year" output.check + format.url output + new.block + note output + fin.entry +} + +FUNCTION {misc} +{ output.bibitem + format.authors output + author format.key output + title howpublished new.block.checkb + format.title output + howpublished new.block.checka + howpublished output + format.date output + format.issn output + format.url output + new.block + note output + fin.entry + empty.misc.check +} + +FUNCTION {phdthesis} +{ output.bibitem + format.authors "author" output.check + author format.key output + new.block + format.btitle "title" output.check + new.block + "PhD thesis" format.thesis.type output.nonnull + school "school" output.check + address output + format.date "year" output.check + format.url output + new.block + note output + fin.entry +} + +FUNCTION {proceedings} +{ output.bibitem + format.editors output + editor format.key output + new.block + format.btitle "title" output.check + format.bvolume output + format.number.series output + address output + format.date "year" output.check + new.sentence + organization output + publisher output + format.isbn output + format.doi output + format.url output + new.block + note output + fin.entry +} + +FUNCTION {techreport} +{ output.bibitem + format.authors "author" output.check + author format.key output + new.block + format.title "title" output.check + new.block + format.tr.number output.nonnull + institution "institution" output.check + address output + format.date "year" output.check + format.url output + new.block + note output + fin.entry +} + +FUNCTION {unpublished} +{ output.bibitem + format.authors "author" output.check + author format.key output + new.block + format.title "title" output.check + new.block + note "note" output.check + format.date output + format.url output + fin.entry +} + +FUNCTION {default.type} { misc } + + +MACRO {jan} {"Jan."} + +MACRO {feb} {"Feb."} + +MACRO {mar} {"Mar."} + +MACRO {apr} {"Apr."} + +MACRO {may} {"May"} + +MACRO {jun} {"June"} + +MACRO {jul} {"July"} + +MACRO {aug} {"Aug."} + +MACRO {sep} {"Sept."} + +MACRO {oct} {"Oct."} + +MACRO {nov} {"Nov."} + +MACRO {dec} {"Dec."} + + + +MACRO {acmcs} {"ACM Comput. Surv."} + +MACRO {acta} {"Acta Inf."} + +MACRO {cacm} {"Commun. ACM"} + +MACRO {ibmjrd} {"IBM J. Res. Dev."} + +MACRO {ibmsj} {"IBM Syst.~J."} + +MACRO {ieeese} {"IEEE Trans. Softw. Eng."} + +MACRO {ieeetc} {"IEEE Trans. Comput."} + +MACRO {ieeetcad} + {"IEEE Trans. Comput.-Aided Design Integrated Circuits"} + +MACRO {ipl} {"Inf. Process. Lett."} + +MACRO {jacm} {"J.~ACM"} + +MACRO {jcss} {"J.~Comput. Syst. Sci."} + +MACRO {scp} {"Sci. Comput. Programming"} + +MACRO {sicomp} {"SIAM J. Comput."} + +MACRO {tocs} {"ACM Trans. Comput. Syst."} + +MACRO {tods} {"ACM Trans. Database Syst."} + +MACRO {tog} {"ACM Trans. Gr."} + +MACRO {toms} {"ACM Trans. Math. Softw."} + +MACRO {toois} {"ACM Trans. Office Inf. Syst."} + +MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."} + +MACRO {tcs} {"Theoretical Comput. Sci."} + + +READ + +FUNCTION {sortify} +{ purify$ + "l" change.case$ +} + +INTEGERS { len } + +FUNCTION {chop.word} +{ 's := + 'len := + s #1 len substring$ = + { s len #1 + global.max$ substring$ } + 's + if$ +} + +FUNCTION {format.lab.names} +{ 's := + s #1 "{vv~}{ll}" format.name$ + s num.names$ duplicate$ + #2 > + { pop$ " et~al." * } + { #2 < + 'skip$ + { s #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = + { " et~al." * } + { " and " * s #2 "{vv~}{ll}" format.name$ * } + if$ + } + if$ + } + if$ +} + +FUNCTION {author.key.label} +{ author empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { author format.lab.names } + if$ +} + +FUNCTION {author.editor.key.label} +{ author empty$ + { editor empty$ + { key empty$ + { cite$ #1 #3 substring$ } + 'key + if$ + } + { editor format.lab.names } + if$ + } + { author format.lab.names } + if$ +} + +FUNCTION {author.key.organization.label} +{ author empty$ + { key empty$ + { organization empty$ + { cite$ #1 #3 substring$ } + { "The " #4 organization chop.word #3 text.prefix$ } + if$ + } + 'key + if$ + } + { author format.lab.names } + if$ +} + +FUNCTION {editor.key.organization.label} +{ editor empty$ + { key empty$ + { organization empty$ + { cite$ #1 #3 substring$ } + { "The " #4 organization chop.word #3 text.prefix$ } + if$ + } + 'key + if$ + } + { editor format.lab.names } + if$ +} + +FUNCTION {calc.short.authors} +{ type$ "book" = + type$ "inbook" = + or + 'author.editor.key.label + { type$ "proceedings" = + 'editor.key.organization.label + { type$ "manual" = + 'author.key.organization.label + 'author.key.label + if$ + } + if$ + } + if$ + 'short.list := +} + +FUNCTION {calc.label} +{ calc.short.authors + short.list + "(" + * + year duplicate$ empty$ + short.list key field.or.null = or + { pop$ "" } + 'skip$ + if$ + * + 'label := +} + +FUNCTION {sort.format.names} +{ 's := + #1 'nameptr := + "" + s num.names$ 'numnames := + numnames 'namesleft := + { namesleft #0 > } + { + s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't := + nameptr #1 > + { + " " * + namesleft #1 = t "others" = and + { "zzzzz" * } + { numnames #2 > nameptr #2 = and + { "zz" * year field.or.null * " " * } + 'skip$ + if$ + t sortify * + } + if$ + } + { t sortify * } + if$ + nameptr #1 + 'nameptr := + namesleft #1 - 'namesleft := + } + while$ +} + +FUNCTION {sort.format.title} +{ 't := + "A " #2 + "An " #3 + "The " #4 t chop.word + chop.word + chop.word + sortify + #1 global.max$ substring$ +} + +FUNCTION {author.sort} +{ author empty$ + { key empty$ + { "to sort, need author or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { author sort.format.names } + if$ +} + +FUNCTION {author.editor.sort} +{ author empty$ + { editor empty$ + { key empty$ + { "to sort, need author, editor, or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { editor sort.format.names } + if$ + } + { author sort.format.names } + if$ +} + +FUNCTION {author.organization.sort} +{ author empty$ + { organization empty$ + { key empty$ + { "to sort, need author, organization, or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { "The " #4 organization chop.word sortify } + if$ + } + { author sort.format.names } + if$ +} + +FUNCTION {editor.organization.sort} +{ editor empty$ + { organization empty$ + { key empty$ + { "to sort, need editor, organization, or key in " cite$ * warning$ + "" + } + { key sortify } + if$ + } + { "The " #4 organization chop.word sortify } + if$ + } + { editor sort.format.names } + if$ +} + + +FUNCTION {presort} +{ calc.label + label sortify + " " + * + type$ "book" = + type$ "inbook" = + or + 'author.editor.sort + { type$ "proceedings" = + 'editor.organization.sort + { type$ "manual" = + 'author.organization.sort + 'author.sort + if$ + } + if$ + } + if$ + " " + * + year field.or.null sortify + * + " " + * + cite$ + * + #1 entry.max$ substring$ + 'sort.label := + sort.label * + #1 entry.max$ substring$ + 'sort.key$ := +} + +ITERATE {presort} + +SORT + +STRINGS { longest.label last.label next.extra } + +INTEGERS { longest.label.width last.extra.num number.label } + +FUNCTION {initialize.longest.label} +{ "" 'longest.label := + #0 int.to.chr$ 'last.label := + "" 'next.extra := + #0 'longest.label.width := + #0 'last.extra.num := + #0 'number.label := +} + +FUNCTION {forward.pass} +{ last.label label = + { last.extra.num #1 + 'last.extra.num := + last.extra.num int.to.chr$ 'extra.label := + } + { "a" chr.to.int$ 'last.extra.num := + "" 'extra.label := + label 'last.label := + } + if$ + number.label #1 + 'number.label := +} + +FUNCTION {reverse.pass} +{ next.extra "b" = + { "a" 'extra.label := } + 'skip$ + if$ + extra.label 'next.extra := + extra.label + duplicate$ empty$ + 'skip$ + { "{\natexlab{" swap$ * "}}" * } + if$ + 'extra.label := + label extra.label * 'label := +} + +EXECUTE {initialize.longest.label} + +ITERATE {forward.pass} + +REVERSE {reverse.pass} + +FUNCTION {bib.sort.order} +{ sort.label 'sort.key$ := +} + +ITERATE {bib.sort.order} + +SORT + +FUNCTION {begin.bib} +{ preamble$ empty$ + 'skip$ + { preamble$ write$ newline$ } + if$ + "\begin{thebibliography}{" number.label int.to.str$ * "}" * + write$ newline$ + "\providecommand{\natexlab}[1]{#1}" + write$ newline$ + "\providecommand{\url}[1]{\texttt{#1}}" + write$ newline$ + "\expandafter\ifx\csname urlstyle\endcsname\relax" + write$ newline$ + " \providecommand{\doi}[1]{doi: #1}\else" + write$ newline$ + " \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi" + write$ newline$ +} + +EXECUTE {begin.bib} + +EXECUTE {init.state.consts} + +ITERATE {call.type$} + +FUNCTION {end.bib} +{ newline$ + "\end{thebibliography}" write$ newline$ +} + +EXECUTE {end.bib} diff --git a/img/DeiT-B_ImageNet_v2.pdf b/img/DeiT-B_ImageNet_v2.pdf deleted file mode 100644 index ac76a2f..0000000 Binary files a/img/DeiT-B_ImageNet_v2.pdf and /dev/null differ diff --git a/img/DeiT-B_ImageNet_v3.pdf b/img/DeiT-B_ImageNet_v3.pdf deleted file mode 100644 index da18456..0000000 --- a/img/DeiT-B_ImageNet_v3.pdf +++ /dev/null @@ -1,71 +0,0 @@ -%PDF-1.4 -%¬Ü «º -1 0 obj -<< /Type /Catalog /Pages 2 0 R >> -endobj -8 0 obj -<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R -/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> -endobj -11 0 obj -<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ] -/Contents 9 0 R /Annots 10 0 R >> -endobj -9 0 obj -<< /Length 12 0 R /Filter /FlateDecode >> -stream -xœµ”MŽA …÷>…OÐc—«ü³¤¬%Ñ4ƒ×GåªN2­ bÃ"rRýâï=»»>}{<~:¼Ã÷Ÿááúëñ'0>ã Ÿð72ñ „„+p­Høœµžfý -p‚ØeÛÅ—#~ÁïHKD)"Ö´ -êÊaÊ*H‹¸haõŠ/gäDà_ÿ0=UOKÕ±zš¥ hñbµQ8Õî¥SŠ´´nÂ¥YçßUÂN9+ôøÜK"7ÓP²#šy³ÞÑU¨µžªIa‹ò~[;á@¾ÞC27ÓR²-”Ì«p¦aiÆF’pŠfR*ó€ßQÂN9f»Þ ÍYÏ’§´ÔÑQ¬¯­pS×>Ýþõòl¿ÞfÊþnÃgH5jîEZïã-ª¸Z±£RGÈ;JØ)'v½Î¶Iðfd367\Zeêm¸™ -«÷{5´IXlî(a§ã\oã¾~Ôæ¸ÇŽ3Q‰Ê¹9bwñàÜq­áE#æŽßVÂN9±ëmؽøØŒý¯<ñ—¡__5'›58ÁGøÀ÷“ -endstream -endobj -12 0 obj -420 -endobj -10 0 obj -[ ] -endobj -3 0 obj -<< >> -endobj -4 0 obj -<< >> -endobj -5 0 obj -<< >> -endobj -6 0 obj -<< >> -endobj -7 0 obj -<< >> -endobj -2 0 obj -<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >> -endobj -13 0 obj -<< /Creator (Matplotlib v3.9.2, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.9.2) -/CreationDate (D:20250802073347+02'00') >> -endobj -xref -0 14 -0000000000 65535 f -0000000016 00000 n -0000000970 00000 n -0000000865 00000 n -0000000886 00000 n -0000000907 00000 n -0000000928 00000 n -0000000949 00000 n -0000000065 00000 n -0000000330 00000 n -0000000845 00000 n -0000000208 00000 n -0000000825 00000 n -0000001030 00000 n -trailer -<< /Size 14 /Root 1 0 R /Info 13 0 R >> -startxref -1187 -%%EOF diff --git a/img/DeiT-B_ImageNet_vNone.pdf b/img/DeiT-B_ImageNet_vNone.pdf deleted file mode 100644 index 28816a9..0000000 Binary files a/img/DeiT-B_ImageNet_vNone.pdf and /dev/null differ diff --git a/img/DeiT-B_fornet_all_cos_0.8_2.0_v3.pdf b/img/DeiT-B_fornet_all_cos_0.8_2.0_v3.pdf deleted file mode 100644 index 5154597..0000000 --- a/img/DeiT-B_fornet_all_cos_0.8_2.0_v3.pdf +++ /dev/null @@ -1,71 +0,0 @@ -%PDF-1.4 -%¬Ü «º -1 0 obj -<< /Type /Catalog /Pages 2 0 R >> -endobj -8 0 obj -<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R -/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> -endobj -11 0 obj -<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ] -/Contents 9 0 R /Annots 10 0 R >> -endobj -9 0 obj -<< /Length 12 0 R /Filter /FlateDecode >> -stream -xœ”MnÛ@ …÷<OàC9Ü&¼n»èÛMH -$×/æ/–»èBý<ñ{|CéîÛáãéñðc?áî|õøŸñ„„ÏHø‰Œ{d<!ᬊ„/m­Ç ÐXá «l>|?à/|EÚ…YÎ,Éi§áš”Ìi—"“’X0¾ŸsÿýÆp¥¥™Ò‚Zš¥±Ô›p¬å¤˜”(¹šqe/’vf™2IãßÂFØ „5DXNþ°ÓÉÅ…SöÄ•¦.f^Ë‹iR͉e ¯+a£ìÐËhÔéb˜jtKlE¢&Wz ñ¨™²’sæÑ÷ !\ -{²ËºÑ°–ôXÚÝJ®ò¢ÂâRw³K®¹ñþùx”_Öµú7ñ­E¢I¬o§¯Î%‚ˆ¤ ä !l„ºœs­' ;mL[}‰5BkUÍ®ªmj“[.9RüJØ({˜ËºÙËm„Ýé™$™‡k›× ¢a-Q+Lš=³ úu%l”»¬›íØ/ÓØov·$QgÊÈ¥_×ø5{ò9ÞW•°QðrŽýü«ùò2ÍÁ¾Ã_¢÷m -endstream -endobj -12 0 obj -432 -endobj -10 0 obj -[ ] -endobj -3 0 obj -<< >> -endobj -4 0 obj -<< >> -endobj -5 0 obj -<< >> -endobj -6 0 obj -<< >> -endobj -7 0 obj -<< >> -endobj -2 0 obj -<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >> -endobj -13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150053+02'00') >> -endobj -xref -0 14 -0000000000 65535 f -0000000016 00000 n -0000000982 00000 n -0000000877 00000 n -0000000898 00000 n -0000000919 00000 n -0000000940 00000 n -0000000961 00000 n -0000000065 00000 n -0000000330 00000 n -0000000857 00000 n -0000000208 00000 n -0000000837 00000 n -0000001042 00000 n -trailer -<< /Size 14 /Root 1 0 R /Info 13 0 R >> -startxref -1201 -%%EOF diff --git a/img/DeiT-B_fornet_all_cos_0.8_2.0_vNone.pdf b/img/DeiT-B_fornet_all_cos_0.8_2.0_vNone.pdf deleted file mode 100644 index fc2eb70..0000000 Binary files a/img/DeiT-B_fornet_all_cos_0.8_2.0_vNone.pdf and /dev/null differ diff --git a/img/DeiT-B_fornet_all_cos_v1.pdf b/img/DeiT-B_fornet_all_cos_v1.pdf deleted file mode 100644 index 0abb5b6..0000000 Binary files a/img/DeiT-B_fornet_all_cos_v1.pdf and /dev/null differ diff --git a/img/DeiT-B_fornet_all_cos_v2.pdf b/img/DeiT-B_fornet_all_cos_v2.pdf deleted file mode 100644 index 06cec15..0000000 --- a/img/DeiT-B_fornet_all_cos_v2.pdf +++ /dev/null @@ -1,71 +0,0 @@ -%PDF-1.4 -%¬Ü «º -1 0 obj -<< /Type /Catalog /Pages 2 0 R >> -endobj -8 0 obj -<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R -/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> -endobj -11 0 obj -<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ] -/Contents 9 0 R /Annots 10 0 R >> -endobj -9 0 obj -<< /Length 12 0 R /Filter /FlateDecode >> -stream -xœ”ËnÜ0 E÷ü -~CJ|nÛ³n»è3Óq€¦@ûû…di^ÍYôãZç^Šöçýïû/»øñ+<œ¯ã0‘ð ÿ ã@H¸‹ ás¯íxõ;À~b“͇¯{ü†/HK¦”Z2¼"-ÆšliH‹˜kdp|="wæÞýÆp%ÑMI D·4J»  %¬˜E!ÑæF -»Q#-jLÉÞ ÜUÂrC®Ð,ás+9 C©•RÒZ”¨™!V¸](— âecßQÂrƒ^ïE§NÃT§‹‡×H±Ö;wµ"®éHKM ñj&ýŽn”[w×˨i½Û£ô»ÝÖ$©\û6V +.Úÿ}<–_/3õõ'nâ{ÈQ¹px·^Uª²´¡•kÞ2Þµp@×sgÛIÇNÓÖØá÷÷]%ü;Öi[gØëm4{›í ÎÌR¹OlªzXiá5”¢˜‘ŽÙ~[ 7Ê]/ÃnØ“il8«:å6¶¥¤ïßY•ê,§ñ~C׺]Ï-?ÿjN>¦18Àgø ò#ø9 -endstream -endobj -12 0 obj -429 -endobj -10 0 obj -[ ] -endobj -3 0 obj -<< >> -endobj -4 0 obj -<< >> -endobj -5 0 obj -<< >> -endobj -6 0 obj -<< >> -endobj -7 0 obj -<< >> -endobj -2 0 obj -<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >> -endobj -13 0 obj -<< /Creator (Matplotlib v3.9.2, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.9.2) -/CreationDate (D:20250802073409+02'00') >> -endobj -xref -0 14 -0000000000 65535 f -0000000016 00000 n -0000000979 00000 n -0000000874 00000 n -0000000895 00000 n -0000000916 00000 n -0000000937 00000 n -0000000958 00000 n -0000000065 00000 n -0000000330 00000 n -0000000854 00000 n -0000000208 00000 n -0000000834 00000 n -0000001039 00000 n -trailer -<< /Size 14 /Root 1 0 R /Info 13 0 R >> -startxref -1196 -%%EOF diff --git a/img/DeiT-B_fornet_all_cos_v3.pdf b/img/DeiT-B_fornet_all_cos_v3.pdf deleted file mode 100644 index ddf3280..0000000 Binary files a/img/DeiT-B_fornet_all_cos_v3.pdf and /dev/null differ diff --git a/img/DeiT-B_fornet_all_linear_v1.pdf b/img/DeiT-B_fornet_all_linear_v1.pdf deleted file mode 100644 index c524b6f..0000000 Binary files a/img/DeiT-B_fornet_all_linear_v1.pdf and /dev/null differ diff --git a/img/DeiT-L_ImageNet_v1.pdf b/img/DeiT-L_ImageNet_v1.pdf deleted file mode 100644 index 8785a89..0000000 Binary files a/img/DeiT-L_ImageNet_v1.pdf and /dev/null differ diff --git a/img/DeiT-L_ImageNet_v2.pdf b/img/DeiT-L_ImageNet_v2.pdf deleted file mode 100644 index d20839e..0000000 Binary files a/img/DeiT-L_ImageNet_v2.pdf and /dev/null differ diff --git a/img/DeiT-L_ImageNet_v3.pdf b/img/DeiT-L_ImageNet_v3.pdf deleted file mode 100644 index 8a83e3f..0000000 --- a/img/DeiT-L_ImageNet_v3.pdf +++ /dev/null @@ -1,73 +0,0 @@ -%PDF-1.4 -%¬Ü «º -1 0 obj -<< /Type /Catalog /Pages 2 0 R >> -endobj -8 0 obj -<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R -/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> -endobj -11 0 obj -<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ] -/Contents 9 0 R /Annots 10 0 R >> -endobj -9 0 obj -<< /Length 12 0 R /Filter /FlateDecode >> -stream -xœ”ËnÛ@ E÷ü -~Br8|lÛ^·]ô;mh -´¿_ÌC±-ØEÂèq5çò’ÒçãïÇ/‡øñ+<œ¯ã30>!á3þAÆ2>!á -¬Š„/}mÇ Ð\¿œà'6ÙöðíˆßðiIó,U)+ÒRÉUH¤ -’iµ¤§àÛrGà_˜ž4º% Ôè†æÒn h‰ ÎL)ܼhÖêa¢më¨bFµñï*a§ÈÒ:$m 7ÓPggw6±æÞ­P2sÒ¢Â"b<Øw”°Sèu':us1M zÏJÞéìT8¹gÕ©jzä¤ßVÂN9Ò]/KMëiÏ¥ßmìV!yháâ½s\-L\GÔÿ|<·_/kêûo¸ ß‹t©šÄiÜ;XX8ܤ¬DF%²Qä%ì”»ž³m'¼ÙŒ”]£Öõ6RÕ•(³µa)".Bef|[WºåzYêõ‡6£“]-D%£ô¦sÕìƒ] ¥¦•m°o -a'œÐõ²Ì}w±ÙšÃ­ä)D}d=Ô28KOž¹P)³Û÷”°SNðzüü£y÷²™ƒ|†¿q÷- -endstream -endobj -12 0 obj -441 -endobj -10 0 obj -[ ] -endobj -3 0 obj -<< >> -endobj -4 0 obj -<< >> -endobj -5 0 obj -<< >> -endobj -6 0 obj -<< >> -endobj -7 0 obj -<< >> -endobj -2 0 obj -<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >> -endobj -13 0 obj -<< /Creator (Matplotlib v3.9.2, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.9.2) -/CreationDate (D:20250802073347+02'00') >> -endobj -xref -0 14 -0000000000 65535 f -0000000016 00000 n -0000000991 00000 n -0000000886 00000 n -0000000907 00000 n -0000000928 00000 n -0000000949 00000 n -0000000970 00000 n -0000000065 00000 n -0000000330 00000 n -0000000866 00000 n -0000000208 00000 n -0000000846 00000 n -0000001051 00000 n -trailer -<< /Size 14 /Root 1 0 R /Info 13 0 R >> -startxref -1208 -%%EOF diff --git a/img/DeiT-L_ImageNet_vNone.pdf b/img/DeiT-L_ImageNet_vNone.pdf deleted file mode 100644 index 6cdb23c..0000000 Binary files a/img/DeiT-L_ImageNet_vNone.pdf and /dev/null differ diff --git a/img/DeiT-L_fornet_all_cos_v1.pdf b/img/DeiT-L_fornet_all_cos_v1.pdf deleted file mode 100644 index 26efb4b..0000000 Binary files a/img/DeiT-L_fornet_all_cos_v1.pdf and /dev/null differ diff --git a/img/DeiT-L_fornet_all_cos_v2.pdf b/img/DeiT-L_fornet_all_cos_v2.pdf deleted file mode 100644 index 52779b7..0000000 Binary files a/img/DeiT-L_fornet_all_cos_v2.pdf and /dev/null differ diff --git a/img/DeiT-L_fornet_all_cos_v3.pdf b/img/DeiT-L_fornet_all_cos_v3.pdf deleted file mode 100644 index 3bbace3..0000000 Binary files a/img/DeiT-L_fornet_all_cos_v3.pdf and /dev/null differ diff --git a/img/DeiT-S_ImageNet_v2.pdf b/img/DeiT-S_ImageNet_v2.pdf deleted file mode 100644 index d1e7762..0000000 Binary files a/img/DeiT-S_ImageNet_v2.pdf and /dev/null differ diff --git a/img/DeiT-S_ImageNet_v3.pdf b/img/DeiT-S_ImageNet_v3.pdf deleted file mode 100644 index aee0d98..0000000 Binary files a/img/DeiT-S_ImageNet_v3.pdf and /dev/null differ diff --git a/img/DeiT-S_ImageNet_vNone.pdf b/img/DeiT-S_ImageNet_vNone.pdf deleted file mode 100644 index ea24781..0000000 Binary files a/img/DeiT-S_ImageNet_vNone.pdf and /dev/null differ diff --git a/img/DeiT-S_fornet_all_cos_v1.pdf b/img/DeiT-S_fornet_all_cos_v1.pdf deleted file mode 100644 index 7927f34..0000000 Binary files a/img/DeiT-S_fornet_all_cos_v1.pdf and /dev/null differ diff --git a/img/DeiT-S_fornet_all_cos_v2.pdf b/img/DeiT-S_fornet_all_cos_v2.pdf deleted file mode 100644 index 40a0e98..0000000 Binary files a/img/DeiT-S_fornet_all_cos_v2.pdf and /dev/null differ diff --git a/img/DeiT-S_fornet_all_cos_v3.pdf b/img/DeiT-S_fornet_all_cos_v3.pdf deleted file mode 100644 index f5ce2d3..0000000 Binary files a/img/DeiT-S_fornet_all_cos_v3.pdf and /dev/null differ diff --git a/img/DeiT-S_fornet_all_cos_vNone.pdf b/img/DeiT-S_fornet_all_cos_vNone.pdf deleted file mode 100644 index 42927e9..0000000 Binary files a/img/DeiT-S_fornet_all_cos_vNone.pdf and /dev/null differ diff --git a/img/DeiT-S_fornet_all_linear_v1.pdf b/img/DeiT-S_fornet_all_linear_v1.pdf deleted file mode 100644 index 17d723d..0000000 --- a/img/DeiT-S_fornet_all_linear_v1.pdf +++ /dev/null @@ -1,70 +0,0 @@ -%PDF-1.4 -%¬Ü «º -1 0 obj -<< /Type /Catalog /Pages 2 0 R >> -endobj -8 0 obj -<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R -/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> -endobj -11 0 obj -<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ] -/Contents 9 0 R /Annots 10 0 R >> -endobj -9 0 obj -<< /Length 12 0 R /Filter /FlateDecode >> -stream -xœµ”An1 E÷<O0!%J$·M¯Û.r€Àvx$Úë¢$Û8i7] 8žùÖûŸ¤}÷yÿëÇãþëîÞƒ»Ë§ÇŸÀøŒG$|BÂßȸCÆ#®À"HxŠÚ®Шßð‚M6_¾îñŸ‘W±R,‰"-¥¨¹{EZrJše|="qÿ¨ŽÄŠ…QÚC8´ÃrR/¤fá„•2;3ÒR‹RWó†W eG®Ð á©•@NÃPg{ʦErn8ÏfÕk Nd¤ÙÒ`ßVÂFÙ¡oçÔéb˜ -z•Jŵ8&fI³J .\XŒú¯2ˆf‹õnÏ^£Ï£ÄÓFmß#5ÉœµeI\ªÕ¤R‚öáëqüz&Ο¸‰xeN+!-êÊE\H‚Rrg®=á;JØ(v½tµÝx™Æútµ¦ìNkZ5÷“cM5çR…xL÷¶6ÊÞÎõ:îÛÙh÷ÿÛëÀ_…íØ³il.·‰9I›£rõRˆc¨B.EÛÞž—û–6Ê^/m¿üÍœ½Lsp€/ðKöç -endstream -endobj -12 0 obj -416 -endobj -10 0 obj -[ ] -endobj -3 0 obj -<< >> -endobj -4 0 obj -<< >> -endobj -5 0 obj -<< >> -endobj -6 0 obj -<< >> -endobj -7 0 obj -<< >> -endobj -2 0 obj -<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >> -endobj -13 0 obj -<< /Creator (Matplotlib v3.9.2, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.9.2) -/CreationDate (D:20250802073412+02'00') >> -endobj -xref -0 14 -0000000000 65535 f -0000000016 00000 n -0000000966 00000 n -0000000861 00000 n -0000000882 00000 n -0000000903 00000 n -0000000924 00000 n -0000000945 00000 n -0000000065 00000 n -0000000330 00000 n -0000000841 00000 n -0000000208 00000 n -0000000821 00000 n -0000001026 00000 n -trailer -<< /Size 14 /Root 1 0 R /Info 13 0 R >> -startxref -1183 -%%EOF diff --git a/img/DeiT-S_fornet_all_linear_v2.pdf b/img/DeiT-S_fornet_all_linear_v2.pdf deleted file mode 100644 index f0b227f..0000000 --- a/img/DeiT-S_fornet_all_linear_v2.pdf +++ /dev/null @@ -1,71 +0,0 @@ -%PDF-1.4 -%¬Ü «º -1 0 obj -<< /Type /Catalog /Pages 2 0 R >> -endobj -8 0 obj -<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R -/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> -endobj -11 0 obj -<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ] -/Contents 9 0 R /Annots 10 0 R >> -endobj -9 0 obj -<< /Length 12 0 R /Filter /FlateDecode >> -stream -xœ”MŽÛ0 …÷<Oà!%Š"·m¬Û.z€A’vÐ)0sýB”'ARtaÐ?ÏúŸh?}Ù¿ÿzÞÛ}ÂÏßái»zþŒ/ÀxDÂ$ü@Æ2pAÂSÔvœ€Fý p€ßØdóáÛà+ÒâU¬KR‘–Rª¹»"-9¥še|;"qÿ©ŽÄŠ…QÚM8´Å’za®©½ïêIRqGZTœŠQmôGB¸và -ÍžZ àä;Tržï{B¸캅¾ýfÎN¦58ÀWø ­ ö™ -endstream -endobj -12 0 obj -433 -endobj -10 0 obj -[ ] -endobj -3 0 obj -<< >> -endobj -4 0 obj -<< >> -endobj -5 0 obj -<< >> -endobj -6 0 obj -<< >> -endobj -7 0 obj -<< >> -endobj -2 0 obj -<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >> -endobj -13 0 obj -<< /Creator (Matplotlib v3.9.2, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.9.2) -/CreationDate (D:20250802073412+02'00') >> -endobj -xref -0 14 -0000000000 65535 f -0000000016 00000 n -0000000983 00000 n -0000000878 00000 n -0000000899 00000 n -0000000920 00000 n -0000000941 00000 n -0000000962 00000 n -0000000065 00000 n -0000000330 00000 n -0000000858 00000 n -0000000208 00000 n -0000000838 00000 n -0000001043 00000 n -trailer -<< /Size 14 /Root 1 0 R /Info 13 0 R >> -startxref -1200 -%%EOF diff --git a/img/DeiT-S_fornet_all_linear_v3.pdf b/img/DeiT-S_fornet_all_linear_v3.pdf deleted file mode 100644 index be979ae..0000000 --- a/img/DeiT-S_fornet_all_linear_v3.pdf +++ /dev/null @@ -1,70 +0,0 @@ -%PDF-1.4 -%¬Ü «º -1 0 obj -<< /Type /Catalog /Pages 2 0 R >> -endobj -8 0 obj -<< /Font 3 0 R /XObject 7 0 R /ExtGState 4 0 R /Pattern 5 0 R -/Shading 6 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> -endobj -11 0 obj -<< /Type /Page /Parent 2 0 R /Resources 8 0 R /MediaBox [ 0 0 144 144 ] -/Contents 9 0 R /Annots 10 0 R >> -endobj -9 0 obj -<< /Length 12 0 R /Filter /FlateDecode >> -stream -xœ”ËnA E÷þ -ÁÄ.»üØÒ¬MQ:A‚ßGõš™´&ˆE«úqÛç^»ºï>~ÿ¸?}9~À_áîruÿ Ÿ€ñ Ÿð2‘ñ 7`U$|îk;žæúà~b“­‡¯'ü†/H‡t+’IîH‡j.é\5‘R\¤šãë#rgá¿ß˜®4º) Ôè–æÒnÂC+§ªQ"¹ 7VT¼¶ s!(®ÍÀ»JØ)rƒ´IÈå`ì,UY„éàÄÕMØ¢EI«’že±o+a§з³èÔåbšêtsªEj«5UÂÜé %ÄÅiÑßQÂN9º»]GMëÝžK¿ÛØA*,Þj®V\kþóñ,¿]gêõná{ÈZ]Ý#…›u&£ê„ª=Šï(a§œØíÒÛvÒÁËÈ26fì¢Î®l¡®¤¡ÚR S(gÌßVÂN9Ú¹]Ç}û¹Ívz%)æÙÇšžšÖ{jÁ¤Õ+Û¤ßVÂN9±ÛuØ=ûXÆÖ—ðªÒ™)–fÖ·8QK”󿥄r‚·KÛ/?œ³—eà3ü™øù -endstream -endobj -12 0 obj -437 -endobj -10 0 obj -[ ] -endobj -3 0 obj -<< >> -endobj -4 0 obj -<< >> -endobj -5 0 obj -<< >> -endobj -6 0 obj -<< >> -endobj -7 0 obj -<< >> -endobj -2 0 obj -<< /Type /Pages /Kids [ 11 0 R ] /Count 1 >> -endobj -13 0 obj -<< /Creator (Matplotlib v3.9.2, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.9.2) -/CreationDate (D:20250802073413+02'00') >> -endobj -xref -0 14 -0000000000 65535 f -0000000016 00000 n -0000000987 00000 n -0000000882 00000 n -0000000903 00000 n -0000000924 00000 n -0000000945 00000 n -0000000966 00000 n -0000000065 00000 n -0000000330 00000 n -0000000862 00000 n -0000000208 00000 n -0000000842 00000 n -0000001047 00000 n -trailer -<< /Size 14 /Root 1 0 R /Info 13 0 R >> -startxref -1204 -%%EOF diff --git a/img/DeiT-S_fornet_all_linear_vNone.pdf b/img/DeiT-S_fornet_all_linear_vNone.pdf deleted file mode 100644 index dad51e4..0000000 Binary files a/img/DeiT-S_fornet_all_linear_vNone.pdf and /dev/null differ diff --git a/img/ResNet101_ImageNet_v1.pdf b/img/ResNet101_ImageNet_v1.pdf index 0000c01..9a89e44 100644 Binary files a/img/ResNet101_ImageNet_v1.pdf and b/img/ResNet101_ImageNet_v1.pdf differ diff --git a/img/ResNet101_ImageNet_v2.pdf b/img/ResNet101_ImageNet_v2.pdf index 06c7992..10e02a8 100644 Binary files a/img/ResNet101_ImageNet_v2.pdf and b/img/ResNet101_ImageNet_v2.pdf differ diff --git a/img/ResNet101_ImageNet_v3.pdf b/img/ResNet101_ImageNet_v3.pdf index 82752dd..f23929d 100644 --- a/img/ResNet101_ImageNet_v3.pdf +++ b/img/ResNet101_ImageNet_v3.pdf @@ -42,9 +42,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150045+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250227094736+02'00') >> endobj xref 0 14 @@ -65,5 +65,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1206 +1204 %%EOF diff --git a/img/ResNet101_RecombNet_all_v1.pdf b/img/ResNet101_RecombNet all_v1.pdf similarity index 90% rename from img/ResNet101_RecombNet_all_v1.pdf rename to img/ResNet101_RecombNet all_v1.pdf index d2c4d1b..53ebc28 100644 Binary files a/img/ResNet101_RecombNet_all_v1.pdf and b/img/ResNet101_RecombNet all_v1.pdf differ diff --git a/img/ResNet101_RecombNet_all_v2.pdf b/img/ResNet101_RecombNet all_v2.pdf similarity index 90% rename from img/ResNet101_RecombNet_all_v2.pdf rename to img/ResNet101_RecombNet all_v2.pdf index 74734f4..08e4c7d 100644 Binary files a/img/ResNet101_RecombNet_all_v2.pdf and b/img/ResNet101_RecombNet all_v2.pdf differ diff --git a/img/ResNet101_RecombNet_all_v3.pdf b/img/ResNet101_RecombNet all_v3.pdf similarity index 90% rename from img/ResNet101_RecombNet_all_v3.pdf rename to img/ResNet101_RecombNet all_v3.pdf index d97cbcc..8555faa 100644 --- a/img/ResNet101_RecombNet_all_v3.pdf +++ b/img/ResNet101_RecombNet all_v3.pdf @@ -43,9 +43,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150051+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250227094747+02'00') >> endobj xref 0 14 @@ -66,5 +66,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1210 +1208 %%EOF diff --git a/img/ResNet50_ImageNet_v1.pdf b/img/ResNet50_ImageNet_v1.pdf index 32bc24f..953058c 100644 --- a/img/ResNet50_ImageNet_v1.pdf +++ b/img/ResNet50_ImageNet_v1.pdf @@ -45,9 +45,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150046+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250227094737+02'00') >> endobj xref 0 14 @@ -68,5 +68,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1212 +1210 %%EOF diff --git a/img/ResNet50_ImageNet_v2.pdf b/img/ResNet50_ImageNet_v2.pdf index 06ee1b4..ea19853 100644 Binary files a/img/ResNet50_ImageNet_v2.pdf and b/img/ResNet50_ImageNet_v2.pdf differ diff --git a/img/ResNet50_ImageNet_v3.pdf b/img/ResNet50_ImageNet_v3.pdf index e020acf..3c2fd5c 100644 Binary files a/img/ResNet50_ImageNet_v3.pdf and b/img/ResNet50_ImageNet_v3.pdf differ diff --git a/img/ResNet50_RecombNet_all_v1.pdf b/img/ResNet50_RecombNet all_v1.pdf similarity index 90% rename from img/ResNet50_RecombNet_all_v1.pdf rename to img/ResNet50_RecombNet all_v1.pdf index 88ba2e9..b2e75c2 100644 --- a/img/ResNet50_RecombNet_all_v1.pdf +++ b/img/ResNet50_RecombNet all_v1.pdf @@ -42,9 +42,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150050+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250227094746+02'00') >> endobj xref 0 14 @@ -65,5 +65,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1206 +1204 %%EOF diff --git a/img/ResNet50_RecombNet_all_v2.pdf b/img/ResNet50_RecombNet all_v2.pdf similarity index 90% rename from img/ResNet50_RecombNet_all_v2.pdf rename to img/ResNet50_RecombNet all_v2.pdf index d9c8523..0f8cc87 100644 Binary files a/img/ResNet50_RecombNet_all_v2.pdf and b/img/ResNet50_RecombNet all_v2.pdf differ diff --git a/img/ResNet50_RecombNet_all_v3.pdf b/img/ResNet50_RecombNet all_v3.pdf similarity index 90% rename from img/ResNet50_RecombNet_all_v3.pdf rename to img/ResNet50_RecombNet all_v3.pdf index 2259597..efa84ce 100644 --- a/img/ResNet50_RecombNet_all_v3.pdf +++ b/img/ResNet50_RecombNet all_v3.pdf @@ -42,9 +42,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150050+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250227094746+02'00') >> endobj xref 0 14 @@ -65,5 +65,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1199 +1197 %%EOF diff --git a/img/Swin-S_ImageNet_v1.pdf b/img/Swin-S_ImageNet_v1.pdf index ef09926..3dcd902 100644 --- a/img/Swin-S_ImageNet_v1.pdf +++ b/img/Swin-S_ImageNet_v1.pdf @@ -43,9 +43,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150047+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250227094738+02'00') >> endobj xref 0 14 @@ -66,5 +66,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1207 +1205 %%EOF diff --git a/img/Swin-S_ImageNet_v2.pdf b/img/Swin-S_ImageNet_v2.pdf index cde8932..fda1945 100644 Binary files a/img/Swin-S_ImageNet_v2.pdf and b/img/Swin-S_ImageNet_v2.pdf differ diff --git a/img/Swin-S_ImageNet_v3.pdf b/img/Swin-S_ImageNet_v3.pdf index 0d223e5..66df876 100644 Binary files a/img/Swin-S_ImageNet_v3.pdf and b/img/Swin-S_ImageNet_v3.pdf differ diff --git a/img/Swin-S_RecombNet_all_v1.pdf b/img/Swin-S_RecombNet all_v1.pdf similarity index 90% rename from img/Swin-S_RecombNet_all_v1.pdf rename to img/Swin-S_RecombNet all_v1.pdf index c3cfae6..99f0442 100644 Binary files a/img/Swin-S_RecombNet_all_v1.pdf and b/img/Swin-S_RecombNet all_v1.pdf differ diff --git a/img/Swin-S_RecombNet_all_v2.pdf b/img/Swin-S_RecombNet all_v2.pdf similarity index 90% rename from img/Swin-S_RecombNet_all_v2.pdf rename to img/Swin-S_RecombNet all_v2.pdf index 2be743b..5267a01 100644 --- a/img/Swin-S_RecombNet_all_v2.pdf +++ b/img/Swin-S_RecombNet all_v2.pdf @@ -42,9 +42,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150049+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250227094745+02'00') >> endobj xref 0 14 @@ -65,5 +65,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1170 +1168 %%EOF diff --git a/img/Swin-S_RecombNet_all_v3.pdf b/img/Swin-S_RecombNet all_v3.pdf similarity index 90% rename from img/Swin-S_RecombNet_all_v3.pdf rename to img/Swin-S_RecombNet all_v3.pdf index 49c3b06..2db583f 100644 Binary files a/img/Swin-S_RecombNet_all_v3.pdf and b/img/Swin-S_RecombNet all_v3.pdf differ diff --git a/img/Swin-Ti_ImageNet_v1.pdf b/img/Swin-Ti_ImageNet_v1.pdf index 171804c..07b7fcd 100644 Binary files a/img/Swin-Ti_ImageNet_v1.pdf and b/img/Swin-Ti_ImageNet_v1.pdf differ diff --git a/img/Swin-Ti_ImageNet_v2.pdf b/img/Swin-Ti_ImageNet_v2.pdf index 7e56d1c..315a962 100644 --- a/img/Swin-Ti_ImageNet_v2.pdf +++ b/img/Swin-Ti_ImageNet_v2.pdf @@ -44,9 +44,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150045+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250301004049+02'00') >> endobj xref 0 14 @@ -67,5 +67,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1209 +1207 %%EOF diff --git a/img/Swin-Ti_ImageNet_v3.pdf b/img/Swin-Ti_ImageNet_v3.pdf index f6c1ac1..d738b01 100644 --- a/img/Swin-Ti_ImageNet_v3.pdf +++ b/img/Swin-Ti_ImageNet_v3.pdf @@ -43,9 +43,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150045+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250301004048+02'00') >> endobj xref 0 14 @@ -66,5 +66,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1206 +1204 %%EOF diff --git a/img/Swin-Ti_RecombNet_all_v1.pdf b/img/Swin-Ti_RecombNet all_v1.pdf similarity index 90% rename from img/Swin-Ti_RecombNet_all_v1.pdf rename to img/Swin-Ti_RecombNet all_v1.pdf index 3c43d4b..9edf8f9 100644 Binary files a/img/Swin-Ti_RecombNet_all_v1.pdf and b/img/Swin-Ti_RecombNet all_v1.pdf differ diff --git a/img/Swin-Ti_RecombNet_all_v2.pdf b/img/Swin-Ti_RecombNet all_v2.pdf similarity index 90% rename from img/Swin-Ti_RecombNet_all_v2.pdf rename to img/Swin-Ti_RecombNet all_v2.pdf index 757529b..5144150 100644 Binary files a/img/Swin-Ti_RecombNet_all_v2.pdf and b/img/Swin-Ti_RecombNet all_v2.pdf differ diff --git a/img/Swin-Ti_RecombNet_all_v3.pdf b/img/Swin-Ti_RecombNet all_v3.pdf similarity index 90% rename from img/Swin-Ti_RecombNet_all_v3.pdf rename to img/Swin-Ti_RecombNet all_v3.pdf index 6f7cc11..c0ee44c 100644 Binary files a/img/Swin-Ti_RecombNet_all_v3.pdf and b/img/Swin-Ti_RecombNet all_v3.pdf differ diff --git a/img/ViT-B_ImageNet_v1.pdf b/img/ViT-B_ImageNet_v1.pdf index 8319743..7e7462a 100644 --- a/img/ViT-B_ImageNet_v1.pdf +++ b/img/ViT-B_ImageNet_v1.pdf @@ -43,9 +43,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150046+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250227094737+02'00') >> endobj xref 0 14 @@ -66,5 +66,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1209 +1207 %%EOF diff --git a/img/ViT-B_ImageNet_v2.pdf b/img/ViT-B_ImageNet_v2.pdf index d3cc4d6..7836f68 100644 Binary files a/img/ViT-B_ImageNet_v2.pdf and b/img/ViT-B_ImageNet_v2.pdf differ diff --git a/img/ViT-B_ImageNet_v3.pdf b/img/ViT-B_ImageNet_v3.pdf index 0844562..a159302 100644 --- a/img/ViT-B_ImageNet_v3.pdf +++ b/img/ViT-B_ImageNet_v3.pdf @@ -42,9 +42,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150045+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250227094736+02'00') >> endobj xref 0 14 @@ -65,5 +65,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1193 +1191 %%EOF diff --git a/img/ViT-B_RecombNet_all_v1.pdf b/img/ViT-B_RecombNet all_v1.pdf similarity index 90% rename from img/ViT-B_RecombNet_all_v1.pdf rename to img/ViT-B_RecombNet all_v1.pdf index 2a70916..b589def 100644 --- a/img/ViT-B_RecombNet_all_v1.pdf +++ b/img/ViT-B_RecombNet all_v1.pdf @@ -43,9 +43,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150049+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250227094745+02'00') >> endobj xref 0 14 @@ -66,5 +66,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1206 +1204 %%EOF diff --git a/img/ViT-B_RecombNet_all_v2.pdf b/img/ViT-B_RecombNet all_v2.pdf similarity index 90% rename from img/ViT-B_RecombNet_all_v2.pdf rename to img/ViT-B_RecombNet all_v2.pdf index 93fc253..43bad24 100644 Binary files a/img/ViT-B_RecombNet_all_v2.pdf and b/img/ViT-B_RecombNet all_v2.pdf differ diff --git a/img/ViT-B_RecombNet_all_v3.pdf b/img/ViT-B_RecombNet all_v3.pdf similarity index 90% rename from img/ViT-B_RecombNet_all_v3.pdf rename to img/ViT-B_RecombNet all_v3.pdf index e2bba06..ed87bf4 100644 Binary files a/img/ViT-B_RecombNet_all_v3.pdf and b/img/ViT-B_RecombNet all_v3.pdf differ diff --git a/img/ViT-L_ImageNet_v1.pdf b/img/ViT-L_ImageNet_v1.pdf index 5c4d8b8..f2769b8 100644 Binary files a/img/ViT-L_ImageNet_v1.pdf and b/img/ViT-L_ImageNet_v1.pdf differ diff --git a/img/ViT-L_ImageNet_v2.pdf b/img/ViT-L_ImageNet_v2.pdf index b4b082e..25fda26 100644 Binary files a/img/ViT-L_ImageNet_v2.pdf and b/img/ViT-L_ImageNet_v2.pdf differ diff --git a/img/ViT-L_ImageNet_v3.pdf b/img/ViT-L_ImageNet_v3.pdf index bf94eea..07e9d9a 100644 --- a/img/ViT-L_ImageNet_v3.pdf +++ b/img/ViT-L_ImageNet_v3.pdf @@ -42,9 +42,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150047+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250227094738+02'00') >> endobj xref 0 14 @@ -65,5 +65,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1184 +1182 %%EOF diff --git a/img/ViT-L_RecombNet_all_v1.pdf b/img/ViT-L_RecombNet all_v1.pdf similarity index 90% rename from img/ViT-L_RecombNet_all_v1.pdf rename to img/ViT-L_RecombNet all_v1.pdf index 8f46491..824df1c 100644 Binary files a/img/ViT-L_RecombNet_all_v1.pdf and b/img/ViT-L_RecombNet all_v1.pdf differ diff --git a/img/ViT-L_RecombNet_all_v2.pdf b/img/ViT-L_RecombNet all_v2.pdf similarity index 90% rename from img/ViT-L_RecombNet_all_v2.pdf rename to img/ViT-L_RecombNet all_v2.pdf index a06ee6d..f9fd7f8 100644 --- a/img/ViT-L_RecombNet_all_v2.pdf +++ b/img/ViT-L_RecombNet all_v2.pdf @@ -43,9 +43,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150048+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250227094744+02'00') >> endobj xref 0 14 @@ -66,5 +66,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1190 +1188 %%EOF diff --git a/img/ViT-L_RecombNet_all_v3.pdf b/img/ViT-L_RecombNet all_v3.pdf similarity index 90% rename from img/ViT-L_RecombNet_all_v3.pdf rename to img/ViT-L_RecombNet all_v3.pdf index c7a96d9..898c369 100644 Binary files a/img/ViT-L_RecombNet_all_v3.pdf and b/img/ViT-L_RecombNet all_v3.pdf differ diff --git a/img/ViT-S_ImageNet_v1.pdf b/img/ViT-S_ImageNet_v1.pdf index af1acb7..9daf666 100644 Binary files a/img/ViT-S_ImageNet_v1.pdf and b/img/ViT-S_ImageNet_v1.pdf differ diff --git a/img/ViT-S_ImageNet_v2.pdf b/img/ViT-S_ImageNet_v2.pdf index 25f10a4..364b437 100644 Binary files a/img/ViT-S_ImageNet_v2.pdf and b/img/ViT-S_ImageNet_v2.pdf differ diff --git a/img/ViT-S_ImageNet_v3.pdf b/img/ViT-S_ImageNet_v3.pdf index 6773bd9..30ae7a5 100644 --- a/img/ViT-S_ImageNet_v3.pdf +++ b/img/ViT-S_ImageNet_v3.pdf @@ -43,9 +43,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150046+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250227094737+02'00') >> endobj xref 0 14 @@ -66,5 +66,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1212 +1210 %%EOF diff --git a/img/ViT-S_RecombNet_all_v1.pdf b/img/ViT-S_RecombNet all_v1.pdf similarity index 90% rename from img/ViT-S_RecombNet_all_v1.pdf rename to img/ViT-S_RecombNet all_v1.pdf index 9d82eb3..b2b176d 100644 Binary files a/img/ViT-S_RecombNet_all_v1.pdf and b/img/ViT-S_RecombNet all_v1.pdf differ diff --git a/img/ViT-S_RecombNet_all_v2.pdf b/img/ViT-S_RecombNet all_v2.pdf similarity index 90% rename from img/ViT-S_RecombNet_all_v2.pdf rename to img/ViT-S_RecombNet all_v2.pdf index 56a7427..4ceb446 100644 Binary files a/img/ViT-S_RecombNet_all_v2.pdf and b/img/ViT-S_RecombNet all_v2.pdf differ diff --git a/img/ViT-S_RecombNet_all_v3.pdf b/img/ViT-S_RecombNet all_v3.pdf similarity index 90% rename from img/ViT-S_RecombNet_all_v3.pdf rename to img/ViT-S_RecombNet all_v3.pdf index 1581fd3..b713bf2 100644 --- a/img/ViT-S_RecombNet_all_v3.pdf +++ b/img/ViT-S_RecombNet all_v3.pdf @@ -44,9 +44,9 @@ endobj << /Type /Pages /Kids [ 11 0 R ] /Count 1 >> endobj 13 0 obj -<< /Creator (Matplotlib v3.10.1, https://matplotlib.org) -/Producer (Matplotlib pdf backend v3.10.1) -/CreationDate (D:20250724150051+02'00') >> +<< /Creator (Matplotlib v3.9.4, https://matplotlib.org) +/Producer (Matplotlib pdf backend v3.9.4) +/CreationDate (D:20250227094746+02'00') >> endobj xref 0 14 @@ -67,5 +67,5 @@ xref trailer << /Size 14 /Root 1 0 R /Info 13 0 R >> startxref -1210 +1208 %%EOF diff --git a/img/appendix_examples/n01531178_4963.JPEG b/img/appendix_examples/n01531178_4963.JPEG deleted file mode 100644 index 01577c8..0000000 Binary files a/img/appendix_examples/n01531178_4963.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_recombined_v1.JPEG b/img/appendix_examples/n01531178_4963_recombined_v1.JPEG deleted file mode 100644 index 6a39b6e..0000000 Binary files a/img/appendix_examples/n01531178_4963_recombined_v1.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_recombined_v11.JPEG b/img/appendix_examples/n01531178_4963_recombined_v11.JPEG deleted file mode 100644 index e0c1fa0..0000000 Binary files a/img/appendix_examples/n01531178_4963_recombined_v11.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_recombined_v13.JPEG b/img/appendix_examples/n01531178_4963_recombined_v13.JPEG deleted file mode 100644 index f5fd04c..0000000 Binary files a/img/appendix_examples/n01531178_4963_recombined_v13.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_recombined_v14.JPEG b/img/appendix_examples/n01531178_4963_recombined_v14.JPEG deleted file mode 100644 index 19f4037..0000000 Binary files a/img/appendix_examples/n01531178_4963_recombined_v14.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_recombined_v15.JPEG b/img/appendix_examples/n01531178_4963_recombined_v15.JPEG deleted file mode 100644 index 80048a8..0000000 Binary files a/img/appendix_examples/n01531178_4963_recombined_v15.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_recombined_v18.JPEG b/img/appendix_examples/n01531178_4963_recombined_v18.JPEG deleted file mode 100644 index ca72e8b..0000000 Binary files a/img/appendix_examples/n01531178_4963_recombined_v18.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_recombined_v2.JPEG b/img/appendix_examples/n01531178_4963_recombined_v2.JPEG deleted file mode 100644 index d50d6f9..0000000 Binary files a/img/appendix_examples/n01531178_4963_recombined_v2.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_recombined_v20.JPEG b/img/appendix_examples/n01531178_4963_recombined_v20.JPEG deleted file mode 100644 index 058e82b..0000000 Binary files a/img/appendix_examples/n01531178_4963_recombined_v20.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_recombined_v21.JPEG b/img/appendix_examples/n01531178_4963_recombined_v21.JPEG deleted file mode 100644 index 6cf9d41..0000000 Binary files a/img/appendix_examples/n01531178_4963_recombined_v21.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_recombined_v23.JPEG b/img/appendix_examples/n01531178_4963_recombined_v23.JPEG deleted file mode 100644 index bafd160..0000000 Binary files a/img/appendix_examples/n01531178_4963_recombined_v23.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_recombined_v26.JPEG b/img/appendix_examples/n01531178_4963_recombined_v26.JPEG deleted file mode 100644 index f2e9797..0000000 Binary files a/img/appendix_examples/n01531178_4963_recombined_v26.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_recombined_v7.JPEG b/img/appendix_examples/n01531178_4963_recombined_v7.JPEG deleted file mode 100644 index 27a94d0..0000000 Binary files a/img/appendix_examples/n01531178_4963_recombined_v7.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_v0_bg.JPEG b/img/appendix_examples/n01531178_4963_v0_bg.JPEG deleted file mode 100644 index f755934..0000000 Binary files a/img/appendix_examples/n01531178_4963_v0_bg.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_v0_fg.PNG b/img/appendix_examples/n01531178_4963_v0_fg.PNG deleted file mode 100644 index 221c35e..0000000 Binary files a/img/appendix_examples/n01531178_4963_v0_fg.PNG and /dev/null differ diff --git a/img/appendix_examples/n01531178_4963_v0_fg.WEBP b/img/appendix_examples/n01531178_4963_v0_fg.WEBP deleted file mode 100644 index e52d982..0000000 Binary files a/img/appendix_examples/n01531178_4963_v0_fg.WEBP and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507.JPEG b/img/appendix_examples/n01818515_31507.JPEG deleted file mode 100644 index 7408d47..0000000 Binary files a/img/appendix_examples/n01818515_31507.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_recombined_v0.JPEG b/img/appendix_examples/n01818515_31507_recombined_v0.JPEG deleted file mode 100644 index 6fd8c09..0000000 Binary files a/img/appendix_examples/n01818515_31507_recombined_v0.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_recombined_v10.JPEG b/img/appendix_examples/n01818515_31507_recombined_v10.JPEG deleted file mode 100644 index 5f58774..0000000 Binary files a/img/appendix_examples/n01818515_31507_recombined_v10.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_recombined_v12.JPEG b/img/appendix_examples/n01818515_31507_recombined_v12.JPEG deleted file mode 100644 index 19a5bb9..0000000 Binary files a/img/appendix_examples/n01818515_31507_recombined_v12.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_recombined_v16.JPEG b/img/appendix_examples/n01818515_31507_recombined_v16.JPEG deleted file mode 100644 index ef66cb8..0000000 Binary files a/img/appendix_examples/n01818515_31507_recombined_v16.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_recombined_v20.JPEG b/img/appendix_examples/n01818515_31507_recombined_v20.JPEG deleted file mode 100644 index 3674a8b..0000000 Binary files a/img/appendix_examples/n01818515_31507_recombined_v20.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_recombined_v21.JPEG b/img/appendix_examples/n01818515_31507_recombined_v21.JPEG deleted file mode 100644 index 537ac4b..0000000 Binary files a/img/appendix_examples/n01818515_31507_recombined_v21.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_recombined_v25.JPEG b/img/appendix_examples/n01818515_31507_recombined_v25.JPEG deleted file mode 100644 index 4a68243..0000000 Binary files a/img/appendix_examples/n01818515_31507_recombined_v25.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_recombined_v26.JPEG b/img/appendix_examples/n01818515_31507_recombined_v26.JPEG deleted file mode 100644 index 9e4412f..0000000 Binary files a/img/appendix_examples/n01818515_31507_recombined_v26.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_recombined_v28.JPEG b/img/appendix_examples/n01818515_31507_recombined_v28.JPEG deleted file mode 100644 index 4024f32..0000000 Binary files a/img/appendix_examples/n01818515_31507_recombined_v28.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_recombined_v29.JPEG b/img/appendix_examples/n01818515_31507_recombined_v29.JPEG deleted file mode 100644 index f477388..0000000 Binary files a/img/appendix_examples/n01818515_31507_recombined_v29.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_recombined_v3.JPEG b/img/appendix_examples/n01818515_31507_recombined_v3.JPEG deleted file mode 100644 index 5fef617..0000000 Binary files a/img/appendix_examples/n01818515_31507_recombined_v3.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_recombined_v7.JPEG b/img/appendix_examples/n01818515_31507_recombined_v7.JPEG deleted file mode 100644 index 2a70429..0000000 Binary files a/img/appendix_examples/n01818515_31507_recombined_v7.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_v1_bg.JPEG b/img/appendix_examples/n01818515_31507_v1_bg.JPEG deleted file mode 100644 index 921cecb..0000000 Binary files a/img/appendix_examples/n01818515_31507_v1_bg.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_v1_fg.PNG b/img/appendix_examples/n01818515_31507_v1_fg.PNG deleted file mode 100644 index afd4958..0000000 Binary files a/img/appendix_examples/n01818515_31507_v1_fg.PNG and /dev/null differ diff --git a/img/appendix_examples/n01818515_31507_v1_fg.WEBP b/img/appendix_examples/n01818515_31507_v1_fg.WEBP deleted file mode 100644 index 5d56c52..0000000 Binary files a/img/appendix_examples/n01818515_31507_v1_fg.WEBP and /dev/null differ diff --git a/img/appendix_examples/n01943899_20070.JPEG b/img/appendix_examples/n01943899_20070.JPEG deleted file mode 100644 index bcf50a2..0000000 Binary files a/img/appendix_examples/n01943899_20070.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01943899_20070_bg.JPEG b/img/appendix_examples/n01943899_20070_bg.JPEG deleted file mode 100644 index 1903519..0000000 Binary files a/img/appendix_examples/n01943899_20070_bg.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01943899_20070_fg.PNG b/img/appendix_examples/n01943899_20070_fg.PNG deleted file mode 100644 index e84af56..0000000 Binary files a/img/appendix_examples/n01943899_20070_fg.PNG and /dev/null differ diff --git a/img/appendix_examples/n01943899_20070_fg.WEBP b/img/appendix_examples/n01943899_20070_fg.WEBP deleted file mode 100644 index c9bf1ef..0000000 Binary files a/img/appendix_examples/n01943899_20070_fg.WEBP and /dev/null differ diff --git a/img/appendix_examples/n01943899_20070_recombined_v0.JPEG b/img/appendix_examples/n01943899_20070_recombined_v0.JPEG deleted file mode 100644 index 80db5b3..0000000 Binary files a/img/appendix_examples/n01943899_20070_recombined_v0.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01943899_20070_recombined_v1.JPEG b/img/appendix_examples/n01943899_20070_recombined_v1.JPEG deleted file mode 100644 index df9fa9a..0000000 Binary files a/img/appendix_examples/n01943899_20070_recombined_v1.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01943899_20070_recombined_v10.JPEG b/img/appendix_examples/n01943899_20070_recombined_v10.JPEG deleted file mode 100644 index eb3c788..0000000 Binary files a/img/appendix_examples/n01943899_20070_recombined_v10.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01943899_20070_recombined_v14.JPEG b/img/appendix_examples/n01943899_20070_recombined_v14.JPEG deleted file mode 100644 index 1034ca9..0000000 Binary files a/img/appendix_examples/n01943899_20070_recombined_v14.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01943899_20070_recombined_v15.JPEG b/img/appendix_examples/n01943899_20070_recombined_v15.JPEG deleted file mode 100644 index abd571f..0000000 Binary files a/img/appendix_examples/n01943899_20070_recombined_v15.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01943899_20070_recombined_v18.JPEG b/img/appendix_examples/n01943899_20070_recombined_v18.JPEG deleted file mode 100644 index 68c814e..0000000 Binary files a/img/appendix_examples/n01943899_20070_recombined_v18.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01943899_20070_recombined_v2.JPEG b/img/appendix_examples/n01943899_20070_recombined_v2.JPEG deleted file mode 100644 index f8f4273..0000000 Binary files a/img/appendix_examples/n01943899_20070_recombined_v2.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01943899_20070_recombined_v27.JPEG b/img/appendix_examples/n01943899_20070_recombined_v27.JPEG deleted file mode 100644 index bfd748f..0000000 Binary files a/img/appendix_examples/n01943899_20070_recombined_v27.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01943899_20070_recombined_v3.JPEG b/img/appendix_examples/n01943899_20070_recombined_v3.JPEG deleted file mode 100644 index 345c519..0000000 Binary files a/img/appendix_examples/n01943899_20070_recombined_v3.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01986214_4117.JPEG b/img/appendix_examples/n01986214_4117.JPEG deleted file mode 100644 index 6d36550..0000000 Binary files a/img/appendix_examples/n01986214_4117.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01986214_4117_bg.JPEG b/img/appendix_examples/n01986214_4117_bg.JPEG deleted file mode 100644 index dff6643..0000000 Binary files a/img/appendix_examples/n01986214_4117_bg.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01986214_4117_fg.PNG b/img/appendix_examples/n01986214_4117_fg.PNG deleted file mode 100644 index 00776cb..0000000 Binary files a/img/appendix_examples/n01986214_4117_fg.PNG and /dev/null differ diff --git a/img/appendix_examples/n01986214_4117_fg.WEBP b/img/appendix_examples/n01986214_4117_fg.WEBP deleted file mode 100644 index 89cf6ef..0000000 Binary files a/img/appendix_examples/n01986214_4117_fg.WEBP and /dev/null differ diff --git a/img/appendix_examples/n01986214_4117_recombined_v12.JPEG b/img/appendix_examples/n01986214_4117_recombined_v12.JPEG deleted file mode 100644 index d1b11f9..0000000 Binary files a/img/appendix_examples/n01986214_4117_recombined_v12.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01986214_4117_recombined_v16.JPEG b/img/appendix_examples/n01986214_4117_recombined_v16.JPEG deleted file mode 100644 index 1319331..0000000 Binary files a/img/appendix_examples/n01986214_4117_recombined_v16.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01986214_4117_recombined_v18.JPEG b/img/appendix_examples/n01986214_4117_recombined_v18.JPEG deleted file mode 100644 index 2e427b9..0000000 Binary files a/img/appendix_examples/n01986214_4117_recombined_v18.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01986214_4117_recombined_v20.JPEG b/img/appendix_examples/n01986214_4117_recombined_v20.JPEG deleted file mode 100644 index 5459dca..0000000 Binary files a/img/appendix_examples/n01986214_4117_recombined_v20.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01986214_4117_recombined_v21.JPEG b/img/appendix_examples/n01986214_4117_recombined_v21.JPEG deleted file mode 100644 index 1daad29..0000000 Binary files a/img/appendix_examples/n01986214_4117_recombined_v21.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01986214_4117_recombined_v24.JPEG b/img/appendix_examples/n01986214_4117_recombined_v24.JPEG deleted file mode 100644 index c712145..0000000 Binary files a/img/appendix_examples/n01986214_4117_recombined_v24.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01986214_4117_recombined_v8.JPEG b/img/appendix_examples/n01986214_4117_recombined_v8.JPEG deleted file mode 100644 index 853509f..0000000 Binary files a/img/appendix_examples/n01986214_4117_recombined_v8.JPEG and /dev/null differ diff --git a/img/appendix_examples/n01986214_4117_recombined_v9.JPEG b/img/appendix_examples/n01986214_4117_recombined_v9.JPEG deleted file mode 100644 index 2df4f5f..0000000 Binary files a/img/appendix_examples/n01986214_4117_recombined_v9.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02190166_1208.JPEG b/img/appendix_examples/n02190166_1208.JPEG deleted file mode 100644 index 5ea0d5b..0000000 Binary files a/img/appendix_examples/n02190166_1208.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02190166_1208_bg.JPEG b/img/appendix_examples/n02190166_1208_bg.JPEG deleted file mode 100644 index 679c0e8..0000000 Binary files a/img/appendix_examples/n02190166_1208_bg.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02190166_1208_fg.PNG b/img/appendix_examples/n02190166_1208_fg.PNG deleted file mode 100644 index 88773b6..0000000 Binary files a/img/appendix_examples/n02190166_1208_fg.PNG and /dev/null differ diff --git a/img/appendix_examples/n02190166_1208_fg.WEBP b/img/appendix_examples/n02190166_1208_fg.WEBP deleted file mode 100644 index db2d6ae..0000000 Binary files a/img/appendix_examples/n02190166_1208_fg.WEBP and /dev/null differ diff --git a/img/appendix_examples/n02190166_1208_recombined_v1.JPEG b/img/appendix_examples/n02190166_1208_recombined_v1.JPEG deleted file mode 100644 index 31da003..0000000 Binary files a/img/appendix_examples/n02190166_1208_recombined_v1.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02190166_1208_recombined_v15.JPEG b/img/appendix_examples/n02190166_1208_recombined_v15.JPEG deleted file mode 100644 index 8593cda..0000000 Binary files a/img/appendix_examples/n02190166_1208_recombined_v15.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02190166_1208_recombined_v18.JPEG b/img/appendix_examples/n02190166_1208_recombined_v18.JPEG deleted file mode 100644 index 435ee69..0000000 Binary files a/img/appendix_examples/n02190166_1208_recombined_v18.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02190166_1208_recombined_v20.JPEG b/img/appendix_examples/n02190166_1208_recombined_v20.JPEG deleted file mode 100644 index 498c227..0000000 Binary files a/img/appendix_examples/n02190166_1208_recombined_v20.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02190166_1208_recombined_v21.JPEG b/img/appendix_examples/n02190166_1208_recombined_v21.JPEG deleted file mode 100644 index 8d4b4da..0000000 Binary files a/img/appendix_examples/n02190166_1208_recombined_v21.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02190166_1208_recombined_v23.JPEG b/img/appendix_examples/n02190166_1208_recombined_v23.JPEG deleted file mode 100644 index 37d6c00..0000000 Binary files a/img/appendix_examples/n02190166_1208_recombined_v23.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02190166_1208_recombined_v7.JPEG b/img/appendix_examples/n02190166_1208_recombined_v7.JPEG deleted file mode 100644 index c9e5346..0000000 Binary files a/img/appendix_examples/n02190166_1208_recombined_v7.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02190166_1208_recombined_v9.JPEG b/img/appendix_examples/n02190166_1208_recombined_v9.JPEG deleted file mode 100644 index 6e7a780..0000000 Binary files a/img/appendix_examples/n02190166_1208_recombined_v9.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02229544_6170.JPEG b/img/appendix_examples/n02229544_6170.JPEG deleted file mode 100644 index 94e8f06..0000000 Binary files a/img/appendix_examples/n02229544_6170.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02229544_6170.WEBP b/img/appendix_examples/n02229544_6170.WEBP deleted file mode 100644 index bd09490..0000000 Binary files a/img/appendix_examples/n02229544_6170.WEBP and /dev/null differ diff --git a/img/appendix_examples/n02229544_6170_bg.JPEG b/img/appendix_examples/n02229544_6170_bg.JPEG deleted file mode 100644 index 9b65353..0000000 Binary files a/img/appendix_examples/n02229544_6170_bg.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02229544_6170_fg.PNG b/img/appendix_examples/n02229544_6170_fg.PNG deleted file mode 100644 index b81d40e..0000000 Binary files a/img/appendix_examples/n02229544_6170_fg.PNG and /dev/null differ diff --git a/img/appendix_examples/n02229544_6170_recombined_v1.JPEG b/img/appendix_examples/n02229544_6170_recombined_v1.JPEG deleted file mode 100644 index 3234983..0000000 Binary files a/img/appendix_examples/n02229544_6170_recombined_v1.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02229544_6170_recombined_v17.JPEG b/img/appendix_examples/n02229544_6170_recombined_v17.JPEG deleted file mode 100644 index b28c317..0000000 Binary files a/img/appendix_examples/n02229544_6170_recombined_v17.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02229544_6170_recombined_v18.JPEG b/img/appendix_examples/n02229544_6170_recombined_v18.JPEG deleted file mode 100644 index ff41338..0000000 Binary files a/img/appendix_examples/n02229544_6170_recombined_v18.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02229544_6170_recombined_v19.JPEG b/img/appendix_examples/n02229544_6170_recombined_v19.JPEG deleted file mode 100644 index dc9a4f3..0000000 Binary files a/img/appendix_examples/n02229544_6170_recombined_v19.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02229544_6170_recombined_v2.JPEG b/img/appendix_examples/n02229544_6170_recombined_v2.JPEG deleted file mode 100644 index 9bf60f2..0000000 Binary files a/img/appendix_examples/n02229544_6170_recombined_v2.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02229544_6170_recombined_v25.JPEG b/img/appendix_examples/n02229544_6170_recombined_v25.JPEG deleted file mode 100644 index 80401f0..0000000 Binary files a/img/appendix_examples/n02229544_6170_recombined_v25.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02229544_6170_recombined_v28.JPEG b/img/appendix_examples/n02229544_6170_recombined_v28.JPEG deleted file mode 100644 index 911223f..0000000 Binary files a/img/appendix_examples/n02229544_6170_recombined_v28.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02229544_6170_recombined_v5.JPEG b/img/appendix_examples/n02229544_6170_recombined_v5.JPEG deleted file mode 100644 index 5c2ae47..0000000 Binary files a/img/appendix_examples/n02229544_6170_recombined_v5.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02229544_6170_recombined_v6.JPEG b/img/appendix_examples/n02229544_6170_recombined_v6.JPEG deleted file mode 100644 index 11bc6d1..0000000 Binary files a/img/appendix_examples/n02229544_6170_recombined_v6.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02443484_5430.JPEG b/img/appendix_examples/n02443484_5430.JPEG deleted file mode 100644 index 35e7697..0000000 Binary files a/img/appendix_examples/n02443484_5430.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02443484_5430_bg.JPEG b/img/appendix_examples/n02443484_5430_bg.JPEG deleted file mode 100644 index e22bd2e..0000000 Binary files a/img/appendix_examples/n02443484_5430_bg.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02443484_5430_fg.PNG b/img/appendix_examples/n02443484_5430_fg.PNG deleted file mode 100644 index 5b1961a..0000000 Binary files a/img/appendix_examples/n02443484_5430_fg.PNG and /dev/null differ diff --git a/img/appendix_examples/n02443484_5430_fg.WEBP b/img/appendix_examples/n02443484_5430_fg.WEBP deleted file mode 100644 index 0c9b7a7..0000000 Binary files a/img/appendix_examples/n02443484_5430_fg.WEBP and /dev/null differ diff --git a/img/appendix_examples/n02443484_5430_recombined_v10.JPEG b/img/appendix_examples/n02443484_5430_recombined_v10.JPEG deleted file mode 100644 index 0f269da..0000000 Binary files a/img/appendix_examples/n02443484_5430_recombined_v10.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02443484_5430_recombined_v16.JPEG b/img/appendix_examples/n02443484_5430_recombined_v16.JPEG deleted file mode 100644 index 2745662..0000000 Binary files a/img/appendix_examples/n02443484_5430_recombined_v16.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02443484_5430_recombined_v20.JPEG b/img/appendix_examples/n02443484_5430_recombined_v20.JPEG deleted file mode 100644 index cfcf176..0000000 Binary files a/img/appendix_examples/n02443484_5430_recombined_v20.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02443484_5430_recombined_v21.JPEG b/img/appendix_examples/n02443484_5430_recombined_v21.JPEG deleted file mode 100644 index abe6fe7..0000000 Binary files a/img/appendix_examples/n02443484_5430_recombined_v21.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02443484_5430_recombined_v24.JPEG b/img/appendix_examples/n02443484_5430_recombined_v24.JPEG deleted file mode 100644 index 4852fb5..0000000 Binary files a/img/appendix_examples/n02443484_5430_recombined_v24.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02443484_5430_recombined_v27.JPEG b/img/appendix_examples/n02443484_5430_recombined_v27.JPEG deleted file mode 100644 index 005f5ee..0000000 Binary files a/img/appendix_examples/n02443484_5430_recombined_v27.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02443484_5430_recombined_v3.JPEG b/img/appendix_examples/n02443484_5430_recombined_v3.JPEG deleted file mode 100644 index 846b9e2..0000000 Binary files a/img/appendix_examples/n02443484_5430_recombined_v3.JPEG and /dev/null differ diff --git a/img/appendix_examples/n02443484_5430_recombined_v4.JPEG b/img/appendix_examples/n02443484_5430_recombined_v4.JPEG deleted file mode 100644 index 60df11f..0000000 Binary files a/img/appendix_examples/n02443484_5430_recombined_v4.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03201208_21000.JPEG b/img/appendix_examples/n03201208_21000.JPEG deleted file mode 100644 index 873b167..0000000 Binary files a/img/appendix_examples/n03201208_21000.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03201208_21000_bg.JPEG b/img/appendix_examples/n03201208_21000_bg.JPEG deleted file mode 100644 index 8474e29..0000000 Binary files a/img/appendix_examples/n03201208_21000_bg.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03201208_21000_fg.PNG b/img/appendix_examples/n03201208_21000_fg.PNG deleted file mode 100644 index 07f3d17..0000000 Binary files a/img/appendix_examples/n03201208_21000_fg.PNG and /dev/null differ diff --git a/img/appendix_examples/n03201208_21000_fg.WEBP b/img/appendix_examples/n03201208_21000_fg.WEBP deleted file mode 100644 index 1b279d3..0000000 Binary files a/img/appendix_examples/n03201208_21000_fg.WEBP and /dev/null differ diff --git a/img/appendix_examples/n03201208_21000_recombined_v0.JPEG b/img/appendix_examples/n03201208_21000_recombined_v0.JPEG deleted file mode 100644 index c3b56a7..0000000 Binary files a/img/appendix_examples/n03201208_21000_recombined_v0.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03201208_21000_recombined_v11.JPEG b/img/appendix_examples/n03201208_21000_recombined_v11.JPEG deleted file mode 100644 index 73c1fd3..0000000 Binary files a/img/appendix_examples/n03201208_21000_recombined_v11.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03201208_21000_recombined_v15.JPEG b/img/appendix_examples/n03201208_21000_recombined_v15.JPEG deleted file mode 100644 index 969f170..0000000 Binary files a/img/appendix_examples/n03201208_21000_recombined_v15.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03201208_21000_recombined_v19.JPEG b/img/appendix_examples/n03201208_21000_recombined_v19.JPEG deleted file mode 100644 index 97f36c7..0000000 Binary files a/img/appendix_examples/n03201208_21000_recombined_v19.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03201208_21000_recombined_v20.JPEG b/img/appendix_examples/n03201208_21000_recombined_v20.JPEG deleted file mode 100644 index f26a955..0000000 Binary files a/img/appendix_examples/n03201208_21000_recombined_v20.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03201208_21000_recombined_v21.JPEG b/img/appendix_examples/n03201208_21000_recombined_v21.JPEG deleted file mode 100644 index 2b3cdb6..0000000 Binary files a/img/appendix_examples/n03201208_21000_recombined_v21.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03201208_21000_recombined_v24.JPEG b/img/appendix_examples/n03201208_21000_recombined_v24.JPEG deleted file mode 100644 index 505fe8f..0000000 Binary files a/img/appendix_examples/n03201208_21000_recombined_v24.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03201208_21000_recombined_v26.JPEG b/img/appendix_examples/n03201208_21000_recombined_v26.JPEG deleted file mode 100644 index 87a1dd0..0000000 Binary files a/img/appendix_examples/n03201208_21000_recombined_v26.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03201208_21000_recombined_v3.JPEG b/img/appendix_examples/n03201208_21000_recombined_v3.JPEG deleted file mode 100644 index 550be47..0000000 Binary files a/img/appendix_examples/n03201208_21000_recombined_v3.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03201208_21000_recombined_v5.JPEG b/img/appendix_examples/n03201208_21000_recombined_v5.JPEG deleted file mode 100644 index d80574f..0000000 Binary files a/img/appendix_examples/n03201208_21000_recombined_v5.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03424325_21435.JPEG b/img/appendix_examples/n03424325_21435.JPEG deleted file mode 100644 index faaa7a1..0000000 Binary files a/img/appendix_examples/n03424325_21435.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03424325_21435_bg.JPEG b/img/appendix_examples/n03424325_21435_bg.JPEG deleted file mode 100644 index 139ce98..0000000 Binary files a/img/appendix_examples/n03424325_21435_bg.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03424325_21435_fg.PNG b/img/appendix_examples/n03424325_21435_fg.PNG deleted file mode 100644 index e7071a9..0000000 Binary files a/img/appendix_examples/n03424325_21435_fg.PNG and /dev/null differ diff --git a/img/appendix_examples/n03424325_21435_fg.WEBP b/img/appendix_examples/n03424325_21435_fg.WEBP deleted file mode 100644 index 1ecf29c..0000000 Binary files a/img/appendix_examples/n03424325_21435_fg.WEBP and /dev/null differ diff --git a/img/appendix_examples/n03424325_21435_recombined_v10.JPEG b/img/appendix_examples/n03424325_21435_recombined_v10.JPEG deleted file mode 100644 index 0ac3cf7..0000000 Binary files a/img/appendix_examples/n03424325_21435_recombined_v10.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03424325_21435_recombined_v11.JPEG b/img/appendix_examples/n03424325_21435_recombined_v11.JPEG deleted file mode 100644 index 99c4858..0000000 Binary files a/img/appendix_examples/n03424325_21435_recombined_v11.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03424325_21435_recombined_v12.JPEG b/img/appendix_examples/n03424325_21435_recombined_v12.JPEG deleted file mode 100644 index 440e764..0000000 Binary files a/img/appendix_examples/n03424325_21435_recombined_v12.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03424325_21435_recombined_v13.JPEG b/img/appendix_examples/n03424325_21435_recombined_v13.JPEG deleted file mode 100644 index a761dc5..0000000 Binary files a/img/appendix_examples/n03424325_21435_recombined_v13.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03424325_21435_recombined_v15.JPEG b/img/appendix_examples/n03424325_21435_recombined_v15.JPEG deleted file mode 100644 index 240bb70..0000000 Binary files a/img/appendix_examples/n03424325_21435_recombined_v15.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03424325_21435_recombined_v2.JPEG b/img/appendix_examples/n03424325_21435_recombined_v2.JPEG deleted file mode 100644 index 594a164..0000000 Binary files a/img/appendix_examples/n03424325_21435_recombined_v2.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03424325_21435_recombined_v26.JPEG b/img/appendix_examples/n03424325_21435_recombined_v26.JPEG deleted file mode 100644 index 3475af1..0000000 Binary files a/img/appendix_examples/n03424325_21435_recombined_v26.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03424325_21435_recombined_v4.JPEG b/img/appendix_examples/n03424325_21435_recombined_v4.JPEG deleted file mode 100644 index 8cf0b5b..0000000 Binary files a/img/appendix_examples/n03424325_21435_recombined_v4.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03424325_21435_recombined_v8.JPEG b/img/appendix_examples/n03424325_21435_recombined_v8.JPEG deleted file mode 100644 index 7858a71..0000000 Binary files a/img/appendix_examples/n03424325_21435_recombined_v8.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03642806_3615.JPEG b/img/appendix_examples/n03642806_3615.JPEG deleted file mode 100644 index d65f877..0000000 Binary files a/img/appendix_examples/n03642806_3615.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03642806_3615.WEBP b/img/appendix_examples/n03642806_3615.WEBP deleted file mode 100644 index a687bc4..0000000 Binary files a/img/appendix_examples/n03642806_3615.WEBP and /dev/null differ diff --git a/img/appendix_examples/n03642806_3615_bg.JPEG b/img/appendix_examples/n03642806_3615_bg.JPEG deleted file mode 100644 index 8ff063f..0000000 Binary files a/img/appendix_examples/n03642806_3615_bg.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03642806_3615_fg.PNG b/img/appendix_examples/n03642806_3615_fg.PNG deleted file mode 100644 index 39fb6f7..0000000 Binary files a/img/appendix_examples/n03642806_3615_fg.PNG and /dev/null differ diff --git a/img/appendix_examples/n03642806_3615_recombined_v11.JPEG b/img/appendix_examples/n03642806_3615_recombined_v11.JPEG deleted file mode 100644 index b79c11e..0000000 Binary files a/img/appendix_examples/n03642806_3615_recombined_v11.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03642806_3615_recombined_v12.JPEG b/img/appendix_examples/n03642806_3615_recombined_v12.JPEG deleted file mode 100644 index dae5021..0000000 Binary files a/img/appendix_examples/n03642806_3615_recombined_v12.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03642806_3615_recombined_v15.JPEG b/img/appendix_examples/n03642806_3615_recombined_v15.JPEG deleted file mode 100644 index 2436987..0000000 Binary files a/img/appendix_examples/n03642806_3615_recombined_v15.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03642806_3615_recombined_v17.JPEG b/img/appendix_examples/n03642806_3615_recombined_v17.JPEG deleted file mode 100644 index caae886..0000000 Binary files a/img/appendix_examples/n03642806_3615_recombined_v17.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03642806_3615_recombined_v2.JPEG b/img/appendix_examples/n03642806_3615_recombined_v2.JPEG deleted file mode 100644 index e1e2095..0000000 Binary files a/img/appendix_examples/n03642806_3615_recombined_v2.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03642806_3615_recombined_v25.JPEG b/img/appendix_examples/n03642806_3615_recombined_v25.JPEG deleted file mode 100644 index 10b6c33..0000000 Binary files a/img/appendix_examples/n03642806_3615_recombined_v25.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03642806_3615_recombined_v29.JPEG b/img/appendix_examples/n03642806_3615_recombined_v29.JPEG deleted file mode 100644 index 0abe753..0000000 Binary files a/img/appendix_examples/n03642806_3615_recombined_v29.JPEG and /dev/null differ diff --git a/img/appendix_examples/n03642806_3615_recombined_v7.JPEG b/img/appendix_examples/n03642806_3615_recombined_v7.JPEG deleted file mode 100644 index 0344bea..0000000 Binary files a/img/appendix_examples/n03642806_3615_recombined_v7.JPEG and /dev/null differ diff --git a/img/appendix_examples/n04141975_11426.JPEG b/img/appendix_examples/n04141975_11426.JPEG deleted file mode 100644 index 4801624..0000000 Binary files a/img/appendix_examples/n04141975_11426.JPEG and /dev/null differ diff --git a/img/appendix_examples/n04141975_11426_bg.JPEG b/img/appendix_examples/n04141975_11426_bg.JPEG deleted file mode 100644 index cfb8e45..0000000 Binary files a/img/appendix_examples/n04141975_11426_bg.JPEG and /dev/null differ diff --git a/img/appendix_examples/n04141975_11426_fg.PNG b/img/appendix_examples/n04141975_11426_fg.PNG deleted file mode 100644 index e1effa8..0000000 Binary files a/img/appendix_examples/n04141975_11426_fg.PNG and /dev/null differ diff --git a/img/appendix_examples/n04141975_11426_fg.WEBP b/img/appendix_examples/n04141975_11426_fg.WEBP deleted file mode 100644 index 87e31cc..0000000 Binary files a/img/appendix_examples/n04141975_11426_fg.WEBP and /dev/null differ diff --git a/img/appendix_examples/n04141975_11426_recombined_v10.JPEG b/img/appendix_examples/n04141975_11426_recombined_v10.JPEG deleted file mode 100644 index 8341cfe..0000000 Binary files a/img/appendix_examples/n04141975_11426_recombined_v10.JPEG and /dev/null differ diff --git a/img/appendix_examples/n04141975_11426_recombined_v13.JPEG b/img/appendix_examples/n04141975_11426_recombined_v13.JPEG deleted file mode 100644 index 8a01dd8..0000000 Binary files a/img/appendix_examples/n04141975_11426_recombined_v13.JPEG and /dev/null differ diff --git a/img/appendix_examples/n04141975_11426_recombined_v14.JPEG b/img/appendix_examples/n04141975_11426_recombined_v14.JPEG deleted file mode 100644 index 79b9f2f..0000000 Binary files a/img/appendix_examples/n04141975_11426_recombined_v14.JPEG and /dev/null differ diff --git a/img/appendix_examples/n04141975_11426_recombined_v20.JPEG b/img/appendix_examples/n04141975_11426_recombined_v20.JPEG deleted file mode 100644 index 3394264..0000000 Binary files a/img/appendix_examples/n04141975_11426_recombined_v20.JPEG and /dev/null differ diff --git a/img/appendix_examples/n04141975_11426_recombined_v23.JPEG b/img/appendix_examples/n04141975_11426_recombined_v23.JPEG deleted file mode 100644 index b36ea25..0000000 Binary files a/img/appendix_examples/n04141975_11426_recombined_v23.JPEG and /dev/null differ diff --git a/img/appendix_examples/n04141975_11426_recombined_v25.JPEG b/img/appendix_examples/n04141975_11426_recombined_v25.JPEG deleted file mode 100644 index 78124cf..0000000 Binary files a/img/appendix_examples/n04141975_11426_recombined_v25.JPEG and /dev/null differ diff --git a/img/appendix_examples/n04141975_11426_recombined_v6.JPEG b/img/appendix_examples/n04141975_11426_recombined_v6.JPEG deleted file mode 100644 index 9998518..0000000 Binary files a/img/appendix_examples/n04141975_11426_recombined_v6.JPEG and /dev/null differ diff --git a/img/appendix_examples/n04141975_11426_recombined_v7.JPEG b/img/appendix_examples/n04141975_11426_recombined_v7.JPEG deleted file mode 100644 index e958070..0000000 Binary files a/img/appendix_examples/n04141975_11426_recombined_v7.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07714990_7596.JPEG b/img/appendix_examples/n07714990_7596.JPEG deleted file mode 100644 index 058ab52..0000000 Binary files a/img/appendix_examples/n07714990_7596.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07714990_7596_bg.JPEG b/img/appendix_examples/n07714990_7596_bg.JPEG deleted file mode 100644 index e8b8c46..0000000 Binary files a/img/appendix_examples/n07714990_7596_bg.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07714990_7596_fg.PNG b/img/appendix_examples/n07714990_7596_fg.PNG deleted file mode 100644 index 03339c6..0000000 Binary files a/img/appendix_examples/n07714990_7596_fg.PNG and /dev/null differ diff --git a/img/appendix_examples/n07714990_7596_fg.WEBP b/img/appendix_examples/n07714990_7596_fg.WEBP deleted file mode 100644 index 280d321..0000000 Binary files a/img/appendix_examples/n07714990_7596_fg.WEBP and /dev/null differ diff --git a/img/appendix_examples/n07714990_7596_recombined_v1.JPEG b/img/appendix_examples/n07714990_7596_recombined_v1.JPEG deleted file mode 100644 index 486aa08..0000000 Binary files a/img/appendix_examples/n07714990_7596_recombined_v1.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07714990_7596_recombined_v13.JPEG b/img/appendix_examples/n07714990_7596_recombined_v13.JPEG deleted file mode 100644 index 29845e2..0000000 Binary files a/img/appendix_examples/n07714990_7596_recombined_v13.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07714990_7596_recombined_v15.JPEG b/img/appendix_examples/n07714990_7596_recombined_v15.JPEG deleted file mode 100644 index 3f6431b..0000000 Binary files a/img/appendix_examples/n07714990_7596_recombined_v15.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07714990_7596_recombined_v16.JPEG b/img/appendix_examples/n07714990_7596_recombined_v16.JPEG deleted file mode 100644 index 1670705..0000000 Binary files a/img/appendix_examples/n07714990_7596_recombined_v16.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07714990_7596_recombined_v17.JPEG b/img/appendix_examples/n07714990_7596_recombined_v17.JPEG deleted file mode 100644 index dd69294..0000000 Binary files a/img/appendix_examples/n07714990_7596_recombined_v17.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07714990_7596_recombined_v22.JPEG b/img/appendix_examples/n07714990_7596_recombined_v22.JPEG deleted file mode 100644 index ba09df8..0000000 Binary files a/img/appendix_examples/n07714990_7596_recombined_v22.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07714990_7596_recombined_v27.JPEG b/img/appendix_examples/n07714990_7596_recombined_v27.JPEG deleted file mode 100644 index bace8a2..0000000 Binary files a/img/appendix_examples/n07714990_7596_recombined_v27.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07714990_7596_recombined_v29.JPEG b/img/appendix_examples/n07714990_7596_recombined_v29.JPEG deleted file mode 100644 index 62cbfee..0000000 Binary files a/img/appendix_examples/n07714990_7596_recombined_v29.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07714990_7596_recombined_v6.JPEG b/img/appendix_examples/n07714990_7596_recombined_v6.JPEG deleted file mode 100644 index b4c1379..0000000 Binary files a/img/appendix_examples/n07714990_7596_recombined_v6.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07714990_7596_recombined_v8.JPEG b/img/appendix_examples/n07714990_7596_recombined_v8.JPEG deleted file mode 100644 index 783ca9a..0000000 Binary files a/img/appendix_examples/n07714990_7596_recombined_v8.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601.JPEG b/img/appendix_examples/n07749582_17601.JPEG deleted file mode 100644 index 0ec67f5..0000000 Binary files a/img/appendix_examples/n07749582_17601.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_bg.JPEG b/img/appendix_examples/n07749582_17601_bg.JPEG deleted file mode 100644 index bb163b4..0000000 Binary files a/img/appendix_examples/n07749582_17601_bg.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_fg.PNG b/img/appendix_examples/n07749582_17601_fg.PNG deleted file mode 100644 index d83a5d4..0000000 Binary files a/img/appendix_examples/n07749582_17601_fg.PNG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_fg.WEBP b/img/appendix_examples/n07749582_17601_fg.WEBP deleted file mode 100644 index 1e6dedb..0000000 Binary files a/img/appendix_examples/n07749582_17601_fg.WEBP and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_recombined_v1.JPEG b/img/appendix_examples/n07749582_17601_recombined_v1.JPEG deleted file mode 100644 index 902331b..0000000 Binary files a/img/appendix_examples/n07749582_17601_recombined_v1.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_recombined_v15.JPEG b/img/appendix_examples/n07749582_17601_recombined_v15.JPEG deleted file mode 100644 index 3c57cc3..0000000 Binary files a/img/appendix_examples/n07749582_17601_recombined_v15.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_recombined_v17.JPEG b/img/appendix_examples/n07749582_17601_recombined_v17.JPEG deleted file mode 100644 index e11c1b2..0000000 Binary files a/img/appendix_examples/n07749582_17601_recombined_v17.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_recombined_v2.JPEG b/img/appendix_examples/n07749582_17601_recombined_v2.JPEG deleted file mode 100644 index 96c4d25..0000000 Binary files a/img/appendix_examples/n07749582_17601_recombined_v2.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_recombined_v20.JPEG b/img/appendix_examples/n07749582_17601_recombined_v20.JPEG deleted file mode 100644 index 12eea4d..0000000 Binary files a/img/appendix_examples/n07749582_17601_recombined_v20.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_recombined_v23.JPEG b/img/appendix_examples/n07749582_17601_recombined_v23.JPEG deleted file mode 100644 index 3561344..0000000 Binary files a/img/appendix_examples/n07749582_17601_recombined_v23.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_recombined_v24.JPEG b/img/appendix_examples/n07749582_17601_recombined_v24.JPEG deleted file mode 100644 index 7051e56..0000000 Binary files a/img/appendix_examples/n07749582_17601_recombined_v24.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_recombined_v25.JPEG b/img/appendix_examples/n07749582_17601_recombined_v25.JPEG deleted file mode 100644 index 2ada806..0000000 Binary files a/img/appendix_examples/n07749582_17601_recombined_v25.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_recombined_v26.JPEG b/img/appendix_examples/n07749582_17601_recombined_v26.JPEG deleted file mode 100644 index bfa66e5..0000000 Binary files a/img/appendix_examples/n07749582_17601_recombined_v26.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_recombined_v27.JPEG b/img/appendix_examples/n07749582_17601_recombined_v27.JPEG deleted file mode 100644 index 39e85f3..0000000 Binary files a/img/appendix_examples/n07749582_17601_recombined_v27.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_recombined_v28.JPEG b/img/appendix_examples/n07749582_17601_recombined_v28.JPEG deleted file mode 100644 index a6d794b..0000000 Binary files a/img/appendix_examples/n07749582_17601_recombined_v28.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_recombined_v8.JPEG b/img/appendix_examples/n07749582_17601_recombined_v8.JPEG deleted file mode 100644 index 586e2e7..0000000 Binary files a/img/appendix_examples/n07749582_17601_recombined_v8.JPEG and /dev/null differ diff --git a/img/appendix_examples/n07749582_17601_recombined_v9.JPEG b/img/appendix_examples/n07749582_17601_recombined_v9.JPEG deleted file mode 100644 index b86364b..0000000 Binary files a/img/appendix_examples/n07749582_17601_recombined_v9.JPEG and /dev/null differ diff --git a/img/appendix_examples/n09332890_27898.JPEG b/img/appendix_examples/n09332890_27898.JPEG deleted file mode 100644 index df273ef..0000000 Binary files a/img/appendix_examples/n09332890_27898.JPEG and /dev/null differ diff --git a/img/appendix_examples/n09332890_27898_bg.JPEG b/img/appendix_examples/n09332890_27898_bg.JPEG deleted file mode 100644 index 6e989c5..0000000 Binary files a/img/appendix_examples/n09332890_27898_bg.JPEG and /dev/null differ diff --git a/img/appendix_examples/n09332890_27898_fg.PNG b/img/appendix_examples/n09332890_27898_fg.PNG deleted file mode 100644 index 1cfdffd..0000000 Binary files a/img/appendix_examples/n09332890_27898_fg.PNG and /dev/null differ diff --git a/img/appendix_examples/n09332890_27898_fg.WEBP b/img/appendix_examples/n09332890_27898_fg.WEBP deleted file mode 100644 index 94028e3..0000000 Binary files a/img/appendix_examples/n09332890_27898_fg.WEBP and /dev/null differ diff --git a/img/appendix_examples/n09332890_27898_recombined_v0.JPEG b/img/appendix_examples/n09332890_27898_recombined_v0.JPEG deleted file mode 100644 index 00e1343..0000000 Binary files a/img/appendix_examples/n09332890_27898_recombined_v0.JPEG and /dev/null differ diff --git a/img/appendix_examples/n09332890_27898_recombined_v12.JPEG b/img/appendix_examples/n09332890_27898_recombined_v12.JPEG deleted file mode 100644 index 4cc58bb..0000000 Binary files a/img/appendix_examples/n09332890_27898_recombined_v12.JPEG and /dev/null differ diff --git a/img/appendix_examples/n09332890_27898_recombined_v13.JPEG b/img/appendix_examples/n09332890_27898_recombined_v13.JPEG deleted file mode 100644 index 61f632b..0000000 Binary files a/img/appendix_examples/n09332890_27898_recombined_v13.JPEG and /dev/null differ diff --git a/img/appendix_examples/n09332890_27898_recombined_v14.JPEG b/img/appendix_examples/n09332890_27898_recombined_v14.JPEG deleted file mode 100644 index 756c1fe..0000000 Binary files a/img/appendix_examples/n09332890_27898_recombined_v14.JPEG and /dev/null differ diff --git a/img/appendix_examples/n09332890_27898_recombined_v18.JPEG b/img/appendix_examples/n09332890_27898_recombined_v18.JPEG deleted file mode 100644 index e366194..0000000 Binary files a/img/appendix_examples/n09332890_27898_recombined_v18.JPEG and /dev/null differ diff --git a/img/appendix_examples/n09332890_27898_recombined_v20.JPEG b/img/appendix_examples/n09332890_27898_recombined_v20.JPEG deleted file mode 100644 index 4a3396c..0000000 Binary files a/img/appendix_examples/n09332890_27898_recombined_v20.JPEG and /dev/null differ diff --git a/img/appendix_examples/n09332890_27898_recombined_v4.JPEG b/img/appendix_examples/n09332890_27898_recombined_v4.JPEG deleted file mode 100644 index a6b7d08..0000000 Binary files a/img/appendix_examples/n09332890_27898_recombined_v4.JPEG and /dev/null differ diff --git a/img/appendix_examples/n09332890_27898_recombined_v7.JPEG b/img/appendix_examples/n09332890_27898_recombined_v7.JPEG deleted file mode 100644 index 0153369..0000000 Binary files a/img/appendix_examples/n09332890_27898_recombined_v7.JPEG and /dev/null differ diff --git a/img/background no icon.png b/img/background no icon.png deleted file mode 100644 index ab24e9b..0000000 Binary files a/img/background no icon.png and /dev/null differ diff --git a/img/bg_robustness.pdf b/img/bg_robustness.pdf deleted file mode 100644 index dcfd724..0000000 Binary files a/img/bg_robustness.pdf and /dev/null differ diff --git a/img/bg_robustness_4.pdf b/img/bg_robustness_4.pdf deleted file mode 100644 index 7b7d3be..0000000 Binary files a/img/bg_robustness_4.pdf and /dev/null differ diff --git a/img/color change icon.png b/img/color change icon.png deleted file mode 100644 index 98e7b73..0000000 Binary files a/img/color change icon.png and /dev/null differ diff --git a/img/colorbar_horizontal.pdf b/img/colorbar_horizontal.pdf index 8779c08..141aa5f 100644 Binary files a/img/colorbar_horizontal.pdf and b/img/colorbar_horizontal.pdf differ diff --git a/img/colorbar_vertical.pdf b/img/colorbar_vertical.pdf index 754158e..4c42a00 100644 Binary files a/img/colorbar_vertical.pdf and b/img/colorbar_vertical.pdf differ diff --git a/img/edge blur icon.png b/img/edge blur icon.png deleted file mode 100644 index e0b7c7f..0000000 Binary files a/img/edge blur icon.png and /dev/null differ diff --git a/img/extraction icon.png b/img/extraction icon.png deleted file mode 100644 index 3084923..0000000 Binary files a/img/extraction icon.png and /dev/null differ diff --git a/img/fg_focus.pdf b/img/fg_focus.pdf deleted file mode 100644 index 77b9b97..0000000 Binary files a/img/fg_focus.pdf and /dev/null differ diff --git a/img/fig-1.drawio b/img/fig-1.drawio index 0b7f8c2..00f822f 100644 --- a/img/fig-1.drawio +++ b/img/fig-1.drawio @@ -1,6 +1,6 @@ - + - + @@ -20,9 +20,9 @@ - + - + @@ -121,7 +121,7 @@ - + diff --git a/img/fig-1.pdf b/img/fig-1.pdf index 08adcb3..33f9c75 100644 Binary files a/img/fig-1.pdf and b/img/fig-1.pdf differ diff --git a/img/fig-1.png b/img/fig-1.png deleted file mode 100644 index 7863c38..0000000 Binary files a/img/fig-1.png and /dev/null differ diff --git a/img/fig-2-horizontal.drawio b/img/fig-2-horizontal.drawio deleted file mode 100644 index e21f1ff..0000000 --- a/img/fig-2-horizontal.drawio +++ /dev/null @@ -1,406 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/img/fig-2-old.pdf b/img/fig-2-old.pdf deleted file mode 100644 index c8abc84..0000000 Binary files a/img/fig-2-old.pdf and /dev/null differ diff --git a/img/fig-2.drawio b/img/fig-2.drawio index 34ea040..522fbbf 100644 --- a/img/fig-2.drawio +++ b/img/fig-2.drawio @@ -1,284 +1,240 @@ - + - + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + - - - - + + + + + + + + + + - - - - - + + - - + + - - + + - - + + - - + + - - + + + + + + + + - - + + - - + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - + + - - + + - - + + - - + + - - + + - - + + - - + + + + + - - + + - - + + - - + + - - + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/img/fig-2-old.jpg b/img/fig-2.jpg similarity index 100% rename from img/fig-2-old.jpg rename to img/fig-2.jpg diff --git a/img/fig-2.pdf b/img/fig-2.pdf index 14d154e..c8abc84 100644 Binary files a/img/fig-2.pdf and b/img/fig-2.pdf differ diff --git a/img/fig-2.png b/img/fig-2.png deleted file mode 100644 index 7dcbd15..0000000 Binary files a/img/fig-2.png and /dev/null differ diff --git a/img/fig-2_old.pdf b/img/fig-2_old.pdf deleted file mode 100644 index 6b03135..0000000 Binary files a/img/fig-2_old.pdf and /dev/null differ diff --git a/img/filter icon.png b/img/filter icon.png deleted file mode 100644 index d7a71f8..0000000 Binary files a/img/filter icon.png and /dev/null differ diff --git a/img/foraug_examples/n01818515_31507_recombined_v12.JPEG b/img/foraug_examples/n01818515_31507_recombined_v12.JPEG deleted file mode 100644 index dcf278e..0000000 Binary files a/img/foraug_examples/n01818515_31507_recombined_v12.JPEG and /dev/null differ diff --git a/img/foraug_examples/n01818515_31507_recombined_v15.JPEG b/img/foraug_examples/n01818515_31507_recombined_v15.JPEG deleted file mode 100644 index ec4b6a5..0000000 Binary files a/img/foraug_examples/n01818515_31507_recombined_v15.JPEG and /dev/null differ diff --git a/img/foraug_examples/n01818515_31507_recombined_v18.JPEG b/img/foraug_examples/n01818515_31507_recombined_v18.JPEG deleted file mode 100644 index a24a6ea..0000000 Binary files a/img/foraug_examples/n01818515_31507_recombined_v18.JPEG and /dev/null differ diff --git a/img/foraug_examples/n01818515_31507_recombined_v3.JPEG b/img/foraug_examples/n01818515_31507_recombined_v3.JPEG deleted file mode 100644 index 6367d0e..0000000 Binary files a/img/foraug_examples/n01818515_31507_recombined_v3.JPEG and /dev/null differ diff --git a/img/foraug_examples/n01818515_31507_recombined_v4.JPEG b/img/foraug_examples/n01818515_31507_recombined_v4.JPEG deleted file mode 100644 index 86f6ddd..0000000 Binary files a/img/foraug_examples/n01818515_31507_recombined_v4.JPEG and /dev/null differ diff --git a/img/foraug_examples/n01818515_31507_recombined_v6.JPEG b/img/foraug_examples/n01818515_31507_recombined_v6.JPEG deleted file mode 100644 index 9f6bf7a..0000000 Binary files a/img/foraug_examples/n01818515_31507_recombined_v6.JPEG and /dev/null differ diff --git a/img/foraug_examples/n01943899_20070_recombined_v10.JPEG b/img/foraug_examples/n01943899_20070_recombined_v10.JPEG deleted file mode 100644 index 42ca57e..0000000 Binary files a/img/foraug_examples/n01943899_20070_recombined_v10.JPEG and /dev/null differ diff --git a/img/foraug_examples/n01943899_20070_recombined_v11.JPEG b/img/foraug_examples/n01943899_20070_recombined_v11.JPEG deleted file mode 100644 index 223fcb9..0000000 Binary files a/img/foraug_examples/n01943899_20070_recombined_v11.JPEG and /dev/null differ diff --git a/img/foraug_examples/n01943899_20070_recombined_v12.JPEG b/img/foraug_examples/n01943899_20070_recombined_v12.JPEG deleted file mode 100644 index 8c9c4e1..0000000 Binary files a/img/foraug_examples/n01943899_20070_recombined_v12.JPEG and /dev/null differ diff --git a/img/foraug_examples/n01943899_20070_recombined_v17.JPEG b/img/foraug_examples/n01943899_20070_recombined_v17.JPEG deleted file mode 100644 index 2f6468e..0000000 Binary files a/img/foraug_examples/n01943899_20070_recombined_v17.JPEG and /dev/null differ diff --git a/img/foraug_examples/n01943899_20070_recombined_v8.JPEG b/img/foraug_examples/n01943899_20070_recombined_v8.JPEG deleted file mode 100644 index 4924c0b..0000000 Binary files a/img/foraug_examples/n01943899_20070_recombined_v8.JPEG and /dev/null differ diff --git a/img/foraug_examples/n01943899_20070_recombined_v9.JPEG b/img/foraug_examples/n01943899_20070_recombined_v9.JPEG deleted file mode 100644 index d9a2c12..0000000 Binary files a/img/foraug_examples/n01943899_20070_recombined_v9.JPEG and /dev/null differ diff --git a/img/foraug_examples/n02229544_6170_recombined_v0.JPEG b/img/foraug_examples/n02229544_6170_recombined_v0.JPEG deleted file mode 100644 index a6f6e5e..0000000 Binary files a/img/foraug_examples/n02229544_6170_recombined_v0.JPEG and /dev/null differ diff --git a/img/foraug_examples/n02229544_6170_recombined_v10.JPEG b/img/foraug_examples/n02229544_6170_recombined_v10.JPEG deleted file mode 100644 index cb095b1..0000000 Binary files a/img/foraug_examples/n02229544_6170_recombined_v10.JPEG and /dev/null differ diff --git a/img/foraug_examples/n02229544_6170_recombined_v15.JPEG b/img/foraug_examples/n02229544_6170_recombined_v15.JPEG deleted file mode 100644 index 2a576a0..0000000 Binary files a/img/foraug_examples/n02229544_6170_recombined_v15.JPEG and /dev/null differ diff --git a/img/foraug_examples/n02229544_6170_recombined_v16.JPEG b/img/foraug_examples/n02229544_6170_recombined_v16.JPEG deleted file mode 100644 index 5a51e36..0000000 Binary files a/img/foraug_examples/n02229544_6170_recombined_v16.JPEG and /dev/null differ diff --git a/img/foraug_examples/n02229544_6170_recombined_v2.JPEG b/img/foraug_examples/n02229544_6170_recombined_v2.JPEG deleted file mode 100644 index 755a5e0..0000000 Binary files a/img/foraug_examples/n02229544_6170_recombined_v2.JPEG and /dev/null differ diff --git a/img/foraug_examples/n02229544_6170_recombined_v6.JPEG b/img/foraug_examples/n02229544_6170_recombined_v6.JPEG deleted file mode 100644 index fb32e14..0000000 Binary files a/img/foraug_examples/n02229544_6170_recombined_v6.JPEG and /dev/null differ diff --git a/img/foraug_examples/n03642806_3615_recombined_v0.JPEG b/img/foraug_examples/n03642806_3615_recombined_v0.JPEG deleted file mode 100644 index c69b4c8..0000000 Binary files a/img/foraug_examples/n03642806_3615_recombined_v0.JPEG and /dev/null differ diff --git a/img/foraug_examples/n03642806_3615_recombined_v1.JPEG b/img/foraug_examples/n03642806_3615_recombined_v1.JPEG deleted file mode 100644 index d3e20c2..0000000 Binary files a/img/foraug_examples/n03642806_3615_recombined_v1.JPEG and /dev/null differ diff --git a/img/foraug_examples/n03642806_3615_recombined_v11.JPEG b/img/foraug_examples/n03642806_3615_recombined_v11.JPEG deleted file mode 100644 index b3fadc8..0000000 Binary files a/img/foraug_examples/n03642806_3615_recombined_v11.JPEG and /dev/null differ diff --git a/img/foraug_examples/n03642806_3615_recombined_v14.JPEG b/img/foraug_examples/n03642806_3615_recombined_v14.JPEG deleted file mode 100644 index 6405408..0000000 Binary files a/img/foraug_examples/n03642806_3615_recombined_v14.JPEG and /dev/null differ diff --git a/img/foraug_examples/n03642806_3615_recombined_v15.JPEG b/img/foraug_examples/n03642806_3615_recombined_v15.JPEG deleted file mode 100644 index f0c1f2e..0000000 Binary files a/img/foraug_examples/n03642806_3615_recombined_v15.JPEG and /dev/null differ diff --git a/img/foraug_examples/n03642806_3615_recombined_v2.JPEG b/img/foraug_examples/n03642806_3615_recombined_v2.JPEG deleted file mode 100644 index 1b53597..0000000 Binary files a/img/foraug_examples/n03642806_3615_recombined_v2.JPEG and /dev/null differ diff --git a/img/foreground yes icon.png b/img/foreground yes icon.png deleted file mode 100644 index a38f215..0000000 Binary files a/img/foreground yes icon.png and /dev/null differ diff --git a/img/horizontal flip icon new.png b/img/horizontal flip icon new.png deleted file mode 100644 index ba2751b..0000000 Binary files a/img/horizontal flip icon new.png and /dev/null differ diff --git a/img/horse_mask_1.WEBP b/img/horse_mask_1.WEBP deleted file mode 100644 index ad7c8f2..0000000 Binary files a/img/horse_mask_1.WEBP and /dev/null differ diff --git a/img/horse_mask_2.WEBP b/img/horse_mask_2.WEBP deleted file mode 100644 index f7d5c81..0000000 Binary files a/img/horse_mask_2.WEBP and /dev/null differ diff --git a/img/infill icon.png b/img/infill icon.png deleted file mode 100644 index 6529ae1..0000000 Binary files a/img/infill icon.png and /dev/null differ diff --git a/img/infill_distr.pdf b/img/infill_distr.pdf deleted file mode 100644 index c946d8c..0000000 Binary files a/img/infill_distr.pdf and /dev/null differ diff --git a/img/mask_expansion.pdf b/img/mask_expansion.pdf deleted file mode 100644 index 34093e8..0000000 Binary files a/img/mask_expansion.pdf and /dev/null differ diff --git a/img/masked_image_examples.pdf b/img/masked_image_examples.pdf deleted file mode 100644 index 171d3b8..0000000 Binary files a/img/masked_image_examples.pdf and /dev/null differ diff --git a/img/masked_image_examples_train.pdf b/img/masked_image_examples_train.pdf deleted file mode 100644 index 8c21e81..0000000 Binary files a/img/masked_image_examples_train.pdf and /dev/null differ diff --git a/img/object size icon.png b/img/object size icon.png deleted file mode 100644 index b21f09c..0000000 Binary files a/img/object size icon.png and /dev/null differ diff --git a/img/random crop icon.png b/img/random crop icon.png deleted file mode 100644 index b779465..0000000 Binary files a/img/random crop icon.png and /dev/null differ diff --git a/img/random draw icon.png b/img/random draw icon.png deleted file mode 100644 index 9dca91e..0000000 Binary files a/img/random draw icon.png and /dev/null differ diff --git a/img/random flipping icon.png b/img/random flipping icon.png deleted file mode 100644 index cf44d5b..0000000 Binary files a/img/random flipping icon.png and /dev/null differ diff --git a/img/random position icon.png b/img/random position icon.png deleted file mode 100644 index 793e2c0..0000000 Binary files a/img/random position icon.png and /dev/null differ diff --git a/img/random resize icon.png b/img/random resize icon.png deleted file mode 100644 index d7f535f..0000000 Binary files a/img/random resize icon.png and /dev/null differ diff --git a/img/rotation icon.png b/img/rotation icon.png deleted file mode 100644 index 96fe089..0000000 Binary files a/img/rotation icon.png and /dev/null differ diff --git a/img/segmentation icon.png b/img/segmentation icon.png deleted file mode 100644 index 8e8232d..0000000 Binary files a/img/segmentation icon.png and /dev/null differ diff --git a/img/size_bias.pdf b/img/size_bias.pdf index 9706f2e..0515183 100644 Binary files a/img/size_bias.pdf and b/img/size_bias.pdf differ diff --git a/img/size_bias_grid.pdf b/img/size_bias_grid.pdf deleted file mode 100644 index c6f3296..0000000 Binary files a/img/size_bias_grid.pdf and /dev/null differ diff --git a/img/size_bias_wide.pdf b/img/size_bias_wide.pdf deleted file mode 100644 index 0bec28c..0000000 Binary files a/img/size_bias_wide.pdf and /dev/null differ diff --git a/img/strategy.pdf b/img/strategy.pdf index 1fc3089..45b0062 100644 Binary files a/img/strategy.pdf and b/img/strategy.pdf differ diff --git a/llncs.cls b/llncs.cls deleted file mode 100644 index de80a15..0000000 --- a/llncs.cls +++ /dev/null @@ -1,1218 +0,0 @@ -% LLNCS DOCUMENT CLASS -- version 2.22 (05-Sep-2022) -% Springer Verlag LaTeX2e support for Lecture Notes in Computer Science -% -%% -%% \CharacterTable -%% {Upper-case \A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z -%% Lower-case \a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z -%% Digits \0\1\2\3\4\5\6\7\8\9 -%% Exclamation \! Double quote \" Hash (number) \# -%% Dollar \$ Percent \% Ampersand \& -%% Acute accent \' Left paren \( Right paren \) -%% Asterisk \* Plus \+ Comma \, -%% Minus \- Point \. Solidus \/ -%% Colon \: Semicolon \; Less than \< -%% Equals \= Greater than \> Question mark \? -%% Commercial at \@ Left bracket \[ Backslash \\ -%% Right bracket \] Circumflex \^ Underscore \_ -%% Grave accent \` Left brace \{ Vertical bar \| -%% Right brace \} Tilde \~} -%% -\NeedsTeXFormat{LaTeX2e}[1995/12/01] -\ProvidesClass{llncs}[2022/09/05 v2.22 -^^J LaTeX document class for Lecture Notes in Computer Science] -% Options -\let\if@envcntreset\iffalse -\DeclareOption{envcountreset}{\let\if@envcntreset\iftrue} -\DeclareOption{citeauthoryear}{\let\citeauthoryear=Y} -\DeclareOption{oribibl}{\let\oribibl=Y} -\let\if@custvec\iftrue -\DeclareOption{orivec}{\let\if@custvec\iffalse} -\let\if@envcntsame\iffalse -\DeclareOption{envcountsame}{\let\if@envcntsame\iftrue} -\let\if@envcntsect\iffalse -\DeclareOption{envcountsect}{\let\if@envcntsect\iftrue} -\let\if@runhead\iffalse -\DeclareOption{runningheads}{\let\if@runhead\iftrue} - -\let\if@openright\iftrue -\let\if@openbib\iffalse -\DeclareOption{openbib}{\let\if@openbib\iftrue} - -% languages -\let\switcht@@therlang\relax -\def\ds@deutsch{\def\switcht@@therlang{\switcht@deutsch}} -\def\ds@francais{\def\switcht@@therlang{\switcht@francais}} - -\DeclareOption*{\PassOptionsToClass{\CurrentOption}{article}} - -\ProcessOptions - -\LoadClass[twoside]{article} -\RequirePackage{multicol} % needed for the list of participants, index -\RequirePackage{aliascnt} - -\setlength{\textwidth}{12.2cm} -\setlength{\textheight}{19.3cm} -\renewcommand\@pnumwidth{2em} -\renewcommand\@tocrmarg{3.5em} -% -\def\@dottedtocline#1#2#3#4#5{% - \ifnum #1>\c@tocdepth \else - \vskip \z@ \@plus.2\p@ - {\leftskip #2\relax \rightskip \@tocrmarg \advance\rightskip by 0pt plus 2cm - \parfillskip -\rightskip \pretolerance=10000 - \parindent #2\relax\@afterindenttrue - \interlinepenalty\@M - \leavevmode - \@tempdima #3\relax - \advance\leftskip \@tempdima \null\nobreak\hskip -\leftskip - {#4}\nobreak - \leaders\hbox{$\m@th - \mkern \@dotsep mu\hbox{.}\mkern \@dotsep - mu$}\hfill - \nobreak - \hb@xt@\@pnumwidth{\hfil\normalfont \normalcolor #5}% - \par}% - \fi} -% -\def\switcht@albion{% -\def\abstractname{Abstract.} -\def\ackname{Acknowledgement.} -\def\andname{and} -\def\lastandname{\unskip, and} -\def\appendixname{Appendix} -\def\chaptername{Chapter} -\def\claimname{Claim} -\def\conjecturename{Conjecture} -\def\contentsname{Table of Contents} -\def\corollaryname{Corollary} -\def\definitionname{Definition} -\def\examplename{Example} -\def\exercisename{Exercise} -\def\figurename{Fig.} -\def\keywordname{{\bf Keywords:}} -\def\indexname{Index} -\def\lemmaname{Lemma} -\def\contriblistname{List of Contributors} -\def\listfigurename{List of Figures} -\def\listtablename{List of Tables} -\def\mailname{{\it Correspondence to\/}:} -\def\noteaddname{Note added in proof} -\def\notename{Note} -\def\partname{Part} -\def\problemname{Problem} -\def\proofname{Proof} -\def\propertyname{Property} -\def\propositionname{Proposition} -\def\questionname{Question} -\def\remarkname{Remark} -\def\seename{see} -\def\solutionname{Solution} -\def\subclassname{{\it Subject Classifications\/}:} -\def\tablename{Table} -\def\theoremname{Theorem}} -\switcht@albion -% Names of theorem like environments are already defined -% but must be translated if another language is chosen -% -% French section -\def\switcht@francais{%\typeout{On parle francais.}% - \def\abstractname{R\'esum\'e.}% - \def\ackname{Remerciements.}% - \def\andname{et}% - \def\lastandname{ et}% - \def\appendixname{Appendice}% - \def\chaptername{Chapitre}% - \def\claimname{Pr\'etention}% - \def\conjecturename{Hypoth\`ese}% - \def\contentsname{Table des mati\`eres}% - \def\corollaryname{Corollaire}% - \def\definitionname{D\'efinition}% - \def\examplename{Exemple}% - \def\exercisename{Exercice}% - \def\figurename{Fig.}% - \def\keywordname{{\bf Mots-cl\'e:}}% - \def\indexname{Index}% - \def\lemmaname{Lemme}% - \def\contriblistname{Liste des contributeurs}% - \def\listfigurename{Liste des figures}% - \def\listtablename{Liste des tables}% - \def\mailname{{\it Correspondence to\/}:}% - \def\noteaddname{Note ajout\'ee \`a l'\'epreuve}% - \def\notename{Remarque}% - \def\partname{Partie}% - \def\problemname{Probl\`eme}% - \def\proofname{Preuve}% - \def\propertyname{Caract\'eristique}% -%\def\propositionname{Proposition}% - \def\questionname{Question}% - \def\remarkname{Remarque}% - \def\seename{voir}% - \def\solutionname{Solution}% - \def\subclassname{{\it Subject Classifications\/}:}% - \def\tablename{Tableau}% - \def\theoremname{Th\'eor\`eme}% -} -% -% German section -\def\switcht@deutsch{%\typeout{Man spricht deutsch.}% - \def\abstractname{Zusammenfassung.}% - \def\ackname{Danksagung.}% - \def\andname{und}% - \def\lastandname{ und}% - \def\appendixname{Anhang}% - \def\chaptername{Kapitel}% - \def\claimname{Behauptung}% - \def\conjecturename{Hypothese}% - \def\contentsname{Inhaltsverzeichnis}% - \def\corollaryname{Korollar}% -%\def\definitionname{Definition}% - \def\examplename{Beispiel}% - \def\exercisename{\"Ubung}% - \def\figurename{Abb.}% - \def\keywordname{{\bf Schl\"usselw\"orter:}}% - \def\indexname{Index}% -%\def\lemmaname{Lemma}% - \def\contriblistname{Mitarbeiter}% - \def\listfigurename{Abbildungsverzeichnis}% - \def\listtablename{Tabellenverzeichnis}% - \def\mailname{{\it Correspondence to\/}:}% - \def\noteaddname{Nachtrag}% - \def\notename{Anmerkung}% - \def\partname{Teil}% -%\def\problemname{Problem}% - \def\proofname{Beweis}% - \def\propertyname{Eigenschaft}% -%\def\propositionname{Proposition}% - \def\questionname{Frage}% - \def\remarkname{Anmerkung}% - \def\seename{siehe}% - \def\solutionname{L\"osung}% - \def\subclassname{{\it Subject Classifications\/}:}% - \def\tablename{Tabelle}% -%\def\theoremname{Theorem}% -} - -% Ragged bottom for the actual page -\def\thisbottomragged{\def\@textbottom{\vskip\z@ plus.0001fil -\global\let\@textbottom\relax}} - -\renewcommand\small{% - \@setfontsize\small\@ixpt{11}% - \abovedisplayskip 8.5\p@ \@plus3\p@ \@minus4\p@ - \abovedisplayshortskip \z@ \@plus2\p@ - \belowdisplayshortskip 4\p@ \@plus2\p@ \@minus2\p@ - \def\@listi{\leftmargin\leftmargini - \parsep 0\p@ \@plus1\p@ \@minus\p@ - \topsep 8\p@ \@plus2\p@ \@minus4\p@ - \itemsep0\p@}% - \belowdisplayskip \abovedisplayskip -} - -\frenchspacing -\widowpenalty=10000 -\clubpenalty=10000 - -\setlength\oddsidemargin {63\p@} -\setlength\evensidemargin {63\p@} -\setlength\marginparwidth {90\p@} - -\setlength\headsep {16\p@} - -\setlength\footnotesep{7.7\p@} -\setlength\textfloatsep{8mm\@plus 2\p@ \@minus 4\p@} -\setlength\intextsep {8mm\@plus 2\p@ \@minus 2\p@} - -\setcounter{secnumdepth}{2} - -\newcounter {chapter} -\renewcommand\thechapter {\@arabic\c@chapter} - -\newif\if@mainmatter \@mainmattertrue -\newcommand\frontmatter{\cleardoublepage - \@mainmatterfalse\pagenumbering{Roman}} -\newcommand\mainmatter{\cleardoublepage - \@mainmattertrue\pagenumbering{arabic}} -\newcommand\backmatter{\if@openright\cleardoublepage\else\clearpage\fi - \@mainmatterfalse} - -\renewcommand\part{\cleardoublepage - \thispagestyle{empty}% - \if@twocolumn - \onecolumn - \@tempswatrue - \else - \@tempswafalse - \fi - \null\vfil - \secdef\@part\@spart} - -\def\@part[#1]#2{% - \ifnum \c@secnumdepth >-2\relax - \refstepcounter{part}% - \addcontentsline{toc}{part}{\thepart\hspace{1em}#1}% - \else - \addcontentsline{toc}{part}{#1}% - \fi - \markboth{}{}% - {\centering - \interlinepenalty \@M - \normalfont - \ifnum \c@secnumdepth >-2\relax - \huge\bfseries \partname~\thepart - \par - \vskip 20\p@ - \fi - \Huge \bfseries #2\par}% - \@endpart} -\def\@spart#1{% - {\centering - \interlinepenalty \@M - \normalfont - \Huge \bfseries #1\par}% - \@endpart} -\def\@endpart{\vfil\newpage - \if@twoside - \null - \thispagestyle{empty}% - \newpage - \fi - \if@tempswa - \twocolumn - \fi} - -\newcommand\chapter{\clearpage - \thispagestyle{empty}% - \global\@topnum\z@ - \@afterindentfalse - \secdef\@chapter\@schapter} -\def\@chapter[#1]#2{\ifnum \c@secnumdepth >\m@ne - \if@mainmatter - \refstepcounter{chapter}% - \typeout{\@chapapp\space\thechapter.}% - \addcontentsline{toc}{chapter}% - {\protect\numberline{\thechapter}#1}% - \else - \addcontentsline{toc}{chapter}{#1}% - \fi - \else - \addcontentsline{toc}{chapter}{#1}% - \fi - \chaptermark{#1}% - \addtocontents{lof}{\protect\addvspace{10\p@}}% - \addtocontents{lot}{\protect\addvspace{10\p@}}% - \if@twocolumn - \@topnewpage[\@makechapterhead{#2}]% - \else - \@makechapterhead{#2}% - \@afterheading - \fi} -\def\@makechapterhead#1{% -% \vspace*{50\p@}% - {\centering - \ifnum \c@secnumdepth >\m@ne - \if@mainmatter - \large\bfseries \@chapapp{} \thechapter - \par\nobreak - \vskip 20\p@ - \fi - \fi - \interlinepenalty\@M - \Large \bfseries #1\par\nobreak - \vskip 40\p@ - }} -\def\@schapter#1{\if@twocolumn - \@topnewpage[\@makeschapterhead{#1}]% - \else - \@makeschapterhead{#1}% - \@afterheading - \fi} -\def\@makeschapterhead#1{% -% \vspace*{50\p@}% - {\centering - \normalfont - \interlinepenalty\@M - \Large \bfseries #1\par\nobreak - \vskip 40\p@ - }} - -\renewcommand\section{\@startsection{section}{1}{\z@}% - {-18\p@ \@plus -4\p@ \@minus -4\p@}% - {12\p@ \@plus 4\p@ \@minus 4\p@}% - {\normalfont\large\bfseries\boldmath - \rightskip=\z@ \@plus 8em\pretolerance=10000 }} -\renewcommand\subsection{\@startsection{subsection}{2}{\z@}% - {-18\p@ \@plus -4\p@ \@minus -4\p@}% - {8\p@ \@plus 4\p@ \@minus 4\p@}% - {\normalfont\normalsize\bfseries\boldmath - \rightskip=\z@ \@plus 8em\pretolerance=10000 }} -\renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}% - {-18\p@ \@plus -4\p@ \@minus -4\p@}% - {-0.5em \@plus -0.22em \@minus -0.1em}% - {\normalfont\normalsize\bfseries\boldmath}} -\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}% - {-12\p@ \@plus -4\p@ \@minus -4\p@}% - {-0.5em \@plus -0.22em \@minus -0.1em}% - {\normalfont\normalsize\itshape}} -\renewcommand\subparagraph[1]{\typeout{LLNCS warning: You should not use - \string\subparagraph\space with this class}\vskip0.5cm -You should not use \verb|\subparagraph| with this class.\vskip0.5cm} - -\DeclareMathSymbol{\Gamma}{\mathalpha}{letters}{"00} -\DeclareMathSymbol{\Delta}{\mathalpha}{letters}{"01} -\DeclareMathSymbol{\Theta}{\mathalpha}{letters}{"02} -\DeclareMathSymbol{\Lambda}{\mathalpha}{letters}{"03} -\DeclareMathSymbol{\Xi}{\mathalpha}{letters}{"04} -\DeclareMathSymbol{\Pi}{\mathalpha}{letters}{"05} -\DeclareMathSymbol{\Sigma}{\mathalpha}{letters}{"06} -\DeclareMathSymbol{\Upsilon}{\mathalpha}{letters}{"07} -\DeclareMathSymbol{\Phi}{\mathalpha}{letters}{"08} -\DeclareMathSymbol{\Psi}{\mathalpha}{letters}{"09} -\DeclareMathSymbol{\Omega}{\mathalpha}{letters}{"0A} - -\let\footnotesize\small - -\if@custvec -\DeclareRobustCommand\vec[1]{\mathchoice{\mbox{\boldmath$\displaystyle#1$}} -{\mbox{\boldmath$\textstyle#1$}} -{\mbox{\boldmath$\scriptstyle#1$}} -{\mbox{\boldmath$\scriptscriptstyle#1$}}} -\fi - -\def\squareforqed{\hbox{\rlap{$\sqcap$}$\sqcup$}} -\def\qed{\ifmmode\squareforqed\else{\unskip\nobreak\hfil -\penalty50\hskip1em\null\nobreak\hfil\squareforqed -\parfillskip=0pt\finalhyphendemerits=0\endgraf}\fi} - -\def\getsto{\mathrel{\mathchoice {\vcenter{\offinterlineskip -\halign{\hfil -$\displaystyle##$\hfil\cr\gets\cr\to\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr\gets -\cr\to\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr\gets -\cr\to\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr -\gets\cr\to\cr}}}}} -\def\lid{\mathrel{\mathchoice {\vcenter{\offinterlineskip\halign{\hfil -$\displaystyle##$\hfil\cr<\cr\noalign{\vskip1.2pt}=\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr<\cr -\noalign{\vskip1.2pt}=\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr<\cr -\noalign{\vskip1pt}=\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr -<\cr -\noalign{\vskip0.9pt}=\cr}}}}} -\def\gid{\mathrel{\mathchoice {\vcenter{\offinterlineskip\halign{\hfil -$\displaystyle##$\hfil\cr>\cr\noalign{\vskip1.2pt}=\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr>\cr -\noalign{\vskip1.2pt}=\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr>\cr -\noalign{\vskip1pt}=\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr ->\cr -\noalign{\vskip0.9pt}=\cr}}}}} -\def\grole{\mathrel{\mathchoice {\vcenter{\offinterlineskip -\halign{\hfil -$\displaystyle##$\hfil\cr>\cr\noalign{\vskip-1pt}<\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr ->\cr\noalign{\vskip-1pt}<\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr ->\cr\noalign{\vskip-0.8pt}<\cr}}} -{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr ->\cr\noalign{\vskip-0.3pt}<\cr}}}}} -\def\bbbr{{\rm I\!R}} %reelle Zahlen -\def\bbbm{{\rm I\!M}} -\def\bbbn{{\rm I\!N}} %natuerliche Zahlen -\def\bbbf{{\rm I\!F}} -\def\bbbh{{\rm I\!H}} -\def\bbbk{{\rm I\!K}} -\def\bbbp{{\rm I\!P}} -\def\bbbone{{\mathchoice {\rm 1\mskip-4mu l} {\rm 1\mskip-4mu l} -{\rm 1\mskip-4.5mu l} {\rm 1\mskip-5mu l}}} -\def\bbbc{{\mathchoice {\setbox0=\hbox{$\displaystyle\rm C$}\hbox{\hbox -to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}} -{\setbox0=\hbox{$\textstyle\rm C$}\hbox{\hbox -to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptstyle\rm C$}\hbox{\hbox -to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptscriptstyle\rm C$}\hbox{\hbox -to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}}}} -\def\bbbq{{\mathchoice {\setbox0=\hbox{$\displaystyle\rm -Q$}\hbox{\raise -0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.8\ht0\hss}\box0}} -{\setbox0=\hbox{$\textstyle\rm Q$}\hbox{\raise -0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.8\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptstyle\rm Q$}\hbox{\raise -0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.7\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptscriptstyle\rm Q$}\hbox{\raise -0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.7\ht0\hss}\box0}}}} -\def\bbbt{{\mathchoice {\setbox0=\hbox{$\displaystyle\rm -T$}\hbox{\hbox to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}} -{\setbox0=\hbox{$\textstyle\rm T$}\hbox{\hbox -to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptstyle\rm T$}\hbox{\hbox -to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptscriptstyle\rm T$}\hbox{\hbox -to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}}}} -\def\bbbs{{\mathchoice -{\setbox0=\hbox{$\displaystyle \rm S$}\hbox{\raise0.5\ht0\hbox -to0pt{\kern0.35\wd0\vrule height0.45\ht0\hss}\hbox -to0pt{\kern0.55\wd0\vrule height0.5\ht0\hss}\box0}} -{\setbox0=\hbox{$\textstyle \rm S$}\hbox{\raise0.5\ht0\hbox -to0pt{\kern0.35\wd0\vrule height0.45\ht0\hss}\hbox -to0pt{\kern0.55\wd0\vrule height0.5\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptstyle \rm S$}\hbox{\raise0.5\ht0\hbox -to0pt{\kern0.35\wd0\vrule height0.45\ht0\hss}\raise0.05\ht0\hbox -to0pt{\kern0.5\wd0\vrule height0.45\ht0\hss}\box0}} -{\setbox0=\hbox{$\scriptscriptstyle\rm S$}\hbox{\raise0.5\ht0\hbox -to0pt{\kern0.4\wd0\vrule height0.45\ht0\hss}\raise0.05\ht0\hbox -to0pt{\kern0.55\wd0\vrule height0.45\ht0\hss}\box0}}}} -\def\bbbz{{\mathchoice {\hbox{$\mathsf\textstyle Z\kern-0.4em Z$}} -{\hbox{$\mathsf\textstyle Z\kern-0.4em Z$}} -{\hbox{$\mathsf\scriptstyle Z\kern-0.3em Z$}} -{\hbox{$\mathsf\scriptscriptstyle Z\kern-0.2em Z$}}}} - -\let\ts\, - -\setlength\leftmargini {17\p@} -\setlength\leftmargin {\leftmargini} -\setlength\leftmarginii {\leftmargini} -\setlength\leftmarginiii {\leftmargini} -\setlength\leftmarginiv {\leftmargini} -\setlength \labelsep {.5em} -\setlength \labelwidth{\leftmargini} -\addtolength\labelwidth{-\labelsep} - -\def\@listI{\leftmargin\leftmargini - \parsep 0\p@ \@plus1\p@ \@minus\p@ - \topsep 8\p@ \@plus2\p@ \@minus4\p@ - \itemsep0\p@} -\let\@listi\@listI -\@listi -\def\@listii {\leftmargin\leftmarginii - \labelwidth\leftmarginii - \advance\labelwidth-\labelsep - \topsep 0\p@ \@plus2\p@ \@minus\p@} -\def\@listiii{\leftmargin\leftmarginiii - \labelwidth\leftmarginiii - \advance\labelwidth-\labelsep - \topsep 0\p@ \@plus\p@\@minus\p@ - \parsep \z@ - \partopsep \p@ \@plus\z@ \@minus\p@} - -\renewcommand\labelitemi{\normalfont\bfseries --} -\renewcommand\labelitemii{$\m@th\bullet$} - -\setlength\arraycolsep{1.4\p@} -\setlength\tabcolsep{1.4\p@} - -\def\tableofcontents{\chapter*{\contentsname\@mkboth{{\contentsname}}% - {{\contentsname}}} - \def\authcount##1{\setcounter{auco}{##1}\setcounter{@auth}{1}} - \def\lastand{\ifnum\value{auco}=2\relax - \unskip{} \andname\ - \else - \unskip \lastandname\ - \fi}% - \def\and{\stepcounter{@auth}\relax - \ifnum\value{@auth}=\value{auco}% - \lastand - \else - \unskip, - \fi}% - \@starttoc{toc}\if@restonecol\twocolumn\fi} - -\def\l@part#1#2{\addpenalty{\@secpenalty}% - \addvspace{2em plus\p@}% % space above part line - \begingroup - \parindent \z@ - \rightskip \z@ plus 5em - \hrule\vskip5pt - \large % same size as for a contribution heading - \bfseries\boldmath % set line in boldface - \leavevmode % TeX command to enter horizontal mode. - #1\par - \vskip5pt - \hrule - \vskip1pt - \nobreak % Never break after part entry - \endgroup} - -\def\@dotsep{2} - -\let\phantomsection=\relax - -\def\hyperhrefextend{\ifx\hyper@anchor\@undefined\else -{}\fi} - -\def\addnumcontentsmark#1#2#3{% -\addtocontents{#1}{\protect\contentsline{#2}{\protect\numberline - {\thechapter}#3}{\thepage}\hyperhrefextend}}% -\def\addcontentsmark#1#2#3{% -\addtocontents{#1}{\protect\contentsline{#2}{#3}{\thepage}\hyperhrefextend}}% -\def\addcontentsmarkwop#1#2#3{% -\addtocontents{#1}{\protect\contentsline{#2}{#3}{0}\hyperhrefextend}}% - -\def\@adcmk[#1]{\ifcase #1 \or -\def\@gtempa{\addnumcontentsmark}% - \or \def\@gtempa{\addcontentsmark}% - \or \def\@gtempa{\addcontentsmarkwop}% - \fi\@gtempa{toc}{chapter}% -} -\def\addtocmark{% -\phantomsection -\@ifnextchar[{\@adcmk}{\@adcmk[3]}% -} - -\def\l@chapter#1#2{\addpenalty{-\@highpenalty} - \vskip 1.0em plus 1pt \@tempdima 1.5em \begingroup - \parindent \z@ \rightskip \@tocrmarg - \advance\rightskip by 0pt plus 2cm - \parfillskip -\rightskip \pretolerance=10000 - \leavevmode \advance\leftskip\@tempdima \hskip -\leftskip - {\large\bfseries\boldmath#1}\ifx0#2\hfil\null - \else - \nobreak - \leaders\hbox{$\m@th \mkern \@dotsep mu.\mkern - \@dotsep mu$}\hfill - \nobreak\hbox to\@pnumwidth{\hss #2}% - \fi\par - \penalty\@highpenalty \endgroup} - -\def\l@title#1#2{\addpenalty{-\@highpenalty} - \addvspace{8pt plus 1pt} - \@tempdima \z@ - \begingroup - \parindent \z@ \rightskip \@tocrmarg - \advance\rightskip by 0pt plus 2cm - \parfillskip -\rightskip \pretolerance=10000 - \leavevmode \advance\leftskip\@tempdima \hskip -\leftskip - #1\nobreak - \leaders\hbox{$\m@th \mkern \@dotsep mu.\mkern - \@dotsep mu$}\hfill - \nobreak\hbox to\@pnumwidth{\hss #2}\par - \penalty\@highpenalty \endgroup} - -\def\l@author#1#2{\addpenalty{\@highpenalty} - \@tempdima=15\p@ %\z@ - \begingroup - \parindent \z@ \rightskip \@tocrmarg - \advance\rightskip by 0pt plus 2cm - \pretolerance=10000 - \leavevmode \advance\leftskip\@tempdima %\hskip -\leftskip - \textit{#1}\par - \penalty\@highpenalty \endgroup} - -\setcounter{tocdepth}{0} -\newdimen\tocchpnum -\newdimen\tocsecnum -\newdimen\tocsectotal -\newdimen\tocsubsecnum -\newdimen\tocsubsectotal -\newdimen\tocsubsubsecnum -\newdimen\tocsubsubsectotal -\newdimen\tocparanum -\newdimen\tocparatotal -\newdimen\tocsubparanum -\tocchpnum=\z@ % no chapter numbers -\tocsecnum=15\p@ % section 88. plus 2.222pt -\tocsubsecnum=23\p@ % subsection 88.8 plus 2.222pt -\tocsubsubsecnum=27\p@ % subsubsection 88.8.8 plus 1.444pt -\tocparanum=35\p@ % paragraph 88.8.8.8 plus 1.666pt -\tocsubparanum=43\p@ % subparagraph 88.8.8.8.8 plus 1.888pt -\def\calctocindent{% -\tocsectotal=\tocchpnum -\advance\tocsectotal by\tocsecnum -\tocsubsectotal=\tocsectotal -\advance\tocsubsectotal by\tocsubsecnum -\tocsubsubsectotal=\tocsubsectotal -\advance\tocsubsubsectotal by\tocsubsubsecnum -\tocparatotal=\tocsubsubsectotal -\advance\tocparatotal by\tocparanum} -\calctocindent - -\def\l@section{\@dottedtocline{1}{\tocchpnum}{\tocsecnum}} -\def\l@subsection{\@dottedtocline{2}{\tocsectotal}{\tocsubsecnum}} -\def\l@subsubsection{\@dottedtocline{3}{\tocsubsectotal}{\tocsubsubsecnum}} -\def\l@paragraph{\@dottedtocline{4}{\tocsubsubsectotal}{\tocparanum}} -\def\l@subparagraph{\@dottedtocline{5}{\tocparatotal}{\tocsubparanum}} - -\def\listoffigures{\@restonecolfalse\if@twocolumn\@restonecoltrue\onecolumn - \fi\section*{\listfigurename\@mkboth{{\listfigurename}}{{\listfigurename}}} - \@starttoc{lof}\if@restonecol\twocolumn\fi} -\def\l@figure{\@dottedtocline{1}{0em}{1.5em}} - -\def\listoftables{\@restonecolfalse\if@twocolumn\@restonecoltrue\onecolumn - \fi\section*{\listtablename\@mkboth{{\listtablename}}{{\listtablename}}} - \@starttoc{lot}\if@restonecol\twocolumn\fi} -\let\l@table\l@figure - -\renewcommand\listoffigures{% - \section*{\listfigurename - \@mkboth{\listfigurename}{\listfigurename}}% - \@starttoc{lof}% - } - -\renewcommand\listoftables{% - \section*{\listtablename - \@mkboth{\listtablename}{\listtablename}}% - \@starttoc{lot}% - } - -\ifx\oribibl\undefined -\ifx\citeauthoryear\undefined -\renewenvironment{thebibliography}[1] - {\section*{\refname} - \def\@biblabel##1{##1.} - \small - \list{\@biblabel{\@arabic\c@enumiv}}% - {\settowidth\labelwidth{\@biblabel{#1}}% - \leftmargin\labelwidth - \advance\leftmargin\labelsep - \if@openbib - \advance\leftmargin\bibindent - \itemindent -\bibindent - \listparindent \itemindent - \parsep \z@ - \fi - \usecounter{enumiv}% - \let\p@enumiv\@empty - \renewcommand\theenumiv{\@arabic\c@enumiv}}% - \if@openbib - \renewcommand\newblock{\par}% - \else - \renewcommand\newblock{\hskip .11em \@plus.33em \@minus.07em}% - \fi - \sloppy\clubpenalty4000\widowpenalty4000% - \sfcode`\.=\@m} - {\def\@noitemerr - {\@latex@warning{Empty `thebibliography' environment}}% - \endlist} -\def\@lbibitem[#1]#2{\item[{[#1]}\hfill]\if@filesw - {\let\protect\noexpand\immediate - \write\@auxout{\string\bibcite{#2}{#1}}}\fi\ignorespaces} -\newcount\@tempcntc -\def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi - \@tempcnta\z@\@tempcntb\m@ne\def\@citea{}\@cite{\@for\@citeb:=#2\do - {\@ifundefined - {b@\@citeb}{\@citeo\@tempcntb\m@ne\@citea\def\@citea{,}{\bfseries - ?}\@warning - {Citation `\@citeb' on page \thepage \space undefined}}% - {\setbox\z@\hbox{\global\@tempcntc0\csname b@\@citeb\endcsname\relax}% - \ifnum\@tempcntc=\z@ \@citeo\@tempcntb\m@ne - \@citea\def\@citea{,}\hbox{\csname b@\@citeb\endcsname}% - \else - \advance\@tempcntb\@ne - \ifnum\@tempcntb=\@tempcntc - \else\advance\@tempcntb\m@ne\@citeo - \@tempcnta\@tempcntc\@tempcntb\@tempcntc\fi\fi}}\@citeo}{#1}} -\def\@citeo{\ifnum\@tempcnta>\@tempcntb\else - \@citea\def\@citea{,\,\hskip\z@skip}% - \ifnum\@tempcnta=\@tempcntb\the\@tempcnta\else - {\advance\@tempcnta\@ne\ifnum\@tempcnta=\@tempcntb \else - \def\@citea{--}\fi - \advance\@tempcnta\m@ne\the\@tempcnta\@citea\the\@tempcntb}\fi\fi} -\else -\renewenvironment{thebibliography}[1] - {\section*{\refname} - \small - \list{}% - {\settowidth\labelwidth{}% - \leftmargin\parindent - \itemindent=-\parindent - \labelsep=\z@ - \if@openbib - \advance\leftmargin\bibindent - \itemindent -\bibindent - \listparindent \itemindent - \parsep \z@ - \fi - \usecounter{enumiv}% - \let\p@enumiv\@empty - \renewcommand\theenumiv{}}% - \if@openbib - \renewcommand\newblock{\par}% - \else - \renewcommand\newblock{\hskip .11em \@plus.33em \@minus.07em}% - \fi - \sloppy\clubpenalty4000\widowpenalty4000% - \sfcode`\.=\@m} - {\def\@noitemerr - {\@latex@warning{Empty `thebibliography' environment}}% - \endlist} - \def\@cite#1{#1}% - \def\@lbibitem[#1]#2{\item[]\if@filesw - {\def\protect##1{\string ##1\space}\immediate - \write\@auxout{\string\bibcite{#2}{#1}}}\fi\ignorespaces} - \fi -\else -\@cons\@openbib@code{\noexpand\small} -\fi - -\def\idxquad{\hskip 10\p@}% space that divides entry from number - -\def\@idxitem{\par\hangindent 10\p@} - -\def\subitem{\par\setbox0=\hbox{--\enspace}% second order - \noindent\hangindent\wd0\box0}% index entry - -\def\subsubitem{\par\setbox0=\hbox{--\,--\enspace}% third - \noindent\hangindent\wd0\box0}% order index entry - -\def\indexspace{\par \vskip 10\p@ plus5\p@ minus3\p@\relax} - -\renewenvironment{theindex} - {\@mkboth{\indexname}{\indexname}% - \thispagestyle{empty}\parindent\z@ - \parskip\z@ \@plus .3\p@\relax - \let\item\par - \def\,{\relax\ifmmode\mskip\thinmuskip - \else\hskip0.2em\ignorespaces\fi}% - \normalfont\small - \begin{multicols}{2}[\@makeschapterhead{\indexname}]% - } - {\end{multicols}} - -\renewcommand\footnoterule{% - \kern-3\p@ - \hrule\@width 2truecm - \kern2.6\p@} - \newdimen\fnindent - \fnindent1em -\long\def\@makefntext#1{% - \parindent \fnindent% - \leftskip \fnindent% - \noindent - \llap{\hb@xt@1em{\hss\@makefnmark\ }}\ignorespaces#1} - -\long\def\@makecaption#1#2{% - \small - \vskip\abovecaptionskip - \sbox\@tempboxa{{\bfseries #1.} #2}% - \ifdim \wd\@tempboxa >\hsize - {\bfseries #1.} #2\par - \else - \global \@minipagefalse - \hb@xt@\hsize{\hfil\box\@tempboxa\hfil}% - \fi - \vskip\belowcaptionskip} - -\def\fps@figure{htbp} -\def\fnum@figure{\figurename\thinspace\thefigure} -\def \@floatboxreset {% - \reset@font - \small - \@setnobreak - \@setminipage -} -\def\fps@table{htbp} -\def\fnum@table{\tablename~\thetable} -\renewenvironment{table} - {\setlength\abovecaptionskip{0\p@}% - \setlength\belowcaptionskip{10\p@}% - \@float{table}} - {\end@float} -\renewenvironment{table*} - {\setlength\abovecaptionskip{0\p@}% - \setlength\belowcaptionskip{10\p@}% - \@dblfloat{table}} - {\end@dblfloat} - -\long\def\@caption#1[#2]#3{\par\addcontentsline{\csname - ext@#1\endcsname}{#1}{\protect\numberline{\csname - the#1\endcsname}{\ignorespaces #2}}\begingroup - \@parboxrestore - \@makecaption{\csname fnum@#1\endcsname}{\ignorespaces #3}\par - \endgroup} - -% LaTeX does not provide a command to enter the authors institute -% addresses. The \institute command is defined here. - -\newcounter{@inst} -\newcounter{@auth} -\newcounter{auco} -\newdimen\instindent -\newbox\authrun -\newtoks\authorrunning -\newtoks\tocauthor -\newbox\titrun -\newtoks\titlerunning -\newtoks\toctitle - -\def\clearheadinfo{\gdef\@author{No Author Given}% - \gdef\@title{No Title Given}% - \gdef\@subtitle{}% - \gdef\@institute{No Institute Given}% - \gdef\@thanks{}% - \global\titlerunning={}\global\authorrunning={}% - \global\toctitle={}\global\tocauthor={}} - -\def\institute#1{\gdef\@institute{#1}} - -\def\institutename{\par - \begingroup - \parskip=\z@ - \parindent=\z@ - \setcounter{@inst}{1}% - \def\and{\par\stepcounter{@inst}% - \noindent$^{\the@inst}$\enspace\ignorespaces}% - \setbox0=\vbox{\def\thanks##1{}\@institute}% - \ifnum\c@@inst=1\relax - \gdef\fnnstart{0}% - \else - \xdef\fnnstart{\c@@inst}% - \setcounter{@inst}{1}% - \noindent$^{\the@inst}$\enspace - \fi - \ignorespaces - \@institute\par - \endgroup} - -\def\@fnsymbol#1{\ensuremath{\ifcase#1\or\star\or{\star\star}\or - {\star\star\star}\or \dagger\or \ddagger\or - \mathchar "278\or \mathchar "27B\or \|\or **\or \dagger\dagger - \or \ddagger\ddagger \else\@ctrerr\fi}} - -\def\inst#1{\unskip$^{#1}$} -\def\orcidID#1{\unskip$^{[#1]}$} % added MR 2018-03-10 -\def\fnmsep{\unskip$^,$} -\def\email#1{{\tt#1}} - -\AtBeginDocument{\@ifundefined{url}{\def\url#1{#1}}{}% -\@ifpackageloaded{babel}{% -\@ifundefined{extrasenglish}{}{\addto\extrasenglish{\switcht@albion}}% -\@ifundefined{extrasfrenchb}{}{\addto\extrasfrenchb{\switcht@francais}}% -\@ifundefined{extrasgerman}{}{\addto\extrasgerman{\switcht@deutsch}}% -\@ifundefined{extrasngerman}{}{\addto\extrasngerman{\switcht@deutsch}}% -}{\switcht@@therlang}% -\providecommand{\keywords}[1]{\def\and{{\textperiodcentered} }% -\par\addvspace\baselineskip -\noindent\keywordname\enspace\ignorespaces#1}% -\@ifpackageloaded{hyperref}{% -\def\doi#1{\href{https://doi.org/\detokenize{#1}}{\url{https://doi.org/#1}}}}{ -\def\doi#1{https://doi.org/\detokenize{#1}}} -} -\def\homedir{\~{ }} - -\def\subtitle#1{\gdef\@subtitle{#1}} -\clearheadinfo -% -%%% to avoid hyperref warnings -\providecommand*{\toclevel@author}{999} -%%% to make title-entry parent of section-entries -\providecommand*{\toclevel@title}{0} -% -\renewcommand\maketitle{\newpage -\phantomsection - \refstepcounter{chapter}% - \stepcounter{section}% - \setcounter{section}{0}% - \setcounter{subsection}{0}% - \setcounter{figure}{0} - \setcounter{table}{0} - \setcounter{equation}{0} - \setcounter{footnote}{0}% - \begingroup - \parindent=\z@ - \renewcommand\thefootnote{\@fnsymbol\c@footnote}% - \if@twocolumn - \ifnum \col@number=\@ne - \@maketitle - \else - \twocolumn[\@maketitle]% - \fi - \else - \newpage - \global\@topnum\z@ % Prevents figures from going at top of page. - \@maketitle - \fi - \thispagestyle{empty}\@thanks -% - \def\\{\unskip\ \ignorespaces}\def\inst##1{\unskip{}}% - \def\thanks##1{\unskip{}}\def\fnmsep{\unskip}% - \instindent=\hsize - \advance\instindent by-\headlineindent - \if!\the\toctitle!\addcontentsline{toc}{title}{\@title}\else - \addcontentsline{toc}{title}{\the\toctitle}\fi - \if@runhead - \if!\the\titlerunning!\else - \edef\@title{\the\titlerunning}% - \fi - \global\setbox\titrun=\hbox{\small\rm\unboldmath\ignorespaces\@title}% - \ifdim\wd\titrun>\instindent - \typeout{Title too long for running head. Please supply}% - \typeout{a shorter form with \string\titlerunning\space prior to - \string\maketitle}% - \global\setbox\titrun=\hbox{\small\rm - Title Suppressed Due to Excessive Length}% - \fi - \xdef\@title{\copy\titrun}% - \fi -% - \if!\the\tocauthor!\relax - {\def\and{\noexpand\protect\noexpand\and}% - \def\inst##1{}% added MR 2017-09-20 to remove inst numbers from the TOC - \def\orcidID##1{}% added MR 2017-09-20 to remove ORCID ids from the TOC - \protected@xdef\toc@uthor{\@author}}% - \else - \def\\{\noexpand\protect\noexpand\newline}% - \protected@xdef\scratch{\the\tocauthor}% - \protected@xdef\toc@uthor{\scratch}% - \fi - \addtocontents{toc}{\noexpand\protect\noexpand\authcount{\the\c@auco}}% - \addcontentsline{toc}{author}{\toc@uthor}% - \if@runhead - \if!\the\authorrunning! - \value{@inst}=\value{@auth}% - \setcounter{@auth}{1}% - \else - \edef\@author{\the\authorrunning}% - \fi - \global\setbox\authrun=\hbox{\def\inst##1{}% added MR 2017-09-20 to remove inst numbers from the runninghead - \def\orcidID##1{}% added MR 2017-09-20 to remove ORCID ids from the runninghead - \small\unboldmath\@author\unskip}% - \ifdim\wd\authrun>\instindent - \typeout{Names of authors too long for running head. Please supply}% - \typeout{a shorter form with \string\authorrunning\space prior to - \string\maketitle}% - \global\setbox\authrun=\hbox{\small\rm - Authors Suppressed Due to Excessive Length}% - \fi - \xdef\@author{\copy\authrun}% - \markboth{\@author}{\@title}% - \fi - \endgroup - \setcounter{footnote}{\fnnstart}% - \clearheadinfo} -% -\def\@maketitle{\newpage - \markboth{}{}% - \def\lastand{\ifnum\value{@inst}=2\relax - \unskip{} \andname\ - \else - \unskip \lastandname\ - \fi}% - \def\and{\stepcounter{@auth}\relax - \ifnum\value{@auth}=\value{@inst}% - \lastand - \else - \unskip, - \fi}% - \begin{center}% - \let\newline\\ - {\Large \bfseries\boldmath - \pretolerance=10000 - \@title \par}\vskip .8cm -\if!\@subtitle!\else {\large \bfseries\boldmath - \vskip -.65cm - \pretolerance=10000 - \@subtitle \par}\vskip .8cm\fi - \setbox0=\vbox{\setcounter{@auth}{1}\def\and{\stepcounter{@auth}}% - \def\thanks##1{}\@author}% - \global\value{@inst}=\value{@auth}% - \global\value{auco}=\value{@auth}% - \setcounter{@auth}{1}% -{\lineskip .5em -\noindent\ignorespaces -\@author\vskip.35cm} - {\small\institutename} - \end{center}% - } - -% definition of the "\spnewtheorem" command. -% -% Usage: -% -% \spnewtheorem{env_nam}{caption}[within]{cap_font}{body_font} -% or \spnewtheorem{env_nam}[numbered_like]{caption}{cap_font}{body_font} -% or \spnewtheorem*{env_nam}{caption}{cap_font}{body_font} -% -% New is "cap_font" and "body_font". It stands for -% fontdefinition of the caption and the text itself. -% -% "\spnewtheorem*" gives a theorem without number. -% -% A defined spnewthoerem environment is used as described -% by Lamport. -% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -\def\@thmcountersep{} -\def\@thmcounterend{.} - -\def\spnewtheorem{\@ifstar{\@sthm}{\@Sthm}} - -% definition of \spnewtheorem with number - -\def\@spnthm#1#2{% - \@ifnextchar[{\@spxnthm{#1}{#2}}{\@spynthm{#1}{#2}}} -\def\@Sthm#1{\@ifnextchar[{\@spothm{#1}}{\@spnthm{#1}}} - -\def\@spxnthm#1#2[#3]#4#5{\expandafter\@ifdefinable\csname #1\endcsname - {\@definecounter{#1}\@addtoreset{#1}{#3}% - \expandafter\xdef\csname the#1\endcsname{\expandafter\noexpand - \csname the#3\endcsname \noexpand\@thmcountersep \@thmcounter{#1}}% - \expandafter\xdef\csname #1name\endcsname{#2}% - \global\@namedef{#1}{\@spthm{#1}{\csname #1name\endcsname}{#4}{#5}}% - \global\@namedef{end#1}{\@endtheorem}}} - -\def\@spynthm#1#2#3#4{\expandafter\@ifdefinable\csname #1\endcsname - {\@definecounter{#1}% - \expandafter\xdef\csname the#1\endcsname{\@thmcounter{#1}}% - \expandafter\xdef\csname #1name\endcsname{#2}% - \global\@namedef{#1}{\@spthm{#1}{\csname #1name\endcsname}{#3}{#4}}% - \global\@namedef{end#1}{\@endtheorem}}} - -\def\@spothm#1[#2]#3#4#5{% - \@ifundefined{c@#2}{\@latexerr{No theorem environment `#2' defined}\@eha}% - {\expandafter\@ifdefinable\csname #1\endcsname - {\newaliascnt{#1}{#2}% - \expandafter\xdef\csname #1name\endcsname{#3}% - \global\@namedef{#1}{\@spthm{#1}{\csname #1name\endcsname}{#4}{#5}}% - \global\@namedef{end#1}{\@endtheorem}}}} - -\def\@spthm#1#2#3#4{\topsep 7\p@ \@plus2\p@ \@minus4\p@ -\refstepcounter{#1}% -\@ifnextchar[{\@spythm{#1}{#2}{#3}{#4}}{\@spxthm{#1}{#2}{#3}{#4}}} - -\def\@spxthm#1#2#3#4{\@spbegintheorem{#2}{\csname the#1\endcsname}{#3}{#4}% - \ignorespaces} - -\def\@spythm#1#2#3#4[#5]{\@spopargbegintheorem{#2}{\csname - the#1\endcsname}{#5}{#3}{#4}\ignorespaces} - -\def\@spbegintheorem#1#2#3#4{\trivlist - \item[\hskip\labelsep{#3#1\ #2\@thmcounterend}]#4} - -\def\@spopargbegintheorem#1#2#3#4#5{\trivlist - \item[\hskip\labelsep{#4#1\ #2}]{#4(#3)\@thmcounterend\ }#5} - -% definition of \spnewtheorem* without number - -\def\@sthm#1#2{\@Ynthm{#1}{#2}} - -\def\@Ynthm#1#2#3#4{\expandafter\@ifdefinable\csname #1\endcsname - {\global\@namedef{#1}{\@Thm{\csname #1name\endcsname}{#3}{#4}}% - \expandafter\xdef\csname #1name\endcsname{#2}% - \global\@namedef{end#1}{\@endtheorem}}} - -\def\@Thm#1#2#3{\topsep 7\p@ \@plus2\p@ \@minus4\p@ -\@ifnextchar[{\@Ythm{#1}{#2}{#3}}{\@Xthm{#1}{#2}{#3}}} - -\def\@Xthm#1#2#3{\@Begintheorem{#1}{#2}{#3}\ignorespaces} - -\def\@Ythm#1#2#3[#4]{\@Opargbegintheorem{#1} - {#4}{#2}{#3}\ignorespaces} - -\def\@Begintheorem#1#2#3{#3\trivlist - \item[\hskip\labelsep{#2#1\@thmcounterend}]} - -\def\@Opargbegintheorem#1#2#3#4{#4\trivlist - \item[\hskip\labelsep{#3#1}]{#3(#2)\@thmcounterend\ }} - -\if@envcntsect - \def\@thmcountersep{.} - \spnewtheorem{theorem}{Theorem}[section]{\bfseries}{\itshape} -\else - \spnewtheorem{theorem}{Theorem}{\bfseries}{\itshape} - \if@envcntreset - \@addtoreset{theorem}{section} - \else - \@addtoreset{theorem}{chapter} - \fi -\fi - -%definition of divers theorem environments -\spnewtheorem*{claim}{Claim}{\itshape}{\rmfamily} -\spnewtheorem*{proof}{Proof}{\itshape}{\rmfamily} -\if@envcntsame % alle Umgebungen wie Theorem. - \def\spn@wtheorem#1#2#3#4{\@spothm{#1}[theorem]{#2}{#3}{#4}} -\else % alle Umgebungen mit eigenem Zaehler - \if@envcntsect % mit section numeriert - \def\spn@wtheorem#1#2#3#4{\@spxnthm{#1}{#2}[section]{#3}{#4}} - \else % nicht mit section numeriert - \if@envcntreset - \def\spn@wtheorem#1#2#3#4{\@spynthm{#1}{#2}{#3}{#4} - \@addtoreset{#1}{section}} - \else - \def\spn@wtheorem#1#2#3#4{\@spynthm{#1}{#2}{#3}{#4} - \@addtoreset{#1}{chapter}}% - \fi - \fi -\fi -\spn@wtheorem{case}{Case}{\itshape}{\rmfamily} -\spn@wtheorem{conjecture}{Conjecture}{\itshape}{\rmfamily} -\spn@wtheorem{corollary}{Corollary}{\bfseries}{\itshape} -\spn@wtheorem{definition}{Definition}{\bfseries}{\itshape} -\spn@wtheorem{example}{Example}{\itshape}{\rmfamily} -\spn@wtheorem{exercise}{Exercise}{\itshape}{\rmfamily} -\spn@wtheorem{lemma}{Lemma}{\bfseries}{\itshape} -\spn@wtheorem{note}{Note}{\itshape}{\rmfamily} -\spn@wtheorem{problem}{Problem}{\itshape}{\rmfamily} -\spn@wtheorem{property}{Property}{\itshape}{\rmfamily} -\spn@wtheorem{proposition}{Proposition}{\bfseries}{\itshape} -\spn@wtheorem{question}{Question}{\itshape}{\rmfamily} -\spn@wtheorem{solution}{Solution}{\itshape}{\rmfamily} -\spn@wtheorem{remark}{Remark}{\itshape}{\rmfamily} - -\def\@takefromreset#1#2{% - \def\@tempa{#1}% - \let\@tempd\@elt - \def\@elt##1{% - \def\@tempb{##1}% - \ifx\@tempa\@tempb\else - \@addtoreset{##1}{#2}% - \fi}% - \expandafter\expandafter\let\expandafter\@tempc\csname cl@#2\endcsname - \expandafter\def\csname cl@#2\endcsname{}% - \@tempc - \let\@elt\@tempd} - -\def\theopargself{\def\@spopargbegintheorem##1##2##3##4##5{\trivlist - \item[\hskip\labelsep{##4##1\ ##2}]{##4##3\@thmcounterend\ }##5} - \def\@Opargbegintheorem##1##2##3##4{##4\trivlist - \item[\hskip\labelsep{##3##1}]{##3##2\@thmcounterend\ }} - } - -\renewenvironment{abstract}{% - \list{}{\advance\topsep by0.35cm\relax\small - \leftmargin=1cm - \labelwidth=\z@ - \listparindent=\z@ - \itemindent\listparindent - \rightmargin\leftmargin}\item[\hskip\labelsep - \bfseries\abstractname]} - {\endlist} - -\newdimen\headlineindent % dimension for space between -\headlineindent=1.166cm % number and text of headings. - -\def\ps@headings{\let\@mkboth\@gobbletwo - \let\@oddfoot\@empty\let\@evenfoot\@empty - \def\@evenhead{\normalfont\small\rlap{\thepage}\hspace{\headlineindent}% - \leftmark\hfil} - \def\@oddhead{\normalfont\small\hfil\rightmark\hspace{\headlineindent}% - \llap{\thepage}} - \def\chaptermark##1{}% - \def\sectionmark##1{}% - \def\subsectionmark##1{}} - -\def\ps@titlepage{\let\@mkboth\@gobbletwo - \let\@oddfoot\@empty\let\@evenfoot\@empty - \def\@evenhead{\normalfont\small\rlap{\thepage}\hspace{\headlineindent}% - \hfil} - \def\@oddhead{\normalfont\small\hfil\hspace{\headlineindent}% - \llap{\thepage}} - \def\chaptermark##1{}% - \def\sectionmark##1{}% - \def\subsectionmark##1{}} - -\if@runhead\ps@headings\else -\ps@empty\fi - -\setlength\arraycolsep{1.4\p@} -\setlength\tabcolsep{1.4\p@} - -\endinput -%end of file llncs.cls diff --git a/main.brf b/main.brf new file mode 100644 index 0000000..f862fa9 --- /dev/null +++ b/main.brf @@ -0,0 +1,110 @@ +\backcite {Sanderson2022}{{1}{1}{figure.caption.1}} +\backcite {Vezakis2024}{{1}{1}{figure.caption.1}} +\backcite {Wang2022b}{{1}{1}{figure.caption.1}} +\backcite {Carion2020}{{1}{1}{figure.caption.1}} +\backcite {Girshick2013}{{1}{1}{figure.caption.1}} +\backcite {He2017}{{1}{1}{figure.caption.1}} +\backcite {Dosovitskiy2021}{{1}{1}{figure.caption.1}} +\backcite {Liu2021}{{1}{1}{figure.caption.1}} +\backcite {Touvron2021b}{{1}{1}{figure.caption.1}} +\backcite {Khan2022}{{1}{1}{figure.caption.1}} +\backcite {Rangel2024}{{1}{1}{figure.caption.1}} +\backcite {Deng2009}{{1}{1}{figure.caption.1}} +\backcite {He2016}{{1}{1}{figure.caption.1}} +\backcite {Krizhevsky2012}{{1}{1}{figure.caption.1}} +\backcite {He2016}{{1}{1}{figure.caption.1}} +\backcite {Krizhevsky2012}{{1}{1}{figure.caption.1}} +\backcite {Touvron2022}{{1}{1}{figure.caption.1}} +\backcite {Wortsman2022}{{1}{1}{figure.caption.1}} +\backcite {Vaswani2017}{{1}{1}{figure.caption.1}} +\backcite {Dosovitskiy2021}{{1}{1}{figure.caption.1}} +\backcite {Carion2020}{{1}{1}{figure.caption.1}} +\backcite {Wang2022a}{{1}{1}{figure.caption.1}} +\backcite {Wortsman2022}{{1}{1}{figure.caption.1}} +\backcite {Yu2022}{{1}{1}{figure.caption.1}} +\backcite {Zong2022}{{1}{1}{figure.caption.1}} +\backcite {Shorten2019}{{1}{1}{figure.caption.1}} +\backcite {Xu2023d}{{1}{1}{figure.caption.1}} +\backcite {Alomar2023}{{1}{1}{figure.caption.1}} +\backcite {Ding2023a}{{2}{1}{figure.caption.1}} +\backcite {RojasGomez2023}{{2}{1}{figure.caption.1}} +\backcite {Kolesnikov2020}{{2}{1}{figure.caption.1}} +\backcite {Ren2024}{{2}{1}{figure.caption.1}} +\backcite {Sun2024}{{2}{1}{figure.caption.1}} +\backcite {Suvorov2021}{{2}{1}{figure.caption.1}} +\backcite {Zhong2017}{{2}{2}{section*.3}} +\backcite {Liu2022d}{{2}{2}{section*.3}} +\backcite {Zhang2018a}{{2}{2}{section*.3}} +\backcite {Yun2019}{{2}{2}{section*.3}} +\backcite {Takahashi2018}{{2}{2}{section*.3}} +\backcite {Cubuk2018}{{2}{2}{section*.3}} +\backcite {Cubuk2019}{{2}{2}{section*.3}} +\backcite {Touvron2022}{{2}{2}{section*.3}} +\backcite {Shorten2019}{{2}{2}{section*.3}} +\backcite {Xu2023d}{{2}{2}{section*.3}} +\backcite {Ghiasi2020}{{2}{2}{section*.4}} +\backcite {Ghiasi2020}{{2}{2}{section*.4}} +\backcite {Shermaine2025}{{2}{2}{section*.4}} +\backcite {Ling2022}{{2}{2}{section*.4}} +\backcite {Werman2021}{{2}{2}{section*.4}} +\backcite {Hinterstoisser2019}{{2}{2}{section*.4}} +\backcite {Dwibedi2017}{{2}{2}{section*.4}} +\backcite {Ge2023}{{2}{2}{section*.4}} +\backcite {Werman2021}{{2}{2}{section*.4}} +\backcite {Hendrycks2019}{{3}{2}{section*.5}} +\backcite {Hendrycks2019}{{3}{2}{section*.5}} +\backcite {Li2023e}{{3}{2}{section*.5}} +\backcite {Zhang2024f}{{3}{2}{section*.5}} +\backcite {Geirhos2018}{{3}{2}{section*.5}} +\backcite {Xiao2020}{{3}{2}{section*.5}} +\backcite {Sun2024}{{3}{3}{section*.7}} +\backcite {Ren2024}{{3}{3}{section*.7}} +\backcite {Liu2023e}{{3}{3}{section*.7}} +\backcite {Kirillov2023}{{3}{3}{section*.7}} +\backcite {Suvorov2021}{{3}{3}{section*.7}} +\backcite {Sun2024}{{3}{3}{section*.7}} +\backcite {Bates1955}{{4}{3}{equation.3.2}} +\backcite {Touvron2022}{{4}{3}{equation.3.2}} +\backcite {Le2015}{{4}{4.1}{subsection.4.1}} +\backcite {Suvorov2021}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Suvorov2021}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Suvorov2021}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{\caption@xref {??}{ on input line 65}}{table.caption.9}} +\backcite {Sun2024}{{5}{4.1}{table.caption.9}} +\backcite {Suvorov2021}{{5}{4.1}{table.caption.9}} +\backcite {Bates1955}{{6}{4.1}{table.caption.11}} +\backcite {Jonhson1995}{{6}{4.1}{table.caption.11}} +\backcite {Nauen2023}{{6}{4.2}{table.caption.13}} +\backcite {Touvron2022}{{6}{4.2}{table.caption.13}} +\backcite {Dosovitskiy2021}{{6}{4.2}{table.caption.13}} +\backcite {Liu2021}{{6}{4.2}{table.caption.13}} +\backcite {He2016}{{6}{4.2}{table.caption.13}} +\backcite {Maji2013}{{6}{4.2}{table.caption.14}} +\backcite {Dehghan2017}{{6}{4.2}{table.caption.14}} +\backcite {Nilsback2008}{{6}{4.2}{table.caption.14}} +\backcite {Kaur2017}{{6}{4.2}{table.caption.14}} +\backcite {Parkhi2012}{{6}{4.2}{table.caption.14}} +\backcite {Chattopadhay2018}{{7}{4.3}{table.caption.18}} +\backcite {Selvaraju2016}{{7}{4.3}{table.caption.18}} +\backcite {Sundararajan2017}{{7}{4.3}{table.caption.18}} +\backcite {Selvaraju2016}{{7}{4.3}{table.caption.18}} +\backcite {Chattopadhay2018}{{7}{4.3}{table.caption.18}} +\backcite {Sundararajan2017}{{7}{4.3}{table.caption.18}} diff --git a/main.pdf b/main.pdf new file mode 100644 index 0000000..43017c7 Binary files /dev/null and b/main.pdf differ diff --git a/main.tex b/main.tex index 857f5ae..fde61ec 100644 --- a/main.tex +++ b/main.tex @@ -1,102 +1,70 @@ -\documentclass[runningheads]{llncs} +% ICCV 2025 Paper Template -% --------------------------------------------------------------- -% Include basic ECCV package - -% TODO REVIEW: Insert your submission number below by replacing '*****' -% TODO FINAL: Comment out the following line for the camera-ready version -\usepackage[review,year=2026,ID=1741]{eccv} -% % TODO FINAL: Un-comment the following line for the camera-ready version -% \usepackage{eccv} +\documentclass[10pt,twocolumn,letterpaper]{article} -% OPTIONAL: Un-comment the following line for a version which is easier to read -% on small portrait-orientation screens (e.g., mobile phones, or beside other windows) -%\usepackage[mobile]{eccv} +%%%%%%%%% PAPER TYPE - PLEASE UPDATE FOR FINAL VERSION +% \usepackage{iccv} % To produce the CAMERA-READY version +\usepackage[review]{iccv} % To produce the REVIEW version +% \usepackage[pagenumbers]{iccv} % To force page numbers, e.g. for an arXiv version - -% --------------------------------------------------------------- -% Other packages - -% Commonly used abbreviations (\eg, \ie, \etc, \cf, \etal, etc.) -\usepackage{eccvabbrv} - -% Include other packages here, before hyperref. -\usepackage{graphicx} -\usepackage{booktabs} - -% The "axessiblity" package can be found at: https://ctan.org/pkg/axessibility?lang=en -\usepackage[accsupp]{axessibility} % Improves PDF readability for those with disabilities. - -% --------------------------------------------------------------- -% Hyperref package +% Import additional packages in the preamble file, before hyperref +\input{packages} % It is strongly recommended to use hyperref, especially for the review version. -% Please disable hyperref *only* if you encounter grave issues. -% hyperref with option pagebackref eases the reviewers' job, but should be disabled for the final version. +% hyperref with option pagebackref eases the reviewers' job. +% Please disable hyperref *only* if you encounter grave issues, +% e.g. with the file validation for the camera-ready version. % -% If you comment hyperref and then uncomment it, you should delete -% main.aux before re-running LaTeX. -% (Or just hit 'q' on the first LaTeX run, let it finish, and you -% should be clear). +% If you comment hyperref and then uncomment it, you should delete *.aux before re-running LaTeX. +% (Or just hit 'q' on the first LaTeX run, let it finish, and you should be clear). +\definecolor{iccvblue}{rgb}{0.21,0.49,0.74} +\usepackage[pagebackref,breaklinks,colorlinks,allcolors=iccvblue]{hyperref} +\usepackage[capitalize,noabbrev]{cleveref} -% TODO FINAL: Comment out the following line for the camera-ready version -%\usepackage[pagebackref,breaklinks,colorlinks,citecolor=eccvblue]{hyperref} -% TODO FINAL: Un-comment the following line for the camera-ready version -\usepackage{hyperref} -\input{packages.tex} - -% Support for ORCID icon -\usepackage{orcidlink} +%%%%%%%%% PAPER ID - PLEASE UPDATE +\def\paperID{6426} % *** Enter the Paper ID here +\def\confName{ICCV} +\def\confYear{2025} +\newcommand{\name}{\textit{ForNet}\xspace} \newcommand{\schemename}{\textit{ForAug}\xspace} +% Names: RecombiNet, RecombNet, ReMix, ReMixNet, FoReMix/ForeMix + +%%%%%%%%% TITLE - PLEASE UPDATE +\title{\schemename: Recombining Foregrounds and Backgrounds to Improve Vision Transformer Training with Bias Mitigation} + +%%%%%%%%% AUTHORS - PLEASE UPDATE +\author{Tobias Christian Nauen${}^{1,2}$ Brian Moser${}^2$ Federico Raue${}^2$ Stanislav Frolov${}^2$ Andreas Dengel${}^{1,2}$\\ +${}^1$RPTU Kaiserslautern-Landau, Kaiserslautern, Germany \\ +${}^2$German Research Center for Artificial Intelligence (DFKI), Kaiserslautern, Germany \\ +{\tt\small first\_second.last@dfki.de / first.last@dfki.de} +% For a paper whose authors are all at the same institution, +% omit the following lines up until the closing ``}''. +% Additional authors and addresses can be added with ``\and'', +% just like the second author. +% To save space, use either the email address or home page, not both +} \begin{document} - -% --------------------------------------------------------------- -% \title{\schemename: Recombining Foregrounds and Backgrounds to Improve Vision Transformer Training with Bias Mitigation} -\title{\schemename: Mitigating Biases in Image Classification via Controlled Image Compositions} - -% TODO REVIEW: If the paper title is too long for the running head, you can set -% an abbreviated paper title here. If not, comment out. -\titlerunning{\schemename} - -% TODO FINAL: Replace with your author list. -% Include the authors' OCRID for the camera-ready version, if at all possible. -\author{ - Tobias Christian Nauen\inst{1,2}\orcidlink{0000-1111-2222-3333} \and - Brian Moser\inst{2}\orcidlink{1111-2222-3333-4444} \and - Federico Raue\inst{2}\orcidlink{2222--3333-4444-5555} \and \\ - Stanislav Frolov\inst{2} \and - Andreas Dengel\inst{1,2} -} - -% TODO FINAL: Replace with an abbreviated list of authors. -\authorrunning{T.~C.~Nauen et al.} -% First names are abbreviated in the running head. -% If there are more than two authors, 'et al.' is used. - -% TODO FINAL: Replace with your institution list. -\institute{RPTU University Kaiserslautern-Landau, Kaiserslautern, Germany \and - German Research Center for Artificial Intelligence (DFKI), Kaiserslautern, Germany\\ - \email{first\_second.last@dfki.de} / \email{first.last@dfki.de} -} - \maketitle +\input{sec/abstract} +\input{sec/intro} +\input{sec/related_work} +\input{sec/method} +\input{sec/experiments} +% \input{sec/future_work} +\input{sec/conclusion} +\input{sec/acks} -\input{sec/abstract.tex} -\input{sec/intro.tex} -% \input{sec/intro_old.tex} -\input{sec/related_work.tex} -\input{sec/method.tex} -\input{sec/experiments.tex} -\input{sec/conclusion.tex} -\input{sec/acks.tex} - -\bibliographystyle{splncs04} -\bibliography{../JabRef/main_bib} +{ + \small + \bibliographystyle{ieeenat_fullname} + \bibliography{../JabRef/main_bib} +} % \newpage +% \onecolumn % \appendix -% \input{sec/appendix.tex} +% \input{sec/appendix} \end{document} diff --git a/packages.tex b/packages.tex index 08b376e..d8173c3 100644 --- a/packages.tex +++ b/packages.tex @@ -1,11 +1,14 @@ -\usepackage{color} +% \usepackage{color} % \usepackage{hyperref} +% if you use cleveref.. +% \usepackage[capitalize,noabbrev]{cleveref} + + % my own set of packages \usepackage{amssymb} \usepackage{amsfonts} \usepackage{amsmath} -\usepackage[capitalize,noabbrev]{cleveref} %\usepackage{tabu} \usepackage{amsxtra} \usepackage{cancel} @@ -26,8 +29,7 @@ \usepackage{textcomp} %\usepackage[defaultlines=3,all]{nowidow} \usepackage{float} -\usepackage{placeins} -\usepackage{xcolor} +%\usepackage{xcolor} \usepackage{pdflscape} \usepackage{csquotes} %\usepackage{setspace} @@ -53,7 +55,6 @@ \usepackage{booktabs} \usepackage{microtype} \usepackage{footmisc} -\usepackage[export]{adjustbox} % Mathshortcuts \DeclareMathSymbol{\mlq}{\mathord}{operators}{``} @@ -133,12 +134,9 @@ \newcommand{\ops}{\operatorname{ops}} \newcommand{\entr}{\operatorname{entries}} \newcommand{\gtxt}[1]{\text{\textcolor{gray}{#1}}} -\definecolor{DarkGreen}{RGB}{34,149,34} \newcommand{\grntxt}[1]{\text{\textcolor{ForestGreen}{#1}}} \newcommand{\rdtxt}[1]{\text{\textcolor{red}{#1}}} \newcommand{\code}[1]{\texttt{#1}} -\newcommand{\cmark}{\ding{51}}% -\newcommand{\xmark}{\ding{55}}% \newcommand*\rot{\rotatebox{90}} \newcommand{\tldr}{\textbf{TL;DR:}\xspace} diff --git a/preamble.tex b/preamble.tex new file mode 100644 index 0000000..53747b0 --- /dev/null +++ b/preamble.tex @@ -0,0 +1,10 @@ +% +% --- inline annotations +% +\newcommand{\red}[1]{{\color{red}#1}} +\newcommand{\todo}[1]{{\color{red}#1}} +\newcommand{\TODO}[1]{\textbf{\color{red}[TODO: #1]}} +% --- disable by uncommenting +% \renewcommand{\TODO}[1]{} +% \renewcommand{\todo}[1]{#1} + diff --git a/rebuttal.pdf b/rebuttal.pdf new file mode 100644 index 0000000..f614c43 Binary files /dev/null and b/rebuttal.pdf differ diff --git a/rebuttal.tex b/rebuttal.tex new file mode 100644 index 0000000..e0bfef8 --- /dev/null +++ b/rebuttal.tex @@ -0,0 +1,214 @@ +\documentclass[10pt,twocolumn,letterpaper]{article} +\usepackage[rebuttal]{iccv} + +% Include other packages here, before hyperref. +\usepackage{graphicx} +\usepackage{amsmath} +\usepackage{amssymb} +\usepackage{booktabs} + +% Import additional packages in the preamble file, before hyperref +\input{packages} + +% If you comment hyperref and then uncomment it, you should delete +% egpaper.aux before re-running latex. (Or just hit 'q' on the first latex +% run, let it finish, and you should be clear). +\definecolor{iccvblue}{rgb}{0.21,0.49,0.74} +\usepackage[pagebackref,breaklinks,colorlinks,allcolors=iccvblue]{hyperref} + +% If you wish to avoid re-using figure, table, and equation numbers from +% the main paper, please uncomment the following and change the numbers +% appropriately. +%\setcounter{figure}{2} +%\setcounter{table}{1} +%\setcounter{equation}{2} + +% If you wish to avoid re-using reference numbers from the main paper, +% please uncomment the following and change the counter value to the +% number of references you have in the main paper (here, 100). +%\makeatletter +%\apptocmd{\thebibliography}{\global\c@NAT@ctr 100\relax}{}{} +%\makeatother + +%%%%%%%%% PAPER ID - PLEASE UPDATE +\def\paperID{6426} % *** Enter the Paper ID here +\def\confName{ICCV} +\def\confYear{2025} + +\newcommand{\rone}{\textbf{\textcolor{blue}{kCub}}} +\newcommand{\rtwo}{\textbf{\textcolor{red}{W3SS}}} +\newcommand{\rthree}{\textbf{\textcolor{ForestGreen}{5E96}}} + +\begin{document} + +\newcommand{\name}{\textit{ForNet}\xspace} +\newcommand{\schemename}{\textit{ForAug}\xspace} +% Names: RecombiNet, RecombNet, ReMix, ReMixNet, FoReMix/ForeMix + +%%%%%%%%% TITLE - PLEASE UPDATE +\title{\schemename: Recombining Foregrounds and Backgrounds to Improve Vision Transformer Training with Bias Mitigation} + + +\maketitle +\thispagestyle{empty} +\appendix + +We would like to sincerely thank the reviewers (\rone, \rtwo, \rthree) for their time and valuable feedback. +Below, we will address each of the reviewers points. +% Citations are those of the original manuscript. + + +% \begin{table}[h!] +% \centering +% \small +% \begin{tabular}{lcccccc} +% \toprule +% \multirow{2.5}{*}{Model} & \multicolumn{3}{c}{ImageNet-9} & \multicolumn{3}{c}{CounterAnimal} \\ +% \cmidrule(r){2-4} \cmidrule(l){5-7} +% & same & rand & gap & common & counter & gap \\ +% \midrule +% ViT-S/16 @ IN & $85.86 \pm 1.47$ & $69.74 \pm 1.75$ & $16.12$ & $84.86 \pm 0.37$ & $69.27 \pm 0.39$ & $15.59$ \\ +% ViT-S/16 @ FN & $84.34 \pm 2.17$ & $73.74 \pm 1.92$ & $10.61$ (\grntxt{-5.51}) & $88.37 \pm 0.46$ & $74.48 \pm 0.42$ & $13.89$ (\grntxt{-1.70}) \\ +% ViT-B/16 @ IN & $86.24 \pm 0.67$ & $64.60 \pm 1.82$ & $21.64$ & $83.43 \pm 0.43$ & $66.56 \pm 0.66$ & $16.87$ \\ +% ViT-B/16 @ FN & $84.18 \pm 3.85$ & $73.59 \pm 6.34$ & $10.59$ (\grntxt{-11.05}) & $88.21 \pm 0.61$ & $75.50 \pm 1.10$ & $12.71$ (\grntxt{-4.16}) \\ +% ViT-L/16 @ IN & $88.56 \pm 0.50$ & $68.26 \pm 0.98$ & $20.30$ & $79.72 \pm 0.89$ & $60.57 \pm 1.10$ & $19.15$ \\ +% ViT-L/16 @ FN & $89.72 \pm 0.37$ & $77.29 \pm 1.85$ & $12.44$ (\grntxt{-9.86}) & $87.78 \pm 0.07$ & $75.79 \pm 0.42$ & $11.99$ (\grntxt{-7.16}) \\ +% \midrule +% Swin-Ti @ IN & $91.61 \pm 0.30$ & $77.85 \pm 0.52$ & $13.77$ & $84.48 \pm 0.35$ & $69.03 \pm 0.50$ & $15.44$ \\ +% Swin-Ti @ FN & $93.34 \pm 0.55$ & $84.68 \pm 1.00$ & $8.66$ (\grntxt{-5.11}) & $87.40 \pm 0.15$ & $74.04 \pm 0.05$ & $13.37$ (\grntxt{-2.07}) \\ +% Swin-S @ IN & $90.89 \pm 0.40$ & $74.89 \pm 0.94$ & $16.00$ & $85.93 \pm 0.43$ & $71.81 \pm 0.59$ & $14.12$ \\ +% Swin-S @ FN & $93.28 \pm 0.66$ & $84.24 \pm 1.24$ & $9.04$ (\grntxt{6.96}) & $88.52 \pm 0.54$ & $75.78 \pm 0.22$ & $12.75$ (\grntxt{-1.37}) \\ +% \midrule +% resnet50 @ IN & $24.09 \pm 0.50$ & $22.13 \pm 0.30$ & $1.96$ & $85.35 \pm 0.23$ & $68.10 \pm 0.59$ & $17.25$ \\ +% resnet50 @ FN & $26.82 \pm 0.24$ & $24.63 \pm 0.07$ & $2.19$ (\rdtxt{+0.23}) & $86.16 \pm 0.16$ & $69.17 \pm 0.15$ & $17.00$ (\grntxt{-0.25}) \\ +% resnet101 @ IN & $25.01 \pm 0.14$ & $23.21 \pm 0.31$ & $1.80$ & $86.35 \pm 0.28$ & $70.58 \pm 0.41$ & $15.77$ \\ +% resnet101 @ FN & $29.61 \pm 0.38$ & $27.37 \pm 0.44$ & $2.24$ (\rdtxt{+0.44}) & $87.68 \pm 0.22$ & $73.74 \pm 0.50$ & $13.94$ (\grntxt{1.83}) \\ +% \bottomrule +% \end{tabular} +% \caption{ImageNet-9 and CounterAnimal results for models trained on ImageNet (IN) and ForegroundNet (FN). The numbers in parentheses indicate the difference to the IN trainded model.} +% \end{table} + +\textbf{Reasoning and purpose of \schemename (\rone):} +% The primary purpose of \schemename is to explicitly encode desired invariances directly into the training data (L62) %, +% unlike traditional approaches which rely on model architectures (L56f), +% with the goal of enhancing model robustness and minimizing spurious correlations. +% Our rationale is that by systematically exposing the model to a wide yet controlled range of variations through data, we can make it inherently more robust and less reliant on spurious correlations. +% The goal is to enhance model robustness and minimize spurious correlations by systematically exposing models to controlled variations. +% \schemename's methodology was deliberately designed to achieve this purpose more effectively than simpler augmentation techniques. +% It allows for explicit control over varying position, size, and background independently and extensively and thus moves beyond minor alterations of existing images to generate novel, challenging, yet realistic training samples. +% \schemename's methodology offers explicit control over object position, size, and background and thus moves beyond minor alterations of existing images to generate novel, challenging, yet realistic training samples while maintaining label integrity. +% \schemename moves beyond minor alterations of existing images by controlling object position, size, and background to generate novel, challenging, yet realistic training samples while maintaining label integrity. +Traditional data augmentations are limited by existing image compositions, leading to biases where objects are centered and correlated with specific backgrounds. +\schemename aims to overcome these limitations by introducing object size, position, and background as independent, controllable degrees of freedom. +This approach explicitly exposes the model to a wider range of variations, actively reducing such compositional biases (see Tbls. 6, 8; Figs. 4, \ref{fig:bg-diff-results} (right)). +% Our approach (as detailed above) ensures label integrity despite complex augmentations. +% We show empirically that \schemename reduces model biases towards position, size, and background (Tbls. 6, 8; Figs. 4, \ref{fig:bg-diff-results} (right)). +%We show empirically that \schemename significantly reduces model bias towards position (Table 8), size (Figure 4), and background (Table 6 and \Cref{fig:bg-diff-results}). +Consequently, models trained with \schemename exhibit better performance specifically on these lower-likelihood images. % where differently trained would typically falter. +%In evaluation \schemename also serves as an effective analytical tool for measuring these biases in any pre-trained ImageNet model (Section 4.3). +Moreover, \schemename serves as an analytical tool for measuring biases in any ImageNet-trained model (Sec. 4.3). +%We acknowledge that the manuscript may not have focussed on this reasoning and analysis sufficiently and have expanded the discussion on the purpose and design choices of ForAug in the Introduction and Methodology sections. +%We also have enhanced Section 4.3 to more explicitly connect our experimental findings back to the core purpose of \schemename. +% Acknowledging the need for more clarity, we have expanded the Introduction and Methodology to further highlight the purpose and design, and revised Sec. 4.3 to more clearly connect experimental findings to \schemename's core purpose. +Acknowledging the need for more clarity, we have expanded Sec. 1 and 3 to further highlight the purpose and design, and revised Sec. 4.3 to more clearly connect experimental findings to \schemename's goals. + + +\textbf{Novelty of ForAug (\rone, \rtwo):} +% While inspired by Copy-Paste methods, \schemename makes distinct contributions by extending them to address the non-trivial challenges of classification. +%While we draw inspiration from Copy-Paste methods, we respectfully assert that \schemename introduces distinct and novel contributions by specifically tailoring and extending Copy-Paste to address the unique and non-trivial challenges of image classification. +While inspired by Copy-Paste methods, \schemename makes distinct contributions by %extending them to address +addressing the non-trivial challenges of classification, where we are successfully \textit{"automating the copy-paste augmentation with [...] solid empirical gains"} (\rthree). +%We would like to take the opportunity to emphasize some of the points that make \schemename novel: +% We would like to emphasize some of the points that make \schemename novel: +We want to emphasize some elements that make \schemename novel: +\textbf{(1)} Adapting copy-paste to image classification has only been tried by \footnote{\label{note:staug}J. -S. Kang and K. Chung, "STAug: Copy-Paste Based Image Augmentation Technique Using Salient Target," in IEEE Access, vol. 10, 2022} as an alternative to MixUp in a specialized domain. +%\textbf{(1)} We adapt copy-paste to the unique demands of image classification, which (to the best of our knowledge) has only been investigated by one other paper \footnote{\label{note:staug}J. -S. Kang and K. Chung, "STAug: Copy-Paste Based Image Augmentation Technique Using Salient Target," in IEEE Access, vol. 10, 2022} that utilizes a Copy-Paste-based augmentation as an alternative to MixUp in a specialized domain. +The scarcity of such methods suggests either \schemename's novelty, or the inherent difficulty in achieving successful application, thereby highlighting the novelty of our specific design choices. % and its demonstrated effectiveness in improving generalization and reducing biases. +% \textbf{(2)} We introduce novel strategies to overcome key challenges in adapting to classification, like label integrity. +% We generate plain background images by removing the main object. %from which the primary object of interest has been removed. +\textbf{(2)} We overcome key challenges in adapting to classification. +For label integrity we generate plain background images, removing the main object. +% When a new foreground object is pasted onto these plain backgrounds, a clear and unambiguous label for the resulting image can be assigned. +Pasting a new object onto these backgrounds allows for a clear, unambiguous label. +This approach, unlike \footref{note:staug} and previous Copy-Paste methods pasting onto existing dataset images, ensures clear training signals and reduces spurious background correlations. +\textbf{(3)} % The focus on bias-mitigation for transformer models. +\schemename incorporates large-scale position and size augmentations for the foregrounds +to encode these equivariances into the training data for bias-mitigation, a feature not utilized to the same extent in \footref{note:staug} or [11, 14, 55]. +% This deliberate design choice encodes these equivariances into the training data, a feature not utilized to the same extent in \footref{note:staug} or [11, 14, 55]. +% This targeted augmentation is key to the bias reduction results we demonstrate. + + +\textbf{Directly comparing to Copy-Paste (\rtwo):} +%A direct application of detection/segmentation Copy-Paste methods to classification is not straightforward, because of the following reasons: +Adapting Copy-Paste for classification brings several challenges, due to +%1. Standard Copy-Paste methods in detection/segmentation often depend on human-annotated foreground masks [14,28,41,53], which are generally not available for the large-scale datasets used in image classification. +\textbf{(1)} its dependence on human-annotated foreground masks [14,28,41,53], which are generally not available for large-scale datasets used in image classification. +%2. A fundamental difficulty when adapting Copy-Paste for image classification is determining the label of the augmented image. +\textbf{(2)} the difficulty in determining the augmented image's label. +%If a new foreground object is pasted onto an existing image, the correct classification label becomes ambiguous: should it be the label of the new object, the original background's object, a combined multi-label, or be determined by relative object size/area? +%This ambiguity is not present in detection/segmentation where instance/pixel labels are preserved. +%Thus, any "direct" application of Copy-Paste would rely on many choices making it more akin to a new method. +Pasting new foregrounds on existing dataset images creates label ambiguity (e.g., should the label derive from the new object, original, a multi-label?), unlike in segmentation where instance labels are preserved. +Thus, "directly" applying Copy-Paste requires many design choices, essentially leading to a novel method. + + +\begin{table} + \centering + \small + \resizebox{.60\columnwidth}{!}{ + \begin{tabular}{lccccc} + \toprule + \multirow{2.5}{*}{Model} & DeiT & \multicolumn{2}{c}{Ours (DeiT)} & \multicolumn{2}{c}{Ours (DeiT III)} \\ + \cmidrule(lr){3-4} \cmidrule(lr){5-6} + & original & IN & FN & IN & FN \\ + \midrule + ViT-S & 79.8${}^*$ & 80.5 & 80.3 & 79.1 & \textbf{81.4} \\ + ViT-B & 81.8${}^*$ & 79.6 & \textbf{81.5} & 77.6 & 81.1 \\ + Swin-S & - & 82.2 & \textbf{82.4} & 79.4 & 80.6 \\ + \bottomrule + \end{tabular}} + \caption{Results when training on ImageNet (IN) and \name (FN) using different data augmentation schemes. DeiT uses EMA${}^*$.} + \label{tbl:deit-pipe} +\end{table} +\textbf{Using different training pipelines (\rtwo):} +% Thank you for pointing out that, +While \schemename improves accuracy, our data augmentation pipeline does indeed not reach the results from DeiT. +However, there is no reason to believe that \schemename does not work or improve the model performance when using another data augmentation or that this does make the comparison unfair, since the only difference between the results on ImageNet and \name is the inclusion of \schemename. +% Thus all the performance gains have to be attributed to \schemename. +% Thus all the performance gains come from \schemename. +% Nonetheless, we present our results when training using the DeiT-pipeline in \Cref{tbl:deit-pipe} for a subset of models (time constraints). +% We find that \schemename still improves the performance, especially of large transformers and will add the full set of results to the manuscript. +\cref{tbl:deit-pipe} (above) presents results for a subset of models (time constraints) using the DeiT-pipeline, finding that \schemename still improves performance, especially for larger transformers, with the added benefit of bias reduction. +Full results will be added to the manuscript. + +\begin{figure}[t!] + \centering + \resizebox{.84\columnwidth}{!}{ + \includegraphics{../Diffusion MixUp/plots/rebuttal_bg_robustness.pdf}} + %\caption{Accuracy ranges on ImageNet9 and CounterAnimal. + %The top of the bars is the accuracy on the 'normal' background distribution, while the bottom is at the out of background distribution accuracy. + % All results are the mean value of 3 training runs. + %Training on \name (orange) instead of ImageNet (blue) always reduces the background gap (size of the bar) of transformers by significantly improving the out of distribution performance.} + \caption{ + Results on ImageNet9 and CounterAnimal. Bars span from out-of-distribution (OOD, bottom) to normal backgrounds (top). \name (orange) significantly improves OOD performance compared to ImageNet (blue), reducing the accuracy gap (bar size). + } + \label{fig:bg-diff-results} +\end{figure} + +\textbf{Background Robustness using other datasets (\rthree):} +While our metric (Eq. 4) was designed to mitigate \name-bias by comparing relative accuracy drops using the same recombination scheme, we agree and added the suggested benchmarks to the final manuscript (see \cref{fig:bg-diff-results} above). +%in both the numerator (original class backgrounds) and the denominator (all backgrounds), we fully agree that assessment on external benchmarks adds to a comprehensive evaluation. +%We provide the in- and out-of-distribution accuracy of our models on ImageNet9 and CounterAnimal in \Cref{fig:bg-diff-results}, which we will also add to the final manuscript. +These new results support our findings, as \name reduces the background gap of transformers by boosting OOD performance. %the performance on the out-of-distribution dataset. + +\textbf{Additional compute and space costs (\rthree):} +%While there are additional computations needed for the online recombination of \schemename, these are outsourced to the CPU and heavily parallelized. +%When training ViT-B/16 on A100 GPUs we measure an average step-time of $528 \pm 2$ ms when training on ImageNet and $534 \pm 1$ ms for \name, an increase of $1\%$. +%Regarding disk space, the \name dataset requires 73GB, compared to 147GB for ImageNet. +We added a discussion to the manuscript. +With ViT-B/16 on ImageNet (A100), \schemename leads to a minor 1\% increase in average step-time ($528 \pm 2$ ms to $534 \pm 1$ ms) since the online recombination is CPU-outsourced and heavily parallelized. +% Regarding disk space, \name requires 73GB, whereas ImageNet requires 147GB. +\name requires 73GB of disk space, while ImageNet needs 147GB. + + +\end{document} diff --git a/sec/abstract.tex b/sec/abstract.tex index 3d18f9a..6f8c17e 100644 --- a/sec/abstract.tex +++ b/sec/abstract.tex @@ -1,27 +1,15 @@ % !TeX root = ../main.tex \begin{abstract} - % Transformers, particularly Vision Transformers (ViTs), have achieved state-of-the-art performance in large-scale image classification. - % However, they often require large amounts of data and can exhibit biases, such as center or size bias, that limit their robustness and generalizability. - % This paper introduces \schemename, a novel data augmentation operation that addresses these challenges by explicitly imposing invariances into the training data, which are otherwise part of the neural network architecture. - % \schemename is constructed by using pretrained foundation models to separate and recombine foreground objects with different backgrounds. - % This recombination step enables us to take fine-grained control over object position and size, as well as background selection. - % We demonstrate that using \schemename significantly improves the accuracy of ViTs and other architectures by up to 4.5 percentage points (p.p.) on ImageNet, which translates to 7.3 p.p. on downstream tasks. - % Importantly, \schemename not only improves accuracy but also opens new ways to analyze model behavior and quantify biases. - % Namely, we introduce metrics for background robustness, foreground focus, center bias, and size bias and show that using \schemename during training substantially reduces these biases. - % In summary, \schemename provides a valuable tool for analyzing and mitigating biases, enabling the development of more robust and reliable computer vision models. - % Our code and dataset are publicly available at \code{}. - - Large-scale image classification datasets exhibit strong compositional biases: objects tend to be centered, appear at characteristic scales, and co-occur with class-specific context. - % Models can exploit these biases to achieve high in-distribution accuracy, yet remain brittle under distribution shifts. - By exploiting such biases, models attain high in-distribution accuracy but remain fragile under distribution shifts. - To address this issue, we introduce \schemename, a controlled composition augmentation scheme that factorizes each training image into a \emph{foreground object} and a \emph{background} and recombines them to explicitly manipulate object position, object scale, and background identity. - \schemename uses off-the-shelf segmentation and inpainting models to (i) extract the foreground and synthesize a neutral background, and (ii) paste the foreground onto diverse neutral backgrounds before applying standard strong augmentation policies. - Compared to conventional augmentations and content-mixing methods, our factorization provides direct control knobs that break foreground-background correlations. % while preserving the label. - Across 10 architectures, \schemename improves ImageNet top-1 accuracy by up to 6 percentage points (p.p.) and yields gains of up to 7.3 p.p. on fine-grained downstream datasets. - Moreover, the same control knobs enable targeted diagnostic tests: we quantify background reliance, foreground focus, center bias, and size bias via controlled background swaps and position/scale sweeps, and show that training with \schemename substantially reduces these shortcut behaviors and significantly increases accuracy on standard distribution-shift benchmarks by up to $19$ p.p. - % Moreover, the same control knobs enable targeted diagnostic tests: we quantify background reliance, foreground focus, center bias, and size bias via controlled background swaps and position/scale sweeps, and show that training with \schemename substantially reduces these shortcut behaviors and significantly increases accuracy on standard distribution-shift benchmarks like ImageNet-A/-C/-R by up to $19$ p.p. + Transformers, particularly Vision Transformers (ViTs), have achieved state-of-the-art performance in large-scale image classification. + However, they often require large amounts of data and can exhibit biases that limit their robustness and generalizability. + This paper introduces \schemename, a novel data augmentation scheme that addresses these challenges and explicitly includes inductive biases, which commonly are part of the neural network architecture, into the training data. + % This paper introduces \name, a novel dataset derived from ImageNet that addresses these challenges. + \schemename is constructed by using pretrained foundation models to separate and recombine foreground objects with different backgrounds, enabling fine-grained control over image composition during training. + It thus increases the data diversity and effective number of training samples. + We demonstrate that training on \name, the application of \schemename to ImageNet, significantly improves the accuracy of ViTs and other architectures by up to 4.5 percentage points (p.p.) on ImageNet and 7.3 p.p. on downstream tasks. + Importantly, \schemename enables novel ways of analyzing model behavior and quantifying biases. + Namely, we introduce metrics for background robustness, foreground focus, center bias, and size bias and show that training on \name substantially reduces these biases compared to training on ImageNet. + In summary, \schemename provides a valuable tool for analyzing and mitigating biases, enabling the development of more robust and reliable computer vision models. Our code and dataset are publicly available at \code{}. - - \keywords{Data Augmentation \and Vision Transformer \and Robustness} -\end{abstract} +\end{abstract} \ No newline at end of file diff --git a/sec/acks.tex b/sec/acks.tex index bc82be4..42bfe0e 100644 --- a/sec/acks.tex +++ b/sec/acks.tex @@ -3,6 +3,3 @@ \subsection*{Acknowledgements} \label{sec:acknowledgements} Will be in the final paper. - -% This work was funded by the Carl-Zeiss Foundation under the Sustainable Embedded AI project (P2021-02-009) and by the EU project SustainML (Horizon Europe grant agreement No 101070408). -% All compute was done thanks to the Pegasus cluster at DFKI Kaiserslautern. diff --git a/sec/appendix.tex b/sec/appendix.tex index 24626a8..0dc9b32 100644 --- a/sec/appendix.tex +++ b/sec/appendix.tex @@ -1,530 +1,87 @@ % !TeX root = ../supplementary.tex - - \section{Training Setup} \label{sec:training_setup} -\begin{table*}[h!] +\begin{table} \centering - \caption{Training setup and hyperparameters for our ImageNet training.} + \begin{tabular}{lc} + \toprule + Parameter & Value \\ + \midrule + Image Resolution & $224 \times 224$ \\ + Epochs & 300 \\ + Learning Rate & 3e-3 \\ + Learning Rate Schedule & cosine decay \\ + Batch Size & 2048 \\ + Warmup Schedule & linear \\ + Warmup Epochs & 3 \\ + Weight Decay & 0.02 \\ + Label Smoothing & 0.1 \\ + Optimizer & Lamb \cite{You2020} \\ + Data Augmentation Policy & 3-Augment \cite{Touvron2022} \\ + \bottomrule + \end{tabular} + \caption{Training setup for our ImageNet and \name training.} \label{tab:in-setup} - \resizebox{\textwidth}{!}{ - \begin{tabular}{lccc} - \toprule - Augmentation Pipeline: & Basic & 3-Augment~\cite{Touvron2022} & RandAugment~\cite{Touvron2021b} \\ - \midrule - Image Resolution & \multicolumn{3}{c}{$224 \times 224$} \\ - Epochs & \multicolumn{3}{c}{300} \\ - Learning Rate & S/B: 1e-3, L: 5e-4 & 3e-3 & S/B: 1e-3, L: 5e-4 \\ - Learning Rate Schedule & \multicolumn{3}{c}{cosine decay} \\ - Batch Size & 1024 & 2048 & 1024 \\ - GPUs & \multicolumn{3}{c}{$4\times$ NVIDIA A100/H100/H200} \\ - Warmup Schedule & \multicolumn{3}{c}{linear} \\ - Warmup Epochs & \multicolumn{3}{c}{3} \\ - Weight Decay & 0.05 & 0.02 & 0.05 \\ - Label Smoothing & \multicolumn{3}{c}{0.1} \\ - Optimizer & AdamW & Lamb \cite{You2020} & AdamW \\ - \midrule - Augmentations & \makecell{RandomResizedCrop \\ Horizontal Flip \\ ColorJitter} & \makecell{Resize \\ RandomCrop \\ Horizontal Flip \\ Grayscale \\ Solarize \\ Gaussian-Blur \\ Color Jitter} & \makecell{RandomResizedCrop \\ Horizontal Flip \\ RandomErase \cite{Zhong2020} \\ RandAugment \cite{Cubuk2020} \\ Color Jitter} \\ - \bottomrule - \end{tabular} - } -\end{table*} +\end{table} +On ImageNet we use the same training setup as \cite{Nauen2023} and \cite{Touvron2022} without pretraining. +As our focus is on evaluating the changes in accuracy due to \schemename/\name, like \cite{Nauen2023}, we stick to one set of hyperparameters for all models. +We list the settings used for training on ImageNet and \name in \Cref{tab:in-setup} and the ones used for finetuning those weights on the downstream datasets in \Cref{tab:downstream-setup}. + +\begin{table} + \centering + \begin{tabular}{lccc} + \toprule + Dataset & Batch Size & Epochs & Learning Rate \\ + \midrule + Aircraft & 512 & 500 & 3e-4 \\ + Cars & 1024 & 500 & 3e-4 \\ + Flowers & 256 & 500 & 3e-4 \\ + Food & 2048 & 100 & 3e-4 \\ + Pets & 512 & 500 & 3e-4 \\ + \bottomrule + \end{tabular} + \caption{Training setup for finetuning on different downstream datasets. Other settings are the same as in \Cref{tab:in-setup}.} + \label{tab:downstream-setup} +\end{table} + +\section{Infill Model Comparison} \begin{table}[h!] \centering - \caption{Training setup for finetuning on different downstream datasets. Other settings are the same as in \Cref{tab:in-setup}. For finetuning, we always utilize 3-Augment and the related parameters from the \emph{ViT, Swin, ResNet} column of \Cref{tab:in-setup}} - \label{tab:downstream-setup} - \begin{tabular}{lcccc} - \toprule - Dataset & Batch Size & Epochs & Learning Rate & Num. GPUs \\ - \midrule - Aircraft & 512 & 500 & 3e-4 & 2 \\ - Cars & 1024 & 500 & 3e-4 & 4 \\ - Flowers & 256 & 500 & 3e-4 & 1 \\ - Food & 2048 & 100 & 3e-4 & 4 \\ - Pets & 512 & 500 & 3e-4 & 2 \\ - \bottomrule - \end{tabular} -\end{table} -On ImageNet, we test three different data augmentation pipelines and hyperparameter settings as shown in \Cref{tab:in-setup}: A basic pipeline, a pipeline using RandAugment based on the DeiT~\cite{Touvron2021b} setup and 3-Augment, as used in \cite{Touvron2022,Nauen2025}. -When comparing different architectures, ViT, Swin, and ResNet are trained with the 3-Augment pipeline and DeiT is trained with the RandAugment pipeline. -% On ImageNet we use the same training setup as \cite{Nauen2025} and \cite{Touvron2022} without pretraining for ViT, Swin, and ResNet. -% For DeiT, we train the same ViT architecture but using the data augmentation scheme and hyperparameters from \cite{Touvron2021b}. -As our focus is on evaluating the changes in accuracy due to \schemename, like \cite{Nauen2025}, we stick to one set of hyperparameters for all models. -We list the settings used for training on ImageNet in \Cref{tab:in-setup} and the ones used for finetuning those weights on the downstream datasets in \Cref{tab:downstream-setup}. -Our implementation is using PyTorch \cite{Paszke2019} and the \emph{timm} library \cite{Wightman2019} for model architectures and basic functions. - -\begin{table*}[ht!] - \centering - \caption{Hardware and Software specifics used for both training and evaluation.} - \label{tab:hw-sw-versions} - \begin{tabular}{ll} - \toprule - Parameter & Value \\ - \midrule - GPU & $4 \times$ NVIDIA A100/H100/H200 \\ - CPU & 24 CPU cores (Intel Xenon) per GPU \\ - Memory & up to 120 GB per GPU \\ - Operating System & Enroot container for SLURM based on Ubuntu 24.04 LTS \\ - Python & 3.12.3 \\ - PyTorch & 2.7.0 \\ - TorchVision & 0.22.0 \\ - Timm & 1.0.15 \\ - \bottomrule - \end{tabular} -\end{table*} -\Cref{tab:hw-sw-versions} lists the specific hardware we use, as well as versions of the relevant software packages. - -\section{Resource Usage of \schemename} -To utilize the proposed \schemename, specific computational resources are necessary, particularly for computing and storing for the output of the segmentation stage and for on-the-fly processing of the recombination stage. - -\paragraph{Segmentation.} -% While calculating the segmentations and infills takes a lot of compute, this is effort that has to be spent only once per dataset. -\schemename involves a computationally expensive segmentation and infill stage, which is a one-time calculation per dataset. -Once computed, the segmentation and infill results can be perpetually reused, amortizing the initial cost over all subsequent experiments and applications. -On NVIDIA H100 GPUs, the segmentation stage will compute at a rate of $374.3 \frac{\text{img}}{\text{GPU} \times \text{h}}$ when using Attentive Eraser or $5 338.6 \frac{\text{img}}{\text{GPU} \times \text{h}}$ for LaMa. -For ImageNet this comes down to just under 9 days (Attentive Eraser) or 16 hours (LaMa) on two 8 GPU nodes. -To facilitate immediate use and reproduction of results, we publicly provide the precalculated segmentation stage output for the ImageNet dataset for download\footnote{Link will go here.}. -The output of \schemename's segmentation step on ImageNet dataset requires 73 GB of additional disk space for the segmentation output, which is separate from the base 147 GB ImageNet size. - -\paragraph{Recombination.} -The recombination step of \schemename is implemented as a based data loader operation. -It's thus offloaded to the CPU, where it can be heavily parallelized and thus only results in a very minor increase in the training step-time. -For example, using a ViT-B model on an NVIDIA A100 GPU, the average update step-time increased by $1\%$, from $528 \pm 2$ ms to $534 \pm 1$ ms. - -\section{Extended Bates Distribution} -\label{apdx:bates-distribution} -\begin{figure}[h!] - \centering - \includegraphics[width=.5\columnwidth]{img/bates.pdf} - \caption{Plot of the probability distribution function (PDF) of the extended Bates distribution for different parameters $\eta$. Higher values of $\eta$ concentrate the distribution around the center.} - \label{fig:bates-pdf} -\end{figure} - -We introduce an extension of the Bates distribution~\cite{Bates1955} to include negative parameters, enabling sampling of foreground object positions away from the image center. -The standard Bates distribution, for $\eta \in \N$, is defined as the mean of $\eta$ independent random variables drawn from a uniform distribution \cite{Jonhson1995}. -A larger $\eta$ value increases the concentration of samples around the distribution's mean, which in this case is the image center. - -To achieve an opposite effect--concentrating samples at the image borders--we extend the distribution to $\eta \leq 1$. -\begin{align*} - X \sim \text{Bates}(\eta) :\Leftrightarrow s(X) \sim \text{Bates}(-\eta) -\end{align*} -This is accomplished by sampling from a standard Bates distribution with parameter $-\eta \geq 1$ and then applying a sawtooth function. -The sawtooth function on the interval $[0,1]$ is defined as -\begin{align} - s(x) = \begin{cases} - x + 0.5 & \text{if } 0 < x < 0.5 \\ - x - 0.5 & \text{if } 0.5 \leq x \leq 1 - \end{cases} -\end{align} -This function effectively maps the central portion of the interval to the edges and the edge portions to the center. -For example, a value of 0.3 (central-left) is mapped to 0.8 (edge-right), while 0.8 (edge-right) is mapped to 0.3 (central-left). -This transformation inverts the distribution's concentration, shifting the probability mass from the center to the borders. -We visualize the distribution function of the extended Bates distribution in \Cref{fig:bates-pdf}. -Both $\eta = 1$ and $\eta = -1$ result in a uniform distribution across the image. - -\section{Design Choices of \schemename} -\label{sec:ablation} - -We start by ablating the design choices of \schemename on TinyImageNet~\cite{Le2015}, a subset of ImageNet containing 200 categories with 500 images each. %, and Tiny\name, the application of \schemename to TinyImageNet. -% \Cref{tab:ablation} presents the results of these ablations. -\Cref{tab:ablation-segment} presents ablations for segmentation and \Cref{tab:ablation-recombine} for recombination. - -\begin{table} - \caption{Ablation of the design decisions in the segmentation phase of \schemename on TinyImageNet. - The first line is our baseline, while the other lines are using \schemename. - We use basic settings with the \emph{same} background strategy during recombination for this experiment. - } - \label{tab:ablation-segment} - \centering - \small - % \resizebox{.9\columnwidth}{!}{ - \begin{tabular}{llcc} - \toprule - \multirow{2.5}{*}{\makecell{Detect. \\Prompt}} & \multirow{2.5}{*}{\makecell{Infill \\ Model}} & \multicolumn{2}{c}{TinyImageNet Accuracy [\%]} \\ - \cmidrule{3-4} - & & ViT-Ti & ViT-S \\ - \midrule - \multicolumn{2}{l}{\textbf{TinyImageNet}} & $66.1 \pm 0.5$ & $68.3 \pm 0.7$ \\ - specific & LaMa \cite{Suvorov2022} & $65.5 \pm 0.4$ & $71.2 \pm 0.5$ \\ - general & \gtxt{LaMa \cite{Suvorov2022}} & $66.4 \pm 0.6$ & $72.9 \pm 0.6$ \\ - \gtxt{general} & Att. Eraser \cite{Sun2025} & $67.5 \pm 1.2$ & $72.4 \pm 0.5$ \\ - \bottomrule - \end{tabular} - % } -\end{table} - -\begin{table}[t] - \caption{Ablation of the recombination phase of \schemename on TinyImageNet (top) and ImageNet (bottom). The first experiments use the initial segmentation settings with LaMa \cite{Suvorov2022}.} - \label{tab:ablation-recombine} - \centering - % \resizebox{.9\columnwidth}{!}{ - \begin{tabular}{ccccccccccc} - \toprule - % FG. & Augment. & BG. & BG. & Edge & Original & \multicolumn{2}{c}{Accuracy [\%]} \\ - % Size & Order & Strat. & Prune & Smoothing & Mixing & ViT-Ti & ViT-S \\ - \multirow{2.5}{*}{\makecell{FG. \\size}} & \multirow{2.5}{*}{\makecell{Augment.\\Order}} & \multirow{2.5}{*}{\makecell{BG\\Strat.}} & \multirow{2.5}{*}{\makecell{BG.\\Prune}} & \multirow{2.5}{*}{\makecell{Original\\Mixing}} & \multirow{2.5}{*}{\makecell{Edge\\Smooth.}} & \multicolumn{2}{c}{Accuracy [\%]} \\ - \cmidrule{7-8} - & & & & & & ViT-Ti & ViT-S \\ - \midrule - % TinyImageNet & & & & & & & $66.1\pm0.5$ & $68.3\pm0.7$ \\ - \multicolumn{6}{l}{\textbf{TinyImageNet}} & \gtxt{$66.1\pm0.5$} & \gtxt{$68.3\pm0.7$} \\ - mean & crop$\to$paste & same & - & - & \gtxt{-} & $64.6\pm0.5$ & $70.0\pm0.6$ \\ - range & \gtxt{crop$\to$paste} & \gtxt{same} & \gtxt{-} & \gtxt{-} & \gtxt{-} & $65.5\pm0.4$ & $71.2\pm0.5$ \\ - \midrule - % \gtxt{range} & \gtxt{crop$\to$paste} & \gtxt{same} & \gtxt{-} & \gtxt{-} & \gtxt{-} & $66.4\pm0.6$ & $72.9\pm0.6$ \\ - {range} & {crop$\to$paste} & {same} & {-} & {-} & {-} & $67.5\pm1.2$ & $72.4\pm0.5$ \\ - \gtxt{range} & paste$\to$crop & \gtxt{same} & \gtxt{-} & \gtxt{-} & \gtxt{-} & $67.1\pm1.2$ & $72.9\pm0.5$ \\ - \gtxt{range} & \gtxt{paste$\to$crop} & \gtxt{same} & 1.0 & \gtxt{-} & \gtxt{-} & $67.0\pm1.2$ & $73.0\pm0.3$ \\ - \gtxt{range} & \gtxt{paste$\to$crop} & \gtxt{same} & 0.8 & \gtxt{-} & \gtxt{-} & $67.2\pm1.2$ & $72.9\pm0.8$ \\ - \gtxt{range} & \gtxt{paste$\to$crop} & \gtxt{same} & 0.6 & \gtxt{-} & \gtxt{-} & $67.5\pm1.0$ & $72.8\pm0.7$ \\ - % \gtxt{range} & \gtxt{paste$\to$crop} & \gtxt{same} & \gtxt{0.8} & $\sigma_\text{max} = 2.0$ & \gtxt{-} & $67.2\pm0.4$ & $72.9\pm0.5$ \\ - % \gtxt{range} & \gtxt{paste$\to$crop} & \gtxt{same} & \gtxt{0.8} & $\sigma_\text{max} = 4.0$ & \gtxt{-} & $65.9\pm0.5$ & $72.4\pm0.6$ \\ - \gtxt{range} & \gtxt{paste$\to$crop} & \gtxt{same} & \gtxt{0.8} & $p=0.2$ & \gtxt{-} & $69.8\pm0.5$ & $75.0\pm0.3$ \\ - \gtxt{range} & \gtxt{paste$\to$crop} & \gtxt{same} & \gtxt{0.8} & $p=0.33$ & \gtxt{-} & $69.5\pm0.4$ & $75.2\pm1.0$ \\ - \gtxt{range} & \gtxt{paste$\to$crop} & \gtxt{same} & \gtxt{0.8} & $p=0.5$ & \gtxt{-} & $70.3\pm1.0$ & $74.2\pm0.2$ \\ - \gtxt{range} & \gtxt{paste$\to$crop} & \gtxt{same} & \gtxt{0.8} & linear & \gtxt{-} & $70.1\pm0.7$ & $74.9\pm0.8$ \\ - \gtxt{range} & \gtxt{paste$\to$crop} & \gtxt{same} & \gtxt{0.8} & reverse lin. & \gtxt{-} & $67.6\pm0.2$ & $73.2\pm0.3$ \\ - \gtxt{range} & \gtxt{paste$\to$crop} & \gtxt{same} & \gtxt{0.8} & cos & \gtxt{-} & $71.3\pm1.0$ & $75.7\pm0.8$ \\ - \gtxt{range} & \gtxt{paste$\to$crop} & \gtxt{same} & \gtxt{0.8} & \gtxt{cos} & $\sigma_\text{max} = 4.0$ & $70.0\pm0.8$ & $75.5\pm0.7$ \\ - \gtxt{range} & \gtxt{paste$\to$crop} & orig. & \gtxt{0.8} & \gtxt{cos} & \gtxt{$\sigma_\text{max} = 4.0$} & $67.2\pm0.9$ & $69.9\pm1.0$ \\ - \gtxt{range} & \gtxt{paste$\to$crop} & all & \gtxt{0.8} & \gtxt{cos} & \gtxt{$\sigma_\text{max} = 4.0$} & $70.1\pm0.7$ & $77.5\pm0.6$ \\ - \midrule - \multicolumn{6}{l}{\textbf{ImageNet}} & \gtxt{-} & \gtxt{$79.1\pm0.1$} \\ - \gtxt{range} & \gtxt{paste$\to$crop} & \gtxt{same} & \gtxt{0.8} & \gtxt{cos} & \gtxt{-} & - & $80.5\pm0.1$ \\ - \gtxt{range} & \gtxt{paste$\to$crop} & \gtxt{same} & \gtxt{0.8} & \gtxt{cos} & $\sigma_\text{max} = 4.0$ & - & $80.7\pm0.1$ \\ - \gtxt{range} & \gtxt{paste$\to$crop} & all & \gtxt{0.8} & \gtxt{cos} & \gtxt{$\sigma_\text{max} = 4.0$} & - & $81.4\pm0.1$ \\ - \bottomrule - \end{tabular} - % } -\end{table} - - -\textbf{Prompt.} -% We present the ablation of our main design decisions in \Cref{tab:ablation}. -First, we evaluate the type of prompt used to detect the foreground object. -Here, the \emph{general} prompt, which contains the class and the more general object category, outperforms only having the class name (\emph{specific}). - -\textbf{Inpainting.} Among inpainting models, Attentive Eraser~\cite{Sun2025} produces slightly better results compared to LaMa~\cite{Suvorov2022} ($+0.5$ p.p. on average). -For inpainting examples, see the supplementary material. -% (see the supplementary material for examples). -% When comparing the infill models, the GAN-based LaMa \cite{Suvorov2022} gets outperformed by the Attentive Eraser \cite{Sun2025}. - -\textbf{Foreground size} -% We observe that LaMa's often infills unnatural textures compared to Attentive Eraser. -% The size of foreground objects during training has a significant impact on the performance. -% Here, using the greater variability of the \emph{range} strategy increases the performance by $\approx 1\%$ compared to the \emph{mean} strategy. -significantly impacts performance. -Employing a \emph{range} of sizes during recombination, rather than a fixed \emph{mean} size, boosts accuracy by approximately 1 p.p. -This suggests that the added variability is beneficial. - -\textbf{Order of data augmentation.} -% (1) Applying the image crop related augmentations \emph{before} pasting the foreground object and the color-based ones \emph{after} pasting or (2) applying all data augmentations after pasting the foreground object. -% While results are ambiguous, we choose the second strategy, as it improves the performance of ViT-S, although not the one of ViT-Ti. -Applying all augmentations after foreground-background recombination (\emph{paste$\to$crop$\to$color}) improves ViT-S's performance compared to applying crop-related augmentations before pasting (\emph{crop$\to$paste$\to$color}). -ViT-Ti results are ambiguous. - -\textbf{Background pruning.} -When it comes to the backgrounds to use, we test different pruning thresholds ($t_\text{prune}$) to exclude backgrounds with large inpainting. -% and only use backgrounds with an relative size of the infilled region of at most $t_\text{prune}$ (exclusive). -A threshold of $t_\text{prune}=1.0$ means that we use all backgrounds that are not fully infilled. -% We find that the background pruning does not significantly impact the models' performance. -% We choose $t_\text{prune}=0.8$ for the following experiments to exclude backgrounds that are mostly artificial. -Varying $t_\text{prune}$ has minimal impact. -We choose $t_\text{prune} = 0.8$ to exclude predominantly artificial backgrounds. - -% One of the most important design decisions is the mixing of the original dataset with \name. -\textbf{Mixing} \schemename-augmented samples with the original ImageNet data proves crucial. -While constant and linear mixing schedules improve performance over no mixing by $2-3$ p.p. compared to only augmented samples, the cosine annealing schedule proves optimal, boosting accuracy by $3-4$ p.p. - -\textbf{Edge smoothing.} -We evaluate the impact of using Gaussian blurring to smooth the edges of the foreground masks. -% Similarly, applying edge smoothing to foreground masks with Gaussian blurring actually hurts performance on Tiny\name, but slightly improves it on \name. -For larger models, this gives us a slight performance boost on the full ImageNet (second to last line in \Cref{tab:ablation-recombine}). - -\textbf{Background strategy.} -Another point is the allowed choice of background image for each foreground object. -% We evaluate three different strategies. -% (1) Picking the background from which that specific foreground was originally extracted. -% The major difference to ImageNet when using this setup is the variability in size and position of the foreground object. -% (2) Picking a background that originally had a foreground object of the same class in it. -% Here, we have backgrounds where objects of this type can typically appear while also creating a wider variety of samples due to pairing each foreground object with different backgrounds each time. -% (3) Picking any background. -% This choice has the largest variety of backgrounds, but the backgrounds are not semantically related to the foreground object anymore. -% We find in \Cref{fig:bg-strategy} that choosing only a foreground's original background is the worst choice. -We compare using the original background, a background from the same class, and any background. -These strategies go from low diversity and high shared information content between the foreground and background to high diversity and low shared information content. -For \emph{ViT-Ti}, the latter two strategies perform comparably, while \emph{ViT-S} benefits from the added diversity of using any background. -The same is true when training on the full ImageNet. - - -\begin{table} - \caption{Accuracy of ViT-S on TinyImageNet (TIN) in percent using \schemename with different foreground position distributions by varying the Bates parameter $\eta$. - The best performance is achieved when using the uniform distribution ($\eta=1$) for training.} - \label{tbl:foreground-eta} - \centering - \small - % \resizebox{.9\columnwidth}{!}{ - \begin{tabular}{ccccccc} - \toprule - \multirow{2.5}{*}{\makecell{Bates Parameter \\during training}} & \multirow{2.5}{*}{\makecell{TIN \\w/o \schemename}} & \multicolumn{5}{c}{TIN w/ \schemename} \\ - \cmidrule(l){3-7} - & & $\eta=-3$ & $-2$ & $1/-1$ & $2$ & $3$ \\ - \midrule - Baseline & 68.9 & 60.5 & 60.2 & 60.8 & 62.6 & 63.1 \\ - $\eta=-3$ & 71.3 & 79.3 & 79.5 & 79.1 & 79.3 & 79.1 \\ - $\eta=-2$ & 71.5 & 80.0 & 78.7 & 79.3 & 79.1 & 78.8 \\ - $\eta=1/-1$ & 72.3 & 79.5 & 78.9 & 80.2 & 79.7 & 80.4 \\ - $\eta=2$ & 71.3 & 78.2 & 77.8 & 79.1 & 79.6 & 79.9 \\ - $\eta=3$ & 71.4 & 77.2 & 76.9 & 78.6 & 79.6 & 79.7 \\ - \bottomrule - \end{tabular} - % } -\end{table} - -\textbf{Foreground position.} -Finally, we analyze the foreground object's positioning in the image, using a -generalization of the Bates distribution~\cite{Bates1955} with parameter $\eta \in \Z$ (see \Cref{apdx:bates-distribution}). -The Bates distribution presents an easy way to sample from a bounded domain with just one hyperparameter that controls its concentration. -$\eta = 1/-1$ corresponds to the uniform distribution; $\eta > 1$ concentrates the distribution around the center; and for $\eta < -1$, the distribution is concentrated at the borders (see supplementary material for details). -% We utilize an extended Bates distribution to sample the position of the foreground object. -% The Bates distribution with parameter $\eta \geq 1$ is the mean of $\eta$ independent uniformly distributed random variables \cite{Jonhson1995}. -% The larger $\eta$, the more concentrated the distribution is at the center, $\eta < -1$ concentrates the distribution at the edges. -% We extend this concept to $\eta \leq -1$, shifting the distribution away from the center and towards the edges. -When sampling more towards the center of the image, the difficulty of the task is reduced, which reduces performance on TinyImageNet (\Cref{tbl:foreground-eta}). -This is reflected in the performance when evaluating using \schemename with $\eta=2$ and $\eta=3$ compared to $\eta=-1/1$. -We observe a similar reduction for $\eta < -1$. -% This experiment is conducted using the LaMa infill model. - -\begin{table}[t] - \caption{Dataset statistics for TinyImageNet and ImageNet with and without \schemename. For \schemename we report the number of foreground/background pairs.} - \label{tab:dataset-stats} - \centering - % \resizebox{.5\columnwidth}{!}{ - \begin{tabular}{l S[table-format=4.0] S[table-format=7.0] S[table-format=5.0]} - \toprule - Dataset & {Classes} & {\makecell{Training \\ Images}} & {\makecell{Validation \\ Images}} \\ - \midrule - TinyImageNet & 200 & 100000 & 10000 \\ - TinyImageNet + \schemename & 200 & 99404 & 9915 \\ - ImageNet & 1000 & 1281167 & 50000 \\ - ImageNet + \schemename & 1000 & 1274557 & 49751 \\ - \bottomrule - \end{tabular} - % } -\end{table} -After fixing the optimal design parameters in \Cref{tab:ablation-segment,tab:ablation-recombine} (last rows), we run \schemename's segmentation step on the entire ImageNet dataset. -\Cref{tab:dataset-stats} shows the resulting dataset statistics. -% The slightly lower number of images in \name is due to \emph{Grounded SAM} returning no or invalid detections for some images. -The slightly reduced image count for \schemename is due to instances where Grounded SAM fails to produce valid segmentation masks. - -\section{Robustness Evaluation on Corner-Cases} -\begin{table}[t] - \centering - \caption{Evaluation on the Corner-Cases dataset. Objects cut from ImageNet evaluation bounding boxes are pasted onto infilled backgrounds. Objects have three sizes: $56$px, $84$px, and $112$px. Objects are places in the center (CeX) or corner (CoX) of an image its original background (XxO) or a random background (XxR).} - \label{tab:corner-cases} - \resizebox{\textwidth}{!}{ - \begin{tabular}{lcccccccccccccc} + \resizebox{\textwidth}{!}{\begin{tabular}{cc@{\hskip 0.3in}cc} \toprule - \multirow{4}{*}{Model} & \multirow{4}{*}{w/ \schemename} & \multicolumn{12}{c}{Corner Cases Accuracy [\%]} \\ - \cmidrule(l){3-14} - & & \multicolumn{4}{c}{56} & \multicolumn{4}{c}{84} & \multicolumn{4}{c}{112} \\ - \cmidrule(lr){3-6} \cmidrule(lr){7-10} \cmidrule(l){11-14} - & & CeO & CoO & CeR & CoR & CeO & CoO & CeR & CoR & CeO & CoO & CeR & CoR \\ + LaMa & Att. Eraser & LaMa & Att. Eraser \\ \midrule - ViT-S & \xmark & $40.5 \pm 2.0$ & $28.6 \pm 0.8$ & $10.3 \pm 0.9$ & $6.4 \pm 0.2$ & $56.8 \pm 1.2$ & $47.6 \pm 1.0$ & $31.3 \pm 0.7$ & $25.5 \pm 0.5$ & $70.9 \pm 0.1$ & $66.9 \pm 1.6$ & $55.2 \pm 0.2$ & $51.1 \pm 0.8$ \\ - ViT-S & \cmark & $49.4 \pm 0.6$ & $39.9 \pm 0.5$ & $22.7 \pm 0.4$ & $17.6 \pm 0.3$ & $66.3 \pm 0.3$ & $60.0 \pm 0.3$ & $47.7 \pm 0.7$ & $43.2 \pm 0.2$ & $76.5 \pm 0.2$ & $74.9 \pm 0.4$ & $66.8 \pm 0.6$ & $64.9 \pm 0.1$ \\ - & & \grntxt{$+8.9$} & \grntxt{$+11.3$} & \grntxt{$+12.4$} & \grntxt{$+11.2$} & \grntxt{$+9.4$} & \grntxt{$+12.4$} & \grntxt{$+16.4$} & \grntxt{$+17.7$} & \grntxt{$+5.6$} & \grntxt{$+8.0$} & \grntxt{$+11.6$} & \grntxt{$+13.7$} \\ - \cmidrule(r){1-2} - ViT-B & \xmark & $37.9 \pm 1.4$ & $29.3 \pm 0.7$ & $14.0 \pm 1.7$ & $11.9 \pm 1.1$ & $51.5 \pm 0.7$ & $45.0 \pm 0.8$ & $27.3 \pm 0.8$ & $26.3 \pm 0.8$ & $64.7 \pm 0.3$ & $61.8 \pm 0.6$ & $46.3 \pm 0.3$ & $45.5 \pm 0.5$ \\ - ViT-B & \cmark & $50.4 \pm 0.8$ & $42.4 \pm 0.6$ & $26.5 \pm 0.6$ & $22.8 \pm 0.8$ & $65.3 \pm 0.9$ & $60.9 \pm 0.6$ & $47.6 \pm 0.3$ & $45.6 \pm 0.1$ & $75.7 \pm 0.6$ & $74.0 \pm 0.6$ & $65.7 \pm 0.7$ & $64.3 \pm 0.5$ \\ - & & \grntxt{$+12.5$} & \grntxt{$+13.1$} & \grntxt{$+12.4$} & \grntxt{$+10.9$} & \grntxt{$+13.8$} & \grntxt{$+15.9$} & \grntxt{$+20.2$} & \grntxt{$+19.3$} & \grntxt{$+11.0$} & \grntxt{$+12.2$} & \grntxt{$+19.3$} & \grntxt{$+18.8$} \\ - \cmidrule(r){1-2} - ViT-L & \xmark & $32.8 \pm 1.6$ & $24.8 \pm 1.1$ & $14.8 \pm 2.2$ & $9.7 \pm 1.2$ & $42.7 \pm 0.9$ & $33.8 \pm 0.7$ & $21.3 \pm 1.5$ & $16.3 \pm 1.0$ & $55.7 \pm 0.7$ & $49.7 \pm 0.7$ & $36.0 \pm 1.3$ & $32.5 \pm 0.9$ \\ - ViT-L & \cmark & $45.7 \pm 0.6$ & $39.0 \pm 0.5$ & $25.6 \pm 0.6$ & $24.1 \pm 0.8$ & $59.1 \pm 0.3$ & $55.2 \pm 0.4$ & $41.9 \pm 1.0$ & $42.7 \pm 0.6$ & $71.4 \pm 0.3$ & $69.0 \pm 0.4$ & $60.7 \pm 1.0$ & $60.3 \pm 0.8$ \\ - & & \grntxt{$+12.9$} & \grntxt{$+14.2$} & \grntxt{$+10.8$} & \grntxt{$+14.4$} & \grntxt{$+16.3$} & \grntxt{$+21.5$} & \grntxt{$+20.5$} & \grntxt{$+26.4$} & \grntxt{$+15.7$} & \grntxt{$+19.3$} & \grntxt{$+24.7$} & \grntxt{$+27.8$} \\ - \cmidrule(r){1-2} - DeiT-S & \xmark & $46.3 \pm 0.7$ & $38.1 \pm 0.3$ & $13.1 \pm 0.5$ & $9.9 \pm 0.1$ & $62.8 \pm 0.4$ & $58.2 \pm 0.2$ & $37.1 \pm 0.7$ & $34.3 \pm 0.5$ & $73.3 \pm 0.2$ & $73.9 \pm 0.4$ & $58.8 \pm 0.4$ & $59.4 \pm 0.6$ \\ - DeiT-S & \cmark & $44.7 \pm 1.4$ & $37.1 \pm 1.4$ & $15.6 \pm 1.3$ & $12.1 \pm 0.9$ & $62.1 \pm 1.2$ & $57.8 \pm 1.1$ & $41.6 \pm 1.1$ & $37.9 \pm 1.2$ & $73.2 \pm 0.7$ & $73.3 \pm 0.4$ & $62.3 \pm 0.7$ & $61.4 \pm 0.9$ \\ - & & \rdtxt{$-1.6$} & \rdtxt{$-1.1$} & \grntxt{$+2.4$} & \grntxt{$+2.2$} & \rdtxt{$-0.7$} & \rdtxt{$-0.4$} & \grntxt{$+4.4$} & \grntxt{$+3.5$} & \gtxt{$-0.1$} & \rdtxt{$-0.6$} & \grntxt{$+3.5$} & \grntxt{$+2.0$} \\ - \cmidrule(r){1-2} - DeiT-B & \xmark & $48.1 \pm 0.9$ & $40.4 \pm 2.0$ & $15.8 \pm 0.2$ & $12.9 \pm 0.6$ & $64.0 \pm 0.9$ & $59.5 \pm 1.3$ & $39.0 \pm 0.9$ & $37.2 \pm 0.8$ & $74.1 \pm 0.7$ & $74.8 \pm 0.7$ & $59.1 \pm 0.8$ & $60.0 \pm 0.6$ \\ - DeiT-B & \cmark & $50.7 \pm 0.1$ & $44.0 \pm 0.4$ & $19.3 \pm 0.2$ & $16.3 \pm 0.2$ & $66.0 \pm 0.2$ & $62.0 \pm 0.3$ & $43.4 \pm 0.3$ & $40.9 \pm 0.4$ & $75.4 \pm 0.1$ & $76.4 \pm 0.3$ & $62.8 \pm 0.2$ & $63.9 \pm 0.2$ \\ - & & \grntxt{$+2.6$} & \grntxt{$+3.6$} & \grntxt{$+3.5$} & \grntxt{$+3.5$} & \grntxt{$+2.0$} & \grntxt{$+2.5$} & \grntxt{$+4.4$} & \grntxt{$+3.8$} & \grntxt{$+1.3$} & \grntxt{$+1.6$} & \grntxt{$+3.8$} & \grntxt{$+3.9$} \\ - \cmidrule(r){1-2} - DeiT-L & \xmark & $39.2 \pm 2.6$ & $32.6 \pm 1.5$ & $10.5 \pm 2.8$ & $9.1 \pm 2.3$ & $55.7 \pm 2.5$ & $51.0 \pm 2.7$ & $30.3 \pm 4.0$ & $29.5 \pm 3.9$ & $68.5 \pm 2.1$ & $68.1 \pm 1.7$ & $51.7 \pm 3.1$ & $52.1 \pm 2.7$ \\ - DeiT-L & \cmark & $51.9 \pm 0.7$ & $46.6 \pm 0.5$ & $21.5 \pm 1.3$ & $19.0 \pm 1.2$ & $66.6 \pm 0.6$ & $64.1 \pm 0.7$ & $45.3 \pm 1.3$ & $43.6 \pm 1.1$ & $75.6 \pm 0.4$ & $77.3 \pm 0.4$ & $63.8 \pm 0.8$ & $65.4 \pm 0.6$ \\ - & & \grntxt{$+12.8$} & \grntxt{$+14.0$} & \grntxt{$+11.0$} & \grntxt{$+9.9$} & \grntxt{$+11.0$} & \grntxt{$+13.1$} & \grntxt{$+15.0$} & \grntxt{$+14.1$} & \grntxt{$+7.1$} & \grntxt{$+9.2$} & \grntxt{$+12.1$} & \grntxt{$+13.4$} \\ - \cmidrule(r){1-2} - Swin-Ti & \xmark & $41.2 \pm 1.8$ & $32.5 \pm 0.3$ & $17.4 \pm 2.6$ & $12.2 \pm 0.2$ & $60.0 \pm 1.6$ & $51.4 \pm 0.2$ & $39.6 \pm 2.6$ & $34.8 \pm 0.9$ & $71.7 \pm 0.8$ & $66.1 \pm 0.7$ & $58.2 \pm 1.1$ & $53.6 \pm 1.2$ \\ - Swin-Ti & \cmark & $49.8 \pm 0.6$ & $42.8 \pm 0.7$ & $24.2 \pm 0.7$ & $21.4 \pm 0.9$ & $66.4 \pm 0.6$ & $60.5 \pm 0.2$ & $47.8 \pm 0.5$ & $44.6 \pm 0.5$ & $76.0 \pm 0.3$ & $72.7 \pm 0.2$ & $65.7 \pm 0.5$ & $62.1 \pm 0.3$ \\ - & & \grntxt{$+8.5$} & \grntxt{$+10.3$} & \grntxt{$+6.8$} & \grntxt{$+9.2$} & \grntxt{$+6.4$} & \grntxt{$+9.2$} & \grntxt{$+8.2$} & \grntxt{$+9.8$} & \grntxt{$+4.3$} & \grntxt{$+6.5$} & \grntxt{$+7.5$} & \grntxt{$+8.5$} \\ - \cmidrule(r){1-2} - Swin-S & \xmark & $41.3 \pm 0.6$ & $33.0 \pm 0.1$ & $18.4 \pm 0.7$ & $13.3 \pm 0.5$ & $59.2 \pm 0.1$ & $51.2 \pm 0.5$ & $39.1 \pm 0.2$ & $35.9 \pm 0.3$ & $71.5 \pm 0.2$ & $65.6 \pm 0.1$ & $56.8 \pm 0.5$ & $53.2 \pm 0.2$ \\ - Swin-S & \cmark & $48.6 \pm 0.7$ & $39.9 \pm 1.6$ & $22.2 \pm 0.9$ & $16.8 \pm 1.1$ & $64.4 \pm 0.9$ & $57.9 \pm 1.5$ & $43.8 \pm 1.1$ & $42.3 \pm 1.0$ & $75.7 \pm 0.2$ & $71.8 \pm 0.8$ & $63.2 \pm 0.4$ & $60.6 \pm 0.6$ \\ - & & \grntxt{$+7.3$} & \grntxt{$+7.0$} & \grntxt{$+3.8$} & \grntxt{$+3.6$} & \grntxt{$+5.1$} & \grntxt{$+6.7$} & \grntxt{$+4.7$} & \grntxt{$+6.4$} & \grntxt{$+4.2$} & \grntxt{$+6.2$} & \grntxt{$+6.4$} & \grntxt{$+7.4$} \\ - \cmidrule(r){1-2} - ResNet50 & \xmark & $48.6 \pm 0.6$ & $35.1 \pm 0.4$ & $23.0 \pm 0.7$ & $13.0 \pm 0.3$ & $65.8 \pm 0.4$ & $58.2 \pm 0.3$ & $44.4 \pm 0.6$ & $38.1 \pm 0.5$ & $73.2 \pm 0.2$ & $69.9 \pm 0.2$ & $56.9 \pm 0.1$ & $56.9 \pm 0.1$ \\ - ResNet50 & \cmark & $52.3 \pm 0.6$ & $39.5 \pm 0.1$ & $27.4 \pm 0.6$ & $17.6 \pm 0.1$ & $68.5 \pm 0.3$ & $61.9 \pm 0.1$ & $48.5 \pm 0.4$ & $43.7 \pm 0.3$ & $75.2 \pm 0.1$ & $72.4 \pm 0.1$ & $61.7 \pm 0.3$ & $61.7 \pm 0.3$ \\ - & & \grntxt{$+3.7$} & \grntxt{$+4.4$} & \grntxt{$+4.4$} & \grntxt{$+4.6$} & \grntxt{$+2.8$} & \grntxt{$+3.8$} & \grntxt{$+4.2$} & \grntxt{$+5.5$} & \grntxt{$+2.0$} & \grntxt{$+2.5$} & \grntxt{$+4.8$} & \grntxt{$+4.8$} \\ - \cmidrule(r){1-2} - ResNet101 & \xmark & $47.8 \pm 0.7$ & $37.2 \pm 0.5$ & $20.4 \pm 1.2$ & $14.2 \pm 0.3$ & $64.9 \pm 0.2$ & $58.6 \pm 0.5$ & $41.1 \pm 0.5$ & $38.3 \pm 0.7$ & $73.6 \pm 0.3$ & $70.5 \pm 0.3$ & $56.2 \pm 0.4$ & $57.0 \pm 0.5$ \\ - ResNet101 & \cmark & $52.3 \pm 0.1$ & $42.2 \pm 0.1$ & $24.7 \pm 0.1$ & $19.2 \pm 0.4$ & $68.8 \pm 0.6$ & $62.9 \pm 0.3$ & $46.4 \pm 1.5$ & $44.3 \pm 0.9$ & $76.0 \pm 0.4$ & $73.7 \pm 0.3$ & $61.0 \pm 1.2$ & $62.6 \pm 0.5$ \\ - & & \grntxt{$+4.4$} & \grntxt{$+5.0$} & \grntxt{$+4.3$} & \grntxt{$+5.0$} & \grntxt{$+3.9$} & \grntxt{$+4.3$} & \grntxt{$+5.3$} & \grntxt{$+6.0$} & \grntxt{$+2.4$} & \grntxt{$+3.2$} & \grntxt{$+4.7$} & \grntxt{$+5.7$} \\ + \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000090.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000090.JPEG} & + \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000890.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000890.JPEG} \\ + \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00002106.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00002106.JPEG} & + \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00005045.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00005045.JPEG} \\ + \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00007437.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00007437.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00008542.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00008542.JPEG} \\ + \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00009674.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00009674.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00002743.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00002743.JPEG} \\ + \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00003097.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00003097.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00011629.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00011629.JPEG} \\ + \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00000547.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00000547.JPEG} & \includegraphics[width=.23\columnwidth]{img/lama_infills/comp/ILSVRC2012_val_00025256.JPEG} & \includegraphics[width=.23\columnwidth]{img/att_err_infills/comp/ILSVRC2012_val_00025256.JPEG} \\ \bottomrule - \end{tabular} - } -\end{table} - -\Cref{tab:corner-cases} reports accuracy on the corner-cases dataset~\cite{Fatima2025} for models trained with and without \schemename. -The dataset is constructed by pasting objects cropped by their full bounding boxes (which are available for the ImageNet validation set) onto 224$\times$224 infilled backgrounds. -The dataset has three factors: foreground size (56, 84, 112 pixels), spatial position (center, CeX, vs.\ corner, CoX), and background type (original image background, XxO, vs.\ a random background, XxR), yielding $3 \times 2 \times 2$ controlled configurations per model. - -Across all architectures, training with \schemename consistently improves robustness to these composition shifts. -For ViT-S/B/L, gains range from roughly $+8$ to over $+27$ percentage points, with the largest improvements occurring in the most challenging settings with foregrounds placed in corners on random backgrounds (e.g., CoR and CeR). -Swin and ResNet models also benefit across all configurations, with increases typically between $+3$ and $+10$ points. -DeiT-S shows small drops on some same-background center cases (CeO/CoO), but still improves notably on random-background conditions (XxR), while DeiT-B/L gain across nearly all settings. - -Three trends are apparent. -First, all baselines perform substantially worse when moving from original to random backgrounds and from centered to corner placements, indicating strong background and center biases. -Second, \schemename reduces this sensitivity: the absolute gap between center and corner, and between original and random backgrounds, shrinks for almost all models and sizes. -Third, the relative improvements are especially pronounced for smaller objects and off-center placements, suggesting that \schemename makes models more foreground-focused and less reliant on canonical object scale and position. - - -\section{\schemename Segmentation Samples} -\begin{figure}[t!] - \centering - \begin{subfigure}{.49\textwidth} - - \includegraphics[width=\textwidth]{img/masked_image_examples_train.pdf} - \end{subfigure} - \hfill - \begin{subfigure}{.49\textwidth} - - \includegraphics[width=\textwidth]{img/masked_image_examples.pdf} - \end{subfigure} - \caption{ImageNet validation samples (left) and training samples (right) of our segmentation masks with annotated bounding boxes.} - \label{fig:mask-examples} -\end{figure} -We show examples of the automatically generated segmentation masks for a diverse subset of object categories (``ant,'' ``busby,'' ``bell cote,'' ``pickelhaube,'' ``snorkel,'' ``stove,'' ``tennis ``ball,'' and ``volleyball''). -Note that ``busby,'' ``bell cote,'' ``pickelhaube,'' and ``snorkel'' are the four classes with the \textbf{worst} mean box precision and box-to-box IoU on the validation set. -\Cref{fig:mask-examples} (right) illustrates masks from the evaluation split, while \Cref{fig:mask-examples} (left) shows examples from the training split. -Across both sets, the masks accurately isolate foreground objects with clean boundaries, despite large variations in object scale, shape, and appearance, supporting their use for background removal and resampling in our training pipeline. -We find that the main failure cases are: -(\textit{i}) When the ground-truth annotation corresponds to only a part of an object, the predicted mask often expands to cover the entire object rather than the annotated region. -See for example ``busby'' or ``bell cote''. -(\textit{ii}) In images containing multiple instances, some objects may be missed, resulting in incomplete foreground coverage. -This is especially visible for ``busby'' and ``pickelhaube''. -However, note that especially for ``pickelhaube'' the training distribution is noticeably different from the validation distribution, showing many images with just the head instead of groups of people wearing it. -(\textit{iii}) In rare cases, the predicted mask degenerates and covers nearly the entire image, effectively eliminating the background. -This happens in $<10\%$ of all training images, and we do not use the resulting backgrounds for recombination (see \Cref{apdx:infill-ratio}). - -\section{\schemename Sample Images} -\begin{table*}[t!] - \centering - \caption{Sample Images from using \schemename on ImageNet.} - \label{tbl:example-images} - \resizebox{.93\textwidth}{!}{ - \begin{tabular}{ccccc} - \toprule - Class & \makecell{Original \\Image} & \makecell{Extracted \\Foreground} & \makecell{Infilled \\Background} & \schemename's Recombinations \\ - \midrule - \makecell{n01531178 \\Goldfinch} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01531178_4963.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01531178_4963_v0_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01531178_4963_v0_bg.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01531178_4963_recombined_v11.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01531178_4963_recombined_v13.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01531178_4963_recombined_v14.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01531178_4963_recombined_v18.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01531178_4963_recombined_v20.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01531178_4963_recombined_v26.JPEG} \\ - \makecell{n01818515 \\Macaw} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01818515_31507.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01818515_31507_v1_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01818515_31507_v1_bg.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01818515_31507_recombined_v0.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01818515_31507_recombined_v10.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01818515_31507_recombined_v12.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01818515_31507_recombined_v16.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01818515_31507_recombined_v20.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01818515_31507_recombined_v28.JPEG} \\ - \makecell{n01943899 \\Conch} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01943899_20070.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01943899_20070_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01943899_20070_bg.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01943899_20070_recombined_v0.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01943899_20070_recombined_v1.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01943899_20070_recombined_v10.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01943899_20070_recombined_v27.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01943899_20070_recombined_v18.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01943899_20070_recombined_v15.JPEG} \\ - \makecell{n01986214 \\ Hermit Crab} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01986214_4117.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01986214_4117_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01986214_4117_bg.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01986214_4117_recombined_v12.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01986214_4117_recombined_v18.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01986214_4117_recombined_v20.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01986214_4117_recombined_v21.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01986214_4117_recombined_v9.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01986214_4117_recombined_v8.JPEG} \\ - \makecell{n02190166 \\Fly} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02190166_1208.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02190166_1208_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02190166_1208_bg.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02190166_1208_recombined_v1.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02190166_1208_recombined_v18.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02190166_1208_recombined_v20.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02190166_1208_recombined_v23.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02190166_1208_recombined_v7.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02190166_1208_recombined_v9.JPEG} \\ - \makecell{n02229544 \\Cricket} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02229544_6170.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02229544_6170_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02229544_6170_bg.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02229544_6170_recombined_v1.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02229544_6170_recombined_v17.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02229544_6170_recombined_v18.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02229544_6170_recombined_v19.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02229544_6170_recombined_v25.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02229544_6170_recombined_v5.JPEG} \\ - \makecell{n02443484 \\Black-Footed \\Ferret} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02443484_5430.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02443484_5430_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02443484_5430_bg.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02443484_5430_recombined_v16.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02443484_5430_recombined_v20.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02443484_5430_recombined_v24.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02443484_5430_recombined_v27.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02443484_5430_recombined_v3.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02443484_5430_recombined_v4.JPEG} \\ - \makecell{n03201208 \\Dining Table} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03201208_21000.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03201208_21000_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03201208_21000_bg.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03201208_21000_recombined_v0.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03201208_21000_recombined_v11.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03201208_21000_recombined_v15.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03201208_21000_recombined_v19.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03201208_21000_recombined_v20.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03201208_21000_recombined_v21.JPEG} \\ - \makecell{n03424325 \\Gasmask} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03424325_21435.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03424325_21435_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03424325_21435_bg.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03424325_21435_recombined_v10.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03424325_21435_recombined_v11.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03424325_21435_recombined_v12.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03424325_21435_recombined_v13.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03424325_21435_recombined_v15.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03424325_21435_recombined_v26.JPEG} \\ - \makecell{n03642806 \\Laptop} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03642806_3615.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03642806_3615_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03642806_3615_bg.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03642806_3615_recombined_v11.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03642806_3615_recombined_v12.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03642806_3615_recombined_v15.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03642806_3615_recombined_v17.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03642806_3615_recombined_v25.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03642806_3615_recombined_v29.JPEG} \\ - \makecell{n04141975 \\Scale} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n04141975_11426.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n04141975_11426_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n04141975_11426_bg.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n04141975_11426_recombined_v10.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n04141975_11426_recombined_v13.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n04141975_11426_recombined_v14.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n04141975_11426_recombined_v20.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n04141975_11426_recombined_v23.JPEG}\includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n04141975_11426_recombined_v25.JPEG} \\ - \makecell{n07714990 \\Broccoli} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07714990_7596.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07714990_7596_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07714990_7596_bg.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07714990_7596_recombined_v1.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07714990_7596_recombined_v13.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07714990_7596_recombined_v15.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07714990_7596_recombined_v17.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07714990_7596_recombined_v27.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07714990_7596_recombined_v29.JPEG} \\ - \makecell{n07749582 \\Lemon} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07749582_17601.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07749582_17601_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07749582_17601_bg.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07749582_17601_recombined_v1.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07749582_17601_recombined_v15.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07749582_17601_recombined_v17.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07749582_17601_recombined_v20.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07749582_17601_recombined_v24.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n07749582_17601_recombined_v26.JPEG} \\ - \makecell{n09332890 \\Lakeside} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n09332890_27898.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n09332890_27898_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n09332890_27898_bg.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n09332890_27898_recombined_v0.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n09332890_27898_recombined_v12.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n09332890_27898_recombined_v13.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n09332890_27898_recombined_v14.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n09332890_27898_recombined_v18.JPEG} \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n09332890_27898_recombined_v20.JPEG} \\ - \bottomrule - \end{tabular} - } -\end{table*} -We show some example images of \schemename's recombinations for 14 random classes of ImageNet \cite{Deng2009} in \Cref{tbl:example-images}. -% \schemename visibly varies the background, size, and position of the objects. -The recombined samples display substantial visual diversity, with each extracted foreground appearing in multiple, clearly different background contexts. -Foreground objects remain sharp and well‑preserved across recombinations, while backgrounds vary in texture, color, and scene type -Images show a broad range of spatial placements and scales for the same object, resulting in noticeably different overall layouts. - - -\FloatBarrier -\section{Infill Model Comparison} -\begin{table*}[h!] - \centering + \end{tabular}} \caption{Example infills of LaMa and Attentive Eraser.} - \label{tab:infill-examples} - \resizebox{.9\textwidth}{!}{ - \begin{tabular}{cc@{\hskip 0.3in}cc} - \toprule - LaMa & Att. Eraser & LaMa & Att. Eraser \\ - \midrule - \includegraphics[width=.23\columnwidth, valign=c]{img/lama_infills/comp/ILSVRC2012_val_00000090.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/att_err_infills/comp/ILSVRC2012_val_00000090.JPEG} & - \includegraphics[width=.23\columnwidth, valign=c]{img/lama_infills/comp/ILSVRC2012_val_00000890.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/att_err_infills/comp/ILSVRC2012_val_00000890.JPEG} \\ - \includegraphics[width=.23\columnwidth, valign=c]{img/lama_infills/comp/ILSVRC2012_val_00002106.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/att_err_infills/comp/ILSVRC2012_val_00002106.JPEG} & - \includegraphics[width=.23\columnwidth, valign=c]{img/lama_infills/comp/ILSVRC2012_val_00005045.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/att_err_infills/comp/ILSVRC2012_val_00005045.JPEG} \\ - \includegraphics[width=.23\columnwidth, valign=c]{img/lama_infills/comp/ILSVRC2012_val_00007437.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/att_err_infills/comp/ILSVRC2012_val_00007437.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/lama_infills/comp/ILSVRC2012_val_00008542.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/att_err_infills/comp/ILSVRC2012_val_00008542.JPEG} \\ - \includegraphics[width=.23\columnwidth, valign=c]{img/lama_infills/comp/ILSVRC2012_val_00009674.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/att_err_infills/comp/ILSVRC2012_val_00009674.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/lama_infills/comp/ILSVRC2012_val_00002743.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/att_err_infills/comp/ILSVRC2012_val_00002743.JPEG} \\ - \includegraphics[width=.23\columnwidth, valign=c]{img/lama_infills/comp/ILSVRC2012_val_00003097.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/att_err_infills/comp/ILSVRC2012_val_00003097.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/lama_infills/comp/ILSVRC2012_val_00011629.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/att_err_infills/comp/ILSVRC2012_val_00011629.JPEG} \\ - \includegraphics[width=.23\columnwidth, valign=c]{img/lama_infills/comp/ILSVRC2012_val_00000547.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/att_err_infills/comp/ILSVRC2012_val_00000547.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/lama_infills/comp/ILSVRC2012_val_00025256.JPEG} & \includegraphics[width=.23\columnwidth, valign=c]{img/att_err_infills/comp/ILSVRC2012_val_00025256.JPEG} \\ - \bottomrule - \end{tabular} - } -\end{table*} -We visualize example infilled images for both LaMa \cite{Suvorov2022} and Attentive Eraser \cite{Sun2025} in \Cref{tab:infill-examples}. -The side‑by‑side examples show that both methods generally produce visually consistent infills, with many pairs appearing extremely similar at a glance. -We qualitatively find that Attentive Eraser yields slightly sharper textures or more coherent local structure, while LaMa sometimes produces smoother or more homogenized regions. -Across the table, fine‑detail areas such as foliage, bark, and ground textures reveal the most noticeable differences between the two methods. -% We qualitatively find that while LaMa often leaves repeated textures of blurry spots where the object was erased, Attentive Eraser produces slightly cleaner and more coherent infills of the background. +\end{table} -\FloatBarrier -\newpage -\section{Image Infill Ratio} -\label{apdx:infill-ratio} -\begin{table*}[h!] +\section{Images with High Infill Ratio} +\begin{table}[h] \centering + \begin{tabular}{ccc} + \toprule + Infill Ratio & LaMa & Att. Eraser \\ + \midrule + 93.7 & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00003735.JPEG}} & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00003735.JPEG}} \\ \\ + 95.7 & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00012151.JPEG}} & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00012151.JPEG}} \\ \\ + 83.7 & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00022522.JPEG}} & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00022522.JPEG}} \\ \\ + 88.2 & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00026530.JPEG}} & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00026530.JPEG}} + \end{tabular} \caption{Example infills with a large relative foreground area size that is infilled (infill ratio).} \label{tbl:high-rat} - \resizebox{.8\textwidth}{!}{ - \begin{tabular}{ccc} - \toprule - Infill Ratio & LaMa & Att. Eraser \\ - \midrule - 83.7 & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00022522.JPEG}} & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00022522.JPEG}} \\ \\ - 88.2 & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00026530.JPEG}} & \raisebox{-50pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00026530.JPEG}} \\ \\ - 93.7 & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00003735.JPEG}} & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00003735.JPEG}} \\ \\ - 95.7 & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/lama_infills/high_rat/ILSVRC2012_val_00012151.JPEG}} & \raisebox{-60pt}{\includegraphics[width=.3\columnwidth]{img/att_err_infills/high_rat/ILSVRC2012_val_00012151.JPEG}} - \end{tabular}} -\end{table*} - -\begin{figure} - \centering - \includegraphics[width=.9\textwidth]{img/infill_distr.pdf} - \caption{We plot the distribution of the relative size of the detected foreground object that is infilled in our Segmentation step of ImageNet. - While most images contain objects of smaller size, there is a peak where Grounded~SAM~\cite{Ren2024} detects almost the whole image as the foreground object. For examples of such large infills, see \Cref{tbl:high-rat}. - } - \label{fig:infill-distr} -\end{figure} - -\Cref{tbl:high-rat} shows infills for images where Grounded SAM \cite{Ren2024} marks a high percentile of the image as the foreground object (Infill Ratio), that has to be erased by the infill models. -The examples show that when the infilled region becomes large, both methods begin to lose coherent global structure, with outputs dominated by repetitive or texture‑like patterns. -LaMa tends to produce smoother, more uniform surfaces, like we saw in \Cref{tab:infill-examples}, while Attentive Eraser often generates denser, more regular texture patterns. -Across the rows, increasing infill ratio corresponds to increasingly homogeneous results, with only faint hints of original scene cues remaining. -% While LaMa tends to fill those spots with mostly black or gray and textures similar to what we saw in \Cref{tab:infill-examples}, Attentive Eraser tends to create novel patterns by copying what is left of the background all over the rest of the image. -% We filter out such mostly infilled background using our background pruning hyperparameter $t_\text{prune} = 0.8$. -\Cref{fig:infill-distr} plots the distribution of infill ratios in \schemename. -While there is a smooth curve of the number of detections decreasing with the infill ratio until $\approx 90\%$, there is an additional peak at $\approx 100\%$ infill ratio. -We hypothesize that this peak is made up of failure cases of Grounded~SAM. - -We filter out all backgrounds that have an infill ratio larger than our pruning threshold $t_\text{prune} = 0.8$, which translates to $10\%$ of backgrounds. +\end{table} diff --git a/sec/conclusion.tex b/sec/conclusion.tex index a0df377..27e501c 100644 --- a/sec/conclusion.tex +++ b/sec/conclusion.tex @@ -1,21 +1,11 @@ % !TeX root = ../main.tex -\section{Conclusion \& Future Work} +\section{Discussion \& Conclusion} \label{sec:conclusion} -% We introduce \schemename, a novel data augmentation scheme that facilitates improved Transformer training for image classification. -% By explicitly separating and recombining foreground objects and backgrounds, \schemename enables controlled data augmentation beyond existing image compositions, leading to significant performance gains on ImageNet and downstream fine-grained classification tasks. -% Furthermore, \schemename provides a powerful framework for analyzing model behavior and quantifying biases, including background robustness, foreground focus, center bias, and size bias. -% Our experiments demonstrate that training using \schemename not only boosts accuracy but also significantly reduces these biases, resulting in more robust and generalizable models. -% In the future, we see \schemename be also applied to other datasets and tasks, like video recognition or segmentation. -% \schemename's ability to both improve performance and provide insights into model behavior makes it a valuable tool for advancing CV research and developing more reliable AI systems. - - -We introduced \schemename, a controlled composition augmentation scheme that factorizes images into foreground objects and backgrounds and recombines them with explicit control over background identity, object position, and object scale. -% Empirically, \schemename consistently improves clean accuracy and robustness across architectures and scales. -Across diverse architectures, training with \schemename on top of standard strong augmentations yields substantial gains on ImageNet (up to $+6$ p.p.) and fine-grained downstream tasks (up to $+7.3$ p.p.), and consistently improves robustness on well-recognized benchmarks (up to $+19$ p.p.). -\schemename's compositional controls additionally provide a framework for analyzing model behavior and quantify biases, including background robustness, foreground focus, center bias, and size bias. -This dual role of \schemename as both a training mechanism and an evaluation tool highlights the value of explicit compositional factorization in understanding and improving image classifiers. -In future work, we aim to extend controlled composition beyond classification to multi-object and dense prediction settings, including detection, segmentation, and video recognition. -% By coupling performance gains with interpretable, controllable evaluations, \schemename offers a practical data-centric tool for advancing robust and reliable computer vision systems. -More generally, we believe that designing augmentations around explicitly controllable and interpretable generative setups is a promising direction for building robust and reliable vision systems. \ No newline at end of file +We introduce \schemename, a novel data augmentation scheme that facilitates improved Transformer training for image classification. +By explicitly separating and recombining foreground objects and backgrounds, \schemename enables controlled data augmentation, leading to significant performance gains on ImageNet and downstream fine-grained classification tasks. +Furthermore, \schemename provides a powerful framework for analyzing model behavior and quantifying biases, including background robustness, foreground focus, center bias, and size bias. +Our experiments demonstrate that training on \name, the instantiation of \schemename on ImageNet, not only boosts accuracy but also significantly reduces these biases, resulting in more robust and generalizable models. +In the future, we see \schemename be also applied to other datasets and tasks, like video recognition or segmentation. +\schemename's ability to both improve performance and provide insights into model behavior makes it a valuable tool for advancing CV research and developing more reliable AI systems. \ No newline at end of file diff --git a/sec/experiments.tex b/sec/experiments.tex index 3908008..faf274e 100644 --- a/sec/experiments.tex +++ b/sec/experiments.tex @@ -1,525 +1,417 @@ % !TeX root = ../main.tex - -\begin{figure}[t] - \begin{minipage}[t]{.62\textwidth} - \captionof{table}{ImageNet results when training ViTs with different data augmentation pipelines. - \schemename consistently improves performance in low- and mid-augmentation regimes and remains complementary to strong augmentation pipelines, with larger gains for larger models. - } - \label{tab:imagenet-pipelines} - \centering - \resizebox{\textwidth}{!}{ - \begin{tabular}{lccccc} - \toprule - \multirow{2.5}{*}{Augmentation} & \multirow{2.5}{*}{MixUp} & \multirow{2.5}{*}{CutMix} & \multicolumn{3}{c}{Accuracy [\%] using} \\ - \cmidrule(l){4-6} - & & & ViT-S & ViT-B & ViT-L \\ - \midrule - Basic & \xmark & \xmark & $71.9 \pm 0.1$ & $69.5 \pm 0.2$ & $68.3 \pm 0.4$ \\ - Basic + \schemename & \xmark & \xmark & $75.7 \pm 0.2$ & $75.5 \pm 0.6$ & $73.1 \pm 1.7$ \\ - & & & \grntxt{$+3.8$} & \grntxt{$+6.0$} & \grntxt{$+4.8$} \\ - \midrule - RandAugment & \xmark & \xmark & $76.3 \pm 0.5$ & $75.5 \pm 0.2$ & $74.7 \pm 0.4$ \\ - RandAugment + \schemename & \xmark & \xmark & $78.0 \pm 0.1$ & $77.8 \pm 0.1$ & $78.0 \pm 0.6$ \\ - & & & \grntxt{$+1.7$} & \grntxt{$+2.3$} & \grntxt{$+3.3$} \\ - \midrule - Basic & \cmark & \cmark & $79.8 \pm 0.3$ & $78.6 \pm 0.4$ & $78.1 \pm 1.6$ \\ - Basic + \schemename & \cmark & \cmark & $79.8 \pm 0.3$ & $81.6 \pm 0.5$ & $81.0 \pm 0.4$ \\ - & & & \gtxt{$\pm 0.0$} & \grntxt{$+3.0$} & \grntxt{$+2.9$} \\ - \midrule - 3-Augment & \xmark & \cmark & $79.1 \pm 0.1$ & $77.6 \pm 0.2$ & $75.3 \pm 0.4$ \\ - 3-Augment + \schemename & \xmark & \cmark & $81.4 \pm 0.1$ & $81.1 \pm 0.4$ & $79.8 \pm 0.1$ \\ - & & & \grntxt{$+2.3$} & \grntxt{$+3.5$} & \grntxt{$+4.5$} \\ - \midrule - RandAugment & \cmark & \cmark & $80.1 \pm 0.1$ & $81.9 \pm 0.3$ & $79.3 \pm 2.3$ \\ - RandAugment + \schemename & \cmark & \cmark & $80.0 \pm 0.3$ & $81.9 \pm 0.2$ & $82.4 \pm 0.1$ \\ - & & & \gtxt{$-0.1$} & \gtxt{$\pm 0.0$} & \grntxt{$+3.1$} \\ - \bottomrule - \end{tabular} - } - \end{minipage} - \hfill - \begin{minipage}[t]{.37\textwidth} - \captionof{table}{ImageNet results of models trained on ImageNet with and without \schemename. \schemename improves the performance of most models, with a larger gain for larger models.} - \label{tab:imagenet-results} - \resizebox{\textwidth}{!}{\begin{tabular}{lccc} - \toprule - \multirow{2.5}{*}{Model} & \multicolumn{2}{c}{\makecell{Accuracy [\%]}} & \multirow{2.5}{*}{Delta} \\ - \cmidrule(lr){2-3} - & w/o \schemename & w/ \schemename & \\ - \midrule - ViT-S & $79.1\pm0.1$ & $81.4\pm0.1$ & \grntxt{$+2.3$} \\ - ViT-B & $77.6\pm0.2$ & $81.1\pm0.4$ & \grntxt{$+3.5$} \\ - ViT-L & $75.3\pm0.4$ & $79.8\pm0.1$ & \grntxt{$+4.5$} \\ - \midrule - DeiT-S & $80.1 \pm 0.1$ & $80.0\pm0.3$ & \gtxt{$-0.1$} \\ - DeiT-B & $81.9 \pm 0.3$ & $81.9\pm0.2$ & \gtxt{$\pm0.0$} \\ - DeiT-L & $79.3\pm2.3$ & $82.4\pm0.1$ & \grntxt{$+3.1$} \\ - \midrule - Swin-Ti & $77.9\pm0.2$ & $79.7\pm0.1$ & \grntxt{$+1.8$} \\ - Swin-S & $79.4\pm0.1$ & $80.6\pm0.1$ & \grntxt{$+1.2$} \\ - \midrule - ResNet-50 & $78.3\pm0.1$ & $78.8\pm0.1$ & \grntxt{$+0.5$} \\ - ResNet-101 & $79.4\pm0.1$ & $80.4\pm0.1$ & \grntxt{$+1.0$} \\ - \bottomrule - \end{tabular}} - \end{minipage} -\end{figure} - -% \begin{table}[t] -% \caption{ImageNet results of models trained on ImageNet with and without \schemename. \schemename improves the performance of most models, with a larger gain for larger models.} -% \label{tab:imagenet-results} -% \centering -% \begin{subfigure}{.41\textwidth} -% \resizebox{\textwidth}{!}{\begin{tabular}{lccc} -% \toprule -% \multirow{2.5}{*}{Model} & \multicolumn{2}{c}{\makecell{ImageNet Accuracy [\%]}} & \multirow{2.5}{*}{Delta} \\ -% \cmidrule(lr){2-3} -% & w/o \schemename & w/ \schemename & \\ -% \midrule -% ViT-S & $79.1\pm0.1$ & $81.4\pm0.1$ & \grntxt{$+2.3$} \\ -% ViT-B & $77.6\pm0.2$ & $81.1\pm0.4$ & \grntxt{$+3.5$} \\ -% ViT-L & $75.3\pm0.4$ & $79.8\pm0.1$ & \grntxt{$+4.5$} \\ -% \midrule -% Swin-Ti & $77.9\pm0.2$ & $79.7\pm0.1$ & \grntxt{$+1.8$} \\ -% Swin-S & $79.4\pm0.1$ & $80.6\pm0.1$ & \grntxt{$+1.2$} \\ -% \bottomrule -% \end{tabular}} -% \end{subfigure} -% \hspace{5pt} -% \begin{subfigure}{.448\textwidth} -% \resizebox{\textwidth}{!}{\begin{tabular}{lccc} -% \toprule -% \multirow{2.5}{*}{Model} & \multicolumn{2}{c}{\makecell{ImageNet Accuracy [\%]}} & \multirow{2.5}{*}{Delta} \\ -% \cmidrule(lr){2-3} -% & w/o \schemename & w/ \schemename & \\ -% \midrule -% DeiT-S & $80.1 \pm 0.1$ & $80.0\pm0.3$ & \gtxt{$-0.1$} \\ -% DeiT-B & $81.9 \pm 0.3$ & $81.9\pm0.2$ & \gtxt{$\pm0.0$} \\ -% DeiT-L & $79.3\pm2.3$ & $82.4\pm0.1$ & \grntxt{$+3.1$} \\ -% \midrule -% ResNet-50 & $78.3\pm0.1$ & $78.8\pm0.1$ & \grntxt{$+0.5$} \\ -% ResNet-101 & $79.4\pm0.1$ & $80.4\pm0.1$ & \grntxt{$+1.0$} \\ -% \bottomrule -% \end{tabular}} -% \end{subfigure} -% \end{table} - \section{Experiments} \label{sec:experiments} -We conduct a comprehensive suit of experiments to validate the effectiveness of our approach, -comparing ImageNet training with and without \schemename for 10 different models and 5 data augmentation pipelines. -Furthermore, we assess the impact of using \schemename for pretraining on multiple fine-grained downstream datasets. -Finally, we exploit \schemename's control over the image distribution to quantify model behaviors and biases. -We always report the mean and standard deviation of three independent training runs. +% \begin{itemize} +% \item [1.] Training on RecombiNet +% \item ImageNet results (large) +% \item Ablation (TinyImageNet): Foreground position +% \item Ablation (TinyImageNet): Which background (or part of other ablation table?) +% \item Ablation (TinyImageNet+ImageNet For edge blur): Design decisions: Which infill model, pruning threshold, p$\to$t /t$\to$p, foreground rotation range (?), edge blur, original image probability/schedule, Foreground size +% \item With other Data Augmentations +% \item [2.] More evalution metrics +% \item Background accuracy (how to frame/sell? Background bias?) / Background robustness (= foreground with all background)? +% \item Foreground focus +% \item Position bias +% \item Size bias +% \end{itemize} + +We conduct a comprehensive suit of experiments to validate the effectiveness of our approach. +We compare training on \name, the ImageNet instantiation of \schemename, to training on ImageNet for 7 different models. +Furthermore, we assess the impact of using \name for pretraining on multiple fine-grained downstream datasets. +Additionally, we use \schemename's control over the image distribution to quantify some model behaviors and biases. + +\subsection{Design Choices of \schemename} +\label{sec:ablation} + +We start by ablating the design choices of \schemename. +For this, we revert to TinyImageNet \cite{Le2015}, a subset of ImageNet containing 200 categories with 500 images each, and Tiny\name, a version of \schemename derived from TinyImageNet. +\Cref{tab:ablation} presents the results of these ablations. + +\begin{table*}[t] + \centering + \resizebox{\textwidth}{!}{ + \begin{tabular}{lccccccccccccc} + \toprule + \multirow{2}{*}{Dataset} & Detect. & Infill & FG. & Augmentation & BG. & BG. & edge & original & \multicolumn{2}{c}{TinyImageNet Accuracy} \\ + & prompt & Model & size & Order & strategy & pruning & smoothing & image mixing & ViT-Ti [\%] & ViT-S [\%] \\ + \cmidrule(r){1-1} \cmidrule(lr){2-9} \cmidrule(l){10-11} + TinyImageNet & & & & & & & & & $66.1\pm0.5$ & $68.3\pm0.7$ \\ + Tiny\name & specific & LaMa \cite{Suvorov2021} & mean & crop$\to$paste$\to$color & same & - & - & \gtxt{-} & $64.6\pm0.5$ & $70.0\pm0.6$ \\ + \gtxt{Tiny\name} & \gtxt{specific} & \gtxt{LaMa \cite{Suvorov2021}} & range & \gtxt{crop$\to$paste$\to$color} & \gtxt{same} & \gtxt{-} & \gtxt{-} & \gtxt{-} & $65.5\pm0.4$ & $71.2\pm0.5$ \\ + \gtxt{Tiny\name} & general & \gtxt{LaMa \cite{Suvorov2021}} & \gtxt{range} & \gtxt{crop$\to$paste$\to$color} & \gtxt{same} & \gtxt{-} & \gtxt{-} & \gtxt{-} & $66.4\pm0.6$ & $72.9\pm0.6$ \\ + \gtxt{Tiny\name} & \gtxt{general} & Att. Eraser \cite{Sun2024} & \gtxt{range} & \gtxt{crop$\to$paste$\to$color} & \gtxt{same} & \gtxt{-} & \gtxt{-} & \gtxt{-} & $67.5\pm1.2$ & $72.4\pm0.5$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & paste$\to$crop$\to$color & \gtxt{same} & \gtxt{-} & \gtxt{-} & \gtxt{-} & $67.1\pm1.2$ & $72.9\pm0.5$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & 1.0 & \gtxt{-} & \gtxt{-} & $67.0\pm1.2$ & $73.0\pm0.3$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & 0.8 & \gtxt{-} & \gtxt{-} & $67.2\pm1.2$ & $72.9\pm0.8$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & 0.6 & \gtxt{-} & \gtxt{-} & $67.5\pm1.0$ & $72.8\pm0.7$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & $\sigma_\text{max} = 2.0$ & \gtxt{-} & $67.2\pm0.4$ & $72.9\pm0.5$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & $\sigma_\text{max} = 4.0$ & \gtxt{-} & $65.9\pm0.5$ & $72.4\pm0.6$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & \gtxt{-} & $p=0.2$ & $69.8\pm0.5$ & $75.0\pm0.3$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & \gtxt{-} & $p=0.33$ & $69.5\pm0.4$ & $75.2\pm1.0$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & \gtxt{-} & $p=0.5$ & $70.3\pm1.0$ & $74.2\pm0.2$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & \gtxt{-} & linear & $70.1\pm0.7$ & $74.9\pm0.8$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & \gtxt{-} & reverse lin. & $67.6\pm0.2$ & $73.2\pm0.3$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & \gtxt{-} & cos & $71.3\pm1.0$ & $75.7\pm0.8$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & $\sigma_\text{max} = 4.0$ & \gtxt{cos} & $70.0\pm0.8$ & $75.5\pm0.7$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & orig. & \gtxt{0.8} & \gtxt{$\sigma_\text{max} = 4.0$} & \gtxt{cos} & $67.2\pm0.9$ & $69.9\pm1.0$ \\ + \gtxt{Tiny\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & all & \gtxt{0.8} & \gtxt{$\sigma_\text{max} = 4.0$} & \gtxt{cos} & $70.1\pm0.7$ & $77.5\pm0.6$ \\ + \midrule + \name & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & \gtxt{-} & \gtxt{cos} & - & $80.5\pm0.1$ \\ + \gtxt{\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & \gtxt{same} & \gtxt{0.8} & $\sigma_\text{max} = 4.0$ & \gtxt{cos} & - & $80.7\pm0.1$ \\ + \gtxt{\name} & \gtxt{general} & \gtxt{Att. Eraser \cite{Sun2024}} & \gtxt{range} & \gtxt{paste$\to$crop$\to$color} & all & \gtxt{0.8} & \gtxt{$\sigma_\text{max} = 4.0$} & \gtxt{cos} & - & $81.3\pm0.1$ \\ + \bottomrule + \end{tabular}} + \caption{Ablation of design decisions of Tiny\name on TinyImageNet and \name on ImageNet.} + \label{tab:ablation} +\end{table*} + +\textbf{Prompt.} +% We present the ablation of our main design decisions in \Cref{tab:ablation}. +First, we evaluate the type of prompt used to detect the foreground object. +Here, the \emph{general} prompt, which contains the class and the more general object category, outperforms only having the class name (\emph{specific}). + +\textbf{Inpainting.} Attentive Eraser \cite{Sun2024} produces superior results compared to LaMa \cite{Suvorov2021} (see the supplementary for examples). +% When comparing the infill models, the GAN-based LaMa \cite{Suvorov2021} gets outperformed by the Attentive Eraser \cite{Sun2024}. + +\textbf{Foreground size} +% We observe that LaMa's often infills unnatural textures compared to Attentive Eraser. +% The size of foreground objects during training has a significant impact on the performance. +% Here, using the greater variability of the \emph{range} strategy increases the performance by $\approx 1\%$ compared to the \emph{mean} strategy. +significantly impacts performance. +Employing a \emph{range} of sizes during recombination, rather than a fixed \emph{mean} size, boosts accuracy by approximately 1 p.p. +This suggests that the added variability is beneficial. + +\textbf{Order of data augmentation.} +% (1) Applying the image crop related augmentations \emph{before} pasting the foreground object and the color-based ones \emph{after} pasting or (2) applying all data augmentations after pasting the foreground object. +% While results are ambiguous, we choose the second strategy, as it improves the performance of ViT-S, although not the one of ViT-Ti. +Applying all augmentations after foreground-background recombination (\emph{paste$\to$crop$\to$color}) slightly improves ViT-S's performance compared to applying crop-related augmentations before pasting (\emph{crop$\to$paste$\to$color}). +For ViT-Ti, the results are ambiguous. + +\textbf{Background pruning.} +When it comes to the choice of backgrounds to use, we test two pruning thresholds ($t_\text{prune}$) to exclude backgrounds with excessive inpainting. +% and only use backgrounds with an relative size of the infilled region of at most $t_\text{prune}$ (exclusive). +A threshold of $t_\text{prune}=1.0$ means that we use all backgrounds that are not fully infilled. +% We find that the background pruning does not significantly impact the models' performance. +% We choose $t_\text{prune}=0.8$ for the following experiments to exclude backgrounds that are mostly artificial. +Varying $t_\text{prune}$ has minimal impact. +Therefore, we choose $t_\text{prune} = 0.8$ to exclude predominantly artificial backgrounds. +Similarly, applying edge smoothing to foreground masks with Gaussian blurring actually hurts performance on Tiny\name, but slightly improves it on \name. + +% One of the most important design decisions is the mixing of the original dataset with \name. +\textbf{Mixing} \name with the original ImageNet data proves crucial. +While constant and linear mixing schedules improve performance over no mixing by $2-3$ p.p. compared to only using Tiny\name, the cosine annealing schedule yields the best results, boosting accuracy by another $0.5-1$ p.p. + +\textbf{Background strategy.} +Another point is the allowed choice of background image for each foreground object. +% We evaluate three different strategies. +% (1) Picking the background from which that specific foreground was originally extracted. +% The major difference to ImageNet when using this setup is the variability in size and position of the foreground object. +% (2) Picking a background that originally had a foreground object of the same class in it. +% Here, we have backgrounds where objects of this type can typically appear while also creating a wider variety of samples due to pairing each foreground object with different backgrounds each time. +% (3) Picking any background. +% This choice has the largest variety of backgrounds, but the backgrounds are not semantically related to the foreground object anymore. +% We find in \Cref{fig:bg-strategy} that choosing only a foreground's original background is the worst choice. +We compare using the original background, a background from the same class, and any background. +These strategies go from low diversity and high shared information content between the foreground and background to high diversity and low shared information content. +For \emph{ViT-Ti}, the latter two strategies perform comparably, while \emph{ViT-S} benefits from the added diversity of using any background. +The same is true when training on the full (ImageNet) version of \name. + +\begin{figure} + \centering + \includegraphics[width=.7\columnwidth]{img/bates.pdf} + \caption{Plot of the probability distribution function (PDF) of the extended Bates distribution for different parameters $\eta$. Higher values of $\eta$ concentrate the distribution around the center.} + \label{fig:bates-pdf} +\end{figure} + +\begin{table} + \centering + \resizebox{\columnwidth}{!}{ + \begin{tabular}{ccccccc} + \toprule + \multirow{2.5}{*}{\makecell{Training Set/ \\ Bates Parameter}} & \multirow{2.5}{*}{TIN} & \multicolumn{5}{c}{Tiny\name} \\ + \cmidrule(l){3-7} + & & $\eta=-3$ & $-2$ & $1/-1$ & $2$ & $3$ \\ + \midrule + TinyImageNet & 68.9 & 60.5 & 60.2 & 60.8 & 62.6 & 63.1 \\ + $\eta=-3$ & 71.3 & 79.3 & 79.5 & 79.1 & 79.3 & 79.1 \\ + $\eta=-2$ & 71.5 & 80.0 & 78.7 & 79.3 & 79.1 & 78.8 \\ + $\eta=1/-1$ & 72.3 & 79.5 & 78.9 & 80.2 & 79.7 & 80.4 \\ + $\eta=2$ & 71.3 & 78.2 & 77.8 & 79.1 & 79.6 & 79.9 \\ + $\eta=3$ & 71.4 & 77.2 & 76.9 & 78.6 & 79.6 & 79.7 \\ + \bottomrule + \end{tabular}} + \caption{Accuracy of ViT-S trained on TinyImageNet (TIN) and Tiny\name with different foreground position distributions by varying the parameter of a Bates distribution $\eta$. + The best performance is achieved using the uniform distribution ($\eta=1$).} +\end{table} + +\textbf{Foreground position.} +Finally, we analyze the foreground object's positioning in the image. +We utilize an extended Bates distribution to sample the position of the foreground object. +The Bates distribution~\cite{Bates1955} with parameter $\eta \geq 1$ is the mean of $\eta$ independent uniformly distributed random variables \cite{Jonhson1995}. +Therefore, the larger $\eta$, the more concentrated the distribution is around the center. +We extend this concept to $\eta \leq -1$ by defining ${X \sim \text{Bates}(\eta) :\Leftrightarrow s(X) \sim \text{Bates}(-\eta)}$ for $\eta \leq 1$ with $s$ being the sawtooth function on $[0, 1]$: +\begin{align} + s(x) = \begin{cases} + x + 0.5 & \text{if } 0 < x < 0.5 \\ + x - 0.5 & \text{if } 0.5 \leq x \leq 1 + \end{cases} +\end{align} +Note that $s \circ s = \id$ on $[0, 1]$. +This way, distributions with $\eta \leq -1$ are more concentrated around the borders. +$\eta = 1$ and $\eta = -1$ both correspond to the uniform distribution. +The PDF of this extended Bates distribution is visualized in \Cref{fig:bates-pdf}. + +When sampling more towards the center of the image, the difficulty of the task is reduced, which then reduces the performance on TinyImageNet. +This is reflected in the performance when evaluating on Tiny\name with $\eta=2$ and $\eta=3$ compared to $\eta=-1/1$. +We observe a similar reduction for $\eta < -1$. +This experiment is conducted using the LaMa infill model. + +\begin{table} + \centering + \small + \begin{tabular}{lccc} + \toprule + Dataset & Classes & \makecell{Training \\ Images} & \makecell{Validation \\ Images} \\ + \midrule + TinyImageNet & 200 & 100,000 & 10,000 \\ + Tiny\name & 200 & 99,404 & 9,915 \\ + ImageNet & 1,000 & 1,281,167 & 50,000 \\ + \name & 1,000 & 1,274,557 & 49,751 \\ + \bottomrule + \end{tabular} + \caption{Dataset statistics for TinyImageNet, Tiny\name, ImageNet, and \name. For \name and Tiny\name we report the number of foreground/background pairs.} + \label{tab:dataset-stats} +\end{table} +After fixing the optimal design parameters in \Cref{tab:ablation} (last row), we construct the full \name dataset using the entire ImageNet dataset. +\Cref{tab:dataset-stats} compares the dataset statistics of ImageNet and \name. +% The slightly lower number of images in \name is due to \emph{Grounded SAM} returning no or invalid detections for some images. +The slightly reduced image count in \name is due to instances where Grounded SAM failed to produce valid object detections. \subsection{Image Classification Results} -\textbf{ImageNet training.} -\Cref{tab:imagenet-pipelines} analyzes the effect of \schemename under different data augmentation pipelines: -A \emph{basic} pipeline with RandomResizedCrop, Flip and ColorJitter, the \emph{3-Augment} pipeline from \cite{Touvron2022,Nauen2025} that also includes Grayscale, Solarization and GaussianBlur, as well as the widely used \emph{RandAugment}~\cite{Cubuk2020} based pipeline from DeiT~\cite{Touvron2021b}. -Additionally, we include MixUp~\cite{Zhang2018a} and CutMix~\cite{Yun2019} augmentations. -% We also include Mixup and CutMix. -We find that the effectiveness of \schemename depends on the interplay between model capacity and baseline augmentation strength. -When the baseline augmentation is weak or moderate, \schemename consistently improves ImageNet accuracy, with gains increasing for larger ViT models (up to $+6.0$ p.p.\ for ViT-B). -As the augmentation pipeline becomes stronger (e.g., RandAugment with MixUp and CutMix), ImageNet improvements diminish for smaller models, indicating that the baseline augmentation already saturates their capacity. -Importantly, even in cases where ImageNet accuracy does not improve, we consistently observe gains during downstream fine-tuning (see \Cref{tab:downstream-results}), suggesting that \schemename enhances representation quality beyond what is reflected by ImageNet accuracy. - -\Cref{tab:imagenet-results} additionally compares performance of different model architectures. -ViT~\cite{Dosovitskiy2021}, Swin~\cite{Liu2021} and ResNet~\cite{He2016} (representing CNNs) are trained using the ``3-augment'' strategy, while DeiT~\cite{Touvron2021b} is trained using the ``RandAugment'' strategy. -Notably, \schemename improves performance across all tested architectures, including the ResNet models, % (up to $1$ p.p.), -demonstrating benefits beyond Transformers. -% We find that \schemename's improvements counteract the drop in performance for increasing model sizes. -% Without \schemename this drop is $3.8$ p.p. (ViT-S to L), while with \schemename it is reduced to $1.6$ p.p. -% For DeiT there is a drop of $0.8$ p.p. from small to large while when using \schemename there is a \emph{gain} of $2.4$ p.p. - -\begin{table}[t] - \caption{Downstream accuracy in percent when finetuning on other datasets. Models are pretrained on ImageNet with and without \schemename. Pretraining using \schemename increases transformer downstream accuracy. - % on all datasets. - } - \label{tab:downstream-results} - \begin{subfigure}{.48\columnwidth} - \resizebox{\textwidth}{!}{\begin{tabular}{lcccccc} +\begin{table} + \centering + \begin{tabular}{lccc} \toprule - Model & \schemename & Aircraft & Cars & Flowers & Food & Pets \\ + \multirow{2.5}{*}{Model} & \multicolumn{2}{c}{\makecell{ImageNet Accuracy \\ when trained on}} & \multirow{2.5}{*}{Delta} \\ + \cmidrule(lr){2-3} + & ImageNet & \name & \\ \midrule - ViT-S & \xmark & $72.4\pm1.0$ & $89.8\pm0.3$ & $94.5\pm0.2$ & $89.1\pm0.1$ & $93.8\pm0.2$ \\ - ViT-S & \cmark & $78.6\pm0.5$ & $92.2\pm0.2$ & $95.5\pm0.2$ & $89.6\pm0.1$ & $94.5\pm0.2$ \\ - & & \grntxt{$+6.2$} & \grntxt{$+2.4$} & \grntxt{$+1.0$} & \grntxt{$+0.5$} & \grntxt{$+0.7$} \\ + ViT-S & $79.1\pm0.1$ & $81.4\pm0.1$ & \grntxt{+2.3} \\ + ViT-B & $77.6\pm0.2$ & $81.1\pm0.4$ & \grntxt{+3.5} \\ + ViT-L & $75.3\pm0.4$ & $79.8\pm0.1$ & \grntxt{+4.5} \\ \midrule - ViT-B & \xmark & $71.7\pm0.5$ & $90.0\pm0.2$ & $94.8\pm0.4$ & $89.8\pm0.2$ & $94.1\pm0.4$ \\ - ViT-B & \cmark & $79.0\pm2.2$ & $93.3\pm0.1$ & $ 96.5\pm0.1$ & $90.9\pm0.1$ & $95.1\pm0.4$ \\ - & & \grntxt{$+7.3$} & \grntxt{$+3.3$} & \grntxt{$+1.7$} & \grntxt{$+1.1$} & \grntxt{$+1.0$} \\ + Swin-Ti & $77.9\pm0.2$ & $79.7\pm0.1$ & \grntxt{+1.8} \\ + Swin-S & $79.4\pm0.1$ & $80.6\pm0.1$ & \grntxt{+1.2} \\ \midrule - ViT-L & \xmark & $72.1\pm1.0$ & $88.8\pm0.3$ & $94.4\pm0.3$ & $90.1\pm0.2$ & $94.2\pm0.4$ \\ - ViT-L & \cmark & $77.6\pm1.2$ & $89.1\pm0.2$ & $96.6\pm0.1$ & $91.3\pm0.1$ & $95.1\pm0.1$ \\ - & & \grntxt{$+5.5$} & \grntxt{$+0.3$} & \grntxt{$+2.2$} & \grntxt{$+1.2$} & \grntxt{$+0.9$} \\ - \midrule - Swin-Ti & \xmark & $77.0\pm0.1$ & $91.3\pm0.6$ & $95.9\pm0.1$ & $90.0\pm0.2$ & $94.2\pm0.1$ \\ - Swin-Ti & \cmark & $81.1\pm0.8$ & $92.8\pm0.4$ & $96.2\pm0.1$ & $90.4\pm0.3$ & $94.8\pm0.5$ \\ - & & \grntxt{$+4.1$} & \grntxt{$+2.5$} & \grntxt{$+0.3$} & \grntxt{$+0.4$} & \grntxt{$+0.6$} \\ - \midrule - Swin-S & \xmark & $75.7\pm1.4$ & $91.0\pm0.3$ & $95.9\pm0.5$ & $91.1\pm0.2$ & $94.4\pm0.1$ \\ - Swin-S & \cmark & $81.4\pm0.2$ & $93.1\pm0.2$ & $96.3\pm0.3$ & $91.2\pm0.2$ & $94.9\pm0.3$ \\ - & & \grntxt{$+5.7$} & \grntxt{$+2.1$} & \grntxt{$+1.4$} & \gtxt{$+0.1$} & \grntxt{$+0.5$} \\ + ResNet-50 & $78.3\pm0.1$ & $78.8\pm0.1$ & \grntxt{+0.5} \\ + ResNet-101 & $79.4\pm0.1$ & $80.4\pm0.1$ & \grntxt{+1.0} \\ \bottomrule - \end{tabular}} - \end{subfigure} - \hfill - \begin{subfigure}{.505\columnwidth} - \resizebox{\textwidth}{!}{\begin{tabular}{lcccccc} - \toprule - Model & \schemename & Aircraft & Cars & Flowers & Food & Pets \\ - \midrule - DeiT-S & \xmark & $75.3\pm0.4$ & $91.1\pm0.2$ & $94.8\pm0.4$ & $89.2\pm0.2$ & $92.4\pm0.2$ \\ - DeiT-S & \cmark & $76.8\pm0.8$ & $91.9\pm0.2$ & $95.2\pm0.3$ & $89.1\pm0.2$ & $92.3\pm0.4$ \\ - & & \grntxt{$+1.5$} & \grntxt{$+0.8$} & \grntxt{$+0.4$} & \gtxt{$-0.1$} & \gtxt{$-0.1$} \\ - \midrule - DeiT-B & \xmark & $77.0\pm1.2$ & $92.9\pm0.2$ & $96.1\pm0.2$ & $91.2\pm0.1$ & $93.3\pm0.4$ \\ - DeiT-B & \cmark & $79.3\pm0.3$ & $93.1\pm0.1$ & $96.4\pm0.2$ & $91.3\pm0.1$ & $93.3\pm0.1$ \\ - & & \grntxt{$+2.3$} & \gtxt{$+0.2$} & \grntxt{$+0.3$} & \gtxt{$+0.1$} & \gtxt{$\pm0.0$} \\ - \midrule - DeiT-L & \xmark & $72.8\pm5.5$ & $92.8\pm1.0$ & $95.8\pm1.5$ & $90.5\pm2.6$ & $92.4\pm2.0$ \\ - DeiT-L & \cmark & $78.8\pm0.8$ & $93.8\pm0.2$ & $97.0\pm0.2$ & $92.0\pm0.2$ & $93.5\pm0.2$ \\ - & & \grntxt{$+6.0$} & \grntxt{$+1.0$} & \grntxt{$+1.2$} & \grntxt{$+1.5$} & \grntxt{$+1.1$} \\ - \midrule - ResNet-50 & \xmark & $78.2\pm0.5$ & $89.8\pm0.2$ & $91.7\pm0.4$ & $84.4\pm0.2$ & $93.7\pm0.3$ \\ - ResNet-50 & \cmark & $80.3\pm0.4$ & $90.4\pm0.2$ & $91.7\pm0.2$ & $84.5\pm0.2$ & $93.7\pm0.3$ \\ - & & \grntxt{$+2.1$} & \grntxt{$+0.6$} & \gtxt{$\pm0.0$} & \gtxt{$+0.1$} & \gtxt{$\pm0.0$} \\ - \midrule - ResNet-101 & \xmark & $78.4\pm0.6$ & $90.3\pm0.1$ & $91.2\pm0.5$ & $86.0\pm0.2$ & $94.3\pm0.2$ \\ - ResNet-101 & \cmark & $81.4\pm0.5$ & $91.3\pm0.1$ & $92.9\pm0.2$ & $86.3\pm0.1$ & $94.0\pm0.3$ \\ - & & \grntxt{$+3.0$} & \grntxt{$+1.3$} & \grntxt{$+1.7$} & \grntxt{$+0.3$} & \textcolor{red}{$-0.3$} \\ - \bottomrule - \end{tabular}} - \end{subfigure} + \end{tabular} + \caption{ImageNet results of models trained on \name and on ImageNet directly. \name improves the performance of all models in our test.} + \label{tab:imagenet-results} \end{table} -\textbf{Downstream tasks.} To assess the transferability of \schemename-trained models, we finetune models pretrained on ImageNet with and without \schemename on five fine-grained datasets: +\Cref{tab:imagenet-results} compares the ImageNet performance of models trained on \name and ones trained directly on ImageNet. +We adopt the training setup of \cite{Nauen2023} and \cite{Touvron2022} (details in the supplementary material) for training ViT \cite{Dosovitskiy2021}, Swin \cite{Liu2021} and ResNet \cite{He2016} models. +Notably, \name improves performance across all tested architectures, including the ResNet models (up to $1$ p.p.), demonstrating benefits beyond Transformers. +For Transformer models, we observe improvements from $1.2$ p.p. to $4.5$ p.p. +This improvement is more substantial for the larger models, with ViT-L gaining $4.5$ p.p. in accuracy. +\name's improvements mostly counteract the drop in performance due to overfitting for large models. +When training on ImageNet, this drop is $3.8$ p.p. from ViT-S to ViT-L, while for \name it is reduced to $1.6$ p.p. + + +\begin{table} + \centering + \resizebox{\columnwidth}{!}{\begin{tabular}{lccccc} + \toprule + Model & Aircraft & Cars & Flowers & Food & Pets \\ + \midrule + ViT-S @ ImageNet & $72.4\pm1.0$ & $89.8\pm0.3$ & $94.5\pm0.2$ & $89.1\pm0.1$ & $93.8\pm0.2$ \\ + ViT-S @ \name & $78.6\pm0.5$ & $92.2\pm0.2$ & $95.5\pm0.2$ & $89.6\pm0.1$ & $94.5\pm0.2$ \\ + & \grntxt{+6.2} & \grntxt{+2.4} & \grntxt{+1.0} & \grntxt{+0.5} & \grntxt{+0.7} \\ + \cmidrule(r){1-1} + ViT-B @ ImageNet & $71.7\pm0.5$ & $90.0\pm0.2$ & $94.8\pm0.4$ & $89.8\pm0.2$ & $94.1\pm0.4$ \\ + ViT-B @ \name & $79.0\pm2.2$ & $93.3\pm0.1$ & $ 96.5\pm0.1$ & $90.9\pm0.1$ & $95.1\pm0.4$ \\ + & \grntxt{+7.3} & \grntxt{+3.3} & \grntxt{+1.7} & \grntxt{+1.1} & \grntxt{+1.0} \\ + \cmidrule(r){1-1} + ViT-L @ ImageNet & $72.1\pm1.0$ & $88.8\pm0.3$ & $94.4\pm0.3$ & $90.1\pm0.2$ & $94.2\pm0.4$ \\ + ViT-L @ \name & $77.6\pm1.2$ & $89.1\pm0.2$ & $96.6\pm0.1$ & $91.3\pm0.1$ & $95.1\pm0.1$ \\ + & \grntxt{+5.5} & \grntxt{+0.3} & \grntxt{+2.2} & \grntxt{+1.2} & \grntxt{+0.9} \\ + \midrule + Swin-Ti @ ImageNet & $77.0\pm0.1$ & $91.3\pm0.6$ & $95.9\pm0.1$ & $90.0\pm0.2$ & $94.2\pm0.1$ \\ + Swin-Ti @ \name & $81.1\pm0.8$ & $92.8\pm0.4$ & $96.2\pm0.1$ & $90.4\pm0.3$ & $94.8\pm0.5$ \\ + & \grntxt{+4.1} & \grntxt{+2.5} & \grntxt{+0.3} & \grntxt{+0.4} & \grntxt{+0.6} \\ + \cmidrule(r){1-1} + Swin-S @ ImageNet & $75.7\pm1.4$ & $91.0\pm0.3$ & $95.9\pm0.5$ & $91.1\pm0.2$ & $94.4\pm0.1$ \\ + Swin-S @ \name & $81.4\pm0.2$ & $93.1\pm0.2$ & $96.3\pm0.3$ & $91.2\pm0.2$ & $94.9\pm0.3$ \\ + & \grntxt{+5.7} & \grntxt{+2.1} & \grntxt{+1.4} & \grntxt{+0.1} & \grntxt{+0.5} \\ + \midrule + ResNet-50 @ ImageNet & $78.2\pm0.5$ & $89.8\pm0.2$ & $91.7\pm0.4$ & $84.4\pm0.2$ & $93.7\pm0.3$ \\ + ResNet-50 @ \name & $80.3\pm0.4$ & $90.4\pm0.2$ & $91.7\pm0.2$ & $84.5\pm0.2$ & $93.7\pm0.3$ \\ + & \grntxt{+2.1} & \grntxt{+0.6} & \gtxt{$\pm$0} & \grntxt{+0.1} & \gtxt{$\pm$0} \\ + \cmidrule(r){1-1} + ResNet-101 @ ImageNet & $78.4\pm0.6$ & $90.3\pm0.1$ & $91.2\pm0.5$ & $86.0\pm0.2$ & $94.3\pm0.2$ \\ + ResNet-101 @ \name & $81.4\pm0.5$ & $91.3\pm0.1$ & $92.9\pm0.2$ & $86.3\pm0.1$ & $94.0\pm0.3$ \\ + & \grntxt{+3.0} & \grntxt{+1.3} & \grntxt{+1.7} & \grntxt{+0.3} & \textcolor{red}{-0.3} \\ + \bottomrule + \end{tabular}} + \caption{Downstream accuracy in percent when finetuning on other datasets. Models were pretrained on \name and ImageNet. Pretraining on \name increases Transformer downstream accuracy on all datasets.} +\end{table} + +To assess the transferability of \name-trained models, we finetune models pretrained on ImageNet and \name on five fine-grained datasets: FGVC-Aircraft \cite{Maji2013}, Stanford Cars~\cite{Dehghan2017}, Oxford Flowers \cite{Nilsback2008}, Food-101 \cite{Kaur2017}, and Oxford-IIIT Pets \cite{Parkhi2012}. -% While for ResNets, the performance of both training datasets is about the same, -In \Cref{tab:downstream-results} we see transformer accuracies improve on all these datasets by up to 7.3 p.p. -% and a reduction of error rate of up to $39.3\%$. -% Notably, training with \name increases the downstream performance of DeiT-S and DeiT-B, even though the ImageNet results were the same. -% This demonstrates that the improved representations from training on \name translate to superior performance beyond gains from better ImageNet performance. -Notably, training with \schemename boosts the downstream performance of DeiT-S and DeiT-B, despite similar ImageNet accuracy. -This shows, that the improved representations from training with \schemename translate to gains beyond better ImageNet scores. -% not only on ImageNet, but also on fine-grained image classification tasks. +While for ResNets, the performance of both training datasets is about the same, for every Transformer, we see the accuracy improve on all downstream dataset by up to 7.3 p.p. and a reduction of error rate of up to $39.3\%$. +In summary, these results demonstrate that the improved representation learning achieved by training on \name translates to superior performance not only on ImageNet, but also on a variety of fine-grained image classification tasks. -\begin{table}[t] - \caption{Evaluation of models trained on ImageNet with and without \schemename. \schemename generally increases models' robustness to different image distribution shifts. Note that ViT-S \emph{with} \schemename outperforms DeiT-S, the only model where \schemename does not increase robustness.} - \label{tab:robustness-datasets} - \begin{subfigure}{.485\textwidth} - \resizebox{\textwidth}{!}{ - \begin{tabular}{lccccccc} +\subsection{Further Model Evaluation} +% Additional to just using \name for training, its special properties and posibilities for adjustment of the data distribution make it a valuable tool for evaluating other model properties and biases. +Beyond its use for training, \name's unique properties and controlled data generation capabilities make it a powerful tool for analyzing model behavior and biases. + +\paragraph*{Background Robustness} +\begin{table} + \centering + \begin{tabular}{lccc} \toprule - Model & w/ \schemename & IN-Hard & IN-A & IN-C & IN-R & IN-V2 \\ + \multirow{2.5}{*}{Model} & \multicolumn{2}{c}{\makecell{Background Robustness \\ when trained on}} & \multirow{2.5}{*}{Delta} \\ + \cmidrule(lr){2-3} + & ImageNet & \name & \\ \midrule - ViT-S & \xmark & $18.1 \pm 0.6$ & $18.8 \pm 0.2$ & $44.7 \pm 0.8$ & $41.6 \pm 0.6$ & $67.3 \pm 0.4$ \\ - ViT-S & \cmark & $21.0 \pm 0.4$ & $26.5 \pm 0.4$ & $52.6 \pm 0.6$ & $49.8 \pm 0.3$ & $70.6 \pm 0.1$ \\ - & & \grntxt{$+2.9$} & \grntxt{$+7.7$} & \grntxt{$+7.9$} & \grntxt{$+8.1$} & \grntxt{$+3.3$} \\ + ViT-S & $0.73\pm0.01$ & $0.99\pm0.01$ & \grntxt{+0.26} \\ + ViT-B & $0.72\pm0.01$ & $1.00\pm0.01$ & \grntxt{+0.28} \\ + ViT-L & $0.70\pm0.01$ & $1.00\pm0.01$ & \grntxt{+0.30} \\ \midrule - ViT-B & \xmark & $17.0 \pm 0.4$ & $15.8 \pm 0.7$ & $40.4 \pm 0.8$ & $38.4 \pm 0.7$ & $65.1 \pm 0.6$ \\ - ViT-B & \cmark & $22.0 \pm 0.9$ & $31.9 \pm 1.5$ & $51.6 \pm 1.8$ & $48.7 \pm 1.7$ & $70.3 \pm 0.9$ \\ - & & \grntxt{$+5.0$} & \grntxt{$+16.0$} & \grntxt{$+11.2$} & \grntxt{$+10.3$} & \grntxt{$+5.2$} \\ + Swin-Ti & $0.72\pm0.01$ & $1.00\pm0.01$ & \grntxt{+0.28} \\ + Swin-S & $0.72\pm0.01$ & $1.00\pm0.01$ & \grntxt{+0.28} \\ \midrule - ViT-L & \xmark & $15.6 \pm 0.4$ & $11.3 \pm 0.9$ & $38.4 \pm 1.0$ & $36.8 \pm 0.8$ & $61.6 \pm 0.8$ \\ - ViT-L & \cmark & $20.6 \pm 0.1$ & $30.4 \pm 0.5$ & $48.2 \pm 0.7$ & $46.0 \pm 0.4$ & $68.7 \pm 0.3$ \\ - & & \grntxt{$+5.0$} & \grntxt{$+19.0$} & \grntxt{$+9.8$} & \grntxt{$+9.3$} & \grntxt{$+7.1$} \\ - \midrule - Swin-Ti & \xmark & $16.2 \pm 0.4$ & $15.0 \pm 0.3$ & $36.0 \pm 0.8$ & $36.6 \pm 0.2$ & $65.5 \pm 0.4$ \\ - Swin-Ti & \cmark & $18.3 \pm 0.3$ & $20.3 \pm 0.4$ & $41.4 \pm 0.8$ & $41.4 \pm 0.2$ & $68.2 \pm 0.4$ \\ - & & \grntxt{$+2.2$} & \grntxt{$+5.4$} & \grntxt{$+5.4$} & \grntxt{$+4.8$} & \grntxt{$+2.7$} \\ - \midrule - Swin-S & \xmark & $18.2 \pm 0.3$ & $19.4 \pm 0.3$ & $39.0 \pm 0.7$ & $39.1 \pm 0.2$ & $67.5 \pm 0.1$ \\ - Swin-S & \cmark & $20.5 \pm 0.1$ & $27.7 \pm 0.4$ & $45.6 \pm 0.8$ & $44.1 \pm 0.3$ & $69.6 \pm 0.1$ \\ - & & \grntxt{$+2.2$} & \grntxt{$+8.4$} & \grntxt{$+6.6$} & \grntxt{$+5.0$} & \grntxt{$+2.2$} \\ + ResNet-50 & $0.79\pm0.01$ & $0.99\pm0.01$ & \grntxt{+0.20} \\ + ResNet-101 & $0.79\pm0.01$ & $1.00\pm0.01$ & \grntxt{+0.21} \\ \bottomrule - \end{tabular} - } - \end{subfigure} - \hfill - \begin{subfigure}{.505\textwidth} - \resizebox{\textwidth}{!}{ - \begin{tabular}{lccccccc} - \toprule - Model & w/ \schemename & IN-Hard & IN-A & IN-C & IN-R & IN-V2 \\ - \midrule - DeiT-S & \xmark & $19.5 \pm 0.2$ & $18.4 \pm 0.3$ & $58.8 \pm 0.7$ & $43.0 \pm 0.1$ & $68.8 \pm 0.2$ \\ - DeiT-S & \cmark & $18.5 \pm 0.5$ & $17.3 \pm 1.0$ & $57.0 \pm 0.9$ & $43.8 \pm 0.2$ & $68.7 \pm 0.6$ \\ - & & \rdtxt{$-1.0$} & \rdtxt{$-1.1$} & \rdtxt{$-1.8$} & \grntxt{$+0.8$} & \gtxt{$-0.1$} \\ - \midrule - DeiT-B & \xmark & $22.6 \pm 0.2$ & $26.0 \pm 0.2$ & $62.1 \pm 1.0$ & $45.6 \pm 1.9$ & $70.6 \pm 0.9$ \\ - DeiT-B & \cmark & $22.6 \pm 0.2$ & $25.0 \pm 0.3$ & $62.8 \pm 0.6$ & $47.7 \pm 0.8$ & $70.8 \pm 0.5$ \\ - & & \gtxt{$\pm 0.0$} & \rdtxt{$-1.0$} & \grntxt{$+0.8$} & \grntxt{$+2.0$} & \gtxt{$+0.2$} \\ - \midrule - DeiT-L & \xmark & $21.2 \pm 2.0$ & $20.2 \pm 3.4$ & $59.3 \pm 4.3$ & $41.3 \pm 2.7$ & $66.9 \pm 2.8$ \\ - DeiT-L & \cmark & $23.4 \pm 0.3$ & $28.8 \pm 2.0$ & $63.4 \pm 0.7$ & $47.8 \pm 0.6$ & $71.6 \pm 0.5$ \\ - & & \grntxt{$+2.2$} & \grntxt{$+8.7$} & \grntxt{$+4.1$} & \grntxt{$+6.5$} & \grntxt{$+4.7$} \\ - \midrule - ResNet50 & \xmark & $16.1 \pm 0.2$ & $9.7 \pm 0.1$ & $38.0 \pm 1.0$ & $40.5 \pm 0.6$ & $66.8 \pm 0.4$ \\ - ResNet50 & \cmark & $17.2 \pm 0.1$ & $10.8 \pm 0.4$ & $41.0 \pm 0.7$ & $43.7 \pm 0.3$ & $67.5 \pm 0.1$ \\ - & & \grntxt{$+1.1$} & \grntxt{$+1.1$} & \grntxt{$+3.0$} & \grntxt{$+3.2$} & \grntxt{$+0.7$} \\ - \midrule - ResNet101 & \xmark & $18.2 \pm 0.4$ & $14.3 \pm 0.1$ & $41.7 \pm 0.7$ & $42.3 \pm 0.1$ & $67.7 \pm 0.5$ \\ - ResNet101 & \cmark & $19.9 \pm 0.2$ & $17.6 \pm 0.5$ & $46.3 \pm 0.6$ & $46.3 \pm 0.3$ & $69.5 \pm 0.3$ \\ - & & \grntxt{$+1.7$} & \grntxt{$+3.2$} & \grntxt{$+4.6$} & \grntxt{$+4.0$} & \grntxt{$+1.8$} \\ - \bottomrule - \end{tabular} - } - \end{subfigure} + \end{tabular} + \caption{Evaluation of the background robustness of models trained on \name and on ImageNet directly. Training on \name improves the background robustness of all model to $\approx1.00$, meaning the model is indifferent to the choice of background.} + \label{tab:background-robustness} \end{table} -\subsection{Bias and Robustness Evaluation} -Beyond its use for training, \schemename's unique properties and controlled data generation capabilities make it a powerful tool for analyzing behavior and biases of black-box models. -We exploit this in two complementary ways. -First, we ask whether \schemename-trained models are more robust on \emph{external} ImageNet robustness benchmarks that are not generated by our pipeline. -Second, we use \schemename's fine-grained control for targeted evaluation of specific dimensions of model bias, such as background reliance and center/size bias. -% Together, these experiments allow us to both \emph{probe} and \emph{improve} robustness along clearly defined axes. -% This combination of standard benchmarks and controlled probes allows us to both quantify robustness improvements and attribute them to changes in particular model behaviors. - -\textbf{Robustness on External Distribution Shifts.} -\Cref{tab:robustness-datasets} summarizes accuracy on five widely used ImageNet robustness benchmarks: ImageNet-Hard~\cite{Taesiri2023}, ImageNet-A~\cite{Hendrycks2021}, ImageNet-C~\cite{Hendrycks2019}, ImageNet-R~\cite{Hendrycks2021a}, and ImageNetV2~\cite{Recht2019}. -Across ViTs, Swin Transformers, and ResNets, incorporating \schemename during training generally improves robustness to all considered distribution shifts. -For ViTs, the gains are substantial: for example, ViT-B improves from $15.8\%$ to $31.9\%$ accuracy on ImageNet-A ($+16.0$ p.p.) and from $40.4\%$ to $51.6\%$ on ImageNet-C ($+11.2$ p.p.), with similar improvements for ViT-S and ViT-L. -Swin also benefits consistently, with increases of roughly $2$--$8$ p.p. on most benchmarks, and ResNet sees smaller but steady gains (e.g., up to $+4.6$ points on ImageNet-C). - -For DeiT, the picture is more nuanced: DeiT-B and DeiT-L still enjoy robustness improvements, whereas DeiT-S exhibits small decreases on several benchmarks. -Interestingly, however, ViT-S trained with \schemename outperforms the DeiT-S baseline. -This suggests that controlled composition can partially close the robustness gap between lightly and heavily regularized models. -Overall, the consistent improvements on corruption-based, natural and hard examples indicate that the compositional invariances induced by \schemename extend beyond the specific foreground/background manipulations used in its construction. - -\begin{figure*}[t] - \centering - \includegraphics[width=\textwidth]{img/bg_robustness.pdf} - \caption{Evaluation of background robustness on ImageNet + \schemename, ImageNet9~\cite{Xiao2020} and CounterAnimal~\cite{Wang2024f}. - We plot the in-distribution (top of arrow) and the out-of-distribution (bottom of arrow) accuracy when training with and without \schemename. - We annotate each arrow with its length $\Delta$. - Training with \schemename improves the background robustness of all transformers by mostly boosting the out-of-distribution accuracy. - } - \label{fig:background-robustness} -\end{figure*} - -\textbf{Background Robustness.} % By adjusting the background distribution from using a background from an image of the same class as the foreground to using any background, we can evaluate the robustness of models to shifts in the background distribution. % We assess background robustness by changing the background distribution, comparing accuracy with backgrounds of the same class as the foreground to using any background. We assess the robustness of models to shifts in the background distribution from a class-related background to any background. % We define the background robustness coefficient to be the accuracy of a model on \name when using the same class background divided by the accuracy when using any background: -% Background robustness is defined to be the ratio of accuracy on \name with same-class backgrounds to accuracy with any background: -% \begin{align} -% \text{Background Robustness} = \frac{\text{Acc}(\name_\text{all})}{\text{Acc}(\name_\text{same})} -% \end{align} -% It represents the relative drop in performance under a background distribution shift. -\Cref{fig:background-robustness} presents the background robustness results for three datasets: ImageNet with \schemename (all backgrounds vs. backgrounds of same class), ImageNet9~\cite{Xiao2020} (random backgrounds vs. original backgrounds), and CounterAnimal~\cite{Wang2024f} (counter vs. common background). -The top triangle of each arrow represents the in-distribution backgrounds and the bottom triangle represents the out-of-distribution ones. -We follow ImageNet9 and CounterAnimal and assess the background robustness in terms of the accuracy gap when evaluating a model on images of normal background distribution compared to out-of-distribution backgrounds (length of each arrow; $\Delta$). -% When trained on ImageNet, smaller models generally exhibit greater robustness to changes in the background distribution than larger models and ResNet is more robust than the tested Transformer models. -Crucially, \schemename improves the background robustness of all models and across datasets, reducing the background-gap by boosting the performance on the out-of-background-distribution samples more than the in-distribution ones. -We find a similar trend for the Corner-Cases~\cite{Fatima2025} dataset (see supplementary), highlighting the generalization benefits of \schemename to unusual image compositions. - -\begin{figure*}[t] - \centering - \includegraphics[width=\textwidth]{img/fg_focus.pdf} - \caption{Evaluation of the foreground focus (\Cref{eq:fg-focus}) using GradCam, GradCam++ and IntegratedGradients (IG) of models trained on ImageNet. Training with \schemename improves the foreground focus of almost all models.} - \label{fig:foreground-focus} -\end{figure*} - -\textbf{Foreground Focus.} -Leveraging our inherent knowledge of the foreground masks when using \schemename, as well as common XAI techniques~\cite{Selvaraju2016,Chattopadhay2018,Sundararajan2017}, we can evaluate a model's focus on the foreground object. -% I.e. we measure how much the model's decision depends on the foreground. -We can directly evaluate ImageNet-trained models, but this technique can also be extended to other datasets without relying on manually annotated foreground masks. -To evaluate the foreground focus, we employ Grad-CAM \cite{Selvaraju2016}, Grad-CAM++ \cite{Chattopadhay2018} and IntegratedGradients (IG) \cite{Sundararajan2017} to compute the per-pixel importance of an image for the model's prediction. -The foreground focus is defined to be the ratio of the foreground's relative importance to its relative size in the image: -\begin{align} \label{eq:fg-focus} - \text{FG Focus}(\text{img}) = \frac{\text{Area}(\text{img}) \hspace{3pt} \text{Importance}(\text{fg})}{\text{Area}(\text{fg}) \hspace{3pt} \text{Importance}(\text{img})} +Background robustness is defined to be the ratio of accuracy on \name with same-class backgrounds to accuracy with any background: +\begin{align} + \text{Background Robustness} = \frac{\text{Acc}(\name_\text{all})}{\text{Acc}(\name_\text{same})} \end{align} -If all pixels uniformly receive the same importance value, the foreground focus is one. -The foreground focus of a model is its average focus over all test images. -\Cref{fig:foreground-focus} presents our findings. -Using \schemename significantly increases the foreground focus of ViT, DeiT and ResNet across all XAI metrics. -% I.e. \schemename-trained models base their decision more on the foreground object compared to the background than models trained without \schemename. -% For Swin, the foreground focus stagnates when measured using GradCam and GradCam++, but almost doubles when using IG. -% We hypothesize that Swin's below-uniform foreground focus reported with GradCam is due to its specific implementation for Swin. -We hypothesize Swin's below-uniform foreground focus with GradCam is due to its specific implementation. +It represents the relative drop in performance under a background distribution shift. +\Cref{tab:background-robustness} presents the background robustness of various models. +When trained on ImageNet, smaller models generally exhibit greater robustness to changes in the background distribution than larger models and ResNet is more robust than the tested Transformer models. +Crucially, training on \name instead of ImageNet improves the background robustness of all models to $\approx1.00$, meaning that these models are agnostic to the choice of background and only classify based on the foreground. +These findings highlight the generalization benefits of \name. + +\paragraph*{Foreground Focus} +\begin{table} + \centering + \resizebox{\columnwidth}{!}{ + \begin{tabular}{lcccccc} + \toprule + \multirow{4}{*}{Model} & \multicolumn{6}{c}{Foreground Focus when trained on} \\ + \cmidrule(l){2-7} + & IN & FN & IN & FN & IN & FN \\ + \cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(l){6-7} + & \multicolumn{2}{c}{GradCam} & \multicolumn{2}{c}{GradCam++} & \multicolumn{2}{c}{IG} \\ + \midrule + ViT-S & $1.2\pm0.1$ & $2.3\pm0.3$ & $1.2\pm0.1$ & $2.1\pm0.4$ & $1.9\pm0.1$ & $2.7\pm0.1$ \\ + ViT-B & $1.2\pm0.1$ & $2.4\pm0.7$ & $1.1\pm0.1$ & $2.1\pm0.1$ & $1.7\pm0.1$ & $2.7\pm0.1$ \\ + ViT-L & $1.3\pm0.1$ & $1.6\pm0.1$ & $1.1\pm0.1$ & $1.3\pm0.1$ & $1.3\pm0.1$ & $2.6\pm0.1$ \\ + \midrule + Swin-Ti & $0.9\pm0.1$ & $0.7\pm0.1$ & $1.0\pm0.3$ & $0.7\pm0.3$ & $2.5\pm01$ & $4.8\pm0.3$ \\ + Swin-S & $0.8\pm0.1$ & $0.7\pm0.1$ & $0.7\pm0.1$ & $0.7\pm0.4$ & $2.4\pm0.1$ & $4.6\pm0.3$ \\ + \midrule + ResNet-50 & $2.2\pm0.1$ & $2.7\pm0.1$ & $2.0\pm0.1$ & $2.9\pm0.1$ & $3.2\pm0.1$ & $4.9\pm0.2$ \\ + ResNet-101 & $2.3\pm0.1$ & $2.8\pm0.1$ & $2.2\pm0.1$ & $3.0\pm0.1$ & $3.2\pm0.1$ & $4.8\pm0.1$ \\ + \bottomrule + \end{tabular}} + \caption{Evaluation of the foreground focus using GradCam, GradCam++ and IntegratedGradients of models trained on \name (FN) and on ImageNet (IN) directly. Training on \name improves the foreground focus of almost all models.} + \label{tab:foreground-focus} +\end{table} + +Leveraging our inherent knowledge of the foreground masks when using \name, as well as common XAI techniques~\cite{Selvaraju2016,Chattopadhay2018,Sundararajan2017}, we can evaluate a model's focus on the foreground object. +We can directly evaluate ImageNet trained models, but this technique can also be extended to other datasets without relying on manually annotated foreground-masks. +To evaluate the foreground focus, we employ Grad-CAM \cite{Selvaraju2016}, Grad-CAM++ \cite{Chattopadhay2018} or IntegratedGradients (IG) \cite{Sundararajan2017} to compute the per-pixel importance of an image for the model's prediction. +The foreground focus is defined to be the ratio of the foreground's relative importance to its relative size in the image: +\begin{align} + \text{FG Focus}(\text{img}) = \frac{\text{Area}(\text{img}) \hspace{3pt} \text{Importance}(\text{fg})}{\text{Area}(\text{fg}) \hspace{3pt} \text{Importance}(\text{img})} +\end{align} +The foreground focus of a model is its average foreground focus over all test images. +\Cref{tab:foreground-focus} presents our findings. +Training on \name significantly increasees the foreground focus of ViT and ResNet across all metrics used. +For Swin, the foreground focus stagnates when measured using GradCam and GradCam++, but almost doubles when using IG. % These differences might be due to the way GradCam is calculated for Swin \todo{cite package website where this is from} and the \todo{common critique of GradCam}. -\begin{table}[t] - \caption{ - % Evaluation of the center bias. - Accuracy relative to the center accuracy of multiple instantiations of the models when the foreground objects is in different cells of a $3 \times 3$ grid. - We calculate center bias according to \Cref{eq:center-bias}. - Using \schemename significantly reduces models' center bias.} - \label{tab:center-bias} - \begin{subfigure}{.48\columnwidth} - \resizebox{\textwidth}{!}{ - \begin{tabular}{lccc} - \toprule - \multirow{2.5}{*}{Model} & \multicolumn{2}{c}{\makecell{Center Bias [\%] when trained}} & \multirow{2.5}{*}{Delta} \\ - \cmidrule(lr){2-3} - & w/o \schemename & w/ \schemename \\ - \midrule - ViT-S & \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-S_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-S_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-S_ImageNet_v3.pdf} & \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-S_RecombNet_all_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-S_RecombNet_all_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-S_RecombNet_all_v3.pdf} \\ - & $25.5\pm0.8$ & $22.0\pm0.3$ & \grntxt{$-3.5$} \\ - ViT-B & {\includegraphics[width=.08\columnwidth, valign=c]{img/ViT-B_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-B_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-B_ImageNet_v3.pdf}} & \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-B_RecombNet_all_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-B_RecombNet_all_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-B_RecombNet_all_v3.pdf} \\ - & $25.4\pm0.4$ & $19.0\pm0.2$ & \grntxt{$-6.4$} \\ - ViT-L & \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-L_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-L_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-L_ImageNet_v3.pdf} & \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-L_RecombNet_all_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-L_RecombNet_all_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ViT-L_RecombNet_all_v3.pdf} \\ - & $24.3\pm1.1$ & $11.7\pm0.7$ & \grntxt{$-12.6$} \\ - \midrule - Swin-Ti & {\includegraphics[width=.08\columnwidth, valign=c]{img/Swin-Ti_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/Swin-Ti_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/Swin-Ti_ImageNet_v3.pdf}} & {\includegraphics[width=.08\columnwidth, valign=c]{img/Swin-Ti_RecombNet_all_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/Swin-Ti_RecombNet_all_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/Swin-Ti_RecombNet_all_v3.pdf}} \\ - & $25.0\pm0.7$ & $16.5\pm0.2$ & \grntxt{$-8.5$} \\ - Swin-S & {\includegraphics[width=.08\columnwidth, valign=c]{img/Swin-S_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/Swin-S_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/Swin-S_ImageNet_v3.pdf}} & {\includegraphics[width=.08\columnwidth, valign=c]{img/Swin-S_RecombNet_all_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/Swin-S_RecombNet_all_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/Swin-S_RecombNet_all_v3.pdf}} \\ - & $23.2\pm0.1$ & $15.6\pm0.2$ & \grntxt{$-7.6$} \\ - \bottomrule - \end{tabular} } - \end{subfigure} - \hfill - \begin{subfigure}{.497\columnwidth} - \resizebox{\textwidth}{!}{ - \begin{tabular}{lccc} - \toprule - \multirow{2.5}{*}{Model} & \multicolumn{2}{c}{\makecell{Center Bias [\%] when trained}} & \multirow{2.5}{*}{Delta} \\ - \cmidrule(lr){2-3} - & w/o \schemename & w/ \schemename \\ - \midrule - DeiT-S & {\includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-S_ImageNet_vNone.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-S_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-S_ImageNet_v3.pdf} } & {\includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-S_fornet_all_linear_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-S_fornet_all_linear_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-S_fornet_all_linear_v3.pdf}} \\ - & $20.4 \pm 0.2$ & $21.2 \pm 0.1$ & \gtxt{$+0.8$} \\ - DeiT-B & {\includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-B_ImageNet_vNone.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-B_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-B_ImageNet_v3.pdf} } & {\includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-B_fornet_all_cos_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-B_fornet_all_cos_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-B_fornet_all_cos_v3.pdf}} \\ - & $19.0 \pm 0.7$ & $19.0 \pm 0.2$ & \gtxt{$\pm0.0$} \\ - DeiT-L & { \includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-L_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-L_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-L_ImageNet_v3.pdf} } & { \includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-L_fornet_all_cos_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-L_fornet_all_cos_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/DeiT-L_fornet_all_cos_v3.pdf} } \\ - & $21.2 \pm 0.2$ & $18.0 \pm 0.2$ & \grntxt{$-3.2$} \\ - \midrule - ResNet50 & {\includegraphics[width=.08\columnwidth, valign=c]{img/ResNet50_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ResNet50_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ResNet50_ImageNet_v3.pdf}} & {\includegraphics[width=.08\columnwidth, valign=c]{img/ResNet50_RecombNet_all_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ResNet50_RecombNet_all_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ResNet50_RecombNet_all_v3.pdf}} \\ - & $26.3\pm0.3$ & $19.7\pm0.3$ & \grntxt{$-6.6$} \\ - ResNet101 & {\includegraphics[width=.08\columnwidth, valign=c]{img/ResNet101_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ResNet101_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ResNet101_ImageNet_v3.pdf}} & {\includegraphics[width=.08\columnwidth, valign=c]{img/ResNet101_RecombNet_all_v1.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ResNet101_RecombNet_all_v2.pdf} \includegraphics[width=.08\columnwidth, valign=c]{img/ResNet101_RecombNet_all_v3.pdf}} \\ - & $23.0\pm0.3$ & $19.9\pm0.2$ & \grntxt{$-3.1$} \\ - \bottomrule - \end{tabular} } - \end{subfigure} - \centering - \includegraphics[width=.5\columnwidth]{img/colorbar_horizontal.pdf} +\paragraph*{Center Bias} +\begin{table} + \centering + \resizebox{\columnwidth}{!}{ + \begin{tabular}{lccc} + \toprule + \multirow{2.5}{*}{Model} & \multicolumn{2}{c}{\makecell{Center Bias when trained on}} & \multirow{2.5}{*}{Delta} \\ + \cmidrule(lr){2-3} + & ImageNet & \name \\ + \midrule + ViT-S & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ViT-S_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-S_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-S_ImageNet_v3.pdf}} & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ViT-S_RecombNet all_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-S_RecombNet all_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-S_RecombNet all_v3.pdf}} \\ + & $0.255\pm0.008$ & $0.220\pm003$ & \grntxt{-0.035} \\ + ViT-B & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ViT-B_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-B_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-B_ImageNet_v3.pdf}} & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ViT-B_RecombNet all_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-B_RecombNet all_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-B_RecombNet all_v3.pdf}} \\ + & $0.254\pm0.004$ & $0.190\pm0.002$ & \grntxt{-0.064} \\ + ViT-L & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ViT-L_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-L_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-L_ImageNet_v3.pdf}} & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ViT-L_RecombNet all_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-L_RecombNet all_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ViT-L_RecombNet all_v3.pdf}} \\ + & $0.243\pm0.011$ & $0.117\pm0.007$ & \grntxt{-0.126} \\ + \midrule + Swin-Ti & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/Swin-Ti_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-Ti_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-Ti_ImageNet_v3.pdf}} & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/Swin-Ti_RecombNet all_v1.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-Ti_RecombNet all_v2.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-Ti_RecombNet all_v3.pdf}} \\ + & $0.250\pm0.007$ & $0.165\pm0.002$ & \grntxt{-0.085} \\ + Swin-S & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/Swin-S_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-S_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-S_ImageNet_v3.pdf}} & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/Swin-S_RecombNet all_v1.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-S_RecombNet all_v2.pdf} \includegraphics[width=.08\columnwidth]{img/Swin-S_RecombNet all_v3.pdf}} \\ + & $0.232\pm0.001$ & $0.156\pm002$ & \grntxt{-0.076} \\ + \midrule + ResNet50 & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ResNet50_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet50_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet50_ImageNet_v3.pdf}} & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ResNet50_RecombNet all_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet50_RecombNet all_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet50_RecombNet all_v3.pdf}} \\ + & $0.263\pm0.003$ & $0.197\pm0.003$ & \grntxt{-0.066} \\ + ResNet101 & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ResNet101_ImageNet_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet101_ImageNet_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet101_ImageNet_v3.pdf}} & \raisebox{-6pt}{\includegraphics[width=.08\columnwidth]{img/ResNet101_RecombNet all_v1.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet101_RecombNet all_v2.pdf} \includegraphics[width=.08\columnwidth]{img/ResNet101_RecombNet all_v3.pdf}} \\ + & $0.230\pm0.003$ & $0.199\pm002$ & \grntxt{-0.031} \\ + \bottomrule + \end{tabular} } + \includegraphics[width=.75\columnwidth]{img/colorbar_horizontal.pdf} + \caption{Evaluation of the position bias. We plot the accuracy relative to the center accuracy of multiple instantiations of the models when the foreground objects is in different cells a $3 \times 3$ grid. + Training on \name significantly reduces a models center bias.} + \label{tab:center-bias} \end{table} - -\textbf{Center Bias.} -With \schemename we have unique control over the position of the foreground object in the image. -This lets us quantify the center bias of models trained with and without \schemename. -We divide the image into a $3 \times 3$ grid and evaluate model accuracy when the (scaled-down) foreground object is in each of the $9$ grid cells. +With \name we have unique control over the position of the foreground object in the image. +This lets us quantify the center bias of ImageNet- and \name-trained models. +We divide the image into a $3 \times 3$ grid and evaluate model accuracy when the foreground object is in each of the $9$ grid cells. Each cell's accuracy is divided by the accuracy in the center cell for normalization, which gives us the relative performance drop when the foreground is in each part of the image. The center bias is calculated as one minus the average of the minimum performance of a corner cell and the minimum performance of a side cell: -% \begin{align} -% \begin{split} -% & \text{Center Bias} = \\ -% & \hspace{7pt} 1 - \frac{\min\limits_{a, b \in \{0, 2\}} \text{Acc}(\text{cell}_{(a, b)}) + \min\limits_{\substack{a=1 \text{ or } b=1 \\ a \neq b}} \text{Acc}(\text{cell}_{(a, b)})}{2 \text{Acc}(\text{cell}_{(1, 1)})} -% \end{split} -% \end{align} -\begin{align} \label{eq:center-bias} - \text{Center Bias} = 1 - \frac{\min\limits_{c \in \text{sides}} \text{Acc}(c) + \min\limits_{c \in \text{corners}} \text{Acc}(c)}{2 \text{Acc}(c_\text{center})} +\begin{align} + \begin{split} + & \text{Center Bias} = \\ + & \hspace{7pt} 1 - \frac{\min\limits_{a, b \in \{0, 2\}} \text{Acc}(\text{cell}_{(a, b)}) + \min\limits_{\substack{a=1 \text{ or } b=1 \\ a \neq b}} \text{Acc}(\text{cell}_{(a, b)})}{2 \text{Acc}(\text{cell}_{(1, 1)})} + \end{split} \end{align} \Cref{tab:center-bias} visualizes the center bias of three instantiations of each model. -Performance is generally highest in the center and lowest in the four corners. +Performance is generally highest in the center and the center top and bottom and center left and right cells, and lowest in the four corners. Interestingly, ImageNet-trained models perform slightly better when the foreground object is on the right side of the image, compared to the left side, despite our use of random flipping with a probability of $0.5$ during training. % Training on \name reduces the center bias of all models by at least half. -Using \schemename significantly reduces center bias across models, with a more uniform performance especially across the middle row. -% On corner-cases (see supplementary) we find that -% Their accuracy is higher in the center left and right cells than in the center top and bottom ones, which is not the case for ImageNet-trained models. -% This demonstrates that \schemename promotes a more uniform spatial attention distribution, counteracting the center-bias of ImageNet. -Thus, \schemename makes the model recognize objects across a wider spatial distribution, counteracting the center-bias of ImageNet. +Training on \name significantly reduces center bias across all models. +This demonstrates that \name promotes a more uniform spatial attention distribution. +Their accuracy is higher in the center left and right cells than in the center top and bottom ones, which is not the case for ImageNet-trained models. -\begin{figure}[t!] - \centering - \includegraphics[width=\columnwidth]{img/size_bias_wide.pdf} - \caption{Evaluation of the size bias of models trained on ImageNet. We plot the accuracy relative to the accuracy when using the default size ($f_\text{size} = 1.0$).} - \label{fig:size-bias} +\paragraph*{Size Bias} +\begin{figure} + \centering + \includegraphics[width=.9\columnwidth]{img/size_bias.pdf} + \caption{Evaluation of the size bias of models trained on \name. We plot the accuracy relative to the accuracy when using the mean foreground size.} + \label{fig:size-bias} \end{figure} - -\textbf{Size Bias.} -Finally, we evaluate the impact of different sized foreground objects on the accuracy. +Finally, we evaluate the impact of different-sized foreground objects on the accuracy. For this evaluation, we use the \emph{mean} foreground size strategy. We introduce a size factor $f_\text{size}$ by which we additionally scale the foreground object before pasting it onto the background. -Results are normalized by the accuracy when using $f_\text{size} = 1.0$. -\Cref{fig:size-bias} shows the size bias curves of models trained with and without \schemename. +Results are again normalized by the accuracy when using the mean foreground size ($f_\text{size} = 1.0$). +\Cref{fig:size-bias} shows the size bias curves of ViT-S and ViT-B when trained on ImageNet and \name. % When training on \name, the resulting model keeps it's good performance on smaller foreground objects, while models trained on ImageNet fall of faster and lower. -Models trained using \schemename perform better, especially with smaller foreground objects. -%, when ImageNet-trained models exhibit a more rapid performance decline. -Therefore, \schemename-training improves robustness to variations in object scale, especially for larger models. - - -\subsection{Design Choices of \schemename} -We next analyze key components of \schemename, focusing on three questions: how it compares to simple copy-paste, how background choice affects performance, and how reliably labels are preserved after recomposition. -Additional ablations over variants and hyperparameters are provided in the supplementary material. - -\begin{table}[t] - \caption{Comparison of \schemename and simple Copy-Paste methods. We train ViT-S on ImageNet using the same 3-augment data augmentation on top of the copy-paste augmentation.} - \label{tab:copy-paste-comparison} - \centering - \resizebox{.66\columnwidth}{!}{ - \begin{tabular}{lcc S[table-format=+2.1,retain-explicit-plus,detect-inline-weight=math,detect-weight=true]} - \toprule - Augmentation & labels & \makecell{ Accuracy [\%]} & {\makecell{Delta \\to Prev.}} \\ - \midrule - % Baseline & & $79.1 \pm 0.1$ \\ - 3-Augment + \textbf{Simple Copy-Paste} & bg & $31.3 \pm 0.6$ & \\ - + mixed labels & fg + bg & $32.0 \pm 0.8$ & +0.7 \\ - + fg labels & fg & $31.6 \pm 0.9$ & -0.4 \\ - + \emph{range} foreground size variation & \gtxt{fg} & $43.0 \pm 1.2$ & \bfseries +11.4 \\ - + infilled backgrounds & \gtxt{fg} & $68.7 \pm 0.2$ & \bfseries +25.7 \\ - + \emph{cos} mixing strategy & \gtxt{fg} & $81.2 \pm 0.1$ & \bfseries +12.5 \\ - + edge smoothing & \gtxt{fg} & $81.3 \pm 0.1$ & +0.1 \\ - + background pruning$=$ \textbf{\schemename} & \gtxt{fg} & $81.4 \pm 0.1$ & +0.1 \\ - \bottomrule - \end{tabular}} -\end{table} -\textbf{Comparison to Simple Copy-Paste.} -We compare \schemename to a simple adaption of the Copy-Paste augmentation inspired by \cite{Ge2023,Ghiasi2021,Shermaine2025} in \Cref{tab:copy-paste-comparison}. -Contrary to semantic segmentation we do not have foreground masks available. -Thus, we paste the extracted objects from \textbf{\schemename's segmentation stage} onto normal ImageNet images. -% Since such images do not have straight forward classification labels, we test multiple possibilities. -We observe 3 large jumps in accuracy: (\textbf{1}) From our \emph{range} foreground size variation (+11.4\%), (\textbf{2}) from using our infilled backgrounds instead of images from the dataset (+25.7\%), and (\textbf{3}) from our \emph{cos} mixing strategy with non-augmented images (+12.5\%). -\schemename's changes to the naive copy-paste augmentation are thus imperative for good classification performance. - -\begin{figure}[t] - \begin{minipage}[c]{.49\textwidth} - \centering - \includegraphics[width=\textwidth]{img/strategy.pdf} - \captionof{figure}{We compare Original, Same-class, and All-classes background selection using ViT-Ti and ViT-S backbones on TinyImageNet. - Increasing background diversity consistently improves classification accuracy. - } - \label{fig:background-strategy} - \end{minipage} - \hfill - \begin{minipage}[c]{.49\textwidth} - \centering - \includegraphics[width=\textwidth]{img/mask_expansion.pdf} - \captionof{figure}{ - We vary the foreground mask area for TinyImageNet by shrinking or expanding masks relative to the original outline and report accuracy when training on $100\%$ augmented samples. - Performance is stable for expanded masks and degrades rapidly after shrinking masks. - } - \label{fig:mask-expansion} - \end{minipage} -\end{figure} - -\textbf{Background Choice Strategy.} -\Cref{fig:background-strategy} shows the effect of background selection on TinyImageNet accuracy, where we trade off diversity against context plausibility. -% Using the original inpainted background yields the lowest accuracy, indicating limited regularization from contextual cues. -% Sampling backgrounds from the same class provides a modest but consistent improvement, suggesting that mild context variation encourages robustness while preserving semantic plausibility. -The best performance is achieved by sampling backgrounds from all classes, which introduces substantial context shifts, but leads to the strongest accuracy gains for both ViT-Ti and ViT-S. -Thus, aggressive background diversification is more important than context plausibility and acts as an effective form of context-based regularization rather than introducing harmful noise. - -\textbf{Label Integrity.} -% We assess the label integrity of \schemename, i.e., whether object labels remain correct after recombination, by verifying that the intended object is accurately extracted. -% To this end, we leverage the object bounding box annotations provided in the ImageNet validation set. -% Specifically, we compute the \emph{box precision}, defined as the fraction of the predicted mask area that lies within the ground-truth bounding box, obtaining a mean value of $91\%$. -% In addition, we measure the \emph{box-to-box IoU}, computed as the IoU between the tight bounding box enclosing the predicted mask and the tight bounding box of the ground-truth annotation, which yields a high $76.1\%$. -% Qualitative examples of the predicted masks and bounding boxes are provided in the supplementary material. -% We additionally test label integrity under systematic mask perturbations by expanding or shrinking the foreground masks before composition. -% Concretely, starting from the original outline, we erode or dilate the mask such that the foreground area changes by some percentage. -% \Cref{fig:mask-expansion} shows that accuracy is relatively stable for expanded masks, but drops off significantly for eroded masks, consistent with cropping away semantically important object parts. -% This experiment suggests, that \schemename is relatively robust to artifacts from including an object's original background in the foreground mask. -% Overall, these results indicate that the segmentation stage of \schemename reliably isolates the target class object, thereby preserving label correctness after recombination. -To quantify whether recombined images still depict the intended class, we evaluate the segmentation stage of \schemename on ImageNet validation boxes. -Our predicted masks achieve a mean box precision of $91.0\%$ (fraction of mask area inside the ground-truth bounding boxes of the ImageNet validation set) and a high box-to-box IoU of $76.1\%$, indicating that they tightly capture the target object. -Qualitative examples of the predicted masks and bounding boxes are provided in the supplementary material. -We further probe robustness to mask imprecision by eroding or dilating masks such that the foreground area changes by a fixed percentage before composition. -As shown in \Cref{fig:mask-expansion}, accuracy remains stable for expansions but drops sharply under erosion, consistent with removing semantically important object parts. -Together, these results suggest that (\textit{i}) \schemename reliably isolates the target objects and preserves label integrity and that (\textit{ii}) \schemename is robust to artifacts from an object's original background and degrades mainly when the foreground no longer contains the full object. - +Models trained on \name maintain better performance even with smaller foreground objects, when ImageNet-trained models exhibit a more rapid performance decline. +Therefore, \name-training improves robustness to variations in object scale. diff --git a/sec/intro.tex b/sec/intro.tex index 31d5639..b6f700d 100644 --- a/sec/intro.tex +++ b/sec/intro.tex @@ -3,69 +3,58 @@ \section{Introduction} \label{sec:intro} -% \begin{figure} -% \centering -% \includegraphics[width=.5\columnwidth]{img/fig-1.pdf} -% \caption{\schemename factorizes each training image into a foreground object and a background, then recombines them on the fly while controlling background identity, object position, and object scale. Standard, strong augmentations are applied afterwards.} -% \label{fig:fig-1} -% \end{figure} -\begin{table}[t] - \caption{Examples of \schemename generated images (center cropped) from ImageNet. - We successfully segment even multiple objects (\textit{Macaw}) and complex shapes (\textit{Cricket}).} - \label{tab:foraug-examples} - \centering - \resizebox{.9\textwidth}{!}{ - \begin{tabular}{ccccc} - \toprule - Class & \makecell{Original \\Image} & \makecell{Extracted \\Foreground} & \makecell{Infilled \\Background} & Recombined Examples \\ - \midrule - Macaw & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01818515_31507.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01818515_31507_v1_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01818515_31507_v1_bg.JPEG} & \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n01818515_31507_recombined_v12.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n01818515_31507_recombined_v15.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n01818515_31507_recombined_v18.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n01818515_31507_recombined_v3.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n01818515_31507_recombined_v4.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n01818515_31507_recombined_v6.JPEG} \\ - % Conch & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01943899_20070.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01943899_20070_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n01943899_20070_bg.JPEG} & \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n01943899_20070_recombined_v9.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n01943899_20070_recombined_v10.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n01943899_20070_recombined_v11.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n01943899_20070_recombined_v12.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n01943899_20070_recombined_v17.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n01943899_20070_recombined_v8.JPEG} \\ - Cricket & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02229544_6170.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02229544_6170_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n02229544_6170_bg.JPEG} & \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n02229544_6170_recombined_v0.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n02229544_6170_recombined_v10.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n02229544_6170_recombined_v15.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n02229544_6170_recombined_v16.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n02229544_6170_recombined_v2.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n02229544_6170_recombined_v6.JPEG} \\ - Laptop & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03642806_3615.JPEG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03642806_3615_fg.PNG} & \includegraphics[max width=.1\columnwidth, max height=2cm, valign=c]{img/appendix_examples/n03642806_3615_bg.JPEG} & \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n03642806_3615_recombined_v0.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n03642806_3615_recombined_v1.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n03642806_3615_recombined_v11.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n03642806_3615_recombined_v14.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n03642806_3615_recombined_v15.JPEG} \includegraphics[width=.1\columnwidth, valign=c]{img/foraug_examples/n03642806_3615_recombined_v2.JPEG} \\ - \bottomrule - \end{tabular} - } -\end{table} +% \begin{itemize} +% \item General Into Image classification +% \item ImageNet +% \item CNNs $\to$ Transformers +% \item Traditional Data Augmentation: CNNs +% \item Problems with that: Other model properties of Transformers +% \item Our approach: Recombining ImageNet forgrounds and backgrounds +% \end{itemize} +\begin{figure} + \centering + \includegraphics[width=\columnwidth]{img/fig-1.pdf} + \caption{Comparison of \name and ImageNet. \name recombines foreground objects with different backgrounds each epoch, thus creating a more diverse training set. We still apply traditional data augmentation afterwards.} + \label{fig:fig-1} +\end{figure} -Large-scale image classification is a central driver of modern computer vision: it benchmarks progress in computer vision~\cite{Khan2022,Rangel2024}, powers model pretraining~\cite{Dosovitskiy2021,Liu2021,Touvron2021b}, and yields representations that transfer broadly and underpin applications like medical diagnosis~\cite{Sanderson2022,Vezakis2024}, autonomous driving~\cite{Wang2023a}, and object recognition~\cite{Carion2020,He2017,Girshick2014}. -However, classification supervision is weak in an important sense: the label does not specify \emph{how} the class-object should appear. -In ImageNet~\cite{Deng2009} for example, objects often occur at characteristic positions and scales and co-occur with correlated scene context~\cite{Fatima2025,Barbu2019}. -% In datasets such as ImageNet, objects often occur at characteristic positions and scales and co-occur with correlated scene context~\cite{Fatima2025,Barbu2019}. -As a result, models rely on shortcuts like background cues, center bias, or size bias, that boost in-distribution accuracy but hurt robustness and transfer~\cite{Geirhos2020,Fatima2025,Barbu2019}. +Image classification, a fundamental task in computer vision (CV), involves assigning a label to an image from a predefined set of categories. +This seemingly simple task underpins a wide range of applications, including medical diagnosis~\cite{Sanderson2022,Vezakis2024}, autonomous driving~\cite{Wang2022b}, and object recognition~\cite{Carion2020,He2017,Girshick2013}. +Furthermore, image classification is used for large-scale pretraining of vision models~\cite{Dosovitskiy2021,Liu2021,Touvron2021b} and to judge the progress of the field of CV \cite{Khan2022, Rangel2024}. +The advent of large-scale datasets, particularly ImageNet \cite{Deng2009}, containing millions of labeled images across thousands of categories, has been instrumental in driving significant progress in this field. +ImageNet served as a catalyst for the rise of large-scale CV models~\cite{Krizhevsky2012, He2016} and remains the most important CV benchmark for more than a decade \cite{Krizhevsky2012,Touvron2022, Wortsman2022, He2016}. +% It is used to train and evaluate the best models in the field. -Here, data augmentation is the default defense. -Standard transformations (crop/flip/color jitter) and stronger policies such as MixUp~\cite{Zhang2018a}/CutMix~\cite{Yun2019} and automated augmentation search~\cite{Cubuk2019,Cubuk2020} expand appearance diversity~\cite{Shorten2019,Xu2023d}. % , yet they largely preserve the original \emph{composition} of each image~\cite{Shorten2019,Xu2023d}. -However, their ability to teach spatial and compositional invariances is limited. -This constraint matters especially for Vision Transformers (ViTs)~\cite{Dosovitskiy2021}: with weaker built-in spatial inductive biases than Convolutional Neural Networks (CNNs), ViTs must learn key equivariances (e.g., translation and scale robustness) primarily from data. -Copy-paste style augmentations~\cite{Ghiasi2021,Kang2022} alter composition more aggressively by overlaying segmented objects onto other images. -These are typically designed for detection or instance segmentation and rely on dense human annotations available for these tasks or use unconstrained dataset images as backgrounds. -As a result, they do not offer fine-grained control of object position and scale, and they do not explicitly enforce that the pasted background is semantically neutral, creating ambiguous labels for classification. +While traditionally, convolutional neural networks (CNNs) have been the go-to architecture for image classification, Transformers \cite{Vaswani2017}, particularly the Vision Transformer (ViT) \cite{Dosovitskiy2021}, have emerged as a powerful alternative. +These attention-based models have demonstrated superior performance in various vision tasks, including image classification \cite{Wortsman2022,Yu2022,Carion2020,Zong2022,Wang2022a}. -To encode compositional invariances directly in the training data, we propose \emph{Foreground-Background Augmentation} (\schemename), a controlled composition augmentation that \emph{explicitly factorizes each image into foreground and background, then recombines them for label-preserving, interpretable distribution shifts}. -Concretely, \schemename uses off-the-shelf segmentation and inpainting models to (i) extract a foreground object and synthesize a class-consistent, semantically neutral background, and (ii) paste the foreground onto diverse neutral backgrounds while controlling its position and scale (see \Cref{tab:foraug-examples}). -Unlike prior copy-paste methods that simply overlay objects onto arbitrary scenes~\cite{Ghiasi2021,Ghiasi2021,Kang2022}, \schemename first removes and neutralizes the original background, then samples from well-defined distributions of backgrounds, object positions, and object sizes. -This explicit factorization preserves a clean label for the recombined image while providing direct control over compositions, enabling us to break spurious correlations while still fitting seamlessly into modern strong augmentation pipelines. % (see \Cref{fig:fig-1}). -% Throughout, we apply \schemename on top of strong augmentation pipelines (RandAugment, Mixup, CutMix), so any gains are complementary to these widely used techniques. -% As it is important that any gains are complementary to strong augmentation pipelines (RandAugment, MixUp, CutMix), we apply \schemename on top of these widely used techniques. -To ensure that all gains are complementary to strong augmentation pipelines (RandAugment, MixUp, CutMix), we apply \schemename on top of these widely used techniques. +Data augmentation is a key technique for training image classification models. +% A key technique for training image classification models, especially with limited data, is data augmentation. +Traditional data augmentation methods, such as random cropping, flipping, and color jittering, are commonly employed to increase the diversity of the training data and improve the model's performance~\cite{Xu2023d, Shorten2019}. +These basic transformations, originally designed for CNNs, change the input images in a way that preserves their semantic meaning~\cite{Alomar2023}. +However, the architectural differences of CNNs and Transformers suggest that the latter might benefit from different data augmentation strategies. +In particular, the Transformers self-attention mechanism is not translation equivariant~\cite{RojasGomez2023,Ding2023a}, meaning that the model does not inherently understand the spatial relationships between pixels. +% This creates the need for novel data augmentation strategies tailored to the Transformer architecture. +% This fact opens a new design space for data augmentation strategies to help Transformers understand the basic invariances of image classification. -Empirically, \schemename yields consistent accuracy gains across architectures, improving ImageNet top-1 accuracy by up to 6 p.p. and fine-grained downstream accuracy by up to 7.3 p.p., and even improving transfer when ImageNet accuracy is matched. -Beyond accuracy, training with \schemename substantially improves robustness on standard distribution-shift benchmarks, where we observe gains of roughly $2-19$ p.p. across ViT, Swin, and ResNet architectures. +Inspired by this inductive bias of CNNs, that is not inherent to ViTs, we propose \schemename, a novel data augmentation scheme for image classification which makes the translation equivariance of CNNs explicit in the training data by recombining foreground objects at varying positions with different backgrounds. +% In this paper, we address the challenge of effectively training Transformers for image classification by proposing \schemename, a novel data augmentation scheme for image classification, which combines foreground objects with different backgrounds. +Applying \schemename to ImageNet gives rise to \name, a novel dataset that enables this data augmentation with with fine-grained control over the image composition. +Recognizing that Transformers need to learn the spatial relationships from data, since they are not inherently translation invariant, and in general are usually trained on larger datasets~\cite{Kolesnikov2020}, we separate the foreground objects in ImageNet from their backgrounds, using an open-world object detector~\cite{Ren2024}, and fill in the background in a plausible way using an object removal model~\cite{Sun2024,Suvorov2021}. +This allows us to recombine any foreground object with any background on the fly, creating a highly diverse training set. +During recombination, we can control important parameters, like the size and position of the foreground object, to help the model learn the spatial invariances necessary for image classification. +We show that training on \name instead of ImageNet increases the model accuracy of Transformers by up to 4.5 p.p. on ImageNet and an up to $39.3\%$ reduction in error rate on downstream tasks. -Finally, the same control knobs enable \schemename to become a targeted diagnostic tool of shortcut reliance and model robustness. -We quantify background reliance via controlled background swaps, and probe center and size biases through systematic position and scale sweeps, showing that training with \schemename reduces model biases. +Additionally, \schemename is a useful tool for analyzing model behavior and biases, when used during the evaluation phase. +We utilize our control over the image distribution to quantify a model's background robustness (by varying the choice of background), foreground focus (by leveraging our knowledge about the placement of the foreground object), center bias (by controlling the object's position), and size bias (by controlling object size). +These analyses provide insights into model behavior and biases, which is crucial for model deployment and future robustness optimizations. +We show that training on \name, instead of ImageNet, significantly reduces all of these biases, completely removing the models' dependence on the background distribution. +We make our code for \schemename and the \name-dataset publicly available\footnote{Link will go here.} to facilitate further research. -\medskip -\noindent -\textbf{Contributions} -\begin{itemize}[topsep=0pt] - \item \textbf{Controlled composition augmentation for classification.} - We introduce \schemename, a foreground-background factorization and recombination scheme for image classification that creates label-preserving training samples with explicit control over background identity, object position, and object scale. - \item \textbf{Accuracy and transfer gains.} - Training with \schemename, in addition to standard strong augmentation pipelines, improves ImageNet top-1 accuracy by up to 6 p.p., boosts fine-grained downstream accuracy by up to 7.3 p.p. and increases accuracy on shifted distributions by up to $19$ p.p. - \item \textbf{Controlled bias diagnostics and mitigation.} - Using the same controls during evaluation, we measure background reliance, foreground focus, and position/scale biases through targeted distribution shifts. - \schemename systematically reduces shortcut behaviors and model biases. +\subsection*{Contributions} +\begin{itemize} + \item We propose \schemename, a novel data augmentation scheme, that recombines objects and backgrounds to train Transformers for image classification. + \item We show that training on \name, the ImageNet instantiation of \schemename, leads to 4.5 p.p. improved accuracy on ImageNet and 7.3 p.p. on downstream tasks. + \item We propose novel \schemename-based metrics to analyze and quantify fine-grained biases trained models: Background Robustness, Foreground Focus, Center Bias, and Size Bias. Training on \name, instead of ImageNet, significantly reduces these biases. \end{itemize} \ No newline at end of file diff --git a/sec/intro_old.tex b/sec/intro_old.tex deleted file mode 100644 index 809647a..0000000 --- a/sec/intro_old.tex +++ /dev/null @@ -1,73 +0,0 @@ -% !TeX root = ../main.tex - -\section{Introduction} -\label{sec:intro} - -% \begin{itemize} -% \item General Into Image classification -% \item ImageNet -% \item CNNs $\to$ Transformers -% \item Traditional Data Augmentation: CNNs -% \item Problems with that: Other model properties of Transformers -% \item Our approach: Recombining ImageNet forgrounds and backgrounds -% \end{itemize} - -\begin{figure} - \centering - \includegraphics[width=.5\columnwidth]{img/fig-1.pdf} - \caption{Comparison of traditional image classification training and training when using \schemename. \schemename recombines foreground objects with different backgrounds each epoch, thus creating a more diverse training set. We still apply strong traditional data augmentation afterwards.} - \label{fig:fig-1} -\end{figure} - -Image classification, a fundamental task in computer vision (CV), involves assigning labels to images from a set of categories. -It underpins a wide range of applications, like medical diagnosis~\cite{Sanderson2022,Vezakis2024}, autonomous driving~\cite{Wang2023a}, and object recognition~\cite{Carion2020,He2017,Girshick2014} and facilitates large-scale pretraining~\cite{Dosovitskiy2021,Liu2021,Touvron2021b}, and progress evaluation in CV~\cite{Khan2022, Rangel2024}. -% Furthermore, image classification is used for large-scale pretraining of vision models~\cite{Dosovitskiy2021,Liu2021,Touvron2021b} and to judge the progress of the field of CV \cite{Khan2022, Rangel2024}. -The advent of large-scale datasets, particularly ImageNet~\cite{Deng2009}, served as a catalyst for the rise of large-scale CV models~\cite{Krizhevsky2012, He2016} and remains the most important CV benchmark for more than a decade \cite{Krizhevsky2012,Touvron2022, Wortsman2022, He2016}. -% containing millions of labeled images across thousands of categories, has been instrumental in driving significant progress in this field. -% ImageNet served as a catalyst for the rise of large-scale CV models~\cite{Krizhevsky2012, He2016} and remains the most important CV benchmark for more than a decade \cite{Krizhevsky2012,Touvron2022, Wortsman2022, He2016}. -% It is used to train and evaluate the best models in the field. -While traditionally, convolutional neural networks (CNNs) have been the go-to architecture in CV, Transformers \cite{Vaswani2017}, particularly the Vision Transformer (ViT) \cite{Dosovitskiy2021}, have emerged as a powerful alternative and go-to architecture, demonstrating -% These attention-based models have demonstrated -superior performance in various vision tasks, including image classification \cite{Wortsman2022,Yu2022,Carion2020,Zong2023,Wang2023b}. - - - -Data augmentation is a key technique for training image classification models. -% A key technique for training image classification models, especially with limited data, is data augmentation. -Traditional augmentation methods, such as cropping, flipping, or color shifts, are commonly employed to increase data diversity~\cite{Xu2023d, Shorten2019}, but remain bound to existing image compositions. -While these preserve the images' semantic meaning, their ability to teach spatial invariances is limited. -% the diversity of the training data and improve the model's performance~\cite{Xu2023d, Shorten2019}. -% These basic transformations, originally designed for CNNs, change the input images in a way that preserves their semantic meaning~\cite{Alomar2023}, but are limited to existing image compositions. -While combinations of these data augmentations are still used today, they originally were proposed to benefit CNNs. -However, the architectural differences of CNNs and Transformers suggest that the latter might benefit from different data augmentation strategies. -In particular, the self-attention mechanism, unlike a CNN, is not translation equivariant~\cite{RojasGomez2023,Ding2023a}, meaning that the model is not designed to understand the spatial relationships between pixels. -% This creates the need for novel data augmentation strategies tailored to the Transformer architecture. -% This fact opens a new design space for data augmentation strategies to help Transformers understand the basic invariances of image classification. -% Note that these traditional data augmentations are also limited by existing image compositions. - -Recognizing that Transformers need to learn spatial relationships directly from data, -% and in general are usually trained on larger datasets~\cite{Kolesnikov2020}, -we propose \schemename, a data augmentation method that makes these relationships explicit by recombining foreground objects with diverse backgrounds. -Thus, \schemename goes beyond existing image compositions and encodes desired invariances directly into the training data (see \Cref{fig:fig-1}). -% Inspired by this inductive bias of CNNs, that is not inherent to ViTs, we propose \schemename, a novel data augmentation scheme for image classification which makes the translation equivariance of CNNs explicit in the training data by recombining foreground objects at varying positions with different backgrounds. -% In this paper, we address the challenge of effectively training Transformers for image classification by proposing \schemename, a novel data augmentation scheme for image classification, which combines foreground objects with different backgrounds. -% Applying \schemename to ImageNet gives rise to \name, a novel dataset that enables this data augmentation with with fine-grained control over the image composition. -Applying \schemename to a dataset like ImageNet is a two-step process: -(1)~We separate the foreground objects in ImageNet from their backgrounds, using an open-world object detector~\cite{Ren2024} and fill in the background in a neutral way using an object removal model~\cite{Sun2025,Suvorov2022}. -(2)~This allows us to then recombine any foreground object with any background on the fly, creating a highly diverse training set. -% During recombination, we can control important parameters, like the size and position of the foreground object, to help the model learn the spatial invariances necessary for image classification. -By exploiting the control over foreground size and position during recombination, \schemename explicitly teaches spatial invariances that image classification models typically must learn implicitly. -We show that using \schemename additionally to strong traditional data augmentation increases the model accuracy of Transformers by up to 4.5 p.p. on ImageNet and reduces the error rate by up to $7.3$ p.p. in downstream tasks. - -Beyond training, \schemename becomes a diagnostic tool for analyzing model behavior and biases, when used during evaluation. -We utilize our control over the image distribution to measure a model's background robustness (by varying the choice of background), foreground focus (by leveraging our knowledge about the placement of the foreground object), center bias (by controlling position), and size bias (by controlling size). -These analyses provide valuable insights into model behavior and biases, which is crucial for model deployment and future robustness optimizations. -We show that training using \schemename significantly reduces all of these biases. -We make our code for \schemename and the output of \schemename's segmentation phase on ImageNet publicly available\footnote{Link will go here.} to facilitate further research. - -\subsection*{Contributions} -\begin{itemize} - \item We propose \schemename, a novel data augmentation scheme, that recombines objects and backgrounds. \schemename allows us to move beyond the (possibly biased) image compositions in the dataset while preserving label integrity. - \item We show that training a standard ViT using \schemename leads to up to 4.5 p.p. improved accuracy on ImageNet-1k and 7.3 p.p. on downstream tasks. - \item We propose novel \schemename-based metrics to analyze and quantify fine-grained biases of trained models: Background Robustness, Foreground Focus, Center Bias, and Size Bias. We show that \schemename significantly reduces these biases by encoding invariance that benefits ViT into the training data. -\end{itemize} \ No newline at end of file diff --git a/sec/method.tex b/sec/method.tex index ba6cd4a..fa10484 100644 --- a/sec/method.tex +++ b/sec/method.tex @@ -1,22 +1,6 @@ % !TeX root = ../main.tex -%\begin{figure*}[ht!] -% \centering -% \includegraphics[width=.9\textwidth]{img/fig-2.pdf} -% \caption{Overview of \name. The data creation consists of two stages: (1, offline) Segmentation, where we segment the foreground objects from the background and fill in the background. (3, online) Recombination, where we combine the foreground objects with different backgrounds to create new samples. After recombination, we apply strong, commonly used augmentation policies.} -% \label{fig:method} -%\end{figure*} - -\begin{figure*}[t] - \centering - \includegraphics[width=\textwidth]{img/fig-2.pdf} - \caption{Overview of \schemename. - We segment the foreground object and inpaint the removed region to obtain a neutral background (Offline, \Cref{sec:segmentation}). - We then paste the foreground onto a sampled background while controlling position and scale, then apply standard strong traditional augmentations (Online, \Cref{sec:recombination}).} - \label{fig:method} -\end{figure*} - -\section{\schemename} +\section{\schemename (Method)} \label{sec:method} % \begin{itemize} @@ -35,103 +19,83 @@ % \item Dealing with other data augmentations/transformations % \end{itemize} +\begin{figure*} + \centering + \includegraphics[width=\textwidth]{img/fig-2.pdf} + \caption{Overview of \name. The data creation consists of two stages: (1, offline) Segmentation, where we segment the foreground objects from the background and fill in the background. (2, online) Recombination, where we combine the foreground objects with different backgrounds to create new samples.} + \label{fig:method} +\end{figure*} + % We propose a novel dataset, called \name, that improves image classification performance by explicitly separating and recombining foreground objects and plain backgrounds. % \name consists of two stages: Segmentation and recombination. Both are visualized in \Cref{fig:method}. -% We introduce \schemename, a data augmentation scheme designed to enhance Transformer training by explicitly separating and recombining foreground objects and backgrounds. -% \schemename enhances transformer training by explicitly encoding spatial invariances that these need to learn explicitly in the data. -% \schemename involves two stages: Segmentation and Recombination, both visualized in \Cref{fig:method}. -We introduce \schemename, a data augmentation designed to enhance training by embedding spatial invariances, which Transformers would otherwise need to learn implicitly, directly into the training data. -% It operates by explicitly segmenting and recombining foreground objects and backgrounds. -\schemename comprises two distinct stages: Segmentation and Recombination. Both are illustrated in \Cref{fig:method}. +We introduce \schemename, a data augmentation scheme designed to enhance Transformer training by explicitly separating and recombining foreground objects and backgrounds. +\schemename involves two stages: Segmentation and Recombination, both visualized in \Cref{fig:method}. - -\subsection{Segmentation} -\label{sec:segmentation} -The offline segmentation stage produces reusable assets for recombination. -% The segmentation stage isolates the foreground objects and their corresponding backgrounds. -For each labeled training image, we create a pair $(\mathrm{fg},\mathrm{bg})$ consisting of (\textit{i}) a foreground cut-out $\mathrm{fg}$ with an alpha mask and (\textit{ii}) an inpainted background image $\mathrm{bg}$ where the foreground region has been removed. +\subsubsection*{Segmentation} +The segmentation stage isolates the foreground objects and their corresponding backgrounds. +We then fill in the background in a visually plausible way~\cite{Sun2024} using a pretrained object-removal model. This stage is computed once offline and the results are stored for the recombination stage. -\textbf{Generate candidate foreground masks.} -We obtain foreground candidates with Grounded SAM~\cite{Ren2024} (Grounding DINO~\cite{Liu2024a} + SAM~\cite{Kirillov2023}). -We leverage the dataset label by prompting the model with ``\code{a , a type of }''. -Here \code{} is the immediate WordNet hypernym of the class (e.g., ``sorrel'' $\rightarrow$ ``horse''), which improves robustness when the class name is rare or overly specific. -This can be the case with prompts like ``sorrel'' or ``guenon'', where the more general name ``horse'' or ``monkey'' is more ubiquitous. -To increase recall, we generate up to $N=3$ masks per image by iteratively moving one level up the hypernym chain (e.g., ``sorrel'' $\rightarrow$ ``horse'' $\rightarrow$ ``equine'' $\dots$). -We merge near-duplicate masks with pairwise IoU $\ge 0.9$, yielding a small set of $n_i, a type of }'', where \code{} is the specific name of the objects class as defined by the dataset and \code{} is a the broader category of the object. +The \code{} guides the segmentation model towards the correct object in case the \code{} alone is too specific. +This can be the case with prompts like ``sorrel'' or ``guenon'', where the more general name ``horse'' or ``monkey'' is more helpful. +We derive the \code{} from the WordNet hierarchy, using the immediate hypernym. + +We iteratively extract up to $n$ foreground masks for each dataset-image, using different more and more general prompts based on the more general synsets of WordNet (e.g. ``a sorrel, a type of horse'', ``a horse, a type of equine'', ...). +Masks that are very similar, with a pairwise IoU of at least $0.9$, are merged. +The output is a set of masks delineating the foreground objects and the backgrounds. We select the best mask per image (according to \Cref{eq:filtering-score}) in a later filtering step, described below. -\textbf{Create neutral backgrounds via object removal.} -Given a candidate mask, we remove the masked region and inpaint it using an object-removal model (LaMa~\cite{Suvorov2022} or Attentive Eraser~\cite{Sun2025}). -This produces a visually plausible, ``neutral'' candidate background that can be paired with many foregrounds. -For an image $i$ we now have $n_i$ foreground objects, extracted from $i$ by cutting out the masked region, each paired with a background where the same mask has been infilled. - -\textbf{Select a high-quality pair.} -Different masks can trade off including the full object versus leaking class cues into the background. -We therefore score each candidate pair using an ensemble $E$ of six pretrained classifiers (ViT/ResNet/Swin) trained on the original dataset. -Intuitively, we prefer (\textit{i}) foregrounds that strongly support the ground-truth class and (\textit{ii}) backgrounds that do \emph{not} support the ground-truth class, while (\textit{iii}) discouraging overly large foreground regions. -For each model $m \in E$, we compute the class scores of the ground truth class $c$, $\P[m(\mathrm{fg})=c]$ on the foreground (with solid-gray background) and $\P[m(\mathrm{bg})=c]$ on the background and combine them with a prior $\operatorname{size}(\cdot)$ (pixel count): - +An inpainting model that is specifically optimized to remove objects from images, such as LaMa~\cite{Suvorov2021} or Attentive Eraser~\cite{Sun2024}, is used to inpaint the foreground regions in the backgrounds. +To ensure the quality of the foreground and background images (for each dataset-image), we select a foreground/background pair from the $\leq n$ variants we have extracted and infilled in the previous steps. +Using an ensemble of six ViT, ResNet, and Swin Transformer models pretrained on the original dataset, we select the foreground/background pair that maximizes foreground performance while minimizing the performance on the background and size of the foreground according to: \begin{align} \begin{split} \label{eq:filtering-score} - \text{score}(\mathrm{fg}, \mathrm{bg}, c) &= \log \left( \sum_{m \in E} \frac{\P[m(\mathrm{fg}) = c]}{\abs{E}} \right) - + \log \left( 1 - \sum_{m \in E} \frac{\P[m(\mathrm{bg}) = c]}{\abs E} \right) \\ + \text{score}(\mathrm{fg}, \mathrm{bg}, c) &= \log \left( \frac{1}{\abs{E}} \sum_{m \in E} \P[m(\mathrm{fg}) = c] \right) \\ + & + \log \left( 1 - \frac{1}{\abs E} \sum_{m \in E} \P[m(\mathrm{bg}) = c] \right) \\ & + \lambda \log \left( 1 - \abs{\frac{\operatorname{size}(\mathrm{fg})}{\operatorname{size}(\mathrm{bg})} - \eps} \right). \end{split} \end{align} -% We set $\lambda = 2$ and $\eps = 0.1$ via a small hyperparameter search on a manually annotated subset. -We run a hyperparameter search using a manually annotated subset of foreground/background variants to find the factors in \Cref{eq:filtering-score}: $\lambda = 2$ and $\eps = 0.1$. -For each image, we keep the candidate mask with the highest score. +Here, $E$ is the ensemble of models and $m$ is a pretrained model, $c$ is the correct foreground class, $\mathrm{fg}$, and $\mathrm{bg}$ are the foreground and background and $\operatorname{size}(\cdot)$ is the size in number of pixels. +We ran a hyperparameter search using a manually annotated subset of foreground/background variants to find the factors in \Cref{eq:filtering-score}: $\lambda = 2$ and $\eps = 0.1$. +The \textit{optimal foreground size} of $10\%$ of the full image balances the smallest possible foreground size that encompasses all the respective class information in the image with still conveying the foreground information after pasting it onto another background. +This filtering step ensures we segment all the relevant foreground objects. -\textbf{Filter low-quality backgrounds.} -Finally, we discard backgrounds that are heavily ($\geq 80\%$) inpainted, as they tend to look synthetic and provide little useful diversity (see supplementary). -This step filters out $10\%$ of backgrounds. +Finally, we filter out backgrounds that are more than $80\%$ infilled, as these tend to be overly synthetic, plain and don't carry much information (see the supplementary material). +We ablate this choice in \Cref{sec:ablation}. +In summary, we factorize the dataset into a set of foreground objects with a transparent background and a set of diverse backgrounds per class. +The next step is to recombine them as data augmentation before applying common data augmentation operations during training. -Although segmentation is the main computational overhead, it is performed once offline and reused across all training runs. -On NVIDIA H100 GPUs, the segmentation stage computes at a rate of $5 338.6 \frac{\text{img}}{\text{GPU} \times \text{h}}$ when inpainting with LaMa. -For ImageNet this comes down to just under $30$ hours on a single node. -At roughly twice the cost of a single ViT-B training run ($\approx 14$ hours), this is a modest investment that is amortized over every subsequent experiment the dataset is used in. -For details see the supplementary material. -% Compare this to $\approx 14$ hours for training ViT-B on ImageNet once. -The output of the segmentation stage is a collection of foreground cut-outs (with transparency) and a pool of diverse, neutral backgrounds, which we use in the online recombination stage. -For ImageNet, we provide pre-computed segmentation output\footnote{\code{URL will go here}}. +\subsubsection*{Recombination} +The recombination stage, which is performed online, combines the foreground objects with different backgrounds to create new training samples. +For each object, we follow the pipeline of: Pick an appropriate background, resize it to a fitting size, place it in the background image, smooth the transition edge, and apply other data augmentations. -\subsection{Recombination} -\label{sec:recombination} -In each epoch, the recombination stage generates a recombined training sample for each foreground by (\textit{i}) choosing a background, (\textit{ii}) choosing a target foreground size, (\textit{iii}) sampling a placement, and (\textit{iv}) pasting the foreground using its alpha mask. -This exposes the model to controlled changes in context and spatial layout that are largely absent from standard augmentation. +For each foreground object, we sample a background using one of the following strategies: +(1) the original image background, (2) the set of backgrounds from the same class, or (3) the set of all possible backgrounds. +These sets are trading off the amount of information the model can learn from the background against the diversity of new images created. +In each epoch, each foreground object is seen exactly once, but a background may appear multiple times. -\textbf{Background sampling.} -For each foreground object, we draw a background using one of three increasingly challenging strategies: -(\textit{i}) \textit{Original}: use the object's own inpainted background (no context shift); -(\textit{ii}) \textit{Same-class}: sample a background from the pool of backgrounds belonging to the same class (slight, but plausible context shift); -(\textit{iii}) \textit{All-classes}: sample from the pool of all inpainted backgrounds (large context shift). -These strategies trade off context diversity against semantic plausibility. -We ensure that each foreground is used exactly once per epoch; backgrounds may repeat. +The selected foreground is resized based on its relative size within its original image and the relative size of the original foreground in the selected background image. +The final size is randomly selected from a 30\% range around upper and lower limits ($s_u$ and $s_l$), based on the original sizes: +\begin{align} + s \sim \mathcal U \left[ (1 - 0.3) s_l, (1 + 0.3) s_u \right]. +\end{align} +To balance the size of the foreground and that of the backgrounds original foreground, the upper and lower limit $s_u$ and $s_l$ are set to the mean or range of both sizes, depending on the foreground size strategy: \emph{mean} or \emph{range}. -\textbf{Foreground scaling.} -Let $r_{\text{fg}}$ denote the relative foreground area in the source image of the foreground, and $r_{\text{bg}}$ the relative foreground area in the source image of the background. % of the \emph{original} foreground (before inpainting) in the chosen background image. -We compute the lower/upper size limits $(s_l, s_u)$ from these two ratios using one of two variants: -(\textit{i}) \emph{mean} sets $(s_l, s_u)$ using the mean of $r_{\text{fg}}$ and $r_{\text{bg}}$, while -(\textit{ii}) \emph{range} uses the min/max to preserve a wider scale range. -Then, we sample the final scale from a $\pm 30\%$ interval around them and resize the foreground to this scale, while keeping the aspect ratio. +The resized foreground is then placed at a random position within the background image. +This position is sampled from a generalization of the Bates distribution~\cite{Bates1955} with parameter $\eta \in \N$, visualized in \Cref{fig:bates-pdf}. +We choose the bates distribution, as it presents an easy way to sample from a bounded domain with just one hyperparameter that controls the concentration of the distribution. +$\eta = 1$ corresponds to the uniform distribution; $\eta > 1$ concentrates the distribution around the center; and for $\eta < -1$, the distribution is concentrated at the borders. +To more seamlessly integrate the foreground, we apply a Gaussian blur with ${\sigma \in [\frac{\sigma_{\text{max}}}{10}, \sigma_{\text{max}}]}$, inspired by the standard range for the Gaussian blur operation in \cite{Touvron2022}, to the foreground's alpha-mask. + +We can apply standard data augmentation techniques in two modes: +Either we apply all augmentations to the recombined image, or we apply the cropping and resizing to the background only and then apply the other augmentations after recombination. +% While for the second mode, the foreground object will always be fully visible, the first mode uses the data augmentations in the same way they would be used for the baseline dataset. +The second mode ensures the foreground object remains fully visible, while the first mode mirrors standard data augmentation practices. -\textbf{Placement and boundary smoothing.} -We paste the resized foreground at a uniformly random location within the background. -To reduce cut-and-paste artifacts, we slightly soften the alpha mask boundary by applying a Gaussian blur with $\sigma \in [\frac{\sigma_{\text{max}}}{10}, \sigma_{\text{max}}]$, following the range used in modern augmentation~\cite{Touvron2022}. -% For example recombined images see \Cref{tab:foraug-examples}. - -% \textbf{Interaction with standard augmentation.} -% We support two augmentation orders: -% (\textit{i}) apply the full augmentation pipeline after recombination; or -% (\textit{ii}) apply crop+resize to the background first (to keep the full foreground visible), then recombine, then apply the remaining augmentations. -% The former matches standard training exactly; the latter isolates composition changes from random cropping. - -\textbf{Mixing with original images.} -We optionally mix recombined samples with unmodified dataset images. -A mixing ratio $p$ acts as the probability of drawing the original image; otherwise we use its foreground and apply \schemename. -We consider constant $p$ as well as linear/cosine schedules that increase $p$ over training. -Finally, we apply standard data augmentation techniques on the resulting images. - -The online recombination is CPU-parallel and does not measurably increase training time. -We find a $\approx 1\%$ increase in average step-time (see supplementary). +We experiment with a constant mixing ratio, or a linear or cosine anealing schedule that increases the amount of images from the original dataset over time. +The mixing ratio acts as a probability of selecting an image from the original dataset; +otherwise, an image with the same foreground is recombined using \schemename. +Thus, we still ensure each foreground is seen once per epoch. diff --git a/sec/method_old.tex b/sec/method_old.tex deleted file mode 100644 index 7708824..0000000 --- a/sec/method_old.tex +++ /dev/null @@ -1,120 +0,0 @@ -% !TeX root = ../main.tex - -%\begin{figure*}[ht!] -% \centering -% \includegraphics[width=.9\textwidth]{img/fig-2.pdf} -% \caption{Overview of \name. The data creation consists of two stages: (1, offline) Segmentation, where we segment the foreground objects from the background and fill in the background. (2, online) Recombination, where we combine the foreground objects with different backgrounds to create new samples. After recombination, we apply strong, commonly used augmentation policies.} -% \label{fig:method} -%\end{figure*} - -\begin{figure*}[t] - \centering - \includegraphics[width=\textwidth]{img/fig-2.pdf} - \caption{Overview of \schemename. The data creation consists of two stages: Segmentation (offline, \Cref{sec:segmentation}), where we segment the foreground objects from the background and fill in the background. Recombination (online, \Cref{sec:recombination}), where we combine the foreground objects with different backgrounds to create new samples. After recombination, we apply strong, commonly used augmentation policies.} - \label{fig:method} -\end{figure*} - -\section{\schemename (Method)} -\label{sec:method} - -% \begin{itemize} -% \item[1.] Segment ImageNet -% \item Detect and Cutout Foreground -% \item Multiple foreground possibilities -% \item Foreground mask merging -% \item Background infills -% \item Foreground/Background Filtering -% \item [2.] Recombination -% \item Which foreground \& Background -% \item Background pruning -% \item size -% \item positioning -% \item Border smoothing -% \item Dealing with other data augmentations/transformations -% \end{itemize} - -% We propose a novel dataset, called \name, that improves image classification performance by explicitly separating and recombining foreground objects and plain backgrounds. -% \name consists of two stages: Segmentation and recombination. Both are visualized in \Cref{fig:method}. -% We introduce \schemename, a data augmentation scheme designed to enhance Transformer training by explicitly separating and recombining foreground objects and backgrounds. -% \schemename enhances transformer training by explicitly encoding spatial invariances that these need to learn explicitly in the data. -% \schemename involves two stages: Segmentation and Recombination, both visualized in \Cref{fig:method}. -We introduce \schemename, a data augmentation designed to enhance Transformer training by embedding spatial invariances--which Transformers would otherwise need to learn implicitly--directly into the training data. -% It operates by explicitly segmenting and recombining foreground objects and backgrounds. -\schemename comprises two distinct stages: Segmentation and Recombination. Both are illustrated in \Cref{fig:method}. - - -\subsection{Segmentation} -\label{sec:segmentation} -The segmentation stage isolates the foreground objects and their corresponding backgrounds. -% We then fill in the background in a visually plausible way~\cite{Sun2025} using a pretrained object-removal model. -We then fill the background using a pretrained object-removal model, producing visually plausible~\cite{Sun2025}, neutral scenes ready for recombination. -This stage is computed once offline and the results are stored for the recombination stage. - -First, foreground objects are detected and segmented from their backgrounds using a prompt-based segmentation model to exploit the classification datasets labels. -We use the state-of-the-art Grounded SAM~\cite{Ren2024}, which is based on Grounding DINO~\cite{Liu2024a} and SAM~\cite{Kirillov2023}. -The prompt we use is ``\code{a , a type of }'', where \code{} is the specific name of the objects class as defined by the dataset and \code{} is a the broader category of the object. -The \code{} guides the segmentation model towards the correct object in case the \code{} alone is too specific. -This can be the case with prompts like ``sorrel'' or ``guenon'', where the more general name ``horse'' or ``monkey'' is more helpful. -We derive the \code{} from the WordNet hierarchy, using the immediate hypernym. - -% We iteratively extract up to $n$ foreground masks for each dataset-image, using different more and more general prompts based on the more general synsets of WordNet (e.g. ``a sorrel, a type of horse'', ``a horse, a type of equine'', ...). -We iteratively extract $n$ foreground masks for each dataset-image, creating prompts by going one hypernym up the WordNet-tree each step (e.g. ``a sorrel, a type of horse'', ``a horse, a type of equine'', ...). -Masks that are very similar, with a pairwise IoU of at least $0.9$, are merged. -The output is a set of masks delineating the foreground objects and the backgrounds. -We select the best mask per image (according to \Cref{eq:filtering-score}) in a later filtering step, described below. - -First, an inpainting model that is specifically optimized to remove objects from images, such as LaMa~\cite{Suvorov2022} or Attentive Eraser~\cite{Sun2025}, is used to inpaint the foreground regions in the backgrounds. -Then, to ensure the quality of the foregrounds and the neutral background images, we select a foreground/background pair (for each dataset-image) from the $\leq n$ variants we have extracted and infilled in the previous steps. -Using an ensemble $E$ of six ViT, ResNet, and Swin Transformer models pretrained on the original dataset, we select the foreground/background pair that maximizes foreground performance while minimizing the performance on the background and size of the foreground. -For each model $m \in E$, we predict the score of the ground truth class $c$ on the foreground $\mathrm{fg}$ and background $\mathrm{bg}$ and weigh these with the size $\operatorname{size}(\cdot)$ in number of pixels according to: -% $c$ is the correct foreground class, $\mathrm{fg}$, and $\mathrm{bg}$ are the foreground and background and $\operatorname{size}(\cdot)$ is the size in number of pixels. -\begin{align} \begin{split} \label{eq:filtering-score} - \text{score}(\mathrm{fg}, \mathrm{bg}, c) &= \log \left( \sum_{m \in E} \frac{\P[m(\mathrm{fg}) = c]}{\abs{E}} \right) - + \log \left( 1 - \sum_{m \in E} \frac{\P[m(\mathrm{bg}) = c]}{\abs E} \right) \\ - & + \lambda \log \left( 1 - \abs{\frac{\operatorname{size}(\mathrm{fg})}{\operatorname{size}(\mathrm{bg})} - \eps} \right). - \end{split} \end{align} -% We use $E$ is the ensemble of models and $m$ is a pretrained model, $c$ is the correct foreground class, $\mathrm{fg}$, and $\mathrm{bg}$ are the foreground and background and $\operatorname{size}(\cdot)$ is the size in number of pixels. -We run a hyperparameter search using a manually annotated subset of foreground/background variants to find the factors in \Cref{eq:filtering-score}: $\lambda = 2$ and $\eps = 0.1$. -% The \textit{optimal foreground size} of $10\%$ of the full image balances the smallest possible foreground size that encompasses all the respective class information in the image with still conveying the foreground information after pasting it onto another background. -% This filtering step ensures we segment all the relevant foreground objects. - -Finally, we filter out backgrounds that are largely infilled, as these tend to be overly synthetic and do not carry much information (see the supplementary material). -% We ablate this choice in \Cref{sec:ablation}. -% While the computational cost for the segmentation stage is significant, this is a one-time calculation whose results can be reused in subsequent experiments (see the supplementary material for details). -Although the segmentation stage is computational overhead, it is a one-time cost with results that can be reused across experiments (see the supplementary material for details). -In summary, we factorize the dataset into a set of foreground objects with a transparent background and a set of diverse backgrounds per class. -The next step is to recombine these, before applying other common data augmentation operations during training. - -\subsection{Recombination} -\label{sec:recombination} -The recombination stage, performed online during training, combines the foreground objects with different backgrounds to create new training samples. -For each object, we follow the pipeline of: Pick an appropriate background, resize it to a fitting size, and place it in the background image. -Through this step, we expose the model to variations beyond the image compositions of the dataset. - -For each foreground object, we sample a background using one of the following strategies: -(1) the original image background, (2) the set of backgrounds from the same class, or (3) the set of all possible backgrounds. -These sets are trading off the amount of information the model can learn from the background against the diversity of new images created. -In each epoch, each foreground object is seen exactly once, but a background may appear multiple times. - -The selected foreground is resized based on its relative size within its original image and the relative size of the original foreground in the selected background image. -The final size is randomly selected from a 30\% range around upper and lower limits ($s_u$ and $s_l$), based on the original sizes. -% \begin{align} -% s \sim \mathcal U \left[ (1 - 0.3) s_l, (1 + 0.3) s_u \right]. -% \end{align} -To balance the size of the foreground and that of the backgrounds original foreground, the upper and lower limit $s_u$ and $s_l$ are set to the mean or range of both sizes, depending on the foreground size strategy: \emph{mean} or \emph{range}. - -The resized foreground is then placed at a random position within the background image. -To more seamlessly integrate the foreground, we apply a Gaussian blur with ${\sigma \in [\frac{\sigma_{\text{max}}}{10}, \sigma_{\text{max}}]}$, inspired by the standard range for the Gaussian blur operation in \cite{Touvron2022}, to the foreground's alpha-mask. - -We can apply standard data augmentation techniques in two modes: -Either we apply all augmentations to the recombined image, or we apply the cropping and resizing to the background only and then apply the other augmentations after recombination. -% While for the second mode, the foreground object will always be fully visible, the first mode uses the data augmentations in the same way they would be used for the baseline dataset. -% The second mode ensures the foreground object remains fully visible, while the first mode mirrors standard data augmentation practices. -The first mode mirrors standard augmentation practice, whereas the second one ensures the foreground object remains fully visible. - -We experiment with a constant mixing ratio, or a linear or cosine annealing schedule that increases the amount of images from the original dataset over time. -The mixing ratio acts as a probability of selecting an image from the original dataset; -otherwise, an image with the same foreground is recombined using \schemename, ensuring each object is seen once per epoch. -% Thus, we still ensure each foreground is seen once per epoch. -The recombination stage is designed to be parallelized on the CPU during training and thus does not impact training time (see supplementary material for details). - diff --git a/sec/related_work.tex b/sec/related_work.tex index ce22762..70895ae 100644 --- a/sec/related_work.tex +++ b/sec/related_work.tex @@ -3,44 +3,34 @@ \section{Related Work} \label{sec:related_work} -\textbf{Data Augmentation for Image Classification.} -Data augmentation is a crucial technique for improving the model performance and generalization. -Traditional augmentation strategies rely on simple geometric or color-space transformations like cropping, flipping, rotation, blurring, color jittering, or random erasing~\cite{Zhong2020} to increase training data diversity without changing the semantic meaning. -With the advent of ViTs~\cite{Dosovitskiy2021}, new data augmentation operations like PatchDropout~\cite{Liu2022d} have been proposed. -Other transformations like MixUp~\cite{Zhang2018a}, CutMix~\cite{Yun2019}, or random cropping and patching~\cite{Takahashi2018} combine multiple input images. -These simple transformations are usually bundled to form more complex augmentation policies like AutoAugment~\cite{Cubuk2019} and RandAugment~\cite{Cubuk2020}, or 3-Augment~\cite{Touvron2022}. %, which is optimized to train a ViT. -For a general overview of data augmentation for image classification, we refer to Shorten et al.~\cite{Shorten2019} and Xu et al.~\cite{Xu2023d}. +\paragraph{Data Augmentation for Image Classification} +Data augmentation is a crucial technique for improving the performance and generalization of image classification models. +Traditional augmentation strategies rely on simple geometric or color-space transformations like cropping, flipping, roatation, blurring, color jittering, or random erasing \cite{Zhong2017} to increase the diversity of the training data without changing their semantic meaning. +With the advent of Transformers, new data augmentation operations like PatchDropout \cite{Liu2022d} have been proposed. +Other transformations like Mixup \cite{Zhang2018a}, CutMix \cite{Yun2019}, or random cropping and patching \cite{Takahashi2018} combine multiple input images. +These simple transformations are usually bundled to form more complex augmentation policies like AutoAugment \cite{Cubuk2018} and RandAugment \cite{Cubuk2019}, which automatically search for optimal augmentation policies or 3-augment \cite{Touvron2022} which is optimized to train a ViT. +For a general overview of data augmentation techniques for image classification, we refer to \cite{Shorten2019, Xu2023d}. -We advance these general augmentations by introducing \schemename to explicitly separate objects and backgrounds for image classification, allowing us to move beyond image compositions from the dataset. -Thus, \schemename unlocks performance improvements and bias reduction not possible with traditional data augmentation. -% \schemename is used additionally to traditional augmentation techniques to improve performance and reduce biases. +We build upon these general augmentation techniques by introducing a novel approach to explicitly separate and recombine foregrounds and backgrounds for image classification. +Our approach is used in tandem with traditional data augmentation techniques to improve model performance and reduce biases. -\textbf{Copy-Paste Augmentation.} -The copy-paste augmentation~\cite{Ghiasi2021}, which is used only for object detection~\cite{Shermaine2025,Ghiasi2021} and instance segmentation~\cite{Werman2022,Ling2022}, involves copying segmented objects from one image and pasting them onto another. -While typically human annotated segmentation masks are used to extract the foreground objects, other foreground sources have been explored, like 3D models~\cite{Hinterstoisser2019} and pretrained object-detection models for use on objects on white background~\cite{Dwibedi2017} or synthetic images~\cite{Ge2023}. -Kang et al.~\cite{Kang2022} apply copy-paste as an alternative to CutMix in image classification, but they do not shift the size or position of the foregrounds and use dataset images (with object) as backgrounds. +\paragraph{Copy-Paste Augmentation} +The copy-paste augmentation \cite{Ghiasi2020}, which is used for object detection \cite{Shermaine2025,Ghiasi2020} and instance segmentation \cite{Werman2021,Ling2022}, involves copying segmented objects from one image and pasting them onto another. +While typically human-annotated segmentation masks are used to extract the foreground objects, other foregound sources have been explored, like 3D models \cite{Hinterstoisser2019} and pretrained object-detection models for use on objects on white background \cite{Dwibedi2017} or synthetic images \cite{Ge2023}. +DeePaste \cite{Werman2021} focuses on using inpainting for a more seamless integration of the pasted object. -Unlike prior copy-paste methods that overlay objects, \schemename extracts foregrounds and replaces their backgrounds with semantically neutral fills, thereby preserving label integrity while enabling controlled and diverse recombination. +Unlike these methods, \name focuses on image classification. +While for detection and segmentation, objects are pasted onto another image (with a different foreground) or on available or rendered background images of the target scene, we extract foreground objects and fill in the resulting holes in the background in a semantically neutral way. +This way, we can recombine any foreground object with a large variety of neutral backgrounds from natural images, enabling a controlled and diverse manipulation of image composition. -\textbf{Generative data augmentation.} -Recent work uses generative models to synthesize additional training images, e.g., via GANs or diffusion models driven by text prompts or attribute labels~\cite{Lu2022,Trabucco2024,Islam2024}. -Concurrently to our work, AGA~\cite{Rahat2025} combines LLMs, diffusion models, and segmentation to generate fully synthetic backgrounds from text prompts, onto which real foregrounds are pasted. -These synthetic images are appended to the original training set. -While AGA focuses on increasing diversity via prompt-driven background synthesis, \schemename uses generative models differently: -We apply inpainting only to locally neutralize the original object region, yielding semi-synthetic backgrounds that preserve the global layout, style, and characteristics of real dataset images. -% AGA's focus on synthetic background is likely to produce a shifted, or even collapsed background image distribution~\cite{Zverev2025,Shumailov2024,Adamkiewicz2026}. -Fully synthetic, prompt-generated backgrounds are likely to change, the effective background distribution, especially when prompts or generators are biased~\cite{Zverev2025,Shumailov2024,Adamkiewicz2026}. -We then do online recombination of real foregrounds with these neutralized, dataset-consistent backgrounds under explicit control of object position and scale. -Thus, \schemename acts as a dynamic large-scale augmentation method while AGA is statically expanding small-scale training sets with synthetic data. - -\textbf{Model robustness evaluation.} +\paragraph{Model robustness evaluation} Evaluating model robustness to various image variations is critical for understanding and improving model generalization. -Datasets like ImageNet-A~\cite{Hendrycks2021}, ImageNet-C~\cite{Hendrycks2019} and ImageNet-P~\cite{Hendrycks2019} introduce common corruptions and perturbations. -ImageNet-E~\cite{Li2023e} evaluates model robustness against a collection of distribution shifts. -Other datasets, such as ImageNet-D~\cite{Zhang2024f} and ImageNet-R~\cite{Hendrycks2021a}, focus on varying background, texture, and material, but rely on synthetic data. -Stylized ImageNet~\cite{Geirhos2019} investigates the impact of texture changes. -ImageNet-9~\cite{Xiao2020} explores background variations using segmented images for a 9-class subset of ImageNet with artificial backgrounds. +Datasets like ImageNet-C \cite{Hendrycks2019} and ImageNet-P \cite{Hendrycks2019} introduce common corruptions and perturbations. +ImageNet-E \cite{Li2023e} evaluates model robustness against a collection of distribution shifts. +Other datasets, such as ImageNet-D \cite{Zhang2024f}, focus on varying background, texture, and material, but rely on synthetic data. +Stylized ImageNet \cite{Geirhos2018} investigates the impact of texture changes. +ImageNet-9 \cite{Xiao2020} explores background variations using segmented images, but the backgrounds are often artificial. -In contrast to these existing datasets, which are used only for evaluation, \schemename provides fine-grained control over foreground object placement, size, and background selection, enabling a precise and comprehensive analysis of specific model biases within the context of a large-scale, real-world image distribution. -As \schemename also provides controllable training data generation, it goes beyond simply measuring robustness to actively improving it through training. +In contrast to these existing datasets, which are used only for evaluation, \name provides fine-grained control over foreground object placement, size, and background selection, enabling a precise and comprehensive analysis of specific model biases within the context of a large-scale, real-world image distribution. +As \name also provides controllable training set generation, it goes beyond simply measuring robustness to actively improving it through training. diff --git a/sec/reproducability.tex b/sec/reproducability.tex deleted file mode 100644 index 853f516..0000000 --- a/sec/reproducability.tex +++ /dev/null @@ -1,228 +0,0 @@ -% !TeX root = ../main.tex - -\makeatletter -% \@ifundefined{isChecklistMainFile}{ -% % We are compiling a standalone document -% \newif\ifreproStandalone -% \reproStandalonetrue -% }{ - % We are being \input into the main paper - \newif\ifreproStandalone - \reproStandalonefalse -% } -\makeatother - -\ifreproStandalone -\documentclass[letterpaper]{article} -\usepackage[submission]{aaai2026} -\setlength{\pdfpagewidth}{8.5in} -\setlength{\pdfpageheight}{11in} -\usepackage{times} -\usepackage{helvet} -\usepackage{courier} -\usepackage{xcolor} -\frenchspacing - -\begin{document} -\fi -\setlength{\leftmargini}{20pt} -\makeatletter\def\@listi{\leftmargin\leftmargini \topsep .5em \parsep .5em \itemsep .5em} -\def\@listii{\leftmargin\leftmarginii \labelwidth\leftmarginii \advance\labelwidth-\labelsep \topsep .4em \parsep .4em \itemsep .4em} -\def\@listiii{\leftmargin\leftmarginiii \labelwidth\leftmarginiii \advance\labelwidth-\labelsep \topsep .4em \parsep .4em \itemsep .4em}\makeatother - -\setcounter{secnumdepth}{0} -\renewcommand\thesubsection{\arabic{subsection}} -\renewcommand\labelenumi{\thesubsection.\arabic{enumi}} - -\newcounter{checksubsection} -\newcounter{checkitem}[checksubsection] - -\newcommand{\checksubsection}[1]{% - \refstepcounter{checksubsection}% - \paragraph{\arabic{checksubsection}. #1}% - \setcounter{checkitem}{0}% -} - -\newcommand{\checkitem}{% - \refstepcounter{checkitem}% - \item[\arabic{checksubsection}.\arabic{checkitem}.]% -} -\newcommand{\question}[2]{\normalcolor\checkitem #1 #2 \color{blue}} -\newcommand{\ifyespoints}[1]{\makebox[0pt][l]{\hspace{-15pt}\normalcolor #1}} - -\section*{Reproducibility Checklist} - -\vspace{1em} -\hrule -\vspace{1em} - -\textbf{Instructions for Authors:} - -This document outlines key aspects for assessing reproducibility. Please provide your input by editing this \texttt{.tex} file directly. - -For each question (that applies), replace the ``Type your response here'' text with your answer. - -\vspace{1em} -\noindent -\textbf{Example:} If a question appears as -% -\begin{center} - \noindent - \begin{minipage}{.9\linewidth} - \ttfamily\raggedright - \string\question \{Proofs of all novel claims are included\} \{(yes/partial/no)\} \\ - Type your response here - \end{minipage} -\end{center} -you would change it to: -\begin{center} - \noindent - \begin{minipage}{.9\linewidth} - \ttfamily\raggedright - \string\question \{Proofs of all novel claims are included\} \{(yes/partial/no)\} \\ - yes - \end{minipage} -\end{center} -% -Please make sure to: -\begin{itemize}\setlength{\itemsep}{.1em} - \item Replace ONLY the ``Type your response here'' text and nothing else. - \item Use one of the options listed for that question (e.g., \textbf{yes}, \textbf{no}, \textbf{partial}, or \textbf{NA}). - \item \textbf{Not} modify any other part of the \texttt{\string\question} command or any other lines in this document.\\ -\end{itemize} - -You can \texttt{\string\input} this .tex file right before \texttt{\string\end\{document\}} of your main file or compile it as a stand-alone document. Check the instructions on your conference's website to see if you will be asked to provide this checklist with your paper or separately. - -\vspace{1em} -\hrule -\vspace{1em} - -% The questions start here - -\checksubsection{General Paper Structure} -\begin{itemize} - - \question{Includes a conceptual outline and/or pseudocode description of AI methods introduced}{(yes/partial/no/NA)} - yes - - \question{Clearly delineates statements that are opinions, hypothesis, and speculation from objective facts and results}{(yes/no)} - yes - - \question{Provides well-marked pedagogical references for less-familiar readers to gain background necessary to replicate the paper}{(yes/no)} - yes - -\end{itemize} -\checksubsection{Theoretical Contributions} -\begin{itemize} - - \question{Does this paper make theoretical contributions?}{(yes/no)} - no - - \ifyespoints{\vspace{1.2em}If yes, please address the following points:} - \begin{itemize} - - \question{All assumptions and restrictions are stated clearly and formally}{(yes/partial/no)} - Type your response here - - \question{All novel claims are stated formally (e.g., in theorem statements)}{(yes/partial/no)} - Type your response here - - \question{Proofs of all novel claims are included}{(yes/partial/no)} - Type your response here - - \question{Proof sketches or intuitions are given for complex and/or novel results}{(yes/partial/no)} - Type your response here - - \question{Appropriate citations to theoretical tools used are given}{(yes/partial/no)} - Type your response here - - \question{All theoretical claims are demonstrated empirically to hold}{(yes/partial/no/NA)} - Type your response here - - \question{All experimental code used to eliminate or disprove claims is included}{(yes/no/NA)} - Type your response here - - \end{itemize} -\end{itemize} - -\checksubsection{Dataset Usage} -\begin{itemize} - - \question{Does this paper rely on one or more datasets?}{(yes/no)} - yes - - \ifyespoints{If yes, please address the following points:} - \begin{itemize} - - \question{A motivation is given for why the experiments are conducted on the selected datasets}{(yes/partial/no/NA)} - yes - - \question{All novel datasets introduced in this paper are included in a data appendix}{(yes/partial/no/NA)} - no - - \question{All novel datasets introduced in this paper will be made publicly available upon publication of the paper with a license that allows free usage for research purposes}{(yes/partial/no/NA)} - yes - - \question{All datasets drawn from the existing literature (potentially including authors' own previously published work) are accompanied by appropriate citations}{(yes/no/NA)} - yes - - \question{All datasets drawn from the existing literature (potentially including authors' own previously published work) are publicly available}{(yes/partial/no/NA)} - yes - - \question{All datasets that are not publicly available are described in detail, with explanation why publicly available alternatives are not scientifically satisficing}{(yes/partial/no/NA)} - NA - - \end{itemize} -\end{itemize} - -\checksubsection{Computational Experiments} -\begin{itemize} - - \question{Does this paper include computational experiments?}{(yes/no)} - yes - - \ifyespoints{If yes, please address the following points:} - \begin{itemize} - - \question{This paper states the number and range of values tried per (hyper-) parameter during development of the paper, along with the criterion used for selecting the final parameter setting}{(yes/partial/no/NA)} - yes - - \question{Any code required for pre-processing data is included in the appendix}{(yes/partial/no)} - yes - - \question{All source code required for conducting and analyzing the experiments is included in a code appendix}{(yes/partial/no)} - yes - - \question{All source code required for conducting and analyzing the experiments will be made publicly available upon publication of the paper with a license that allows free usage for research purposes}{(yes/partial/no)} - yes - - \question{All source code implementing new methods have comments detailing the implementation, with references to the paper where each step comes from}{(yes/partial/no)} - yes - - \question{If an algorithm depends on randomness, then the method used for setting seeds is described in a way sufficient to allow replication of results}{(yes/partial/no/NA)} - yes - - \question{This paper specifies the computing infrastructure used for running experiments (hardware and software), including GPU/CPU models; amount of memory; operating system; names and versions of relevant software libraries and frameworks}{(yes/partial/no)} - yes - - \question{This paper formally describes evaluation metrics used and explains the motivation for choosing these metrics}{(yes/partial/no)} - yes - - \question{This paper states the number of algorithm runs used to compute each reported result}{(yes/no)} - yes - - \question{Analysis of experiments goes beyond single-dimensional summaries of performance (e.g., average; median) to include measures of variation, confidence, or other distributional information}{(yes/no)} - yes - - \question{The significance of any improvement or decrease in performance is judged using appropriate statistical tests (e.g., Wilcoxon signed-rank)}{(yes/partial/no)} - no - - \question{This paper lists all final (hyper-)parameters used for each model/algorithm in the paper’s experiments}{(yes/partial/no/NA)} - yes - - - \end{itemize} -\end{itemize} -\ifreproStandalone -\end{document} -\fi \ No newline at end of file diff --git a/splncs04.bst b/splncs04.bst deleted file mode 100644 index 3be8de3..0000000 --- a/splncs04.bst +++ /dev/null @@ -1,1548 +0,0 @@ -%% BibTeX bibliography style `splncs03' -%% -%% BibTeX bibliography style for use with numbered references in -%% Springer Verlag's "Lecture Notes in Computer Science" series. -%% (See Springer's documentation for llncs.cls for -%% more details of the suggested reference format.) Note that this -%% file will not work for author-year style citations. -%% -%% Use \documentclass{llncs} and \bibliographystyle{splncs03}, and cite -%% a reference with (e.g.) \cite{smith77} to get a "[1]" in the text. -%% -%% This file comes to you courtesy of Maurizio "Titto" Patrignani of -%% Dipartimento di Informatica e Automazione Universita' Roma Tre -%% -%% ================================================================================================ -%% This was file `titto-lncs-02.bst' produced on Wed Apr 1, 2009 -%% Edited by hand by titto based on `titto-lncs-01.bst' (see below) -%% -%% CHANGES (with respect to titto-lncs-01.bst): -%% - Removed the call to \urlprefix (thus no "URL" string is added to the output) -%% ================================================================================================ -%% This was file `titto-lncs-01.bst' produced on Fri Aug 22, 2008 -%% Edited by hand by titto based on `titto.bst' (see below) -%% -%% CHANGES (with respect to titto.bst): -%% - Removed the "capitalize" command for editors string "(eds.)" and "(ed.)" -%% - Introduced the functions titto.bbl.pages and titto.bbl.page for journal pages (without "pp.") -%% - Added a new.sentence command to separate with a dot booktitle and series in the inproceedings -%% - Commented all new.block commands before urls and notes (to separate them with a comma) -%% - Introduced the functions titto.bbl.volume for handling journal volumes (without "vol." label) -%% - Used for editors the same name conventions used for authors (see function format.in.ed.booktitle) -%% - Removed a \newblock to avoid long spaces between title and "In: ..." -%% - Added function titto.space.prefix to add a space instead of "~" after the (removed) "vol." label -%% - Added doi -%% ================================================================================================ -%% This was file `titto.bst', -%% generated with the docstrip utility. -%% -%% The original source files were: -%% -%% merlin.mbs (with options: `vonx,nm-rvvc,yr-par,jttl-rm,volp-com,jwdpg,jwdvol,numser,ser-vol,jnm-x,btit-rm,bt-rm,edparxc,bkedcap,au-col,in-col,fin-bare,pp,ed,abr,mth-bare,xedn,jabr,and-com,and-com-ed,xand,url,url-blk,em-x,nfss,') -%% ---------------------------------------- -%% *** Tentative .bst file for Springer LNCS *** -%% -%% Copyright 1994-2007 Patrick W Daly - % =============================================================== - % IMPORTANT NOTICE: - % This bibliographic style (bst) file has been generated from one or - % more master bibliographic style (mbs) files, listed above. - % - % This generated file can be redistributed and/or modified under the terms - % of the LaTeX Project Public License Distributed from CTAN - % archives in directory macros/latex/base/lppl.txt; either - % version 1 of the License, or any later version. - % =============================================================== - % Name and version information of the main mbs file: - % \ProvidesFile{merlin.mbs}[2007/04/24 4.20 (PWD, AO, DPC)] - % For use with BibTeX version 0.99a or later - %------------------------------------------------------------------- - % This bibliography style file is intended for texts in ENGLISH - % This is a numerical citation style, and as such is standard LaTeX. - % It requires no extra package to interface to the main text. - % The form of the \bibitem entries is - % \bibitem{key}... - % Usage of \cite is as follows: - % \cite{key} ==>> [#] - % \cite[chap. 2]{key} ==>> [#, chap. 2] - % where # is a number determined by the ordering in the reference list. - % The order in the reference list is alphabetical by authors. - %--------------------------------------------------------------------- - -ENTRY - { address - author - booktitle - chapter - doi - edition - editor - eid - howpublished - institution - journal - key - month - note - number - organization - pages - publisher - school - series - title - type - url - volume - year - } - {} - { label } -INTEGERS { output.state before.all mid.sentence after.sentence after.block } -FUNCTION {init.state.consts} -{ #0 'before.all := - #1 'mid.sentence := - #2 'after.sentence := - #3 'after.block := -} -STRINGS { s t} -FUNCTION {output.nonnull} -{ 's := - output.state mid.sentence = - { ", " * write$ } - { output.state after.block = - { add.period$ write$ -% newline$ -% "\newblock " write$ % removed for titto-lncs-01 - " " write$ % to avoid long spaces between title and "In: ..." - } - { output.state before.all = - 'write$ - { add.period$ " " * write$ } - if$ - } - if$ - mid.sentence 'output.state := - } - if$ - s -} -FUNCTION {output} -{ duplicate$ empty$ - 'pop$ - 'output.nonnull - if$ -} -FUNCTION {output.check} -{ 't := - duplicate$ empty$ - { pop$ "empty " t * " in " * cite$ * warning$ } - 'output.nonnull - if$ -} -FUNCTION {fin.entry} -{ duplicate$ empty$ - 'pop$ - 'write$ - if$ - newline$ -} - -FUNCTION {new.block} -{ output.state before.all = - 'skip$ - { after.block 'output.state := } - if$ -} -FUNCTION {new.sentence} -{ output.state after.block = - 'skip$ - { output.state before.all = - 'skip$ - { after.sentence 'output.state := } - if$ - } - if$ -} -FUNCTION {add.blank} -{ " " * before.all 'output.state := -} - - -FUNCTION {add.colon} -{ duplicate$ empty$ - 'skip$ - { ":" * add.blank } - if$ -} - -FUNCTION {date.block} -{ - new.block -} - -FUNCTION {not} -{ { #0 } - { #1 } - if$ -} -FUNCTION {and} -{ 'skip$ - { pop$ #0 } - if$ -} -FUNCTION {or} -{ { pop$ #1 } - 'skip$ - if$ -} -STRINGS {z} -FUNCTION {remove.dots} -{ 'z := - "" - { z empty$ not } - { z #1 #1 substring$ - z #2 global.max$ substring$ 'z := - duplicate$ "." = 'pop$ - { * } - if$ - } - while$ -} -FUNCTION {new.block.checka} -{ empty$ - 'skip$ - 'new.block - if$ -} -FUNCTION {new.block.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.block - if$ -} -FUNCTION {new.sentence.checka} -{ empty$ - 'skip$ - 'new.sentence - if$ -} -FUNCTION {new.sentence.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.sentence - if$ -} -FUNCTION {field.or.null} -{ duplicate$ empty$ - { pop$ "" } - 'skip$ - if$ -} -FUNCTION {emphasize} -{ skip$ } - -FUNCTION {embolden} -{ duplicate$ empty$ -{ pop$ "" } -{ "\textbf{" swap$ * "}" * } -if$ -} -FUNCTION {tie.or.space.prefix} -{ duplicate$ text.length$ #5 < - { "~" } - { " " } - if$ - swap$ -} -FUNCTION {titto.space.prefix} % always introduce a space -{ duplicate$ text.length$ #3 < - { " " } - { " " } - if$ - swap$ -} - - -FUNCTION {capitalize} -{ "u" change.case$ "t" change.case$ } - -FUNCTION {space.word} -{ " " swap$ * " " * } - % Here are the language-specific definitions for explicit words. - % Each function has a name bbl.xxx where xxx is the English word. - % The language selected here is ENGLISH -FUNCTION {bbl.and} -{ "and"} - -FUNCTION {bbl.etal} -{ "et~al." } - -FUNCTION {bbl.editors} -{ "eds." } - -FUNCTION {bbl.editor} -{ "ed." } - -FUNCTION {bbl.edby} -{ "edited by" } - -FUNCTION {bbl.edition} -{ "edn." } - -FUNCTION {bbl.volume} -{ "vol." } - -FUNCTION {titto.bbl.volume} % for handling journals -{ "" } - -FUNCTION {bbl.of} -{ "of" } - -FUNCTION {bbl.number} -{ "no." } - -FUNCTION {bbl.nr} -{ "no." } - -FUNCTION {bbl.in} -{ "in" } - -FUNCTION {bbl.pages} -{ "pp." } - -FUNCTION {bbl.page} -{ "p." } - -FUNCTION {titto.bbl.pages} % for journals -{ "" } - -FUNCTION {titto.bbl.page} % for journals -{ "" } - -FUNCTION {bbl.chapter} -{ "chap." } - -FUNCTION {bbl.techrep} -{ "Tech. Rep." } - -FUNCTION {bbl.mthesis} -{ "Master's thesis" } - -FUNCTION {bbl.phdthesis} -{ "Ph.D. thesis" } - -MACRO {jan} {"Jan."} - -MACRO {feb} {"Feb."} - -MACRO {mar} {"Mar."} - -MACRO {apr} {"Apr."} - -MACRO {may} {"May"} - -MACRO {jun} {"Jun."} - -MACRO {jul} {"Jul."} - -MACRO {aug} {"Aug."} - -MACRO {sep} {"Sep."} - -MACRO {oct} {"Oct."} - -MACRO {nov} {"Nov."} - -MACRO {dec} {"Dec."} - -MACRO {acmcs} {"ACM Comput. Surv."} - -MACRO {acta} {"Acta Inf."} - -MACRO {cacm} {"Commun. ACM"} - -MACRO {ibmjrd} {"IBM J. Res. Dev."} - -MACRO {ibmsj} {"IBM Syst.~J."} - -MACRO {ieeese} {"IEEE Trans. Software Eng."} - -MACRO {ieeetc} {"IEEE Trans. Comput."} - -MACRO {ieeetcad} - {"IEEE Trans. Comput. Aid. Des."} - -MACRO {ipl} {"Inf. Process. Lett."} - -MACRO {jacm} {"J.~ACM"} - -MACRO {jcss} {"J.~Comput. Syst. Sci."} - -MACRO {scp} {"Sci. Comput. Program."} - -MACRO {sicomp} {"SIAM J. Comput."} - -MACRO {tocs} {"ACM Trans. Comput. Syst."} - -MACRO {tods} {"ACM Trans. Database Syst."} - -MACRO {tog} {"ACM Trans. Graphic."} - -MACRO {toms} {"ACM Trans. Math. Software"} - -MACRO {toois} {"ACM Trans. Office Inf. Syst."} - -MACRO {toplas} {"ACM Trans. Progr. Lang. Syst."} - -MACRO {tcs} {"Theor. Comput. Sci."} - -FUNCTION {bibinfo.check} -{ swap$ - duplicate$ missing$ - { - pop$ pop$ - "" - } - { duplicate$ empty$ - { - swap$ pop$ - } - { swap$ - pop$ - } - if$ - } - if$ -} -FUNCTION {bibinfo.warn} -{ swap$ - duplicate$ missing$ - { - swap$ "missing " swap$ * " in " * cite$ * warning$ pop$ - "" - } - { duplicate$ empty$ - { - swap$ "empty " swap$ * " in " * cite$ * warning$ - } - { swap$ - pop$ - } - if$ - } - if$ -} -FUNCTION {format.url} -{ url empty$ - { "" } -% { "\urlprefix\url{" url * "}" * } - { "\url{" url * "}" * } % changed in titto-lncs-02.bst - if$ -} - -FUNCTION {format.doi} % added in splncs04.bst -{ doi empty$ - { "" } - { after.block 'output.state := - "\doi{" doi * "}" * } - if$ -} - -INTEGERS { nameptr namesleft numnames } - - -STRINGS { bibinfo} - -FUNCTION {format.names} -{ 'bibinfo := - duplicate$ empty$ 'skip$ { - 's := - "" 't := - #1 'nameptr := - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { s nameptr - "{vv~}{ll}{, jj}{, f{.}.}" - format.name$ - bibinfo bibinfo.check - 't := - nameptr #1 > - { - namesleft #1 > - { ", " * t * } - { - s nameptr "{ll}" format.name$ duplicate$ "others" = - { 't := } - { pop$ } - if$ - "," * - t "others" = - { - " " * bbl.etal * - } - { " " * t * } - if$ - } - if$ - } - 't - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ - } if$ -} -FUNCTION {format.names.ed} -{ - 'bibinfo := - duplicate$ empty$ 'skip$ { - 's := - "" 't := - #1 'nameptr := - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { s nameptr - "{f{.}.~}{vv~}{ll}{ jj}" - format.name$ - bibinfo bibinfo.check - 't := - nameptr #1 > - { - namesleft #1 > - { ", " * t * } - { - s nameptr "{ll}" format.name$ duplicate$ "others" = - { 't := } - { pop$ } - if$ - "," * - t "others" = - { - - " " * bbl.etal * - } - { " " * t * } - if$ - } - if$ - } - 't - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ - } if$ -} -FUNCTION {format.authors} -{ author "author" format.names -} -FUNCTION {get.bbl.editor} -{ editor num.names$ #1 > 'bbl.editors 'bbl.editor if$ } - -FUNCTION {format.editors} -{ editor "editor" format.names duplicate$ empty$ 'skip$ - { - " " * - get.bbl.editor -% capitalize - "(" swap$ * ")" * - * - } - if$ -} -FUNCTION {format.note} -{ - note empty$ - { "" } - { note #1 #1 substring$ - duplicate$ "{" = - 'skip$ - { output.state mid.sentence = - { "l" } - { "u" } - if$ - change.case$ - } - if$ - note #2 global.max$ substring$ * "note" bibinfo.check - } - if$ -} - -FUNCTION {format.title} -{ title - duplicate$ empty$ 'skip$ - { "t" change.case$ } - if$ - "title" bibinfo.check -} -FUNCTION {output.bibitem} -{ newline$ - "\bibitem{" write$ - cite$ write$ - "}" write$ - newline$ - "" - before.all 'output.state := -} - -FUNCTION {n.dashify} -{ - 't := - "" - { t empty$ not } - { t #1 #1 substring$ "-" = - { t #1 #2 substring$ "--" = not - { "--" * - t #2 global.max$ substring$ 't := - } - { { t #1 #1 substring$ "-" = } - { "-" * - t #2 global.max$ substring$ 't := - } - while$ - } - if$ - } - { t #1 #1 substring$ * - t #2 global.max$ substring$ 't := - } - if$ - } - while$ -} - -FUNCTION {word.in} -{ bbl.in capitalize - ":" * - " " * } - -FUNCTION {format.date} -{ - month "month" bibinfo.check - duplicate$ empty$ - year "year" bibinfo.check duplicate$ empty$ - { swap$ 'skip$ - { "there's a month but no year in " cite$ * warning$ } - if$ - * - } - { swap$ 'skip$ - { - swap$ - " " * swap$ - } - if$ - * - remove.dots - } - if$ - duplicate$ empty$ - 'skip$ - { - before.all 'output.state := - " (" swap$ * ")" * - } - if$ -} -FUNCTION {format.btitle} -{ title "title" bibinfo.check - duplicate$ empty$ 'skip$ - { - } - if$ -} -FUNCTION {either.or.check} -{ empty$ - 'pop$ - { "can't use both " swap$ * " fields in " * cite$ * warning$ } - if$ -} -FUNCTION {format.bvolume} -{ volume empty$ - { "" } - { bbl.volume volume tie.or.space.prefix - "volume" bibinfo.check * * - series "series" bibinfo.check - duplicate$ empty$ 'pop$ - { emphasize ", " * swap$ * } - if$ - "volume and number" number either.or.check - } - if$ -} -FUNCTION {format.number.series} -{ volume empty$ - { number empty$ - { series field.or.null } - { output.state mid.sentence = - { bbl.number } - { bbl.number capitalize } - if$ - number tie.or.space.prefix "number" bibinfo.check * * - series empty$ - { "there's a number but no series in " cite$ * warning$ } - { bbl.in space.word * - series "series" bibinfo.check * - } - if$ - } - if$ - } - { "" } - if$ -} - -FUNCTION {format.edition} -{ edition duplicate$ empty$ 'skip$ - { - output.state mid.sentence = - { "l" } - { "t" } - if$ change.case$ - "edition" bibinfo.check - " " * bbl.edition * - } - if$ -} -INTEGERS { multiresult } -FUNCTION {multi.page.check} -{ 't := - #0 'multiresult := - { multiresult not - t empty$ not - and - } - { t #1 #1 substring$ - duplicate$ "-" = - swap$ duplicate$ "," = - swap$ "+" = - or or - { #1 'multiresult := } - { t #2 global.max$ substring$ 't := } - if$ - } - while$ - multiresult -} -FUNCTION {format.pages} -{ pages duplicate$ empty$ 'skip$ - { duplicate$ multi.page.check - { - bbl.pages swap$ - n.dashify - } - { - bbl.page swap$ - } - if$ - tie.or.space.prefix - "pages" bibinfo.check - * * - } - if$ -} -FUNCTION {format.journal.pages} -{ pages duplicate$ empty$ 'pop$ - { swap$ duplicate$ empty$ - { pop$ pop$ format.pages } - { - ", " * - swap$ - n.dashify - pages multi.page.check - 'titto.bbl.pages - 'titto.bbl.page - if$ - swap$ tie.or.space.prefix - "pages" bibinfo.check - * * - * - } - if$ - } - if$ -} -FUNCTION {format.journal.eid} -{ eid "eid" bibinfo.check - duplicate$ empty$ 'pop$ - { swap$ duplicate$ empty$ 'skip$ - { - ", " * - } - if$ - swap$ * - } - if$ -} -FUNCTION {format.vol.num.pages} % this function is used only for journal entries -{ volume field.or.null embolden - duplicate$ empty$ 'skip$ - { -% bbl.volume swap$ tie.or.space.prefix - titto.bbl.volume swap$ titto.space.prefix -% rationale for the change above: for journals you don't want "vol." label -% hence it does not make sense to attach the journal number to the label when -% it is short - "volume" bibinfo.check - * * - } - if$ - number "number" bibinfo.check duplicate$ empty$ 'skip$ - { - swap$ duplicate$ empty$ - { "there's a number but no volume in " cite$ * warning$ } - 'skip$ - if$ - swap$ - "(" swap$ * ")" * - } - if$ * - eid empty$ - { format.journal.pages } - { format.journal.eid } - if$ -} - -FUNCTION {format.chapter.pages} -{ chapter empty$ - 'format.pages - { type empty$ - { bbl.chapter } - { type "l" change.case$ - "type" bibinfo.check - } - if$ - chapter tie.or.space.prefix - "chapter" bibinfo.check - * * - pages empty$ - 'skip$ - { ", " * format.pages * } - if$ - } - if$ -} - -FUNCTION {format.booktitle} -{ - booktitle "booktitle" bibinfo.check -} -FUNCTION {format.in.ed.booktitle} -{ format.booktitle duplicate$ empty$ 'skip$ - { -% editor "editor" format.names.ed duplicate$ empty$ 'pop$ % changed by titto - editor "editor" format.names duplicate$ empty$ 'pop$ - { - " " * - get.bbl.editor -% capitalize - "(" swap$ * ") " * - * swap$ - * } - if$ - word.in swap$ * - } - if$ -} -FUNCTION {empty.misc.check} -{ author empty$ title empty$ howpublished empty$ - month empty$ year empty$ note empty$ - and and and and and - key empty$ not and - { "all relevant fields are empty in " cite$ * warning$ } - 'skip$ - if$ -} -FUNCTION {format.thesis.type} -{ type duplicate$ empty$ - 'pop$ - { swap$ pop$ - "t" change.case$ "type" bibinfo.check - } - if$ -} -FUNCTION {format.tr.number} -{ number "number" bibinfo.check - type duplicate$ empty$ - { pop$ bbl.techrep } - 'skip$ - if$ - "type" bibinfo.check - swap$ duplicate$ empty$ - { pop$ "t" change.case$ } - { tie.or.space.prefix * * } - if$ -} -FUNCTION {format.article.crossref} -{ - key duplicate$ empty$ - { pop$ - journal duplicate$ empty$ - { "need key or journal for " cite$ * " to crossref " * crossref * warning$ } - { "journal" bibinfo.check emphasize word.in swap$ * } - if$ - } - { word.in swap$ * " " *} - if$ - " \cite{" * crossref * "}" * -} -FUNCTION {format.crossref.editor} -{ editor #1 "{vv~}{ll}" format.name$ - "editor" bibinfo.check - editor num.names$ duplicate$ - #2 > - { pop$ - "editor" bibinfo.check - " " * bbl.etal - * - } - { #2 < - 'skip$ - { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = - { - "editor" bibinfo.check - " " * bbl.etal - * - } - { - bbl.and space.word - * editor #2 "{vv~}{ll}" format.name$ - "editor" bibinfo.check - * - } - if$ - } - if$ - } - if$ -} -FUNCTION {format.book.crossref} -{ volume duplicate$ empty$ - { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ - pop$ word.in - } - { bbl.volume - capitalize - swap$ tie.or.space.prefix "volume" bibinfo.check * * bbl.of space.word * - } - if$ - editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { series empty$ - { "need editor, key, or series for " cite$ * " to crossref " * - crossref * warning$ - "" * - } - { series emphasize * } - if$ - } - { key * } - if$ - } - { format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} -FUNCTION {format.incoll.inproc.crossref} -{ - editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { format.booktitle duplicate$ empty$ - { "need editor, key, or booktitle for " cite$ * " to crossref " * - crossref * warning$ - } - { word.in swap$ * } - if$ - } - { word.in key * " " *} - if$ - } - { word.in format.crossref.editor * " " *} - if$ - " \cite{" * crossref * "}" * -} -FUNCTION {format.org.or.pub} -{ 't := - "" - address empty$ t empty$ and - 'skip$ - { - t empty$ - { address "address" bibinfo.check * - } - { t * - address empty$ - 'skip$ - { ", " * address "address" bibinfo.check * } - if$ - } - if$ - } - if$ -} -FUNCTION {format.publisher.address} -{ publisher "publisher" bibinfo.warn format.org.or.pub -} - -FUNCTION {format.organization.address} -{ organization "organization" bibinfo.check format.org.or.pub -} - -FUNCTION {article} -{ output.bibitem - format.authors "author" output.check - add.colon - new.block - format.title "title" output.check - new.block - crossref missing$ - { - journal - "journal" bibinfo.check - "journal" output.check - add.blank - format.vol.num.pages output - format.date "year" output.check - } - { format.article.crossref output.nonnull - format.pages output - } - if$ -% new.block - format.doi output - format.url output -% new.block - format.note output - fin.entry -} -FUNCTION {book} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check - add.colon - } - { format.authors output.nonnull - add.colon - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - new.block - new.sentence - format.number.series output - format.publisher.address output - } - { - new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check -% new.block - format.doi output - format.url output -% new.block - format.note output - fin.entry -} -FUNCTION {booklet} -{ output.bibitem - format.authors output - add.colon - new.block - format.title "title" output.check - new.block - howpublished "howpublished" bibinfo.check output - address "address" bibinfo.check output - format.date output -% new.block - format.doi output - format.url output -% new.block - format.note output - fin.entry -} - -FUNCTION {inbook} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check - add.colon - } - { format.authors output.nonnull - add.colon - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { - format.bvolume output - format.chapter.pages "chapter and pages" output.check - new.block - new.sentence - format.number.series output - format.publisher.address output - } - { - format.chapter.pages "chapter and pages" output.check - new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check -% new.block - format.doi output - format.url output -% new.block - format.note output - fin.entry -} - -FUNCTION {incollection} -{ output.bibitem - format.authors "author" output.check - add.colon - new.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.chapter.pages output - new.sentence - format.number.series output - format.publisher.address output - format.edition output - format.date "year" output.check - } - { format.incoll.inproc.crossref output.nonnull - format.chapter.pages output - } - if$ -% new.block - format.doi output - format.url output -% new.block - format.note output - fin.entry -} -FUNCTION {inproceedings} -{ output.bibitem - format.authors "author" output.check - add.colon - new.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - new.sentence % added by titto - format.bvolume output - format.pages output - new.sentence - format.number.series output - publisher empty$ - { format.organization.address output } - { organization "organization" bibinfo.check output - format.publisher.address output - } - if$ - format.date "year" output.check - } - { format.incoll.inproc.crossref output.nonnull - format.pages output - } - if$ -% new.block - format.doi output - format.url output -% new.block - format.note output - fin.entry -} -FUNCTION {conference} { inproceedings } -FUNCTION {manual} -{ output.bibitem - author empty$ - { organization "organization" bibinfo.check - duplicate$ empty$ 'pop$ - { output - address "address" bibinfo.check output - } - if$ - } - { format.authors output.nonnull } - if$ - add.colon - new.block - format.btitle "title" output.check - author empty$ - { organization empty$ - { - address new.block.checka - address "address" bibinfo.check output - } - 'skip$ - if$ - } - { - organization address new.block.checkb - organization "organization" bibinfo.check output - address "address" bibinfo.check output - } - if$ - format.edition output - format.date output -% new.block - format.doi output - format.url output -% new.block - format.note output - fin.entry -} - -FUNCTION {mastersthesis} -{ output.bibitem - format.authors "author" output.check - add.colon - new.block - format.btitle - "title" output.check - new.block - bbl.mthesis format.thesis.type output.nonnull - school "school" bibinfo.warn output - address "address" bibinfo.check output - format.date "year" output.check -% new.block - format.doi output - format.url output -% new.block - format.note output - fin.entry -} - -FUNCTION {misc} -{ output.bibitem - format.authors output - add.colon - title howpublished new.block.checkb - format.title output - howpublished new.block.checka - howpublished "howpublished" bibinfo.check output - format.date output -% new.block - format.doi output - format.url output -% new.block - format.note output - fin.entry - empty.misc.check -} -FUNCTION {phdthesis} -{ output.bibitem - format.authors "author" output.check - add.colon - new.block - format.btitle - "title" output.check - new.block - bbl.phdthesis format.thesis.type output.nonnull - school "school" bibinfo.warn output - address "address" bibinfo.check output - format.date "year" output.check -% new.block - format.doi output - format.url output -% new.block - format.note output - fin.entry -} - -FUNCTION {proceedings} -{ output.bibitem - editor empty$ - { organization "organization" bibinfo.check output - } - { format.editors output.nonnull } - if$ - add.colon - new.block - format.btitle "title" output.check - format.bvolume output - editor empty$ - { publisher empty$ - { format.number.series output } - { - new.sentence - format.number.series output - format.publisher.address output - } - if$ - } - { publisher empty$ - { - new.sentence - format.number.series output - format.organization.address output } - { - new.sentence - format.number.series output - organization "organization" bibinfo.check output - format.publisher.address output - } - if$ - } - if$ - format.date "year" output.check -% new.block - format.doi output - format.url output -% new.block - format.note output - fin.entry -} - -FUNCTION {techreport} -{ output.bibitem - format.authors "author" output.check - add.colon - new.block - format.title - "title" output.check - new.block - format.tr.number output.nonnull - institution "institution" bibinfo.warn output - address "address" bibinfo.check output - format.date "year" output.check -% new.block - format.doi output - format.url output -% new.block - format.note output - fin.entry -} - -FUNCTION {unpublished} -{ output.bibitem - format.authors "author" output.check - add.colon - new.block - format.title "title" output.check - format.date output -% new.block - format.url output -% new.block - format.note "note" output.check - fin.entry -} - -FUNCTION {default.type} { misc } -READ -FUNCTION {sortify} -{ purify$ - "l" change.case$ -} -INTEGERS { len } -FUNCTION {chop.word} -{ 's := - 'len := - s #1 len substring$ = - { s len #1 + global.max$ substring$ } - 's - if$ -} -FUNCTION {sort.format.names} -{ 's := - #1 'nameptr := - "" - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { s nameptr - "{ll{ }}{ ff{ }}{ jj{ }}" - format.name$ 't := - nameptr #1 > - { - " " * - namesleft #1 = t "others" = and - { "zzzzz" * } - { t sortify * } - if$ - } - { t sortify * } - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {sort.format.title} -{ 't := - "A " #2 - "An " #3 - "The " #4 t chop.word - chop.word - chop.word - sortify - #1 global.max$ substring$ -} -FUNCTION {author.sort} -{ author empty$ - { key empty$ - { "to sort, need author or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { author sort.format.names } - if$ -} -FUNCTION {author.editor.sort} -{ author empty$ - { editor empty$ - { key empty$ - { "to sort, need author, editor, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { editor sort.format.names } - if$ - } - { author sort.format.names } - if$ -} -FUNCTION {author.organization.sort} -{ author empty$ - { organization empty$ - { key empty$ - { "to sort, need author, organization, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { "The " #4 organization chop.word sortify } - if$ - } - { author sort.format.names } - if$ -} -FUNCTION {editor.organization.sort} -{ editor empty$ - { organization empty$ - { key empty$ - { "to sort, need editor, organization, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { "The " #4 organization chop.word sortify } - if$ - } - { editor sort.format.names } - if$ -} -FUNCTION {presort} -{ type$ "book" = - type$ "inbook" = - or - 'author.editor.sort - { type$ "proceedings" = - 'editor.organization.sort - { type$ "manual" = - 'author.organization.sort - 'author.sort - if$ - } - if$ - } - if$ - " " - * - year field.or.null sortify - * - " " - * - title field.or.null - sort.format.title - * - #1 entry.max$ substring$ - 'sort.key$ := -} -ITERATE {presort} -SORT -STRINGS { longest.label } -INTEGERS { number.label longest.label.width } -FUNCTION {initialize.longest.label} -{ "" 'longest.label := - #1 'number.label := - #0 'longest.label.width := -} -FUNCTION {longest.label.pass} -{ number.label int.to.str$ 'label := - number.label #1 + 'number.label := - label width$ longest.label.width > - { label 'longest.label := - label width$ 'longest.label.width := - } - 'skip$ - if$ -} -EXECUTE {initialize.longest.label} -ITERATE {longest.label.pass} -FUNCTION {begin.bib} -{ preamble$ empty$ - 'skip$ - { preamble$ write$ newline$ } - if$ - "\begin{thebibliography}{" longest.label * "}" * - write$ newline$ - "\providecommand{\url}[1]{\texttt{#1}}" - write$ newline$ - "\providecommand{\urlprefix}{URL }" - write$ newline$ - "\providecommand{\doi}[1]{https://doi.org/#1}" - write$ newline$ -} -EXECUTE {begin.bib} -EXECUTE {init.state.consts} -ITERATE {call.type$} -FUNCTION {end.bib} -{ newline$ - "\end{thebibliography}" write$ newline$ -} -EXECUTE {end.bib} -%% End of customized bst file -%% -%% End of file `titto.bst'. diff --git a/supplementary.brf b/supplementary.brf new file mode 100644 index 0000000..6533d50 --- /dev/null +++ b/supplementary.brf @@ -0,0 +1,5 @@ +\backcite {Nauen2023}{{1}{A}{table.caption.1}} +\backcite {Touvron2022}{{1}{A}{table.caption.1}} +\backcite {Nauen2023}{{1}{A}{table.caption.1}} +\backcite {You2020}{{1}{\caption@xref {??}{ on input line 21}}{table.caption.1}} +\backcite {Touvron2022}{{1}{\caption@xref {??}{ on input line 22}}{table.caption.1}} diff --git a/supplementary.pdf b/supplementary.pdf index c7146ab..9a35151 100644 Binary files a/supplementary.pdf and b/supplementary.pdf differ diff --git a/supplementary.tex b/supplementary.tex index 9a7f494..ff827a5 100644 --- a/supplementary.tex +++ b/supplementary.tex @@ -1,112 +1,74 @@ -\documentclass[runningheads]{llncs} +% ICCV 2025 Paper Template -% --------------------------------------------------------------- -% Include basic ECCV package - -% TODO REVIEW: Insert your submission number below by replacing '*****' -% TODO FINAL: Comment out the following line for the camera-ready version -\usepackage[review,year=2026,ID=1741 -- Supplementary]{eccv} -% % TODO FINAL: Un-comment the following line for the camera-ready version -% \usepackage{eccv} +\documentclass[10pt,onecolumn,letterpaper]{article} -% OPTIONAL: Un-comment the following line for a version which is easier to read -% on small portrait-orientation screens (e.g., mobile phones, or beside other windows) -%\usepackage[mobile]{eccv} +%%%%%%%%% PAPER TYPE - PLEASE UPDATE FOR FINAL VERSION +% \usepackage{iccv} % To produce the CAMERA-READY version +\usepackage[review]{iccv} % To produce the REVIEW version +% \usepackage[pagenumbers]{iccv} % To force page numbers, e.g. for an arXiv version - -% --------------------------------------------------------------- -% Other packages - -% Commonly used abbreviations (\eg, \ie, \etc, \cf, \etal, etc.) -\usepackage{eccvabbrv} - -% Include other packages here, before hyperref. -\usepackage{graphicx} -\usepackage{booktabs} - -% The "axessiblity" package can be found at: https://ctan.org/pkg/axessibility?lang=en -\usepackage[accsupp]{axessibility} % Improves PDF readability for those with disabilities. - -% --------------------------------------------------------------- -% Hyperref package +% Import additional packages in the preamble file, before hyperref +\input{packages} % It is strongly recommended to use hyperref, especially for the review version. -% Please disable hyperref *only* if you encounter grave issues. -% hyperref with option pagebackref eases the reviewers' job, but should be disabled for the final version. +% hyperref with option pagebackref eases the reviewers' job. +% Please disable hyperref *only* if you encounter grave issues, +% e.g. with the file validation for the camera-ready version. % -% If you comment hyperref and then uncomment it, you should delete -% main.aux before re-running LaTeX. -% (Or just hit 'q' on the first LaTeX run, let it finish, and you -% should be clear). +% If you comment hyperref and then uncomment it, you should delete *.aux before re-running LaTeX. +% (Or just hit 'q' on the first LaTeX run, let it finish, and you should be clear). +\definecolor{iccvblue}{rgb}{0.21,0.49,0.74} +\usepackage[pagebackref,breaklinks,colorlinks,allcolors=iccvblue]{hyperref} +\usepackage[capitalize,noabbrev]{cleveref} -% TODO FINAL: Comment out the following line for the camera-ready version -%\usepackage[pagebackref,breaklinks,colorlinks,citecolor=eccvblue]{hyperref} -% TODO FINAL: Un-comment the following line for the camera-ready version -\usepackage{hyperref} -\input{packages.tex} - -% Support for ORCID icon -\usepackage{orcidlink} +%%%%%%%%% PAPER ID - PLEASE UPDATE +\def\paperID{6426 - Supplementary} % *** Enter the Paper ID here +\def\confName{ICCV} +\def\confYear{2025} +\newcommand{\name}{\textit{ForNet}\xspace} \newcommand{\schemename}{\textit{ForAug}\xspace} +% Names: RecombiNet, RecombNet, ReMix, ReMixNet, FoReMix/ForeMix + +%%%%%%%%% TITLE - PLEASE UPDATE +\title{\schemename: Recombining Foregrounds and Backgrounds to Improve Vision Transformer Training with Bias Mitigation \\ - Supplementary Material -} + +%%%%%%%%% AUTHORS - PLEASE UPDATE +\author{Tobias Christian Nauen${}^{1,2}$ Brian Moser${}^2$ Federico Raue${}^2$ Stanislav Frolov${}^2$ Andreas Dengel${}^{1,2}$\\ +${}^1$RPTU Kaiserslautern-Landau, Kaiserslautern, Germany \\ +${}^2$German Research Center for Artificial Intelligence (DFKI), Kaiserslautern, Germany \\ +{\tt\small first\_second.last@dfki.de / first.last@dfki.de} +% For a paper whose authors are all at the same institution, +% omit the following lines up until the closing ``}''. +% Additional authors and addresses can be added with ``\and'', +% just like the second author. +% To save space, use either the email address or home page, not both +} \begin{document} - -% --------------------------------------------------------------- -% \title{\schemename: Recombining Foregrounds and Backgrounds to Improve Vision Transformer Training with Bias Mitigation} -\title{\schemename: Mitigating Biases in Image Classification via Controlled Image Compositions\\-- Supplementary Material --} - -% TODO REVIEW: If the paper title is too long for the running head, you can set -% an abbreviated paper title here. If not, comment out. -\titlerunning{\schemename -- Supplementary Material} - -% TODO FINAL: Replace with your author list. -% Include the authors' OCRID for the camera-ready version, if at all possible. -\author{ - Tobias Christian Nauen\inst{1,2}\orcidlink{0000-1111-2222-3333} \and - Brian Moser\inst{2}\orcidlink{1111-2222-3333-4444} \and - Federico Raue\inst{2}\orcidlink{2222--3333-4444-5555} \and \\ - Stanislav Frolov\inst{2} \and - Andreas Dengel\inst{1,2} -} - -% TODO FINAL: Replace with an abbreviated list of authors. -\authorrunning{T.~C.~Nauen et al.} -% First names are abbreviated in the running head. -% If there are more than two authors, 'et al.' is used. - -% TODO FINAL: Replace with your institution list. -\institute{RPTU University Kaiserslautern-Landau, Kaiserslautern, Germany \and - German Research Center for Artificial Intelligence (DFKI), Kaiserslautern, Germany\\ - \email{first\_second.last@dfki.de} / \email{first.last@dfki.de} -} - \maketitle - +% \input{sec/abstract} +% \input{sec/intro} +% \input{sec/related_work} +% \input{sec/method} +% \input{sec/experiments} +% % \input{sec/future_work} +% \input{sec/conclusion} +% \input{sec/acks} \begin{abstract} - This is the supplementary material for the paper:\\ - \schemename: Mitigating Biases in Image Classification via Controlled Image Compositions. - - \keywords{Data Augmentation \and Vision Transformer \and Robustness} + This is the supplementary material for the paper: \schemename: Recombining Foregrounds and Backgrounds to Improve Vision Transformer Training with Bias Mitigation \end{abstract} - -% \input{sec/abstract.tex} -% \input{sec/intro.tex} -% % \input{sec/intro_old.tex} -% \input{sec/related_work.tex} -% \input{sec/method.tex} -% \input{sec/experiments.tex} -% \input{sec/conclusion.tex} -% \input{sec/acks.tex} -\appendix -\input{sec/appendix.tex} - -\bibliographystyle{splncs04} -\bibliography{../JabRef/main_bib} - % \newpage -% \appendix + +\appendix +\input{sec/appendix} + +{ + \small + \bibliographystyle{ieeenat_fullname} + \bibliography{../JabRef/main_bib} +} \end{document}