% \iffalse
%
%% File l3sort.dtx
%
% Copyright (C) 2012-2024 The LaTeX Project
%
% It may be distributed and/or modified under the conditions of the
% LaTeX Project Public License (LPPL), either version 1.3c of this
% license or (at your option) any later version.  The latest version
% of this license is in the file
%
%    https://www.latex-project.org/lppl.txt
%
% This file is part of the "l3kernel bundle" (The Work in LPPL)
% and all files in that bundle must be distributed together.
%
% -----------------------------------------------------------------------
%
% The development version of the bundle can be found at
%
%    https://github.com/latex3/latex3
%
% for those people who are interested.
%
%<*driver>
\documentclass[full,kernel]{l3doc}
\begin{document}
  \DocInput{\jobname.dtx}
\end{document}
%</driver>
% \fi
%
% \title{^^A
%   The \pkg{l3sort} module\\ Sorting functions^^A
% }
%
% \author{^^A
%  The \LaTeX{} Project\thanks
%    {^^A
%      E-mail:
%        \href{mailto:latex-team@latex-project.org}
%          {latex-team@latex-project.org}^^A
%    }^^A
% }
%
% \date{Released 2024-12-25}
%
% \maketitle
%
% \begin{documentation}
%
% \section{Controlling sorting}
%
% \label{sec:l3sort:mech}
%
% \LaTeX3 comes with a facility to sort list variables (sequences,
% token lists, or comma-lists) according to some user-defined
% comparison. For instance,
% \begin{verbatim}
%   \clist_set:Nn \l_foo_clist { 3 , 01 , -2 , 5 , +1 }
%   \clist_sort:Nn \l_foo_clist
%     {
%       \int_compare:nNnTF { #1 } > { #2 }
%         { \sort_return_swapped: }
%         { \sort_return_same: }
%     }
% \end{verbatim}
% results in \cs[no-index]{l_foo_clist} holding the values
% |{ -2 , 01 , +1 , 3 , 5 }| sorted in non-decreasing order.
%
% The code defining the comparison should call
% \cs{sort_return_swapped:} if the two items given as |#1|
% and |#2| are not in the correct order, and otherwise it
% should call \cs{sort_return_same:} to indicate that
% the order of this pair of items should not be changed.
%
% For instance, a \meta{comparison code} consisting only
% of \cs{sort_return_same:} with no test yields a trivial
% sort: the final order is identical to the original order.
% Conversely, using a \meta{comparison code} consisting only
% of \cs{sort_return_swapped:} reverses the list (in a fairly
% inefficient way).
%
% \begin{texnote}
%   The current implementation is limited to sorting approximately
%   $20000$ items ($40000$ in \LuaTeX{}), depending on what other
%   packages are loaded.
%
%   Internally, the code from \pkg{l3sort} stores items in \tn{toks}
%   registers allocated locally.  Thus, the \meta{comparison code}
%   should not call \tn{newtoks} or other commands that allocate new
%   \tn{toks} registers.  On the other hand, altering the value of a
%   previously allocated \tn{toks} register is not a problem.
% \end{texnote}
%
% \begin{function}[added = 2017-02-06]{\sort_return_same:, \sort_return_swapped:}
%   \begin{syntax}
%     \cs{seq_sort:Nn} \meta{seq~var}
%     ~~|{| \ldots{} \cs{sort_return_same:} or \cs{sort_return_swapped:} \ldots{} |}|
%   \end{syntax}
%   Indicates whether to keep the order or swap the order of two items
%   that are compared in the sorting code.  Only one of the
%   \cs[no-index]{sort_return_\ldots{}} functions should be used by the
%   code, according to the results of some tests on the items |#1| and
%   |#2| to be compared.
% \end{function}
%
% \end{documentation}
%
% \begin{implementation}
%
% \section{\pkg{l3sort} implementation}
%
%    \begin{macrocode}
%<*package>
%    \end{macrocode}
%
%    \begin{macrocode}
%<@@=sort>
%    \end{macrocode}
%
% \subsection{Variables}
%
% \begin{variable}{\g_@@_internal_seq, \g_@@_internal_tl}
%   Sorting happens in a group; the result is stored in those global
%   variables before being copied outside the group to the proper
%   places.  For seq and tl this is more efficient than using \cs{use:e}
%   (or some \cs{exp_args:NNNe}) to smuggle the definition outside the
%   group since \TeX{} does not need to re-read tokens.  For clist we
%   don't gain anything since the result is converted from seq to clist
%   anyways.
%    \begin{macrocode}
\seq_new:N \g_@@_internal_seq
\tl_new:N \g_@@_internal_tl
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}
%   {
%     \l_@@_length_int, \l_@@_min_int, \l_@@_top_int, \l_@@_max_int,
%     \l_@@_true_max_int
%   }
%   The sequence has \cs{l_@@_length_int} items and is stored from
%   \cs{l_@@_min_int} to $\cs{l_@@_top_int}-1$.  While reading the
%   sequence in memory, we check that \cs{l_@@_top_int} remains at most
%   \cs{l_@@_max_int}, precomputed by \cs{@@_compute_range:}.  That
%   bound is such that the merge sort only uses \tn{toks} registers
%   less than \cs{l_@@_true_max_int}, namely those that have not been
%   allocated for use in other code: the user's comparison code could
%   alter these.
%    \begin{macrocode}
\int_new:N \l_@@_length_int
\int_new:N \l_@@_min_int
\int_new:N \l_@@_top_int
\int_new:N \l_@@_max_int
\int_new:N \l_@@_true_max_int
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_@@_block_int}
%   Merge sort is done in several passes. In each pass, blocks of size
%   \cs{l_@@_block_int} are merged in pairs. The block size starts
%   at $1$, and, for a length in the range $[2^k+1,2^{k+1}]$, reaches
%   $2^{k}$ in the last pass.
%    \begin{macrocode}
\int_new:N \l_@@_block_int
%    \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_@@_begin_int}
% \begin{variable}{\l_@@_end_int}
%   When merging two blocks, \cs{l_@@_begin_int} marks the lowest
%   index in the two blocks, and \cs{l_@@_end_int} marks the
%   highest index, plus $1$.
%    \begin{macrocode}
\int_new:N \l_@@_begin_int
\int_new:N \l_@@_end_int
%    \end{macrocode}
% \end{variable}
% \end{variable}
%
% \begin{variable}{\l_@@_A_int}
% \begin{variable}{\l_@@_B_int}
% \begin{variable}{\l_@@_C_int}
%   When merging two blocks (whose end-points are \texttt{beg}
%   and \texttt{end}), $A$ starts from the high end of the low
%   block, and decreases until reaching \texttt{beg}. The index
%   $B$ starts from the top of the range and marks the register
%   in which a sorted item should be put. Finally, $C$ points
%   to the copy of the high block in the interval of registers
%   starting at \cs{l_@@_length_int}, upwards. $C$ starts
%   from the upper limit of that range.
%    \begin{macrocode}
\int_new:N \l_@@_A_int
\int_new:N \l_@@_B_int
\int_new:N \l_@@_C_int
%    \end{macrocode}
% \end{variable}
% \end{variable}
% \end{variable}
%
% \begin{variable}{\s_@@_mark,\s_@@_stop}
%   Internal scan marks.
%    \begin{macrocode}
\scan_new:N \s_@@_mark
\scan_new:N \s_@@_stop
%    \end{macrocode}
% \end{variable}
%
% \subsection{Finding available \tn{toks} registers}
%
% \begin{macro}{\@@_shrink_range:}
% \begin{macro}{\@@_shrink_range_loop:}
%   After \cs{@@_compute_range:} (defined below) determines that
%   \tn{toks} registers between \cs{l_@@_min_int} (included) and
%   \cs{l_@@_true_max_int} (excluded) have not yet been assigned,
%   \cs{@@_shrink_range:} computes \cs{l_@@_max_int} to reflect the need
%   for a buffer when merging blocks in the merge sort.  Given
%   $2^{n}\leq A\leq 2^{n}+2^{n-1}$ registers we can sort $\lfloor
%   A/2\rfloor+2^{n-2}$ items while if we have $2^{n}+2^{n-1}\leq A\leq
%   2^{n+1}$ registers we can sort $A-2^{n-1}$ items.  We first find out
%   a power $2^{n}$ such that $2^{n}\leq A\leq 2^{n+1}$ by repeatedly
%   halving \cs{l_@@_block_int}, starting at $2^{15}$ or $2^{14}$ namely
%   half the total number of registers, then we use the formulas and set
%   \cs{l_@@_max_int}.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_shrink_range:
  {
    \int_set:Nn \l_@@_A_int
      { \l_@@_true_max_int - \l_@@_min_int + 1 }
    \int_set:Nn \l_@@_block_int { \c_max_register_int / 2 }
    \@@_shrink_range_loop:
    \int_set:Nn \l_@@_max_int
      {
        \int_compare:nNnTF
          { \l_@@_block_int * 3 / 2 } > \l_@@_A_int
          {
            \l_@@_min_int
            + ( \l_@@_A_int - 1 ) / 2
            + \l_@@_block_int / 4
            - 1
          }
          { \l_@@_true_max_int - \l_@@_block_int / 2 }
      }
  }
\cs_new_protected:Npn \@@_shrink_range_loop:
  {
    \if_int_compare:w \l_@@_A_int < \l_@@_block_int
      \tex_divide:D \l_@@_block_int 2 \exp_stop_f:
      \exp_after:wN \@@_shrink_range_loop:
    \fi:
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}{\@@_compute_range:, \@@_redefine_compute_range:}
% \begin{variable}{\c_@@_max_length_int}
%   First find out what \tn{toks} have not yet been assigned.  There are
%   many cases.  In \LaTeXe{} with no package, available \tn{toks} range
%   from $\tn{count}15+1$ to \cs{c_max_register_int} included (this was
%   not altered despite the 2015 changes).  When \tn{loctoks} is
%   defined, namely in plain (e)\TeX{}, or when the package \pkg{etex}
%   is loaded in \LaTeXe{}, redefine \cs{@@_compute_range:} to use the
%   range $\tn{count}265$ to $\tn{count}275-1$.  The \pkg{elocalloc}
%   package also defines \tn{loctoks} but uses yet another number for
%   the upper bound, namely \cs{e@alloc@top} (minus one).  We must check
%   for \tn{loctoks} every time a sorting function is called, as
%   \pkg{etex} or \pkg{elocalloc} could be loaded.
%
%   In \ConTeXt{} MkIV the range is from
%   $|\c_syst_last_allocated_toks|+1$ to \cs{c_max_register_int}, and in
%   MkII it is from $|\lastallocatedtoks|+1$ to \cs{c_max_register_int}.
%   In all these cases, call \cs{@@_shrink_range:}.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_compute_range:
  {
    \int_set:Nn \l_@@_min_int { \tex_count:D 15 + 1 }
    \int_set:Nn \l_@@_true_max_int { \c_max_register_int + 1 }
    \@@_shrink_range:
    \if_meaning:w \loctoks \tex_undefined:D \else:
      \if_meaning:w \loctoks \scan_stop: \else:
        \@@_redefine_compute_range:
        \@@_compute_range:
      \fi:
    \fi:
  }
\cs_new_protected:Npn \@@_redefine_compute_range:
  {
    \cs_if_exist:cTF { ver@elocalloc.sty }
      {
        \cs_gset_protected:Npn \@@_compute_range:
          {
            \int_set:Nn \l_@@_min_int { \tex_count:D 265 }
            \int_set_eq:NN \l_@@_true_max_int \e@alloc@top
            \@@_shrink_range:
          }
      }
      {
        \cs_gset_protected:Npn \@@_compute_range:
          {
            \int_set:Nn \l_@@_min_int { \tex_count:D 265 }
            \int_set:Nn \l_@@_true_max_int { \tex_count:D 275 }
            \@@_shrink_range:
          }
      }
  }
\cs_if_exist:NT \loctoks { \@@_redefine_compute_range: }
\tl_map_inline:nn { \lastallocatedtoks \c_syst_last_allocated_toks }
  {
    \cs_if_exist:NT #1
      {
        \cs_gset_protected:Npn \@@_compute_range:
          {
            \int_set:Nn \l_@@_min_int { #1 + 1 }
            \int_set:Nn \l_@@_true_max_int { \c_max_register_int + 1 }
            \@@_shrink_range:
          }
      }
  }
%    \end{macrocode}
% \end{variable}
% \end{macro}
%
% \subsection{Protected user commands}
%
% \begin{macro}{\@@_main:NNNn}
%   Sorting happens in three steps. First store items in \tn{toks}
%   registers ranging from \cs{l_@@_min_int} to $\cs{l_@@_top_int}-1$,
%   while checking that the list is not too long. If we reach the
%   maximum length, that's an error; exit the group.  Secondly, sort the
%   array of \tn{toks} registers, using the user-defined sorting
%   function: \cs{@@_level:} calls \cs{@@_compare:nn} as needed.
%   Finally, unpack the \tn{toks} registers (now sorted) into the target
%   tl, or into \cs{g_@@_internal_seq} for seq and clist.  This is done
%   by \cs{@@_seq:NNNNn} and \cs{@@_tl:NNn}.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_main:NNNn #1#2#3#4
  {
    \@@_disable_toksdef:
    \@@_compute_range:
    \int_set_eq:NN \l_@@_top_int \l_@@_min_int
    #1 #3
      {
        \if_int_compare:w \l_@@_top_int = \l_@@_max_int
          \@@_too_long_error:NNw #2 #3
        \fi:
        \tex_toks:D \l_@@_top_int {##1}
        \int_incr:N \l_@@_top_int
      }
    \int_set:Nn \l_@@_length_int
      { \l_@@_top_int - \l_@@_min_int }
    \cs_set:Npn \@@_compare:nn ##1 ##2 {#4}
    \int_set:Nn \l_@@_block_int { 1 }
    \@@_level:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\tl_sort:Nn, \tl_sort:cn, \tl_gsort:Nn, \tl_gsort:cn}
% \begin{macro}{\@@_tl:NNn}
% \begin{macro}[EXP]{\@@_tl_toks:w}
%   Call the main sorting function then unpack \tn{toks} registers
%   outside the group into the target token list.  The unpacking is done
%   by \cs{@@_tl_toks:w}; registers are numbered from \cs{l_@@_min_int}
%   to $\cs{l_@@_top_int}-1$.  For expansion behaviour we need a couple
%   of primitives.  The \cs{tl_gclear:N} reduces memory usage.  The
%   \cs{prg_break_point:} is used by \cs{@@_main:NNNn} when the list is
%   too long.
%    \begin{macrocode}
\cs_new_protected:Npn \tl_sort:Nn { \@@_tl:NNn \tl_set_eq:NN }
\cs_generate_variant:Nn \tl_sort:Nn { c }
\cs_new_protected:Npn \tl_gsort:Nn { \@@_tl:NNn \tl_gset_eq:NN }
\cs_generate_variant:Nn \tl_gsort:Nn { c }
\cs_new_protected:Npn \@@_tl:NNn #1#2#3
  {
    \group_begin:
      \@@_main:NNNn \tl_map_inline:Nn \tl_map_break:n #2 {#3}
      \__kernel_tl_gset:Nx \g_@@_internal_tl
        { \@@_tl_toks:w \l_@@_min_int ; }
    \group_end:
    #1 #2 \g_@@_internal_tl
    \tl_gclear:N \g_@@_internal_tl
    \prg_break_point:
  }
\cs_new:Npn \@@_tl_toks:w #1 ;
  {
    \if_int_compare:w #1 < \l_@@_top_int
      { \tex_the:D \tex_toks:D #1 }
      \exp_after:wN \@@_tl_toks:w
      \int_value:w \int_eval:n { #1 + 1 } \exp_after:wN ;
    \fi:
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{macro}{\seq_sort:Nn, \seq_sort:cn, \seq_gsort:Nn, \seq_gsort:cn}
% \begin{macro}{\clist_sort:Nn, \clist_sort:cn, \clist_gsort:Nn, \clist_gsort:cn}
% \begin{macro}{\@@_seq:NNNNn}
%   Use the same general framework for seq and clist.  Apply the general
%   sorting code, then unpack \tn{toks} into \cs{g_@@_internal_seq}.
%   Outside the group copy or convert (for clist) the data to the target
%   variable.  The \cs{seq_gclear:N} reduces memory usage.  The
%   \cs{prg_break_point:} is used by \cs{@@_main:NNNn} when the list is
%   too long.
%    \begin{macrocode}
\cs_new_protected:Npn \seq_sort:Nn
  { \@@_seq:NNNNn \seq_map_inline:Nn \seq_map_break:n \seq_set_eq:NN }
\cs_generate_variant:Nn \seq_sort:Nn { c }
\cs_new_protected:Npn \seq_gsort:Nn
  { \@@_seq:NNNNn \seq_map_inline:Nn \seq_map_break:n \seq_gset_eq:NN }
\cs_generate_variant:Nn \seq_gsort:Nn { c }
\cs_new_protected:Npn \clist_sort:Nn
  {
    \@@_seq:NNNNn \clist_map_inline:Nn \clist_map_break:n
      \clist_set_from_seq:NN
  }
\cs_generate_variant:Nn \clist_sort:Nn { c }
\cs_new_protected:Npn \clist_gsort:Nn
  {
    \@@_seq:NNNNn \clist_map_inline:Nn \clist_map_break:n
      \clist_gset_from_seq:NN
  }
\cs_generate_variant:Nn \clist_gsort:Nn { c }
\cs_new_protected:Npn \@@_seq:NNNNn #1#2#3#4#5
  {
    \group_begin:
      \@@_main:NNNn #1 #2 #4 {#5}
      \seq_gclear:N \g_@@_internal_seq
      \int_step_inline:nnn
        \l_@@_min_int { \l_@@_top_int - 1 }
        {
          \seq_gput_right:Ne \g_@@_internal_seq
            { \tex_the:D \tex_toks:D ##1 }
        }
    \group_end:
    #3 #4 \g_@@_internal_seq
    \seq_gclear:N \g_@@_internal_seq
    \prg_break_point:
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \subsection{Merge sort}
%
% \begin{macro}{\@@_level:}
%   This function is called once blocks of size \cs{l_@@_block_int}
%   (initially $1$) are each sorted. If the whole list fits in one
%   block, then we are done (this also takes care of the case of an
%   empty list or a list with one item). Otherwise, go through pairs
%   of blocks starting from $0$, then double the block size, and repeat.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_level:
  {
    \if_int_compare:w \l_@@_block_int < \l_@@_length_int
      \l_@@_end_int \l_@@_min_int
      \@@_merge_blocks:
      \tex_advance:D \l_@@_block_int \l_@@_block_int
      \exp_after:wN \@@_level:
    \fi:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_merge_blocks:}
%   This function is called to merge a pair of blocks, starting at
%   the last value of \cs{l_@@_end_int} (end-point of the previous
%   pair of blocks). If shifting by one block to the right we reach
%   the end of the list, then this pass has ended: the end of the
%   list is sorted already. Otherwise, store the result of that shift in $A$,
%   which indexes the first block starting from the top end.
%   Then locate the end-point (maximum) of the second block: shift
%   \texttt{end} upwards by one more block, but keeping it
%   $\leq\texttt{top}$. Copy this upper block of \tn{toks}
%   registers in registers above \texttt{length}, indexed by $C$:
%   this is covered by \cs{@@_copy_block:}. Once this is done we
%   are ready to do the actual merger using \cs{@@_merge_blocks_aux:},
%   after shifting $A$, $B$ and $C$ so that they point to the largest
%   index in their respective ranges rather than pointing just beyond
%   those ranges. Of course, once that pair of blocks is merged,
%   move on to the next pair.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_merge_blocks:
  {
    \l_@@_begin_int \l_@@_end_int
    \tex_advance:D \l_@@_end_int \l_@@_block_int
    \if_int_compare:w \l_@@_end_int < \l_@@_top_int
      \l_@@_A_int \l_@@_end_int
      \tex_advance:D \l_@@_end_int \l_@@_block_int
      \if_int_compare:w \l_@@_end_int > \l_@@_top_int
        \l_@@_end_int \l_@@_top_int
      \fi:
      \l_@@_B_int \l_@@_A_int
      \l_@@_C_int \l_@@_top_int
      \@@_copy_block:
      \int_decr:N \l_@@_A_int
      \int_decr:N \l_@@_B_int
      \int_decr:N \l_@@_C_int
      \exp_after:wN \@@_merge_blocks_aux:
      \exp_after:wN \@@_merge_blocks:
    \fi:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_copy_block:}
%   We wish to store a copy of the \enquote{upper} block of
%   \tn{toks} registers, ranging between the initial value of
%   \cs{l_@@_B_int} (included) and \cs{l_@@_end_int}
%   (excluded) into a new range starting at the initial value
%   of \cs{l_@@_C_int}, namely \cs{l_@@_top_int}.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_copy_block:
  {
    \tex_toks:D \l_@@_C_int \tex_toks:D \l_@@_B_int
    \int_incr:N \l_@@_C_int
    \int_incr:N \l_@@_B_int
    \if_int_compare:w \l_@@_B_int = \l_@@_end_int
      \use_i:nn
    \fi:
    \@@_copy_block:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_merge_blocks_aux:}
%   At this stage, the first block starts at \cs{l_@@_begin_int},
%   and ends at \cs{l_@@_A_int}, and the second block starts at
%   \cs{l_@@_top_int} and ends at \cs{l_@@_C_int}. The result
%   of the merger is stored at positions indexed by \cs{l_@@_B_int},
%   which starts at $\cs{l_@@_end_int}-1$ and decreases down to
%   \cs{l_@@_begin_int}, covering the full range of the two blocks.
%   In other words, we are building the merger starting with the
%   largest values.
%   The comparison function is defined to return either
%   \texttt{swapped} or \texttt{same}. Of course, this
%   means the arguments need to be given in the order they
%   appear originally in the list.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_merge_blocks_aux:
  {
    \exp_after:wN \@@_compare:nn \exp_after:wN
      { \tex_the:D \tex_toks:D \exp_after:wN \l_@@_A_int \exp_after:wN }
      \exp_after:wN { \tex_the:D \tex_toks:D \l_@@_C_int }
    \prg_do_nothing:
    \@@_return_mark:w
    \@@_return_mark:w
    \s_@@_mark
    \@@_return_none_error:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\sort_return_same:, \sort_return_swapped:}
% \begin{macro}{\@@_return_mark:w}
% \begin{macro}{\@@_return_none_error:, \@@_return_two_error:}
%   Each comparison should call \cs{sort_return_same:} or
%   \cs{sort_return_swapped:} exactly once.  If neither is called,
%   \cs{@@_return_none_error:} is called, since the \texttt{return_mark}
%   removes tokens until \cs{s_@@_mark}.  If one is called, the
%   \texttt{return_mark} auxiliary removes everything except
%   \cs{@@_return_same:w} (or its \texttt{swapped} analogue) followed by
%   \cs{@@_return_none_error:}.  Finally if two or more are called,
%   \cs{@@_return_two_error:} ends up before any \cs{@@_return_mark:w},
%   so that it produces an error.
%    \begin{macrocode}
\cs_new_protected:Npn \sort_return_same:
    #1 \@@_return_mark:w #2 \s_@@_mark
  {
    #1
    #2
    \@@_return_two_error:
    \@@_return_mark:w
    \s_@@_mark
    \@@_return_same:w
  }
\cs_new_protected:Npn \sort_return_swapped:
    #1 \@@_return_mark:w #2 \s_@@_mark
  {
    #1
    #2
    \@@_return_two_error:
    \@@_return_mark:w
    \s_@@_mark
    \@@_return_swapped:w
  }
\cs_new_protected:Npn \@@_return_mark:w #1 \s_@@_mark { }
\cs_new_protected:Npn \@@_return_none_error:
  {
    \msg_error:nnee { sort } { return-none }
      { \tex_the:D \tex_toks:D \l_@@_A_int }
      { \tex_the:D \tex_toks:D \l_@@_C_int }
    \@@_return_same:w \@@_return_none_error:
  }
\cs_new_protected:Npn \@@_return_two_error:
  {
    \msg_error:nnee { sort } { return-two }
      { \tex_the:D \tex_toks:D \l_@@_A_int }
      { \tex_the:D \tex_toks:D \l_@@_C_int }
  }
%    \end{macrocode}
% \end{macro}
% \end{macro}
% \end{macro}
%
% \begin{macro}{\@@_return_same:w}
%   If the comparison function returns \texttt{same},
%   then the second argument fed to \cs{@@_compare:nn}
%   should remain to the right of the other one. Since
%   we build the merger starting from the right, we copy
%   that \tn{toks} register into the allotted range, then
%   shift the pointers $B$ and $C$, and go on to do one
%   more step in the merger, unless the second block has
%   been exhausted: then the remainder of the first block
%   is already in the correct registers and we are done
%   with merging those two blocks.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_return_same:w #1 \@@_return_none_error:
  {
    \tex_toks:D \l_@@_B_int \tex_toks:D \l_@@_C_int
    \int_decr:N \l_@@_B_int
    \int_decr:N \l_@@_C_int
    \if_int_compare:w \l_@@_C_int < \l_@@_top_int
      \use_i:nn
    \fi:
    \@@_merge_blocks_aux:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_return_swapped:w}
%   If the comparison function returns \texttt{swapped},
%   then the next item to add to the merger is the first
%   argument, contents of the \tn{toks} register $A$.
%   Then shift the pointers $A$ and $B$ to the left, and
%   go for one more step for the merger, unless the left
%   block was exhausted ($A$ goes below the threshold).
%   In that case, all remaining \tn{toks} registers in
%   the second block, indexed by $C$, are copied
%   to the merger by \cs{@@_merge_blocks_end:}.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_return_swapped:w #1 \@@_return_none_error:
  {
    \tex_toks:D \l_@@_B_int \tex_toks:D \l_@@_A_int
    \int_decr:N \l_@@_B_int
    \int_decr:N \l_@@_A_int
    \if_int_compare:w \l_@@_A_int < \l_@@_begin_int
      \@@_merge_blocks_end: \use_i:nn
    \fi:
    \@@_merge_blocks_aux:
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_merge_blocks_end:}
%   This function's task is to copy the \tn{toks} registers
%   in the block indexed by $C$ to the merger indexed by $B$.
%   The end can equally be detected by checking when $B$ reaches
%   the threshold \texttt{begin}, or when $C$ reaches
%   \texttt{top}.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_merge_blocks_end:
  {
    \tex_toks:D \l_@@_B_int \tex_toks:D \l_@@_C_int
    \int_decr:N \l_@@_B_int
    \int_decr:N \l_@@_C_int
    \if_int_compare:w \l_@@_B_int < \l_@@_begin_int
      \use_i:nn
    \fi:
    \@@_merge_blocks_end:
  }
%    \end{macrocode}
% \end{macro}
%
% \subsection{Expandable sorting}
%
% Sorting expandably is very different from sorting and assigning to a
% variable.  Since tokens cannot be stored, they must remain in the
% input stream, and be read through at every step.  It is thus
% necessarily much slower (at best $O(n^2\ln n)$) than non-expandable
% sorting functions ($O(n\ln n)$).
%
% A prototypical version of expandable quicksort is as follows.  If the
% argument has no item, return nothing, otherwise partition, using the
% first item as a pivot (argument |#4| of \cs{@@:nnNnn}).  The
% arguments of \cs{@@:nnNnn} are 1.~items less than |#4|, 2.~items
% greater or equal to |#4|, 3.~comparison, 4.~pivot, 5.~next item to
% test.  If |#5| is the tail of the list, call \cs{tl_sort:nN} on |#1|
% and on |#2|, placing |#4| in between; |\use:ff| expands the parts to
% make \cs{tl_sort:nN} \texttt{f}-expandable.  Otherwise, compare |#4|
% and |#5| using |#3|.  If they are ordered, place |#5| amongst the
% \enquote{greater} items, otherwise amongst the \enquote{lesser} items,
% and continue partitioning.
% \begin{verbatim}
% \cs_new:Npn \tl_sort:nN #1#2
%   {
%     \tl_if_blank:nF {#1}
%       {
%         \__sort:nnNnn { } { } #2
%           #1 \q__sort_recursion_tail \q__sort_recursion_stop
%       }
%   }
% \cs_new:Npn \__sort:nnNnn #1#2#3#4#5
%   {
%     \quark_if_recursion_tail_stop_do:nn {#5}
%       { \use:ff { \tl_sort:nN {#1} #3 {#4} } { \tl_sort:nN {#2} #3 } }
%     #3 {#4} {#5}
%       { \__sort:nnNnn {#1} { #2 {#5} } #3 {#4} }
%       { \__sort:nnNnn { #1 {#5} } {#2} #3 {#4} }
%   }
% \cs_generate_variant:Nn \use:nn { ff }
% \end{verbatim}
% There are quite a few optimizations available here: the code below is
% less legible, but more than twice as fast.
%
% In the simple version of the code, \cs{@@:nnNnn} is called
% \(O(n\ln n)\) times on average (the number of comparisons required by
% the quicksort algorithm).  Hence most of our focus is on
% optimizing that function.
%
% The first speed up is to avoid testing for the end of the list at
% every call to \cs{@@:nnNnn}.  For this, the list is prepared by
% changing each \meta{item} of the original token list into
% \meta{command} \Arg{item}, just like sequences are stored.  We arrange
% things such that the \meta{command} is the \meta{conditional} provided
% by the user: the loop over the \meta{prepared tokens} then looks like
% \begin{quote}
%   \ttfamily
%   \cs{cs_new:Npn}~\cs{@@_loop:wNn}~\ldots{}~|#6#7|\\
%   ~~|{|\\
%   ~~~~|#6|~\Arg{pivot}~|{#7}|~\meta{loop big}~\meta{loop small}\\
%   ~~~~~~\meta{extra arguments}\\
%   ~~|}|\\
%   \cs{@@_loop:wNn}~\ldots{}~\meta{prepared tokens}\\
%   ~~\meta{end-loop}~|{}|~\cs{s_@@_stop}
% \end{quote}
% In this example, which matches the structure of
% \cs{@@_quick_split_i:NnnnnNn} and a few other functions below, the
% \cs{@@_loop:wNn} auxiliary normally receives the user's
% \meta{conditional} as~|#6| and an \meta{item} as~|#7|.  This is
% compared to the \meta{pivot} (the argument~|#5|, not shown here), and
% the \meta{conditional} leaves the \meta{loop big} or \meta{loop small}
% auxiliary, which both have the same form as \cs{@@_loop:wNn},
% receiving the next pair \meta{conditional} \Arg{item} as |#6|
% and~|#7|.  At the end, |#6| is the \meta{end-loop} function, which
% terminates the loop.
%
% The second speed up is to minimize the duplicated tokens between the
% \texttt{true} and \texttt{false} branches of the conditional.  For
% this, we introduce two versions of \cs{@@:nnNnn}, which receive
% the new item as~|#1| and place it either into the list~|#2| of items
% less than the pivot~|#4| or into the list~|#3| of items greater or
% equal to the pivot.
% \begin{verbatim}
% \cs_new:Npn \__sort_i:nnnnNn #1#2#3#4#5#6
%   {
%     #5 {#4} {#6} \__sort_ii:nnnnNn \__sort_i:nnnnNn
%       {#6} { #2 {#1} } {#3} {#4}
%   }
% \cs_new:Npn \__sort_ii:nnnnNn #1#2#3#4#5#6
%   {
%     #5 {#4} {#6} \__sort_ii:nnnnNn \__sort_i:nnnnNn
%       {#6} {#2} { #3 {#1} } {#4}
%   }
% \end{verbatim}
% Note that the two functions have the form of \cs{@@_loop:wNn} above,
% receiving as~|#5| the conditional or a function to end the loop.  In
% fact, the lists~|#2| and~|#3| must be made of pairs \meta{conditional}
% \Arg{item}, so we have to replace~|{#6}| above by |{|~|#5|~|{#6}|~|}|,
% and |{#1}|~by~|#1|.  The actual functions have one more argument, so
% all argument numbers are shifted compared to this code.
%
% The third speed up is to avoid |\use:ff| using a continuation-passing
% style: \cs{@@_quick_split:NnNn} expects a list followed by
% \cs{s_@@_mark} \Arg{code}, and expands to \meta{code} \meta{sorted list}.
% Sorting the two parts of the list around the pivot is done with
% \begin{quote}
%   \ttfamily
%   \cs{@@_quick_split:NnNn} |#2| \ldots{} \cs{s_@@_mark}\\
%   ~~|{|\\
%   ~~~~\cs{@@_quick_split:NnNn} |#1| \ldots{} \cs{s_@@_mark} \Arg{code}\\
%   ~~~~\Arg{pivot}\\
%   ~~|}|
% \end{quote}
% Items which are larger than the \meta{pivot} are sorted, then placed
% after code that sorts the smaller items, and after the (braced)
% \meta{pivot}.
%
% The fourth speed up is avoid the recursive call to \cs{tl_sort:nN}
% with an empty first argument.  For this, we introduce functions
% similar to the \cs{@@_i:nnnnNn} of the last example, but aware of
% whether the list of \meta{conditional} \Arg{item} read so far that are
% less than the pivot, and the list of those greater or equal, are empty
% or not: see \cs{@@_quick_split:NnNn} and functions defined below.
% Knowing whether the lists are empty or not is useless if we do not use
% distinct ending codes as appropriate.  The splitting auxiliaries
% communicate to the \meta{end-loop} function (that is initially placed
% after the ``prepared'' list) by placing a specific ending function,
% ignored when looping, but useful at the end.  In fact, the
% \meta{end-loop} function does nothing but place the appropriate ending
% function in front of all its arguments.  The ending functions take
% care of sorting non-empty sublists, placing the pivot in between, and
% the continuation before.
%
% The final change in fact slows down the code a little, but is required
% to avoid memory issues: schematically, when \TeX{} encounters
% \begin{verbatim}
%   \use:n { \use:n { \use:n { ... } ... } ... }
% \end{verbatim}
% the argument of the first \cs{use:n} is not completely read by the
% second \cs{use:n}, hence must remain in memory; then the argument of
% the second \cs{use:n} is not completely read when grabbing the
% argument of the third \cs{use:n}, hence must remain in memory, and so
% on.  The memory consumption grows quadratically with the number of
% nested \cs{use:n}.  In practice, this means that we must read
% everything until a trailing \cs{s_@@_stop} once in a while, otherwise
% sorting lists of more than a few thousand items would exhaust a
% typical \TeX{}'s memory.
%
% \begin{macro}[EXP]{\tl_sort:nN}
% \begin{macro}[EXP]
%   {
%     \@@_quick_prepare:Nnnn,
%     \@@_quick_prepare_end:NNNnw,
%     \@@_quick_cleanup:w
%   }
%   The code within the \cs{exp_not:f} sorts the list, leaving in most
%   cases a leading \cs{exp_not:f}, which stops the expansion, letting
%   the result be return within \cs{exp_not:n}.  We filter out the case
%   of a list with no item, which would otherwise cause problems.  Then
%   prepare the token list~|#1| by inserting the conditional~|#2| before
%   each item.  The \texttt{prepare} auxiliary receives the conditional
%   as~|#1|, the prepared token list so far as~|#2|, the next prepared
%   item as~|#3|, and the item after that as~|#4|.  The loop ends
%   when~|#4| contains \cs{prg_break_point:}, then the
%   \texttt{prepare_end} auxiliary finds the prepared token list
%   as~|#4|.  The scene is then set up for \cs{@@_quick_split:NnNn},
%   which sorts the prepared list and perform the post action placed
%   after \cs{s_@@_mark}, namely removing the trailing \cs{s_@@_stop} and
%   \cs{s_@@_stop} and leaving \cs{exp_stop_f:} to stop
%   \texttt{f}-expansion.
%    \begin{macrocode}
\cs_new:Npn \tl_sort:nN #1#2
  {
    \exp_not:f
      {
        \tl_if_blank:nF {#1}
          {
            \@@_quick_prepare:Nnnn #2 { } { }
              #1
              { \prg_break_point: \@@_quick_prepare_end:NNNnw }
            \s_@@_stop
          }
      }
  }
\cs_new:Npn \@@_quick_prepare:Nnnn #1#2#3#4
  {
    \prg_break: #4 \prg_break_point:
    \@@_quick_prepare:Nnnn #1 { #2 #3 } { #1 {#4} }
  }
\cs_new:Npn \@@_quick_prepare_end:NNNnw #1#2#3#4#5 \s_@@_stop
  {
    \@@_quick_split:NnNn #4 \@@_quick_end:nnTFNn { }
    \s_@@_mark { \@@_quick_cleanup:w \exp_stop_f: }
    \s_@@_mark \s_@@_stop
  }
\cs_new:Npn \@@_quick_cleanup:w #1 \s_@@_mark \s_@@_stop {#1}
%    \end{macrocode}
% \end{macro}
% \end{macro}
%
% \begin{macro}[EXP]
%   {
%     \@@_quick_split:NnNn,
%     \@@_quick_only_i:NnnnnNn,
%     \@@_quick_only_ii:NnnnnNn,
%     \@@_quick_split_i:NnnnnNn,
%     \@@_quick_split_ii:NnnnnNn
%   }
%   The \texttt{only_i}, \texttt{only_ii}, \texttt{split_i} and
%   \texttt{split_ii} auxiliaries receive a useless first argument, the
%   new item~|#2| (that they append to either one of the next two
%   arguments), the list~|#3| of items less than the pivot, bigger
%   items~|#4|, the pivot~|#5|, a \meta{function}~|#6|, and an
%   item~|#7|.  The \meta{function} is the user's \meta{conditional}
%   except at the end of the list where it is
%   \cs{@@_quick_end:nnTFNn}.  The comparison is applied to the
%   \meta{pivot} and the \meta{item}, and calls the \texttt{only_i} or
%   \texttt{split_i} auxiliaries if the \meta{item} is smaller, and the
%   \texttt{only_ii} or \texttt{split_ii} auxiliaries otherwise.  In
%   both cases, the next auxiliary goes to work right away, with no
%   intermediate expansion that would slow down operations.  Note that
%   the argument~|#2| left for the next call has the form
%   \meta{conditional} \Arg{item}, so that the lists~|#3| and~|#4| keep
%   the right form to be fed to the next sorting function.
%   The \texttt{split} auxiliary differs from these in that it is
%   missing three of the arguments, which would be empty, and its first
%   argument is always the user's \meta{conditional} rather than an
%   ending function.
%    \begin{macrocode}
\cs_new:Npn \@@_quick_split:NnNn #1#2#3#4
  {
    #3 {#2} {#4} \@@_quick_only_ii:NnnnnNn
      \@@_quick_only_i:NnnnnNn
      \@@_quick_single_end:nnnwnw
      { #3 {#4} } { } { } {#2}
  }
\cs_new:Npn \@@_quick_only_i:NnnnnNn #1#2#3#4#5#6#7
  {
    #6 {#5} {#7} \@@_quick_split_ii:NnnnnNn
      \@@_quick_only_i:NnnnnNn
      \@@_quick_only_i_end:nnnwnw
      { #6 {#7} } { #3 #2 } { } {#5}
  }
\cs_new:Npn \@@_quick_only_ii:NnnnnNn #1#2#3#4#5#6#7
  {
    #6 {#5} {#7} \@@_quick_only_ii:NnnnnNn
      \@@_quick_split_i:NnnnnNn
      \@@_quick_only_ii_end:nnnwnw
      { #6 {#7} } { } { #4 #2 } {#5}
  }
\cs_new:Npn \@@_quick_split_i:NnnnnNn #1#2#3#4#5#6#7
  {
    #6 {#5} {#7} \@@_quick_split_ii:NnnnnNn
      \@@_quick_split_i:NnnnnNn
      \@@_quick_split_end:nnnwnw
      { #6 {#7} } { #3 #2 } {#4} {#5}
  }
\cs_new:Npn \@@_quick_split_ii:NnnnnNn #1#2#3#4#5#6#7
  {
    #6 {#5} {#7} \@@_quick_split_ii:NnnnnNn
      \@@_quick_split_i:NnnnnNn
      \@@_quick_split_end:nnnwnw
      { #6 {#7} } {#3} { #4 #2 } {#5}
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}[EXP]
%   {
%     \@@_quick_end:nnTFNn,
%     \@@_quick_single_end:nnnwnw,
%     \@@_quick_only_i_end:nnnwnw,
%     \@@_quick_only_ii_end:nnnwnw,
%     \@@_quick_split_end:nnnwnw,
%   }
%   The \cs{@@_quick_end:nnTFNn} appears instead of the user's
%   conditional, and receives as its arguments the pivot~|#1|, a fake
%   item~|#2|, a \texttt{true} and a \texttt{false} branches |#3|
%   and~|#4|, followed by an ending function~|#5| (one of the four
%   auxiliaries here) and another copy~|#6| of the fake item.  All those
%   are discarded except the function~|#5|.  This function receives
%   lists~|#1| and~|#2| of items less than or greater than the
%   pivot~|#3|, then a continuation code~|#5| just after \cs{s_@@_mark}.
%   To avoid a memory problem described earlier, all of the ending
%   functions read~|#6| until \cs{s_@@_stop} and place~|#6| back into the
%   input stream.  When the lists |#1| and~|#2| are empty, the
%   \texttt{single} auxiliary simply places the continuation~|#5| before
%   the pivot~|{#3}|.  When |#2|~is empty, |#1|~is sorted and placed
%   before the pivot~|{#3}|, taking care to feed the continuation~|#5|
%   as a continuation for the function sorting~|#1|.  When |#1|~is
%   empty, |#2|~is sorted, and the continuation argument is used to
%   place the continuation~|#5| and the pivot~|{#3}| before the sorted
%   result.  Finally, when both lists are non-empty, items larger than
%   the pivot are sorted, then items less than the pivot, and the
%   continuations are done in such a way to place the pivot in between.
%    \begin{macrocode}
\cs_new:Npn \@@_quick_end:nnTFNn #1#2#3#4#5#6 {#5}
\cs_new:Npn \@@_quick_single_end:nnnwnw #1#2#3#4 \s_@@_mark #5#6 \s_@@_stop
  { #5 {#3} #6 \s_@@_stop }
\cs_new:Npn \@@_quick_only_i_end:nnnwnw #1#2#3#4 \s_@@_mark #5#6 \s_@@_stop
  {
    \@@_quick_split:NnNn #1
      \@@_quick_end:nnTFNn { } \s_@@_mark {#5}
    {#3}
    #6 \s_@@_stop
  }
\cs_new:Npn \@@_quick_only_ii_end:nnnwnw #1#2#3#4 \s_@@_mark #5#6 \s_@@_stop
  {
    \@@_quick_split:NnNn #2
      \@@_quick_end:nnTFNn { } \s_@@_mark { #5 {#3} }
    #6 \s_@@_stop
  }
\cs_new:Npn \@@_quick_split_end:nnnwnw #1#2#3#4 \s_@@_mark #5#6 \s_@@_stop
  {
    \@@_quick_split:NnNn #2 \@@_quick_end:nnTFNn { } \s_@@_mark
      {
        \@@_quick_split:NnNn #1
          \@@_quick_end:nnTFNn { } \s_@@_mark {#5}
        {#3}
      }
    #6 \s_@@_stop
  }
%    \end{macrocode}
% \end{macro}
%
% \subsection{Messages}
%
% \begin{macro}{\@@_error:}
%   Bailing out of the sorting code is a bit tricky.  It may not be safe
%   to use a delimited argument, so instead we redefine many
%   \pkg{l3sort} commands to be trivial, with \cs{@@_level:} jumping to
%   the break point.  This error recovery won't work in a group.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_error:
  {
    \cs_set_eq:NN \@@_merge_blocks_aux: \prg_do_nothing:
    \cs_set_eq:NN \@@_merge_blocks: \prg_do_nothing:
    \cs_set_protected:Npn \@@_level: { \group_end: \prg_break: }
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_disable_toksdef:, \@@_disabled_toksdef:n}
%   While sorting, \tn{toksdef} is locally disabled to prevent users
%   from using \tn{newtoks} or similar commands in their comparison
%   code: the \tn{toks} registers that would be assigned are in use by
%   \pkg{l3sort}.  In format mode, none of this is needed since there is
%   no \tn{toks} allocator.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_disable_toksdef:
  { \cs_set_eq:NN \toksdef \@@_disabled_toksdef:n }
\cs_new_protected:Npn \@@_disabled_toksdef:n #1
  {
    \msg_error:nne { sort } { toksdef }
      { \token_to_str:N #1 }
    \@@_error:
    \tex_toksdef:D #1
  }
\msg_new:nnnn { sort } { toksdef }
  { Allocation~of~\iow_char:N\\toks~registers~impossible~while~sorting. }
  {
    The~comparison~code~used~for~sorting~a~list~has~attempted~to~
    define~#1~as~a~new~\iow_char:N\\toks~register~using~
    \iow_char:N\\newtoks~
    or~a~similar~command.~The~list~will~not~be~sorted.
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_too_long_error:NNw}
%   When there are too many items in a sequence, this is an error, and
%   we clean up properly the mapping over items in the list: break using
%   the type-specific breaking function |#1|.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_too_long_error:NNw #1#2 \fi:
  {
    \fi:
    \msg_error:nneee { sort } { too-large }
      { \token_to_str:N #2 }
      { \int_eval:n { \l_@@_true_max_int - \l_@@_min_int } }
      { \int_eval:n { \l_@@_top_int - \l_@@_min_int } }
    #1 \@@_error:
  }
\msg_new:nnnn { sort } { too-large }
  { The~list~#1~is~too~long~to~be~sorted~by~TeX. }
  {
    TeX~has~#2~toks~registers~still~available:~
    this~only~allows~to~sort~with~up~to~#3~
    items.~The~list~will~not~be~sorted.
  }
%    \end{macrocode}
% \end{macro}
%
%    \begin{macrocode}
\msg_new:nnnn { sort } { return-none }
  { The~comparison~code~did~not~return. }
  {
    When~sorting~a~list,~the~code~to~compare~items~#1~and~#2~
    did~not~call~
    \iow_char:N\\sort_return_same: ~nor~
    \iow_char:N\\sort_return_swapped: .~
    Exactly~one~of~these~should~be~called.
  }
\msg_new:nnnn { sort } { return-two }
  { The~comparison~code~returned~multiple~times. }
  {
    When~sorting~a~list,~the~code~to~compare~items~#1~and~#2~called~
    \iow_char:N\\sort_return_same: ~or~
    \iow_char:N\\sort_return_swapped: ~multiple~times.~
    Exactly~one~of~these~should~be~called.
  }
\prop_gput:Nnn \g_msg_module_name_prop { sort } { LaTeX }
\prop_gput:Nnn \g_msg_module_type_prop { sort } { }
%    \end{macrocode}
%
%    \begin{macrocode}
%</package>
%    \end{macrocode}
%
% \end{implementation}
%
% \PrintIndex