% \iffalse meta-comment
%
%% File: tagpdf-backend.dtx
%
% Copyright (C) 2019-2024 Ulrike Fischer
%
% It may be distributed and/or modified under the conditions of the
% LaTeX Project Public License (LPPL), either version 1.3c of this
% license or (at your option) any later version.  The latest version
% of this license is in the file
%
%    https://www.latex-project.org/lppl.txt
%
% This file is part of the "tagpdf bundle" (The Work in LPPL)
% and all files in that bundle must be distributed together.
%
% -----------------------------------------------------------------------
%
% The development version of the bundle can be found at
%
%    https://github.com/latex3/tagpdf
%
% for those people who are interested.
%
%<*driver>
\DocumentMetadata{}
\documentclass{l3doc}
\usepackage{array,booktabs,caption}
\hypersetup{pdfauthor=Ulrike Fischer,
 pdftitle=tagpdf-mc module (tagpdf)}
\begin{document}
  \DocInput{\jobname.dtx}
\end{document}
%</driver>
% \fi
% \title{^^A
%   The \pkg{tagpdf-luatex.def} \\ Driver for luatex  ^^A
%   \\ Part of the tagpdf package
% }
%
% \author{^^A
%  Ulrike Fischer\thanks
%    {^^A
%      E-mail:
%        \href{mailto:fischer@troubleshooting-tex.de}
%          {fischer@troubleshooting-tex.de}^^A
%    }^^A
% }
%
% \date{Version 0.99k, released 2024-12-04}
% \maketitle
% \begin{implementation}
%    \begin{macrocode}
%<@@=tag>
%<*luatex>
\ProvidesExplFile {tagpdf-luatex.def} {2024-12-04} {0.99k}
  {tagpdf~driver~for~luatex}
%    \end{macrocode}
% \section{Loading the lua}
% The space code requires that the fall back font has been loaded and initialized,
% so we force that first. But perhaps this could be done in the kernel.
%
%    \begin{macrocode}
{
  \fontencoding{TU}\fontfamily{lmr}\fontseries{m}\fontshape{n}\fontsize{10pt}{10pt}\selectfont
}
\lua_now:e { tagpdf=require('tagpdf.lua') }
%    \end{macrocode}
%
% The following defines wrappers around prop and seq commands to store the
% data also in lua tables.
% I probably want also lua tables
% I put them in the ltx.@@.tables namespaces
% The tables will be named like the variables but without backslash
% To access such a table with a dynamical name create a string and then use
% ltx.@@.tables[string]
% Old code, I'm not quite sure if this was a good idea. Now I have mix of table in
% ltx.@@.tables and ltx.@@.mc/struct. And a lot is probably not needed.
% TODO: this should be cleaned up, but at least roles are currently using
% the table!
%
% \begin{macro}
%   {
%     \@@_prop_new:N,
%     \@@_seq_new:N,
%     \@@_prop_gput:Nnn,
%     \@@_seq_gput_right:Nn,
%     \@@_seq_gput_left:Nn,
%     \@@_seq_item:cn,
%     \@@_prop_item:cn,
%     \@@_seq_show:N,
%     \@@_prop_show:N
%   }
%    \begin{macrocode}
\cs_set_protected:Npn \@@_prop_new:N #1
  {
    \prop_new:N #1
    \lua_now:e { ltx.@@.tables.\cs_to_str:N#1 = {} }
  }

\cs_set_protected:Npn \@@_prop_new_linked:N #1
  {
    \prop_new_linked:N #1
    \lua_now:e { ltx.@@.tables.\cs_to_str:N#1 = {} }
  }


\cs_set_protected:Npn \@@_seq_new:N #1
  {
    \seq_new:N #1
    \lua_now:e { ltx.@@.tables.\cs_to_str:N#1 = {} }
  }


\cs_set_protected:Npn \@@_prop_gput:Nnn #1 #2 #3
  {
    \prop_gput:Nnn #1 { #2 } { #3 }
    \lua_now:e { ltx.@@.tables.\cs_to_str:N#1 ["#2"] = "\lua_escape:n{#3}" }
  }


\cs_set_protected:Npn \@@_seq_gput_right:Nn #1 #2
  {
    \seq_gput_right:Nn #1 { #2 }
    \lua_now:e { table.insert(ltx.@@.tables.\cs_to_str:N#1, "#2") }
  }
%    \end{macrocode}
%
% this inserts on the right of the lua table, but as the lua table is not used for kids
% this is ignored for now.
%    \begin{macrocode}
\cs_set_protected:Npn \@@_seq_gput_left:Nn #1 #2
  {
    \seq_gput_left:Nn #1 { #2 }
    \lua_now:e { table.insert(ltx.@@.tables.\cs_to_str:N#1, "#2") }
  }
  
%Hm not quite sure about the naming
\cs_set:Npn \@@_seq_item:cn #1 #2
  {
    \lua_now:e { tex.print(ltx.@@.tables.#1[#2]) }
  }

\cs_set:Npn \@@_prop_item:cn #1 #2
  {
    \lua_now:e { tex.print(ltx.@@.tables.#1["#2"]) }
  }

%for debugging commands that show both the seq/prop and the lua tables
\cs_set_protected:Npn \@@_seq_show:N #1
  {
    \seq_show:N #1
    \lua_now:e { ltx.@@.trace.log ("lua~sequence~array~\cs_to_str:N#1",1) }
    \lua_now:e { ltx.@@.trace.show_seq (ltx.@@.tables.\cs_to_str:N#1) }
  }

\cs_set_protected:Npn \@@_prop_show:N #1
  {
    \prop_show:N #1
    \lua_now:e {ltx.@@.trace.log  ("lua~property~table~\cs_to_str:N#1",1) }
    \lua_now:e {ltx.@@.trace.show_prop (ltx.@@.tables.\cs_to_str:N#1) }
 }
%    \end{macrocode}
% \end{macro}
% 
%    \begin{macrocode}
%</luatex>
%    \end{macrocode}
% The module declaration
%    \begin{macrocode}
%<*lua>
-- tagpdf.lua
-- Ulrike Fischer

local ProvidesLuaModule = {
    name          = "tagpdf",
    version       = "0.99k",       --TAGVERSION
    date          = "2024-12-04", --TAGDATE
    description   = "tagpdf lua code",
    license       = "The LATEX Project Public License 1.3c"
}

if luatexbase and luatexbase.provides_module then
  luatexbase.provides_module (ProvidesLuaModule)
end

--[[
The code has quite probably a number of problems
 - more variables should be local instead of global
 - the naming is not always consistent due to the development of the code
 - the traversing of the shipout box must be tested with more complicated setups
 - it should probably handle more node types
 -
--]]

%    \end{macrocode}
% Some comments about the lua structure.
%    \begin{macrocode}
--[[
the main table is named ltx.@@. It contains the functions and also the data
collected during the compilation.

ltx.@@.mc     will contain mc connected data.
ltx.@@.struct will contain structure related data.
ltx.@@.page   will contain page data
ltx.@@.tables contains also data from mc and struct (from older code). This needs cleaning up.
             There are certainly dublettes, but I don't dare yet ...
ltx.@@.func   will contain (public) functions.
ltx.@@.trace  will contain tracing/logging functions.
local functions starts with __
functions meant for users will be in ltx.tag

functions
 ltx.@@.func.get_num_from (tag):    takes a tag (string) and returns the id number
 ltx.@@.func.output_num_from (tag): takes a tag (string) and prints (to tex) the id number
 ltx.@@.func.get_tag_from (num):    takes a num and returns the tag
 ltx.@@.func.output_tag_from (num): takes a num and prints (to tex) the tag
 ltx.@@.func.store_mc_data (num,key,data): stores key=data in ltx.@@.mc[num]
 ltx.@@.func.store_mc_label (label,num): stores label=num in ltx.@@.mc.labels
 ltx.@@.func.store_mc_kid (mcnum,kid,page): stores the mc-kids of mcnum on page page
 ltx.@@.func.store_mc_in_page(mcnum,mcpagecnt,page): stores in the page table the number of mcnum on this page
 ltx.@@.func.store_struct_mcabs (structnum,mcnum): stores relations structnum<->mcnum (abs)
 ltx.@@.func.mc_insert_kids (mcnum): inserts the /K entries for mcnum by wandering through the [kids] table
 ltx.@@.func.mark_page_elements(box,mcpagecnt,mccntprev,mcopen,name,mctypeprev) : the main function
 ltx.@@.func.mark_shipout (): a wrapper around the core function which inserts the last EMC
 ltx.@@.func.fill_parent_tree_line (page): outputs the entries of the parenttree for this page
 ltx.@@.func.output_parenttree(): outputs the content of the parenttree
 ltx.@@.func.pdf_object_ref(name,index): outputs the object reference for the object name
 ltx.@@.func.markspaceon(), ltx.@@.func.markspaceoff(): (de)activates the marking of positions for space chars
 ltx.@@.trace.show_mc_data (num,loglevel): shows ltx.@@.mc[num] is the current log level is >= loglevel
 ltx.@@.trace.show_all_mc_data (max,loglevel): shows a maximum about mc's if the current log level is >= loglevel
 ltx.@@.trace.show_seq: shows a sequence (array)
 ltx.@@.trace.show_struct_data (num): shows data of structure num
 ltx.@@.trace.show_prop: shows a prop
 ltx.@@.trace.log
 ltx.@@.trace.showspaces : boolean
 
 ltx.tag.get_structnum: number, shows the current structure number
 ltx.tag.get_structnum_next: number, shows the next structure number 
--]]

%    \end{macrocode}
% This set-ups the main attribute registers.
% The mc_type attribute stores the type (P, Span etc) encoded as a num,
% The mc_cnt attribute stores the absolute number and allows so to see
% if a node belongs to the same mc-chunk.
%
% The interwordspace attr is set by the function |@@_mark_spaces|, and marks
% the place where spaces should be inserted.
% The interwordfont attr is set by the function |@@_mark_spaces| too and
% stores the font, so that we can decide which font
% to use for the real space char.
% The interwordspaceOff attr allows to locally suppress the insertion of
% real space chars, e.g. when they are inserted by other means (e.g. with |\char|).
%    \begin{macrocode}
local mctypeattributeid  = luatexbase.new_attribute ("g_@@_mc_type_attr")
local mccntattributeid   = luatexbase.new_attribute ("g_@@_mc_cnt_attr")
local iwspaceOffattributeid = luatexbase.new_attribute ("g__tag_interwordspaceOff_attr")
local iwspaceattributeid = luatexbase.new_attribute ("g_@@_interwordspace_attr")
local iwfontattributeid  = luatexbase.new_attribute ("g_@@_interwordfont_attr")
%    \end{macrocode}
% with this token we can query the state of the boolean
% and so detect if unmarked nodes should be marked as attributes
%    \begin{macrocode}
local tagunmarkedbool= token.create("g_@@_tagunmarked_bool")
local truebool       = token.create("c_true_bool")
%    \end{macrocode}
% with this token we can query the state of the softhyphen boolean
% and so detect if hyphens from hyphenation should be replaced by soft-hyphens.
%    \begin{macrocode}
local softhyphenbool = token.create("g_@@_softhyphen_bool")
%    \end{macrocode}

% Now a number of local versions from global tables.
% Not all is perhaps needed, most node variants were copied from lua-debug.
%    \begin{macrocode}
local catlatex       = luatexbase.registernumber("catcodetable@latex")
local tableinsert    = table.insert
local nodeid           = node.id
local nodecopy         = node.copy
local nodegetattribute = node.get_attribute
local nodesetattribute = node.set_attribute
local nodehasattribute = node.has_attribute
local nodenew          = node.new
local nodetail         = node.tail
local nodeslide        = node.slide
local noderemove       = node.remove
local nodetraverseid   = node.traverse_id
local nodetraverse     = node.traverse
local nodeinsertafter  = node.insert_after
local nodeinsertbefore = node.insert_before
local pdfpageref       = pdf.pageref

local fonthashes      = fonts.hashes
local identifiers     = fonthashes.identifiers
local fontid          = font.id

local HLIST          = node.id("hlist")
local VLIST          = node.id("vlist")
local RULE           = node.id("rule")
local DISC           = node.id("disc")
local GLUE           = node.id("glue")
local GLYPH          = node.id("glyph")
local KERN           = node.id("kern")
local PENALTY        = node.id("penalty")
local LOCAL_PAR      = node.id("local_par")
local MATH           = node.id("math")

local explicit_disc = 1
local regular_disc = 3
%    \end{macrocode}
% Now we setup the main table structure. ltx is used by other latex code too!
%    \begin{macrocode}
ltx             = ltx        or { }
ltx.tag         = ltx.tag       or { } -- user commands
ltx.@@          = ltx.@@        or { }
ltx.@@.mc       = ltx.@@.mc     or  { } -- mc data
ltx.@@.struct   = ltx.@@.struct or  { } -- struct data
ltx.@@.tables   = ltx.@@.tables or  { } -- tables created with new prop and new seq.
                                        -- wasn't a so great idea ...
                                        -- g_@@_role_tags_seq used by tag<-> is in this tables!
                                        -- used for pure lua tables too now!
ltx.@@.page     = ltx.@@.page   or  { } -- page data, currently only i->{0->mcnum,1->mcnum,...}
ltx.@@.trace    = ltx.@@.trace  or  { } -- show commands
ltx.@@.func     = ltx.@@.func   or  { } -- functions
ltx.@@.conf     = ltx.@@.conf   or  { } -- configuration variables
%    \end{macrocode}
%
% \section{User commands to access data}
% Code like the one in luamml will have to access the current state in some places.

% \begin{macro}{\}
%    \begin{macrocode}
local @@_get_struct_num = 
 function()
  local a = token.get_macro("g__tag_struct_stack_current_tl")
  return a
 end
 
local @@_get_struct_counter = 
 function()
  local a = tex.getcount("c@g_@@_struct_abs_int")
  return a
 end
  
local @@_get_struct_num_next = 
 function()
  local a = tex.getcount("c@g_@@_struct_abs_int") + 1
  return a
 end
  
ltx.tag.get_struct_num = @@_get_struct_num
ltx.tag.get_struct_counter = @@_get_struct_counter
ltx.tag.get_struct_num_next = @@_get_struct_num_next
%    \end{macrocode}
% \end{macro}
% \section{Logging functions}
%
% \begin{macro}{@@_log,ltx.@@.trace.log}
% This rather simple log function takes as argument a
% message (string) and a number and
% will output the message to the log/terminal if the current loglevel
% is greater or equal than num.
%    \begin{macrocode}
local @@_log =
 function (message,loglevel)
  if (loglevel or 3) <= tex.count["l_@@_loglevel_int"] then
   texio.write_nl("tagpdf: ".. message)
  end
 end

ltx.@@.trace.log = @@_log
%    \end{macrocode}
% \end{macro}
% \begin{macro}{ltx.@@.trace.show_seq}
% This shows the content of a seq as stored in the tables table.
% It is used by the |\@@_seq_show:N| function. It is not used
% in user commands, only for debugging, and so requires log level >0.
%    \begin{macrocode}
function ltx.@@.trace.show_seq (seq)
 if (type(seq) == "table") then
  for i,v in ipairs(seq) do
   @@_log ("[" .. i .. "] => " .. tostring(v),1)
  end
  else
   @@_log ("sequence " .. tostring(seq) .. " not found",1)
  end
end
%    \end{macrocode}
% \end{macro}
% \begin{macro}{ @@_pairs_prop,ltx.@@.trace.show_prop}
% This shows the content of a prop as stored in the tables table.
% It is used by the |\@@_prop_show:N| function.
%    \begin{macrocode}
local @@_pairs_prop =
 function  (prop)
      local a = {}
      for n in pairs(prop) do tableinsert(a, n) end
      table.sort(a)
      local i = 0                -- iterator variable
      local iter = function ()   -- iterator function
        i = i + 1
        if a[i] == nil then return nil
        else return a[i], prop[a[i]]
        end
      end
      return iter
  end


function ltx.@@.trace.show_prop (prop)
 if (type(prop) == "table") then
  for i,v in @@_pairs_prop (prop) do
    @@_log ("[" .. i .. "] => " .. tostring(v),1)
  end
 else
   @@_log ("prop " .. tostring(prop) .. " not found or not a table",1)
 end
 end
%    \end{macrocode}
% \end{macro}
% \begin{macro}{ltx.@@.trace.show_mc_data}
% This shows some data for a mc given by |num|.
% If something is shown depends on the log level.
% The function is used by the following function and then in
% |\ShowTagging|
%    \begin{macrocode}
function ltx.@@.trace.show_mc_data (num,loglevel)
 if ltx.@@ and ltx.@@.mc and ltx.@@.mc[num] then
  for k,v in pairs(ltx.@@.mc[num]) do
   @@_log  ("mc"..num..": "..tostring(k).."=>"..tostring(v),loglevel)
  end
  if ltx.@@.mc[num]["kids"] then
  @@_log ("mc" .. num .. " has " .. #ltx.@@.mc[num]["kids"] .. " kids",loglevel)
   for k,v in ipairs(ltx.@@.mc[num]["kids"]) do
    @@_log ("mc ".. num .. " kid "..k.." =>" .. v.kid.." on page " ..v.page,loglevel)
   end
  end
 else
  @@_log  ("mc"..num.." not found",loglevel)
 end
end
%    \end{macrocode}
% \end{macro}
% \begin{macro}{ltx.@@.trace.show_all_mc_data}
% This shows data for the mc's between |min| and |max| (numbers).
% It is used by the |\ShowTagging| function.
%    \begin{macrocode}
function ltx.@@.trace.show_all_mc_data (min,max,loglevel)
 for i = min, max do
  ltx.@@.trace.show_mc_data (i,loglevel)
 end
 texio.write_nl("")
end
%    \end{macrocode}
% \end{macro}

% \begin{macro}
%   {
%    ltx.@@.trace.show_struct_data
%   }
% This function shows some struct data.
% Unused but kept for debugging.
%    \begin{macrocode}
function ltx.@@.trace.show_struct_data (num)
 if ltx.@@ and ltx.@@.struct and ltx.@@.struct[num] then
  for k,v in ipairs(ltx.@@.struct[num]) do
   @@_log  ("struct "..num..": "..tostring(k).."=>"..tostring(v),1)
  end
 else
  @@_log   ("struct "..num.." not found ",1)
 end
end
%    \end{macrocode}
% \end{macro}

%
% \section{Helper functions}
% \subsection{Retrieve data functions}
% \begin{macro}{@@_get_mc_cnt_type_tag}
% This takes a node as argument and returns the mc-cnt, the mc-type and
% and the tag (calculated from the mc-cnt.
%    \begin{macrocode}
local @@_get_mc_cnt_type_tag = function (n)
  local mccnt      =  nodegetattribute(n,mccntattributeid)  or -1
  local mctype     =  nodegetattribute(n,mctypeattributeid)  or -1
  local tag        =  ltx.@@.func.get_tag_from(mctype)
  return mccnt,mctype,tag
end
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{@@_get_mathsubtype}
% This function allows to detect if we are at the begin or the end of math.
% It takes as argument a mathnode.
%    \begin{macrocode}
local function @@_get_mathsubtype  (mathnode)
 if mathnode.subtype == 0 then
  subtype = "beginmath"
 else
  subtype = "endmath"
 end
 return subtype
end
%    \end{macrocode}
% \end{macro}
%
% \begin{variable}{ltx.@@.tables.role_tag_attribute,ltx.@@.tables.role_tag_attribute}
% The first is a table with key a tag and value a number (the attribute)
% The second is an array with the attribute value as key.
%    \begin{macrocode}
ltx.@@.tables.role_tag_attribute = {}
ltx.@@.tables.role_attribute_tag = {}
%    \end{macrocode}
% \end{variable}
% \begin{macro}{ltx.@@.func.alloctag}
%    \begin{macrocode}
local @@_alloctag = 
 function (tag)
   if not ltx.@@.tables.role_tag_attribute[tag] then    
    table.insert(ltx.@@.tables.role_attribute_tag,tag)
    ltx.@@.tables.role_tag_attribute[tag]=#ltx.@@.tables.role_attribute_tag
    @@_log  ("Add "..tag.." "..ltx.@@.tables.role_tag_attribute[tag],3)    
   end 
 end
ltx.@@.func.alloctag = @@_alloctag
%    \end{macrocode}
% \end{macro}
% \begin{macro}
%   {
%    @@_get_num_from,
%    ltx.@@.func.get_num_from,
%    ltx.@@.func.output_num_from
%   }
% These functions take as argument a string |tag|, and return the number
% under which is it recorded (and so the attribute value).
% The first function outputs the number for lua, while the |output| function
% outputs to tex.
%    \begin{macrocode}
local @@_get_num_from =
 function (tag)
  if ltx.@@.tables.role_tag_attribute[tag] then
    a= ltx.@@.tables.role_tag_attribute[tag]
  else
    a= -1
  end
  return a
 end

ltx.@@.func.get_num_from = @@_get_num_from

function ltx.@@.func.output_num_from (tag)
  local num = @@_get_num_from (tag)
  tex.sprint(catlatex,num)
  if num == -1 then
   @@_log ("Unknown tag "..tag.." used")
  end
end
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}
%   {
%    @@_get_tag_from ,
%    ltx.@@.func.get_tag_from,
%    ltx.@@.func.output_tag_from
%   }
% These functions are the opposites to the previous function:
% they take as argument a number (the attribute value) and return the string |tag|.
% The first function outputs the string for lua, while the |output| function
% outputs to tex.
%    \begin{macrocode}
local @@_get_tag_from =
 function  (num)
  if ltx.@@.tables.role_attribute_tag[num] then
   a = ltx.@@.tables.role_attribute_tag[num]
  else
   a= "UNKNOWN"
  end
 return a
end

ltx.@@.func.get_tag_from = @@_get_tag_from

function ltx.@@.func.output_tag_from (num)
  tex.sprint(catlatex,@@_get_tag_from (num))
end
%    \end{macrocode}
% \end{macro}
% \begin{macro}
%   {
%    ltx.@@.func.store_mc_data
%   }
% This function stores for |key|=|data| for mc-chunk |num|. It is used in the
% tagpdf-mc code, to store for example the tag string, and the raw options.
%    \begin{macrocode}
function ltx.@@.func.store_mc_data (num,key,data)
 ltx.@@.mc[num] = ltx.@@.mc[num] or { }
 ltx.@@.mc[num][key] = data
 @@_log  ("INFO TEX-STORE-MC-DATA: "..num.." => "..tostring(key).." => "..tostring(data),3)
end
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}
%   {
%    ltx.@@.func.store_mc_label
%   }
% This function stores the |label|=|num| relationship in the |labels| subtable.
% TODO: this is probably unused and can go.
%    \begin{macrocode}
function ltx.@@.func.store_mc_label (label,num)
 ltx.@@.mc["labels"] = ltx.@@.mc["labels"] or { }
 ltx.@@.mc.labels[label] = num
end
%    \end{macrocode}
% \end{macro}
% \begin{macro}
%   {
%    ltx.@@.func.store_mc_kid
%   }
% This function is used in the traversing code. It stores
% a sub-chunk of a mc |mcnum| into the |kids| table.
%    \begin{macrocode}
function ltx.@@.func.store_mc_kid (mcnum,kid,page)
 ltx.@@.trace.log("INFO TAG-STORE-MC-KID: "..mcnum.." => " .. kid.." on page " .. page,3)
 ltx.@@.mc[mcnum]["kids"] = ltx.@@.mc[mcnum]["kids"] or { }
 local kidtable = {kid=kid,page=page}
 tableinsert(ltx.@@.mc[mcnum]["kids"], kidtable )
end
%    \end{macrocode}
% \end{macro}

% \begin{macro}
%   {
%    ltx.@@.func.mc_num_of_kids
%   }
% This function returns the number of kids a mc |mcnum| has. We need to account for
% the case that a mc can have no kids.
%    \begin{macrocode}
function ltx.@@.func.mc_num_of_kids (mcnum)
 local num = 0
 if ltx.@@.mc[mcnum] and ltx.@@.mc[mcnum]["kids"] then
   num = #ltx.@@.mc[mcnum]["kids"]
 end
 ltx.@@.trace.log ("INFO MC-KID-NUMBERS: " .. mcnum .. "has " .. num .. "KIDS",4)
 return num
end
%    \end{macrocode}
% \end{macro}
% \subsection{Functions to insert the pdf literals}
% \begin{macro}{@@_backend_create_emc_node,@@_insert_emc_node}
% This insert the emc node. We support also dvips and dvipdfmx backend
%    \begin{macrocode}
local @@_backend_create_emc_node
if tex.outputmode == 0 then
 if token.get_macro("c_sys_backend_str") == "dvipdfmx" then
  function @@_backend_create_emc_node ()
    local emcnode = nodenew("whatsit","special")
      emcnode.data = "pdf:code EMC"      
    return emcnode
  end  
 else -- assume a dvips variant
  function @@_backend_create_emc_node ()
    local emcnode = nodenew("whatsit","special")
      emcnode.data = "ps:SDict begin mark /EMC pdfmark end"      
    return emcnode
  end
 end 
else -- pdf mode
  function @@_backend_create_emc_node ()
    local emcnode = nodenew("whatsit","pdf_literal")
      emcnode.data = "EMC"
      emcnode.mode=1
    return emcnode
  end
end 
   
local function @@_insert_emc_node (head,current)
  local emcnode= @@_backend_create_emc_node()
  head = node.insert_before(head,current,emcnode)
  return head
end
%    \end{macrocode}
% \end{macro}
% \begin{macro}{@@_backend_create_bmc_node,@@_insert_bmc_node}
% This inserts a simple bmc node
%    \begin{macrocode}
local @@_backend_create_bmc_node
if tex.outputmode == 0 then
 if token.get_macro("c_sys_backend_str") == "dvipdfmx" then
  function @@_backend_create_bmc_node (tag)
    local bmcnode = nodenew("whatsit","special")
    bmcnode.data = "pdf:code /"..tag.." BMC"    
    return bmcnode
  end  
 else -- assume a dvips variant
  function @@_backend_create_bmc_node (tag)
    local bmcnode = nodenew("whatsit","special")
    bmcnode.data = "ps:SDict begin mark/"..tag.." /BMC pdfmark end"
    return bmcnode
  end
 end 
else -- pdf mode
  function @@_backend_create_bmc_node (tag)
    local bmcnode = nodenew("whatsit","pdf_literal")
    bmcnode.data = "/"..tag.." BMC"
    bmcnode.mode=1
    return bmcnode
  end
end 

local function @@_insert_bmc_node (head,current,tag)
 local bmcnode = @@_backend_create_bmc_node (tag)
 head = node.insert_before(head,current,bmcnode)
 return head
end
%    \end{macrocode}
% \end{macro}
% \begin{macro}{@@_backend_create_bdc_node,@@_insert_bdc_node}
%  This inserts a bcd node with a fix dict.
%  TODO: check if this is still used, now that we create properties.
%    \begin{macrocode}
local @@_backend_create_bdc_node

if tex.outputmode == 0 then
 if token.get_macro("c_sys_backend_str") == "dvipdfmx" then
  function @@_backend_create_bdc_node (tag,dict)
    local bdcnode = nodenew("whatsit","special")
    bdcnode.data = "pdf:code /"..tag.."<<"..dict..">> BDC"    
    return bdcnode
  end  
 else -- assume a dvips variant
  function @@_backend_create_bdc_node (tag,dict)
    local bdcnode = nodenew("whatsit","special")
    bdcnode.data = "ps:SDict begin mark/"..tag.."<<"..dict..">> /BDC pdfmark end"
    return bdcnode
  end
 end 
else -- pdf mode
  function @@_backend_create_bdc_node (tag,dict)
    local bdcnode = nodenew("whatsit","pdf_literal")
    bdcnode.data = "/"..tag.."<<"..dict..">> BDC"
    bdcnode.mode=1
    return bdcnode
  end
end 

local function @@_insert_bdc_node (head,current,tag,dict)
 bdcnode= @@_backend_create_bdc_node (tag,dict)
 head = node.insert_before(head,current,bdcnode)
 return head
end
%    \end{macrocode}
% \end{macro}
% \begin{macro}{@@_pdf_object_ref}
% This allows to reference a pdf object reserved with the l3pdf command by name.
% The return value is |n 0 R|, if the object doesn't exist, n is 0.
%    \begin{macrocode}
local function @@_pdf_object_ref (name,index)
   local object
   if ltx.pdf.object_id then
     object = ltx.pdf.object_id (name,index) ..' 0 R'
   else
     local tokenname = 'c__pdf_object_'..name..'/'..index..'_int'
     object = token.create(tokenname).mode ..' 0 R'
   end 
   return object
end
ltx.@@.func.pdf_object_ref = @@_pdf_object_ref
%    \end{macrocode}
% \end{macro}
%
% \section{Function for the real space chars}
% \begin{macro}{@@_show_spacemark}
% A debugging function, it is used to
% inserts red color markers in the places where space chars can go, it can have
% side effects so not always reliable, but ok.
%    \begin{macrocode}
local function @@_show_spacemark (head,current,color,height)
 local markcolor = color or "1 0 0"
 local markheight = height or 10
 local pdfstring 
 if tex.outputmode == 0 then
  -- ignore dvi mode for now
 else
  pdfstring = node.new("whatsit","pdf_literal")
       pdfstring.data =
       string.format("q "..markcolor.." RG "..markcolor.." rg 0.4 w 0 %g m 0 %g l S Q",-3,markheight)
       head = node.insert_after(head,current,pdfstring)
  return head
 end
end
%    \end{macrocode}
% \end{macro}
% \begin{macro}{@@_fakespace,ltx.@@.func.fakespace}
% This is used to define a lua version of |\pdffakespace|
%    \begin{macrocode}
local function @@_fakespace()
   tex.setattribute(iwspaceattributeid,1)
   tex.setattribute(iwfontattributeid,font.current())
end
ltx.@@.func.fakespace = @@_fakespace
%    \end{macrocode}
% \end{macro}
% \begin{macro}{@@_mark_spaces}
% a function to mark up places where real space chars should be inserted.
% It only sets attributes, these are then be used in a later traversing
% which inserts the actual spaces.
% When space handling is activated this function is inserted in some callbacks.
%    \begin{macrocode}
--[[ a function to mark up places where real space chars should be inserted
     it only sets an attribute.
--]]

local function @@_mark_spaces (head)
  local inside_math = false
  for n in nodetraverse(head) do
    local id = n.id
    if id == GLYPH then
      local glyph = n
      default_currfontid = glyph.font
      if glyph.next and (glyph.next.id == GLUE)
        and not inside_math  and (glyph.next.width >0)
      then
        nodesetattribute(glyph.next,iwspaceattributeid,1)
        nodesetattribute(glyph.next,iwfontattributeid,glyph.font)
      -- for debugging
       if ltx.@@.trace.showspaces then
        @@_show_spacemark (head,glyph)
       end
      elseif glyph.next and (glyph.next.id==KERN) and not inside_math then
       local kern = glyph.next
       if kern.next and (kern.next.id== GLUE)  and (kern.next.width >0)
       then
        nodesetattribute(kern.next,iwspaceattributeid,1)
        nodesetattribute(kern.next,iwfontattributeid,glyph.font)
       end
      end
     --  look also back
     if glyph.prev and (glyph.prev.id == GLUE)
        and not inside_math
        and (glyph.prev.width >0)
        and not nodehasattribute(glyph.prev,iwspaceattributeid)
      then
        nodesetattribute(glyph.prev,iwspaceattributeid,1)
        nodesetattribute(glyph.prev,iwfontattributeid,glyph.font)
      -- for debugging
       if ltx.@@.trace.showspaces then
        @@_show_spacemark (head,glyph)
       end
      end
    elseif id == PENALTY then
      local glyph = n
      -- ltx.@@.trace.log ("PENALTY ".. n.subtype.."VALUE"..n.penalty,3)
      if glyph.next and (glyph.next.id == GLUE)
        and not inside_math  and (glyph.next.width >0) and n.subtype==0
      then
        nodesetattribute(glyph.next,iwspaceattributeid,1) 
        --  changed 2024-01-18, issue #72
        nodesetattribute(glyph.next,iwfontattributeid,default_currfontid)
      -- for debugging
       if ltx.@@.trace.showspaces then
        @@_show_spacemark (head,glyph)
       end
      end
    elseif id == MATH then
      inside_math = (n.subtype == 0)
    end
  end
  return head
end
%    \end{macrocode}
% \end{macro}
% \begin{macro}
%   {
%    @@_activate_mark_space,
%    ltx.@@.func.markspaceon,
%    @@_activate_mark_space,
%    ltx.@@.func.markspaceoff
%    }
% These functions add/remove the function which marks the spaces to the callbacks
% |pre_linebreak_filter| and |hpack_filter|
%    \begin{macrocode}
local function @@_activate_mark_space ()
 if not luatexbase.in_callback ("pre_linebreak_filter","markspaces") then
  luatexbase.add_to_callback("pre_linebreak_filter",@@_mark_spaces,"markspaces")
  luatexbase.add_to_callback("hpack_filter",@@_mark_spaces,"markspaces")
 end
end

ltx.@@.func.markspaceon=@@_activate_mark_space

local function @@_deactivate_mark_space ()
 if luatexbase.in_callback ("pre_linebreak_filter","markspaces") then
 luatexbase.remove_from_callback("pre_linebreak_filter","markspaces")
 luatexbase.remove_from_callback("hpack_filter","markspaces")
 end
end

ltx.@@.func.markspaceoff=@@_deactivate_mark_space
%    \end{macrocode}
% \end{macro}
% We need two local variable to setup a default space char.
%    \begin{macrocode}
local default_space_char = nodenew(GLYPH)
local default_fontid     = fontid("TU/lmr/m/n/10")
local default_currfontid = fontid("TU/lmr/m/n/10")
default_space_char.char  = 32
default_space_char.font  = default_fontid
%    \end{macrocode}
% And a function to check as best as possible if a font has a space:
%    \begin{macrocode}
local function @@_font_has_space (fontid)
 t= fonts.hashes.identifiers[fontid]
 if luaotfload.aux.slot_of_name(fontid,"space") 
    or t.characters and t.characters[32] and t.characters[32]["unicode"]==32 
 then
    return true
 else
    return false
 end      
end 
%    \end{macrocode}
% \begin{macro}
%   {
%    @@_space_chars_shipout,
%    ltx.@@.func.space_chars_shipout,
%    }
% These is the main function to insert real space chars. It inserts a
% glyph before every glue which has been marked previously. The attributes
% are copied from the glue, so if the tagging is done later,
% it will be tagged like it.
%    \begin{macrocode}
local function @@_space_chars_shipout (box)
 local head = box.head
  if head then
    for n in node.traverse(head) do
      local spaceattr = -1  
      if not nodehasattribute(n,iwspaceOffattributeid) then
        spaceattr = nodegetattribute(n,iwspaceattributeid)  or -1
      end  
      if n.id == HLIST  then -- enter the hlist
         @@_space_chars_shipout (n)
      elseif n.id == VLIST then -- enter the vlist
         @@_space_chars_shipout (n)
      elseif n.id == GLUE then
        if ltx.@@.trace.showspaces and spaceattr==1  then
          @@_show_spacemark (head,n,"0 1 0")
        end
        if spaceattr==1  then
          local space
          local space_char = node.copy(default_space_char)
          local curfont    = nodegetattribute(n,iwfontattributeid)
          ltx.@@.trace.log ("INFO SPACE-FUNCTION-FONT: ".. tostring(curfont),3)
          if curfont and 
            -- luaotfload.aux.slot_of_name(curfont,"space") 
            @@_font_has_space (curfont)
          then
            space_char.font=curfont
          end
          head, space = node.insert_before(head, n, space_char) --
          n.width     = n.width - space.width
          space.attr  = n.attr
        end
      end
    end
    box.head = head
  end
end

function ltx.@@.func.space_chars_shipout (box)
  @@_space_chars_shipout (box)
end
%    \end{macrocode}
% \end{macro}
%
% \section{Function for the tagging}
% \begin{macro}
%   {
%    ltx.@@.func.mc_insert_kids
%   }
% This is the main function to insert the
% K entry into a StructElem object. It is used in tagpdf-mc-luacode module.
% The |single| attribute allows to handle the case that a single
% mc on the tex side can have more than one kid after the processing here,
% and so we get the correct array/non array setup.
%    \begin{macrocode}
function ltx.@@.func.mc_insert_kids (mcnum,single)
  if ltx.@@.mc[mcnum] then
  ltx.@@.trace.log("INFO TEX-MC-INSERT-KID-TEST: " .. mcnum,4)
   if ltx.@@.mc[mcnum]["kids"] then
    if #ltx.@@.mc[mcnum]["kids"] > 1 and single==1 then
     tex.sprint("[")
    end
    for i,kidstable in ipairs( ltx.@@.mc[mcnum]["kids"] ) do
     local kidnum  = kidstable["kid"]
     local kidpage = kidstable["page"]
     local kidpageobjnum = pdfpageref(kidpage)
     ltx.@@.trace.log("INFO TEX-MC-INSERT-KID: " .. mcnum ..
                      " insert KID " ..i..
                      " with num " .. kidnum ..
                      " on page " .. kidpage.."/"..kidpageobjnum,3)
     tex.sprint(catlatex,"<</Type /MCR /Pg "..kidpageobjnum .. " 0 R /MCID "..kidnum.. ">> " )
    end
    if #ltx.@@.mc[mcnum]["kids"] > 1 and single==1 then
     tex.sprint("]")
    end
   else
    -- this is typically not a problem, e.g. empty hbox in footer/header can
    -- trigger this warning.
    ltx.@@.trace.log("WARN TEX-MC-INSERT-NO-KIDS: "..mcnum.." has no kids",2)
    if single==1 then
      tex.sprint("null")
    end
   end
  else
   ltx.@@.trace.log("WARN TEX-MC-INSERT-MISSING: "..mcnum.." doesn't exist",0)
  end
end
%    \end{macrocode}
% \end{macro}
% \begin{macro}{ltx.@@.func.store_struct_mcabs}
% This function is used in the tagpdf-mc-luacode. It store the absolute count
% of the mc into the current structure. This must be done ordered.
%    \begin{macrocode}
function ltx.@@.func.store_struct_mcabs (structnum,mcnum)
 ltx.@@.struct[structnum]=ltx.@@.struct[structnum] or { }
 ltx.@@.struct[structnum]["mc"]=ltx.@@.struct[structnum]["mc"] or { }
 -- a structure can contain more than on mc chunk, the content should be ordered
 tableinsert(ltx.@@.struct[structnum]["mc"],mcnum)
 ltx.@@.trace.log("INFO TEX-MC-INTO-STRUCT: "..
                   mcnum.." inserted in struct "..structnum,3)
 -- but every mc can only be in one structure
 ltx.@@.mc[mcnum]= ltx.@@.mc[mcnum] or { }
 ltx.@@.mc[mcnum]["parent"] = structnum
end

%    \end{macrocode}
% \end{macro}
% \begin{macro}{ltx.@@.func.store_mc_in_page}
% This is used in the traversing code and stores the relation between
% abs count and page count.
%    \begin{macrocode}
-- pay attention: lua counts arrays from 1, tex pages from one
-- mcid and arrays in pdf count from 0.
function ltx.@@.func.store_mc_in_page (mcnum,mcpagecnt,page)
 ltx.@@.page[page] = ltx.@@.page[page] or {}
 ltx.@@.page[page][mcpagecnt] = mcnum
 ltx.@@.trace.log("INFO TAG-MC-INTO-PAGE: page " .. page ..
                   ": inserting MCID " .. mcpagecnt .. " => " .. mcnum,3)
end
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{ltx.@@.func.update_mc_attributes}
% This updates the mc-attributes of a box. It should only be used on boxes
% which don't contain structure elements.
% The arguments are a box, the mc-num and the type (as a number)
%    \begin{macrocode}
local function @@_update_mc_attributes (head,mcnum,type)
 for n in node.traverse(head) do
   node.set_attribute(n,mccntattributeid,mcnum)
   node.set_attribute(n,mctypeattributeid,type)
   if n.id == HLIST or n.id == VLIST then
     @@_update_mc_attributes (n.list,mcnum,type)
   end
 end
 return head
end
ltx.@@.func.update_mc_attributes = @@_update_mc_attributes
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{ltx.@@.func.mark_page_elements}
% This is the main traversing function. See the lua comment for more details.
%    \begin{macrocode}
--[[
    Now follows the core function
    It wades through the shipout box and checks the attributes
    ARGUMENTS
    box: is a box,
    mcpagecnt: num, the current page cnt of mc (should start at -1 in shipout box), needed for recursion
    mccntprev: num, the attribute cnt of the previous node/whatever - if different we have a chunk border
    mcopen: num, records if some bdc/emc is open
    These arguments are only needed for log messages, if not present are replaces by fix strings:
    name: string to describe the box
    mctypeprev: num, the type attribute of the previous node/whatever

    there are lots of logging messages currently. Should be cleaned up in due course.
    One should also find ways to make the function shorter.
--]]

function ltx.@@.func.mark_page_elements (box,mcpagecnt,mccntprev,mcopen,name,mctypeprev)
  local name = name or ("SOMEBOX")
  local mctypeprev = mctypeprev or -1
  local abspage = status.total_pages + 1  -- the real counter is increased
                                          -- inside the box so one off
                                          -- if the callback is not used. (???)
  ltx.@@.trace.log ("INFO TAG-ABSPAGE: " .. abspage,3)
  ltx.@@.trace.log ("INFO TAG-ARGS: pagecnt".. mcpagecnt..
                    " prev "..mccntprev ..
                    " type prev "..mctypeprev,4)
  ltx.@@.trace.log ("INFO TAG-TRAVERSING-BOX: ".. tostring(name)..
                    " TYPE ".. node.type(node.getid(box)),3)
  local head = box.head -- ShipoutBox is a vlist?
  if head then
    mccnthead, mctypehead,taghead = @@_get_mc_cnt_type_tag (head)
    ltx.@@.trace.log ("INFO TAG-HEAD: " ..
                      node.type(node.getid(head))..
                      " MC"..tostring(mccnthead)..
                      " => TAG " .. tostring(mctypehead)..
                      " => ".. tostring(taghead),3)
  else
    ltx.@@.trace.log ("INFO TAG-NO-HEAD: head is "..
                       tostring(head),3)
  end
  for n in node.traverse(head) do
    local mccnt, mctype, tag = @@_get_mc_cnt_type_tag (n)
    local spaceattr = nodegetattribute(n,iwspaceattributeid)  or -1
    ltx.@@.trace.log ("INFO TAG-NODE: "..
                       node.type(node.getid(n))..
                      " MC".. tostring(mccnt)..
                      " => TAG ".. tostring(mctype)..
                      " => " ..  tostring(tag),3)
    if n.id == HLIST
    then -- enter the hlist
     mcopen,mcpagecnt,mccntprev,mctypeprev=
      ltx.@@.func.mark_page_elements (n,mcpagecnt,mccntprev,mcopen,"INTERNAL HLIST",mctypeprev)
    elseif n.id == VLIST then -- enter the vlist
     mcopen,mcpagecnt,mccntprev,mctypeprev=
      ltx.@@.func.mark_page_elements (n,mcpagecnt,mccntprev,mcopen,"INTERNAL VLIST",mctypeprev)
    elseif n.id == GLUE and not n.leader then -- at glue real space chars are inserted, but this has
                                   -- been done if the previous shipout wandering, so here it is ignored
    elseif n.id == LOCAL_PAR then  -- local_par is ignored
    elseif n.id == PENALTY then    -- penalty is ignored
    elseif n.id == KERN then       -- kern is ignored
     ltx.@@.trace.log ("INFO TAG-KERN-SUBTYPE: "..
       node.type(node.getid(n)).." "..n.subtype,4)
    else
     -- math is currently only logged.
     -- we could mark the whole as math
     -- for inner processing the mlist_to_hlist callback is probably needed.
     if n.id == MATH then
      ltx.@@.trace.log("INFO TAG-MATH-SUBTYPE: "..
        node.type(node.getid(n)).." "..@@_get_mathsubtype(n),4)
     end
     -- endmath
     ltx.@@.trace.log("INFO TAG-MC-COMPARE: current "..
               mccnt.." prev "..mccntprev,4)
     if mccnt~=mccntprev then -- a new mc chunk
      ltx.@@.trace.log ("INFO TAG-NEW-MC-NODE: "..
                         node.type(node.getid(n))..
                        " MC"..tostring(mccnt)..
                        " <=> PREVIOUS "..tostring(mccntprev),4)
      if mcopen~=0 then -- there is a chunk open, close it (hope there is only one ...
       box.list=@@_insert_emc_node (box.list,n)
       mcopen = mcopen - 1
       ltx.@@.trace.log ("INFO TAG-INSERT-EMC: " ..
         mcpagecnt .. " MCOPEN = " .. mcopen,3)
       if mcopen ~=0 then
        ltx.@@.trace.log ("WARN TAG-OPEN-MC: " .. mcopen,1)
       end
      end
      if ltx.@@.mc[mccnt] then
       if ltx.@@.mc[mccnt]["artifact"] then
        ltx.@@.trace.log("INFO TAG-INSERT-ARTIFACT: "..
                          tostring(ltx.@@.mc[mccnt]["artifact"]),3)
        if ltx.@@.mc[mccnt]["artifact"] == "" then
         box.list = @@_insert_bmc_node (box.list,n,"Artifact")
        else
         box.list = @@_insert_bdc_node (box.list,n,"Artifact", "/Type /"..ltx.@@.mc[mccnt]["artifact"])
        end
       else
        ltx.@@.trace.log("INFO TAG-INSERT-TAG: "..
                          tostring(tag),3)
        mcpagecnt = mcpagecnt +1
        ltx.@@.trace.log ("INFO TAG-INSERT-BDC: "..mcpagecnt,3)
        local dict= "/MCID "..mcpagecnt
        if ltx.@@.mc[mccnt]["raw"] then
         ltx.@@.trace.log("INFO TAG-USE-RAW: "..
           tostring(ltx.@@.mc[mccnt]["raw"]),3)
         dict= dict .. " " .. ltx.@@.mc[mccnt]["raw"]
        end
        if ltx.@@.mc[mccnt]["alt"] then
         ltx.@@.trace.log("INFO TAG-USE-ALT: "..
            tostring(ltx.@@.mc[mccnt]["alt"]),3)
         dict= dict .. " " .. ltx.@@.mc[mccnt]["alt"]
        end
        if ltx.@@.mc[mccnt]["actualtext"] then
         ltx.@@.trace.log("INFO TAG-USE-ACTUALTEXT: "..
           tostring(ltx.@@.mc[mccnt]["actualtext"]),3)
         dict= dict .. " " .. ltx.@@.mc[mccnt]["actualtext"]
        end
        box.list = @@_insert_bdc_node (box.list,n,tag, dict)
        ltx.@@.func.store_mc_kid (mccnt,mcpagecnt,abspage)
        ltx.@@.func.store_mc_in_page(mccnt,mcpagecnt,abspage)
        ltx.@@.trace.show_mc_data (mccnt,3)
       end
       mcopen = mcopen + 1
      else
       if tagunmarkedbool.mode == truebool.mode then
        ltx.@@.trace.log("INFO TAG-NOT-TAGGED: this has not been tagged, using artifact",2)
        box.list = @@_insert_bmc_node (box.list,n,"Artifact")
        mcopen = mcopen + 1
       else
        ltx.@@.trace.log("WARN TAG-NOT-TAGGED: this has not been tagged",1)
       end
      end
      mccntprev = mccnt
     end
    end -- end if
  end -- end for
  if head then
    mccnthead, mctypehead,taghead = @@_get_mc_cnt_type_tag (head)
    ltx.@@.trace.log ("INFO TAG-ENDHEAD: " ..
                       node.type(node.getid(head))..
                      " MC"..tostring(mccnthead)..
                      " => TAG "..tostring(mctypehead)..
                      " => "..tostring(taghead),4)
  else
    ltx.@@.trace.log ("INFO TAG-ENDHEAD: ".. tostring(head),4)
  end
  ltx.@@.trace.log ("INFO TAG-QUITTING-BOX "..
                     tostring(name)..
                    " TYPE ".. node.type(node.getid(box)),4)
 return mcopen,mcpagecnt,mccntprev,mctypeprev
end

%    \end{macrocode}
% \end{macro}
%  \begin{macro}{ltx.@@.func.mark_shipout}
%  This is the function used in the callback. Beside calling the traversing
%  function it also checks if there is an open MC-chunk from a page
%  break and insert the needed EMC literal.
%    \begin{macrocode}
function ltx.@@.func.mark_shipout (box)
 mcopen = ltx.@@.func.mark_page_elements (box,-1,-100,0,"Shipout",-1)
 if mcopen~=0 then -- there is a chunk open, close it (hope there is only one ...
  local emcnode = @@_backend_create_emc_node ()
  local list = box.list
  if list then
     list = node.insert_after (list,node.tail(list),emcnode)
     mcopen = mcopen - 1
     ltx.@@.trace.log ("INFO SHIPOUT-INSERT-LAST-EMC: MCOPEN " .. mcopen,3)
  else
     ltx.@@.trace.log ("WARN SHIPOUT-UPS: this shouldn't happen",0)
  end
  if mcopen ~=0 then
     ltx.@@.trace.log ("WARN SHIPOUT-MC-OPEN: " .. mcopen,1)
  end
 end
end
%    \end{macrocode}
% \end{macro}

% \section{Parenttree}
% \begin{macro}
%   {
%    ltx.@@.func.fill_parent_tree_line,
%    ltx.@@.func.output_parenttree
%   }
% These functions create the parent tree. The second, main function
% is used in the tagpdf-tree code.
% TODO check if the tree code can move into the backend code.
%    \begin{macrocode}
function ltx.@@.func.fill_parent_tree_line (page)
     -- we need to get page-> i=kid -> mcnum -> structnum
     -- pay attention: the kid numbers and the page number in the parent tree start with 0!
    local numsentry =""
    local pdfpage = page-1
    if ltx.@@.page[page] and ltx.@@.page[page][0] then
     mcchunks=#ltx.@@.page[page]
     ltx.@@.trace.log("INFO PARENTTREE-NUM:  page "..
                   page.." has "..mcchunks.."+1 Elements ",4)
     for i=0,mcchunks do
     -- what does this log??
      ltx.@@.trace.log("INFO PARENTTREE-CHUNKS:  "..
        ltx.@@.page[page][i],4)
     end
     if mcchunks == 0 then
      -- only one chunk so no need for an array
      local mcnum  = ltx.@@.page[page][0]
      local structnum = ltx.@@.mc[mcnum]["parent"]
      local propname  = "g_@@_struct_"..structnum.."_prop"
      --local objref   =  ltx.@@.tables[propname]["objref"] or "XXXX"
      local objref = @@_pdf_object_ref('@@/struct',structnum)
      ltx.@@.trace.log("INFO PARENTTREE-STRUCT-OBJREF:  =====>"..
        tostring(objref),5)
      numsentry = pdfpage .. " [".. objref .. "]"
      ltx.@@.trace.log("INFO PARENTTREE-NUMENTRY: page " ..
        page.. " num entry = ".. numsentry,3)
     else
      numsentry = pdfpage .. " ["
       for i=0,mcchunks do
        local mcnum  = ltx.@@.page[page][i]
        local structnum = ltx.@@.mc[mcnum]["parent"] or 0
        local propname  = "g_@@_struct_"..structnum.."_prop"
        --local objref   =  ltx.@@.tables[propname]["objref"] or "XXXX"
        local objref = @@_pdf_object_ref('@@/struct',structnum)
        numsentry = numsentry .. " ".. objref
       end
      numsentry = numsentry .. "] "
      ltx.@@.trace.log("INFO PARENTTREE-NUMENTRY: page " ..
        page.. " num entry = ".. numsentry,3)
     end
    else
      ltx.@@.trace.log ("INFO PARENTTREE-NO-DATA: page "..page,3)
      numsentry = pdfpage.." []"
    end
    return numsentry
end

function ltx.@@.func.output_parenttree (abspage)
 for i=1,abspage do
  line = ltx.@@.func.fill_parent_tree_line (i) .. "^^J"
  tex.sprint(catlatex,line)
 end
end
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}
%   {
%    process_softhyphen_pre
%    process_softhyphen_post
%   }
%  First some local definitions. Since these are only needed locally everything gets wrapped into a block.
%    \begin{macrocode}
do
  local properties = node.get_properties_table()
  local is_soft_hyphen_prop = 'tagpdf.rewrite-softhyphen.is_soft_hyphen'
  local hyphen_char = 0x2D
  local soft_hyphen_char = 0xAD
%    \end{macrocode}
%
%  A lookup table to test if the font supports the soft hyphen glyph.
%    \begin{macrocode}
  local softhyphen_fonts = setmetatable({}, {__index = function(t, fid)
    local fdir = identifiers[fid]
    local format = fdir and fdir.format
    local result = (format == 'opentype' or format == 'truetype')
    local characters = fdir and fdir.characters
    result = result and (characters and characters[soft_hyphen_char]) ~= nil
    t[fid] = result
    return result
  end})
%    \end{macrocode}
%
%  A pre shaping callback to mark hyphens as being hyphenation hyphens.
%  This runs before shaping to avoid affecting hyphens moved into
%  discretionaries during shaping.
%    \begin{macrocode}
  local function process_softhyphen_pre(head, _context, _dir)
    if softhyphenbool.mode ~= truebool.mode then return true end
    for disc, sub in node.traverse_id(DISC, head) do
      if sub == explicit_disc or sub == regular_disc then
        for n, _ch, _f in node.traverse_char(disc.pre) do
          local props = properties[n]
          if not props then
            props = {}
            properties[n] = props
          end
          props[is_soft_hyphen_prop] = true
        end
      end
    end
    return true
  end

%    \end{macrocode}
%
%  Finally do the actual replacement after shaping. No checking for double processing here
%  since the operation is idempotent.
%    \begin{macrocode}
  local function process_softhyphen_post(head, _context, _dir)
    if softhyphenbool.mode ~= truebool.mode then return true end
    for disc, sub in node.traverse_id(DISC, head) do
      for n, ch, fid in node.traverse_glyph(disc.pre) do
        local props = properties[n]
        if softhyphen_fonts[fid] and ch == hyphen_char and props and props[is_soft_hyphen_prop] then
          n.char = soft_hyphen_char
          props.glyph_info = nil
        end
      end
    end
    return true
  end

  luatexbase.add_to_callback('pre_shaping_filter', process_softhyphen_pre, 'tagpdf.rewrite-softhyphen')
  luatexbase.add_to_callback('post_shaping_filter', process_softhyphen_post, 'tagpdf.rewrite-softhyphen')
end
%    \end{macrocode}
% \end{macro}
%
%    \begin{macrocode}
%</lua>
%    \end{macrocode}
% \end{implementation}
% \PrintIndex