# encoding: utf-8
=begin

 * Name: SiSU

 * Description: a framework for document structuring, publishing and search

 * Author: Ralph Amissah

 * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
   2007, 2008, 2009, 2010, 2011, 2012 Ralph Amissah, All Rights Reserved.

 * License: GPL 3 or later:

   SiSU, a framework for document structuring, publishing and search

   Copyright (C) Ralph Amissah

   This program is free software: you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation, either version 3 of the License, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
   more details.

   You should have received a copy of the GNU General Public License along with
   this program. If not, see <http://www.gnu.org/licenses/>.

   If you have Internet connection, the latest version of the GPL should be
   available at these locations:
   <http://www.fsf.org/licensing/licenses/gpl.html>
   <http://www.gnu.org/licenses/gpl.html>

   <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>

 * SiSU uses:
   * Standard SiSU markup syntax,
   * Standard SiSU meta-markup syntax, and the
   * Standard SiSU object citation numbering and system

 * Hompages:
   <http://www.jus.uio.no/sisu>
   <http://www.sisudoc.org>

 * Download:
   <http://www.sisudoc.org/sisu/en/SiSU/download.html>

 * Ralph Amissah
   <ralph@amissah.com>
   <ralph.amissah@gmail.com>

 ** Description: document abstraction

=end
module SiSU_DAL_DocumentStructureExtract
  class Instantiate < SiSU_Param::Parameters::Instructions
    @@flag={} #Beware!!
    def initialize
      @@flag['table_to']=false
      @@counter=@@column=@@columns=0
      @@line_mode=''
    end
  end
  class Build
    @@flag={} #Beware!!
    def initialize(md,data)
      @md,@data=md,data
      SiSU_DAL_DocumentStructureExtract::Instantiate.new
      @pb=SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page])
      @pbn=SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new])
    end
    def ln_get(lv)
      ln=case lv
      when /A/; 1
      when /B/; 2
      when /C/; 3
      when /1/; 4
      when /2/; 5
      when /3/; 6
      when /4/; 7
      when /5/; 8
      when /6/; 9
      end
    end
    def image_test(str)
      boolean=(str=~/\{\s*\S+?\.png.+?\}https?:\/\/\S+/ ? true : false)
    end
    def bullet_test(str)
      bool=((str=~/\*/) ? true : false)
    end
    def hang_and_indent_test(str)
      hang_indent=if str=~/^_([1-9])[^_]/
        [$1,$1]
      elsif str=~/^__([1-9])/
        [0,$1]
      elsif str=~/^_([0-9])_([0-9])/
        [$1,$2]
      else
        [0,0]
      end
      hang,indent=hang_indent[0],hang_indent[1]
      [hang,indent]
    end
    def hang_and_indent_def_test(str1,str2)
      hang_indent=if str1=~/^_([1-9])[^_]/
        [$1,$1]
      elsif str1=~/^__([1-9])/
        [0,$1]
      elsif str1=~/^_([0-9])_([0-9])/
        [$1,$2]
      else
        [0,0]
      end
      obj=if str2 =~/^(.+?)\s+\\\\(?:\s+|\n)/
        str2.gsub(/^(.+?)(\s+\\\\(?:\s+|\n))/,"#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\\2")
      else
        str2.gsub(/^(.+?)\n/,"#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\n")
      end
      hang,indent=hang_indent[0],hang_indent[1]
      [hang,indent,obj]
    end
    def endnote_test?(str)
      bool=((str=~/~\{.+?\}~|~\[.+?\]~/) ? true : false)
    end
    def extract_tags(str,nametag=nil)
      tags=[]
      if str.nil?
      else
        if str =~/(?:^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/
          str=str.gsub(/(^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/i,
              "\\1#{Mx[:tag_o]}\\2#{Mx[:tag_c]}").
            gsub(/ [ ]+/i,' ')
          tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten
          str=str.gsub(/[ ]?#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}[ ]?/,' ') #may be issues with spaces would leave one, but "code" blocks?
        end
        tags=nametag ? (tags << nametag) : tags
      end
      [str,tags]
    end
    def identify_parts
      data=@data
      tuned_file=[]
      @tuned_block,@tuned_code=[],[]
      @@counter,@verse_count=0,0
      @metadata={}
      @data.each do |t_o|
        t_o=t_o.gsub(/(?:\n\s*\n)+/m,"\n") unless @@flag['code']
        if t_o !~/^(?:code|poem|alt|group|block)\{|^\}(?:code|poem|alt|group|block)|^(?:table\{|\{table)[ ~]/ \
        and not @@flag['code'] \
        and not @@flag['poem'] \
        and not @@flag['group'] \
        and not @@flag['block'] \
        and not @@flag['alt'] \
        and not @@flag['table']
          unless t_o =~/^(?:@\S+?:|%+)\s/                  # extract book index for paragraph if any
            idx=if t_o=~/^=\{(.+)\}\s*$\Z/m; m=$1
              t_o=t_o.gsub(/\n=\{.+\}\s*$\Z/m,'')
              m
            else nil
            end
          end
          t_o=case t_o
          when /^#{Mx[:meta_o]}\S+?#{Mx[:meta_c]}/                                 #metadata, header
            if t_o=~/^#{Mx[:meta_o]}(\S+?)#{Mx[:meta_c]}\s*(.+)/m
              tag,obj=$1,$2
              @metadata[tag]=obj
            end
            t_o=nil
          when /^%+\s/                                     #comment
            t_o=if t_o=~/^%+\s+(.+)/
              h={obj: $1}
              SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h)
            else nil
            end
          when /^:?([A-C1-6])\~/                           #heading / lv
            lv=$1
            ln=ln_get(lv)
            t_o=if t_o=~/^:?[A-C1-6]\~\s+(.+)/m
              obj=$1
              note=endnote_test?(obj)
              obj,tags=extract_tags(obj)
              h={ lv: lv, ln: ln, obj: obj, idx: idx, tags: tags }
              SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h)
            elsif t_o=~/^:?[A-C1-6]\~(\S+?)-\s+(.+)/m
              name,obj=$1,$2
              note=endnote_test?(obj)
              obj,tags=extract_tags(obj)
              h={ lv: lv, name: name, obj: obj, idx: idx, autonum_: false, tags: tags}
              SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h)
            elsif t_o=~/^:?[A-C1-6]\~(\S+)\s+(.+)/m
              name,obj=$1,$2
              note=endnote_test?(obj)
              obj,tags=extract_tags(obj,name)
              h={ lv: lv, name: name, obj: obj, idx: idx, tags: tags }
              SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h)
            else nil
            end
          when /^_(?:[1-9]!?|[1-9]?\*)\s+/                  #indented and/or bullet paragraph
            t_o=if t_o=~/^(_(?:[1-9]?\*|[1-9]!?)\s+)(.+)/m
              tst,obj=$1,$2
              if t_o=~/^_[1-9]!\s+.+/m
                hang,indent,obj=hang_and_indent_def_test(tst,obj)
              else
                hang,indent=hang_and_indent_test(tst)
              end
              bullet=bullet_test(tst)
              image=image_test(obj)
              note=endnote_test?(obj)
              obj,tags=extract_tags(obj)
              unless obj=~/\A\s*\Z/m
                h={ bullet_: bullet, hang: hang, indent: indent, obj: obj, idx: idx, note_: note, image_: image, tags: tags }
                SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h)
              end
            else nil
            end
          when /^_[0-9]?_[0-9]!?\s+/                  #hanging indent paragraph
            t_o=if t_o=~/^(_[0-9]?_[0-9]!?\s+)(.+)/m
              tst,obj=$1,$2
              if t_o=~/^_[0-9]?_[0-9]!\s+.+/m
                hang,indent,obj=hang_and_indent_def_test(tst,obj)
              else
                hang,indent=hang_and_indent_test(tst)
              end
              image=image_test(obj)
              note=endnote_test?(obj)
              obj,tags=extract_tags(obj)
              unless obj=~/\A\s*\Z/m
                h={ hang: hang, indent: indent, obj: obj, idx: idx, note_: note, image_: image, tags: tags }
                SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h)
              end
            else nil
            end
          when /^[<\[](?:br)?:(?:pa?r|o(?:bj|---)?)[>\]]\s*$/                  #[br:par] #[br:obj]
            SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_obj])
          when /^(?:[<\[](?:br)?:pg[>\]]|<?:pb>?)\s*$/                         #[br:pg]
            SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page])
          when /^[<\[](?:br)?:pg?n[>\]]\s*$/                                   #[br:pgn]
            SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new])
          else                                             #paragraph
            image=image_test(t_o)
            note=endnote_test?(t_o)
            obj,tags=extract_tags(t_o)
            unless obj=~/\A\s*\Z/m
              h={ bullet_: false, indent: 0, hang: 0, obj: obj, idx: idx, note_: note, image_: image, tags: tags }
              SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h)
            end
          end
        elsif not @@flag['code']
          if t_o =~/^code\{/
            @@flag['code']=true
            @@counter=1
            @codeblock_numbered=(t_o =~/^code\{#/) ? true : false
            h={ obj: 'code block start' }                   #introduce a counter
            t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h)
          elsif t_o =~/^poem\{/
            @@flag['poem']=true
            h={ obj: 'poem start' }                         #introduce a counter
            t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h)
            tuned_file << t_o
          elsif t_o =~/^group\{/
            @@flag['group']=true
            h={ obj: 'group text start' }                   #introduce a counter
            t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h)
            tuned_file << t_o
          elsif t_o =~/^block\{/
            @@flag['block']=true
            h={ obj: 'block text start' }                   #introduce a counter
            t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h)
            tuned_file << t_o
          elsif t_o =~/^alt\{/
            @@flag['alt']=true
            h={ obj: 'alt text start' }                     #introduce a counter
            t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h)
            tuned_file << t_o
          elsif t_o =~/^(?:table\{|\{table)[ ~]/
            h={ obj: 'table start' }                        #introduce a counter
            ins=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #ins=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h)
            tuned_file << ins
            if t_o=~/^table\{(?:~h)?\s+/
              @@flag['table']=true
              @rows=''
              case t_o
              when /table\{~h\s+c(\d+);\s+(.+)/
                cols=$1
                col=$2.scan(/\d+/)
                heading=true
              when /table\{\s+c(\d+);\s+(.+)/
                cols=$1
                col=$2.scan(/\d+/)
                heading=false
              end
              @h={ head_: heading, cols: cols, widths: col, idx: idx }
            elsif t_o=~/^\{table(?:~h)?(?:\s+\d+;?)?\}\n.+\Z/m
              m1,m2,hd=nil,nil,nil
              tbl=/^\{table(?:~h)?(?:\s+\d+;?)?\}\n(.+)\Z/m.match(t_o)[1]            #two table representations should be consolidated as one
              hd=((t_o =~/^\{table~h/) ? true : false)
              tbl,tags=extract_tags(tbl)
              rws=tbl.split(/\n/)
              rows=''
              cols=nil
              rws.each do |r|
                cols=(cols ? cols : (r.scan('|').length) +1)
                r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}")        #r.gsub!(/\|/m,"#{Mx[:tc_p]}")
                rows += r + Mx[:tc_c]
              end
              col=[]
              if t_o =~/^\{table(?:~h)?\s+(\d+);?\}/       #width of col 1 given as %, usually when wider than rest that are even
                c1=$1.to_i
                width=(100 - c1)/(cols - 1)
                col=[ c1 ]
                (cols - 1).times { col << width }
              else                                         #all columns of equal width
                width=100.00/cols
                cols.times { col << width }
              end
              h={ head_: hd, cols: cols, widths: col, obj: rows, idx: idx, tags: tags }
              t_o=SiSU_DAL_DocumentStructure::ObjectTable.new.table(h) unless h.nil?
              tuned_file << t_o
              h={ obj: 'table end' }                        #introduce a counter
              t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h)
              t_o
            elsif t_o=~/^\{table(?:~h)?\s+/
              m1,m2,hd=nil,nil,nil
              h=case t_o
              when /\{table~h\s+(.+?)\}\n(.+)\Z/m          #two table representations should be consolidated as one
                m1,tbl,hd=$1,$2,true
              when /\{table\s+(.+?)\}\n(.+)\Z/m            #two table representations should be consolidated as one
                m1,tbl,hd=$1,$2,false
              else nil
              end
              tbl,tags=extract_tags(tbl)
              col=m1.scan(/\d+/)
              rws=tbl.split(/\n/)
              rows=''
              rws.each do |r|
                r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}")        #r.gsub!(/\|/m,"#{Mx[:tc_p]}")
                rows += r + Mx[:tc_c]
              end
              h={ head_: hd, cols: col.length, widths: col, obj: rows, idx: idx, tags: tags }
              t_o=SiSU_DAL_DocumentStructure::ObjectTable.new.table(h) unless h.nil?
              tuned_file << t_o
              h={ obj: 'table end' }                        #introduce a counter
              t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h)
              t_o
            end
          end
          t_o
        end
        if @@flag['table']
          if @@flag['table'] \
          and t_o =~/^\}table/                             #two table representations should be consolidated as one
            @@flag['table']=false
            headings,columns,widths,idx=@h[:head_],@h[:cols],@h[:widths],@h[:idx]
            @h={ head_: headings, cols: columns, widths: widths, idx: idx, obj: @rows }
            t_o=SiSU_DAL_DocumentStructure::ObjectTable.new.table(@h)
            tuned_file << t_o
            @h,@rows=nil,''
            t_o
            h={ obj: 'table end' }                          #introduce a counter
            t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h)
            t_o
          else
            if t_o !~/^table\{/ \
            and not t_o.nil?
              t_o=t_o.gsub(/^\n+/m,''). #check added for ruby 1.9.2 not needed in 1.8 series (tested in v2)
                gsub(/\n+/m,"#{Mx[:tc_p]}")
              @rows += t_o + Mx[:tc_c]
            end
            t_o=nil
          end
        end
        if @@flag['code']
          if t_o =~/^\}code/
            @@flag['code']=false
            obj=@tuned_code.join("\n")
            tags=[]
            h={ obj: obj, tags: tags, number_: @codeblock_numbered }
            t_o=SiSU_DAL_DocumentStructure::ObjectBlockTxt.new.code(h)
            @tuned_code=[]
            tuned_file << t_o
            h={ obj: 'code block end' }                     #introduce a counter
            t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h)
          end
          if @@flag['code'] \
          and t_o.class==String \
          and not t_o.nil? #you may need to introduce t_o.class==String test more widely
            sub_array=t_o.dup + "#{Mx[:br_nl]}"
            @line_mode=sub_array.scan(/.+/)
            @line_mode=[]
            sub_array.scan(/.+/) {|w| @line_mode << w if w =~/[\S]+/}
            t_o=SiSU_DAL_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines('code').join
            @tuned_code << t_o
            t_o=nil
          end
        elsif @@flag['poem'] \
        or @@flag['group'] \
        or @@flag['block'] \
        or @@flag['alt']
          if @@flag['poem'] \
          and t_o =~/^\}poem/
            @@flag['poem']=false
            h={ obj: 'poem end' }                           #introduce a counter
            t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h)
          elsif ( @@flag['group'] \
          and t_o =~/^\}group/ )
            @@flag['group']=false
            obj,tags=extract_tags(@tuned_block.join("\n"))
            h={ obj: obj, tags: tags }
            @tuned_block=[]
            t_o=SiSU_DAL_DocumentStructure::ObjectBlockTxt.new.group(h)
            tuned_file << t_o
            h={ obj: 'group text end' }                     #introduce a counter
            t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h)
          elsif ( @@flag['block'] \
          and t_o =~/^\}block/ )
            @@flag['block']=false
            obj,tags=extract_tags(@tuned_block.join("\n"))
            h={ obj: obj, tags: tags }
            @tuned_block=[]
            t_o=SiSU_DAL_DocumentStructure::ObjectBlockTxt.new.block(h)
            tuned_file << t_o
            h={ obj: 'block text end' }                     #introduce a counter
            t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h)
          elsif ( @@flag['alt'] \
          and t_o =~/^\}alt/ )
            @@flag['alt']=false
            obj,tags=extract_tags(@tuned_block.join("\n"))
            h={ obj: obj, tags: tags }
            t_o=SiSU_DAL_DocumentStructure::ObjectBlockTxt.new.alt(h)
            @tuned_block=[]
            tuned_file << t_o
            h={ obj: 'alt text end' }                       #introduce a counter
            t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h)
          end
          if @@flag['poem'] \
          or @@flag['group'] \
          or @@flag['alt'] \
          and t_o =~/\S/ \
          and t_o !~/^(?:\}(?:verse|code|alt|group|block)|(?:verse|code|alt|group|block)\{)/ # fix logic
            sub_array=t_o.dup
            @line_mode=sub_array.scan(/.+/)
            type=if @@flag['poem']; 'poem'
            t_o=SiSU_DAL_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines(type).join
            poem=t_o.split(/\n\n/)
            poem.each do |v|
              v=v.gsub(/\n/m,"#{Mx[:br_nl]}\n")
              obj,tags=extract_tags(v)
              h={ obj: obj, tags: tags }
              t_o=SiSU_DAL_DocumentStructure::ObjectBlockTxt.new.verse(h)
              tuned_file << t_o
            end
            else 'group'
            end
            @verse_count+=1 if @@flag['poem']
          end
        end
        if not @@flag['code']
          if @@flag['poem'] \
          or @@flag['group'] \
          or @@flag['alt']
            if t_o.class==String
              t_o=t_o.gsub(/\n/m,"#{Mx[:br_nl]}").
                gsub(/[ ][ ]/m,"#{Mx[:nbsp]*2}").
                gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}")
              t_o=t_o + Mx[:br_nl] if t_o =~/\S+/
            elsif t_o.is==:group \
            || t_o.is==:block \
            || t_o.is==:alt \
            || t_o.is==:verse
              t_o.obj=t_o.obj.gsub(/\n/m,"#{Mx[:br_nl]}").
                gsub(/[ ][ ]/m,"#{Mx[:nbsp]*2}").
                gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}")
            end
            @tuned_block << t_o if t_o =~/\S+/
          else tuned_file << t_o
          end
        else tuned_file << t_o
        end
      end
      if @md.flag_endnotes
        tuned_file << @pb
        h={ ln: 2, obj: 'Endnotes', autonum_: false }
        tuned_file << SiSU_DAL_DocumentStructure::ObjectHeading.new.heading_insert(h)
        h={ ln: 4, obj: 'Endnotes', name: 'endnotes', autonum_: false }
        tuned_file << SiSU_DAL_DocumentStructure::ObjectHeading.new.heading_insert(h)
        h={ obj: 'Endnotes' }
      end
      if @md.book_idx
        tuned_file << @pb
        h={ ln: 2, obj: 'Index', autonum_: false }
        tuned_file << SiSU_DAL_DocumentStructure::ObjectHeading.new.heading_insert(h)
        h={ ln: 4, obj: 'Index', name: 'book_index', autonum_: false }
        tuned_file << SiSU_DAL_DocumentStructure::ObjectHeading.new.heading_insert(h)
        h={ obj: 'Index' }
      end
      tuned_file << @pb
      h={ ln: 2, obj: 'Metadata', autonum_: false, ocn_: false }
      tuned_file << SiSU_DAL_DocumentStructure::ObjectHeading.new.heading_insert(h)
      h={ ln: 4, obj: 'SiSU Metadata, document information', name: 'metadata', autonum_: false, ocn_: false }
      tuned_file << SiSU_DAL_DocumentStructure::ObjectHeading.new.heading_insert(h)
      tuned_file
      h={ obj: 'eof' }
      meta=SiSU_DAL_DocumentStructure::ObjectMetadata.new.metadata(@metadata)
      [tuned_file,meta]
    end
    def table_rows_and_columns_array(table_str)
      table=[]
      table_str.split(/#{Mx[:tc_c]}/).each do |table_row|
        table_row_with_columns=table_row.split(/#{Mx[:tc_p]}/)
        table << table_row_with_columns
      end
      table
    end
    def meta_heading(h)
      h={ lv: h[:lv], ln: h[:ln], name: h[:name], obj: h[:obj], ocn: '0' }
      SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h)
    end
    def meta_para(str)
      h={ obj: str, ocn_: false }
      SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h)
    end
    def build_lines(type='')
      lines,lines_new=@data,[]
      lines.each do |line|
        line=if line =~/\S/ \
        and line !~/^code\{|^\}code/ \
        and line.class != Hash
          @@counter+=1 if @@flag['code']
          line=line.gsub(/\s\s/,"#{Mx[:nbsp]*2}").
            gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}")
          line=line.gsub(/^/,"#{Mx[:gr_o]}codeline#{Mx[:gr_c]}") if type=='code' # REMOVE try sort for texpdf special case
          line=if line =~/(?:https?|file|ftp):\/\/\S+$/
            line.gsub(/\s*$/," #{Mx[:br_nl]}")
          else line.gsub(/\s*$/,"#{Mx[:br_nl]}") #unless type=='code'
          end
        elsif line =~/^\s*$/
          line.gsub(/\s*$/,"#{Mx[:br_nl]}")
        else line
        end
        lines_new << line
      end
      lines_new
    end
  end
  class Structure                                          # this must happen early
    def initialize(md,dob)
      @md,@dob=md,dob
    end
    def structure
      structure_markup_normalize
      structure_markup
      @dob
    end
    def structure_markup                                   #build structure where structure provided only in meta header
      @dob=if @dob.is ==:para \
      && (((@dob.hang !~/[1-9]/) && (@dob.indent !~/[1-9]/)) \
      || (@dob.hang != @dob.indent)) \
      and not @dob.bullet_
        @dob=case @dob.obj
        when /^#{@md.lv1}/
          h={ lv: 'A', ln: 1 }
          SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,@dob)
        when /^#{@md.lv2}/
          h={ lv: 'B', ln: 2 }
          SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,@dob)
        when /^#{@md.lv3}/
          h={ lv: 'C', ln: 3 }
          SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,@dob)
        when /^#{@md.lv4}/
          h={ lv: '1', ln: 4 }
          SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,@dob)
        when /^#{@md.lv5}/
          h={ lv: '2', ln: 5 }
          SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,@dob)
        when /^#{@md.lv6}/
          h={ lv: '3', ln: 6 }
          SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,@dob)
        else @dob
        end
      else @dob
      end
      @dob
    end
    def structure_markup_normalize                         #needs a bit of thinking
      dob=if @md.markup_version.determined < 0.38                #%convert internal representation, consider making 0.38 structure default ([A-C1-6] instead of [1-9]), requires downstream changes
        @dob=@dob.gsub(/^[456]~/,'!_').
          gsub(/^3~(\S+)/,"#{Mx[:lv_o]}6:\\1#{Mx[:lv_c]}").
          gsub(/^3~\s+/,"#{Mx[:lv_o]}6:#{Mx[:lv_c]}").
          gsub(/^2~(\S+)/,"#{Mx[:lv_o]}5:\\1#{Mx[:lv_c]}").
          gsub(/^2~\s+/,"#{Mx[:lv_o]}5:#{Mx[:lv_c]}").
          gsub(/^1~(\S+)/,"#{Mx[:lv_o]}4:\\1#{Mx[:lv_c]}").
          gsub(/^1~\s+/,"#{Mx[:lv_o]}4:#{Mx[:lv_c]}").
          gsub(/^:?C~(\S+)/,"#{Mx[:lv_o]}3:\\1#{Mx[:lv_c]}").
          gsub(/^:?C~\s+/,"#{Mx[:lv_o]}3:#{Mx[:lv_c]}").
          gsub(/^:?B~(\S+)/,"#{Mx[:lv_o]}2:\\1#{Mx[:lv_c]}").
          gsub(/^:?B~\s+/,"#{Mx[:lv_o]}2:#{Mx[:lv_c]}").
          gsub(/^:?A~(\S+)/,"#{Mx[:lv_o]}1:\\1#{Mx[:lv_c]}").
          gsub(/^:?A~\s+/,"#{Mx[:lv_o]}1:#{Mx[:lv_c]}")
        @dob=if @dob =~/^@(?:level|markup):\s/
          @dob=@dob.gsub(/3/,'6').
            gsub(/2/,'5').
            gsub(/1/,'4').
            gsub(/:?C/,'3').
            gsub(/:?B/,'2').
            gsub(/:?A/,'1')
          @dob
        else @dob
        end
      else @dob
      end
    end
    def structure_marks
      t_o=if @md.markup_version.determined < 0.38
        @t_o=@t_o.gsub(/^1~(\S+)/,"#{Mx[:lv_o]}1:\\1#{Mx[:lv_c]}").
          gsub(/^1~\s+/,"#{Mx[:lv_o]}1:#{Mx[:lv_c]}").
          gsub(/^2~(\S+)/,"#{Mx[:lv_o]}2:\\1#{Mx[:lv_c]}").
          gsub(/^2~\s+/,"#{Mx[:lv_o]}2:#{Mx[:lv_c]}").
          gsub(/^3~(\S+)/,"#{Mx[:lv_o]}3:\\1#{Mx[:lv_c]}").
          gsub(/^3~\s+/,"#{Mx[:lv_o]}3:#{Mx[:lv_c]}").
          gsub(/^4~(\S+)/,"#{Mx[:lv_o]}4:\\1#{Mx[:lv_c]}").
          gsub(/^4~\s+/,"#{Mx[:lv_o]}4:#{Mx[:lv_c]}").
          gsub(/^5~(\S+)/,"#{Mx[:lv_o]}5:\\1#{Mx[:lv_c]}").
          gsub(/^5~\s+/,"#{Mx[:lv_o]}5:#{Mx[:lv_c]}").
          gsub(/^6~(\S+)/,"#{Mx[:lv_o]}6:\\1#{Mx[:lv_c]}").
          gsub(/^6~\s+/,"#{Mx[:lv_o]}6:#{Mx[:lv_c]}").
          gsub(/^[789]~/,'!_')
        @t_o
      else @t_o
      end
    end
  end
  class OCN
    def initialize(md,data)
      @md,@data=md,data
    end
    def ocn                                                                      #and auto segment numbering increment
      data=@data
      @o_array=[]
      node=ocn=ocn_dv=ocn_sp=ocnh=ocnh1=ocnh2=ocnh3=ocnh4=ocnh5=ocnh6=ocno=ocnp=ocnt=ocnc=ocng=ocni=ocnm=ocnu=ocnk=nm=0 # h heading, o other, t table, g group, i image
      node_count_flag=false
      regex_exclude_ocn_and_node = /#{Rx[:meta]}|^@\S+?:\s|^4~endnotes|^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^\^~ |<:e[:_]\d+?>|^<:\#|<:- |<[:!]!4|<hr width|#{Mx[:br_endnotes]}|\A\s*\Z/mi #ocn here #&nbsp; added with Tune.code #¡
      parent=node1=node2=node3=node4=node5=node6=nil
      data.each do |dob|
        h={}
        if (dob.obj !~ regex_exclude_ocn_and_node || dob.is ==:code) \
        && (dob.of !=:comment \
        && dob.of !=:layout \
        && dob.of !=:meta) \
        && dob.obj !~/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/ \
        && dob.ocn_
          #dob.ln now is determined, and set earlier, check how best to remove this -->
          if dob.is==:heading
             ln=case dob.lv
             when 'A'; 1
             when 'B'; 2
             when 'C'; 3
             when '1'; 4
             when '2'; 5
             when '3'; 6
             when '4'; 7
             when '5'; 8
             when '6'; 9
             end
          end
          if not dob.obj =~/<:#>|~#|-#/ \
          or not dob.toc_                                  # fix this no longer in dob.obj
            ocn+=1
            if dob.is==:heading \
            and (ln.to_s =~/^[1-9]/ \
            or ln.to_s =~@md.lv1 \
            or ln.to_s =~@md.lv2 \
            or ln.to_s =~@md.lv3 \
            or ln.to_s =~@md.lv4 \
            or ln.to_s =~@md.lv5 \
            or ln.to_s =~@md.lv6)
              ocnh+=1
              if ln==1 \
              or ln=~@md.lv1; ocnh1+=1                     #heading
                node1="1:#{ocnh1};#{ocn}"
                node,ocn_sp,parent=node1,"h#{ocnh}",0 #FIX
              elsif ln==2 \
              or ln=~@md.lv2; ocnh2+=1
                node2="2:#{ocnh2};#{ocn}"
                node,ocn_sp,parent=node2,"h#{ocnh}",node1
              elsif ln==3 \
              or ln=~@md.lv3; ocnh3+=1
                node3="3:#{ocnh3};#{ocn}"
                node,ocn_sp,parent=node3,"h#{ocnh}",node2
              elsif ln==4 \
              or ln=~@md.lv4; ocnh4+=1
                node4="4:#{ocnh4};#{ocn}"
                node,ocn_sp,parent=node4,"h#{ocnh}",node3
              elsif ln==5 \
              or ln=~@md.lv5; ocnh5+=1
                node5="5:#{ocnh5};#{ocn}"
                node,ocn_sp,parent=node5,"h#{ocnh}",node4
              elsif ln==6 \
              or ln=~@md.lv6; ocnh6+=1
                node6="6:#{ocnh6};#{ocn}"
                node,ocn_sp,parent=node6,"h#{ocnh}",node5
              end
            else
              ocno+=1
              if dob.is==:table
                ocnt+=1
                ocn_sp,parent="t#{ocnt}",node
              elsif dob.is==:code
                ocnc+=1
                ocn_sp,parent="c#{ocnc}",node
              elsif dob.is==:group \
              || dob.is==:block \
              || dob.is==:alt \
              || dob.is==:verse
                ocng+=1 #group, poem
                ocn_sp,parent="g#{ocng}",node
              elsif dob.is==:image #check
                ocni+=1
                ocn_sp,parent="i#{ocni}",node
              else ocnp+=1                                 #paragraph
                ocn_sp,parent="p#{ocnp}",node
              end
            end
            if dob.is==:heading
              dob.ln,dob.node,dob.ocn,dob.odv,dob.osp,dob.parent=ln,node,ocn,ocn_dv,ocn_sp,parent
            else
              if dob.of !=:meta \
              && dob.of !=:comment \
              && dob.of !=:layout
                dob.ocn,dob.odv,dob.osp,dob.parent=ocn,ocn_dv,ocn_sp,parent
              end
            end
          else ocnu+=1
            dob.obj=dob.obj.gsub(/#{Mx[:fa_o]}~##{Mx[:fa_c]}/,'') if dob.obj
            ocn_dv,ocn_sp="u#{ocnu}","u#{ocnu}"
            dob.ocn,dob.odv,dob.osp=ocn,ocn_dv,ocn_sp
          end
          h
        elsif dob.obj=~/#{Mx[:pa_non_object_no_heading]}/
          dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_no_heading]}/,'')
          if dob.is==:para
            h={ obj: dob.obj, ocn_: false, ocn: nil }
            dob=SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h,dob)
          elsif dob.is==:heading
            h={ obj: dob.obj, ocn_: false, ocn: nil, toc_: true }
            dob=SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,dob)
          end
        elsif dob.obj=~/#{Mx[:pa_non_object_dummy_heading]}/
          dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_dummy_heading]}/,'')
          if dob.is==:para
            h={ obj: dob.obj, ocn_: false, ocn: nil }
            dob=SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h,dob)
          elsif dob.is==:heading
            h={ obj: dob.obj, ocn_: false, ocn: nil, toc_: false }
            dob=SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h,dob)
          end
        else dob
        end
        if dob.is==:code \
        || dob.is==:verse \
        || dob.is==:alt \
        || dob.is==:group \
        || dob.is==:block
          dob.obj=dob.obj.gsub(/\n\n/,"\n") #newlines taken out
        end
        @o_array << dob
      end
      @o_array
    end
  end
  class XML
    def initialize(md,data)
      @data,@md=data,md
    end
    def dom
      @s=['0',
        'A',
        'B',
        'C',
        '1',
        '2',
        '3'
      ]
      @sp='  '
      tuned_file=structure_build
      tuned_file
    end
    def structure_build
      data=@data
      tuned_file=[]
      hs=[0,false,false,false]
      t={ lv: @s[0], status: 'open' }
      tuned_file << tags(t)
      if @md.opt.cmd =~/V/
        puts "\nXML sisu structure outline --->\n"
        puts "<#{@s[0]}>"
      end
      data.each_with_index do |o,i|
        if o.is ==:heading \
        || o.is ==:heading_insert
          case o.ln
          when 1
            tuned_file << tag_close(o.ln,hs)
            tuned_file << tag_open(o,@s)
            if @md.opt.cmd =~/V/
              puts_tag_close(o.ln,hs)
              puts_tag_open(o,@s)
            end
            hs=[1,true,false,false]
          when 2
            tuned_file << tag_close(o.ln,hs)
            tuned_file << tag_open(o,@s)
            if @md.opt.cmd =~/V/
              puts_tag_close(o.ln,hs)
              puts_tag_open(o,@s)
            end
            hs=[2,true,true,false]
          when 3
            tuned_file << tag_close(o.ln,hs)
            tuned_file << tag_open(o,@s)
            if @md.opt.cmd =~/V/
              puts_tag_close(o.ln,hs)
              puts_tag_open(o,@s)
            end
            hs=[3,true,true,true]
          when 4
            tuned_file << tag_close(o.ln,hs)
            tuned_file << tag_open(o,@s)
            if @md.opt.cmd =~/V/
              puts_tag_close(o.ln,hs)
              puts_tag_open(o,@s)
            end
            hs[0]=4
          when 5
            tuned_file << tag_close(o.ln,hs)
            tuned_file << tag_open(o,@s)
            if @md.opt.cmd =~/V/
              puts_tag_close(o.ln,hs)
              puts_tag_open(o,@s)
            end
            hs[0]=5
          when 6
            tuned_file << tag_close(o.ln,hs)
            tuned_file << tag_open(o,@s)
            if @md.opt.cmd =~/V/
              puts_tag_close(o.ln,hs)
              puts_tag_open(o,@s)
            end
            hs[0]=6
          end
        end
        tuned_file << o
      end
      puts_tag_close(0,hs) if @md.opt.cmd =~/V/
      tuned_file << tag_close(0,hs)
      tuned_file=tuned_file.flatten
    end
    def tags(o)
      tag=if o[:status]=='open'
        %{<#{o[:lv]} id="#{o[:node]}">}
      else "</#{o[:lv]}>"
      end
      ln=case o[:lv]
      when 'A'; 1
      when 'B'; 2
      when 'C'; 3
      when '1'; 4
      when '2'; 5
      when '3'; 6
      when '4'; 7
      when '5'; 8
      when '6'; 9
      end
      h={ tag: tag, node: o[:node], lv: o[:lv], ln: ln, status: o[:status] }
      SiSU_DAL_DocumentStructure::ObjectStructure.new.xml_dom(h) #downstream code utilise else ignore like comments
    end
    def tag_open(o,tag)
      t={ lv: tag[o.ln], node: o.node, status: 'open' }
      t_o=tags(t)
      t_o
    end
    def tag_close(lev,hs)
      ary=[]
      case hs[0]
      when 1
        if (lev <= 1) and hs[1]
          t={ lv: @s[1], status: 'close' }
          ary << tags(t)
        end
        if (lev==0)
          t={ lv: @s[0], status: 'close' }
          ary << tags(t)
        end
      when 2
        if (lev <= 2) and hs[2]
          t={ lv: @s[2], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 1) and hs[1]
          t={ lv: @s[1], status: 'close' }
          ary << tags(t)
        end
        if (lev==0)
          t={ lv: @s[0], status: 'close' }
          ary << tags(t)
        end
      when 3
        if (lev <= 3) and hs[3]
          t={ lv: @s[3], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 2) and hs[2]
          t={ lv: @s[2], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 1) and hs[1]
          t={ lv: @s[1], status: 'close' }
          ary << tags(t)
        end
        if (lev==0)
          t={ lv: @s[0], status: 'close' }
          ary << tags(t)
        end
      when 4
        if (lev <= 4)
          t={ lv: @s[4], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 3) and hs[3]
          t={ lv: @s[3], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 2) and hs[2]
          t={ lv: @s[2], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 1) and hs[1]
          t={ lv: @s[1], status: 'close' }
          ary << tags(t)
        end
        if (lev==0)
          t={ lv: @s[0], status: 'close' }
          ary << tags(t)
        end
      when 5
        if (lev <= 5)
          t={ lv: @s[5], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 4)
          t={ lv: @s[4], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 3) and hs[3]
          t={ lv: @s[3], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 2) and hs[2]
          t={ lv: @s[2], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 1) and hs[1]
          t={ lv: @s[1], status: 'close' }
          ary << tags(t)
        end
        if (lev==0)
          t={ lv: @s[0], status: 'close' }
          ary << tags(t)
        end
      when 6
        if (lev <= 6)
          t={ lv: @s[6], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 5)
          t={ lv: @s[5], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 4)
          t={ lv: @s[4], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 3) and hs[3]
          t={ lv: @s[3], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 2) and hs[2]
          t={ lv: @s[2], status: 'close' }
          ary << tags(t)
        end
        if (lev <= 1) and hs[1]
          t={ lv: @s[1], status: 'close' }
          ary << tags(t)
        end
        if (lev==0)
          t={ lv: @s[0], status: 'close' }
          ary << tags(t)
        end
      end
      ary
    end
    def puts_tag_open(o,tag)
      puts %{#{@sp*o.ln}<#{tag[o.ln]} id="#{o.node}">}
    end
    def puts_tag_close(lev,hs)
      case hs[0]
      when 1
        puts "#{@sp*1}</#{@s[1]}>" if (lev <= 1) and hs[1]
        puts "</#{@s[0]}>"         if (lev==0)
      when 2
        puts "#{@sp*2}</#{@s[2]}>" if (lev <= 2) and hs[2]
        puts "#{@sp*1}</#{@s[1]}>" if (lev <= 1) and hs[1]
        puts "</#{@s[0]}>"         if (lev==0)
      when 3
        puts "#{@sp*3}</#{@s[3]}>" if (lev <= 3) and hs[3]
        puts "#{@sp*2}</#{@s[2]}>" if (lev <= 2) and hs[2]
        puts "#{@sp*1}</#{@s[1]}>" if (lev <= 1) and hs[1]
        puts "</#{@s[0]}>"         if (lev==0)
      when 4
        puts "#{@sp*4}</#{@s[4]}>" if (lev <= 4)
        puts "#{@sp*3}</#{@s[3]}>" if (lev <= 3) and hs[3]
        puts "#{@sp*2}</#{@s[2]}>" if (lev <= 2) and hs[2]
        puts "#{@sp*1}</#{@s[1]}>" if (lev <= 1) and hs[1]
        puts "</#{@s[0]}>"         if (lev==0)
      when 5
        puts "#{@sp*5}</#{@s[5]}>" if (lev <= 5)
        puts "#{@sp*4}</#{@s[4]}>" if (lev <= 4)
        puts "#{@sp*3}</#{@s[3]}>" if (lev <= 3) and hs[3]
        puts "#{@sp*2}</#{@s[2]}>" if (lev <= 2) and hs[2]
        puts "#{@sp*1}</#{@s[1]}>" if (lev <= 1) and hs[1]
        puts "</#{@s[0]}>"         if (lev==0)
      when 6
        puts "#{@sp*6}</#{@s[6]}>" if (lev <= 6)
        puts "#{@sp*5}</#{@s[5]}>" if (lev <= 5)
        puts "#{@sp*4}</#{@s[4]}>" if (lev <= 4)
        puts "#{@sp*3}</#{@s[3]}>" if (lev <= 3) and hs[3]
        puts "#{@sp*2}</#{@s[2]}>" if (lev <= 2) and hs[2]
        puts "#{@sp*1}</#{@s[1]}>" if (lev <= 1) and hs[1]
        puts "</#{@s[0]}>"         if (lev==0)
      end
    end
  end
end
__END__
