package xm::cread;

use strict;
use xm::o;

sub DESC
{"
  take the plane C source code and convert it into xml'ish, which would be 
  almost like calling xm::sub.off on the complete source text. 
  (xm::sub.off replaces  the special-chars [\&<>\"] with their entity-refs). 
 
 Yet this would loose just  too much information, instead this subroutine 
  marks the C tokens as
    CSTR = c-string (\"...\")
    CCHR = c-char   ('...')
    CDOC = c-comment (/*...*/ and //...)
  and the preprocessor lines are special too, so there is
    CPRE = c-preprocessor line (#...)
  while still making sure to mark CSTR/CCHR/CDOCs inside CPRE which is the 
  way newer cpp(1) is supposed to work.

  much of the later scanner-modules may call xm::sub.on on the enclosed text
  to scan in a more intelligible way (understood by C programmers).
"}

sub ARGS { return xm::o::args_stdin(@_, DESC); }

sub DO 
{
    my ($in,$cdoc,$cchr,$cstr,$cpre,$cerr) = @_;
    $cdoc = "CDOC" if not defined $cdoc or not length $cdoc;
    $cchr = "CCHR" if not defined $cchr or not length $cchr;
    $cstr = "CSTR" if not defined $cstr or not length $cstr;
    $cpre = "CPRE" if not defined $cpre or not length $cpre;
    $cerr = "CERR" if not defined $cerr or not length $cerr;

    $in =~ s{\&} {\&amp;}gs;
    $in =~ s{\<} {\&lt;}gs;
    $in =~ s{\>} {\&gt;}gs;
    
    my $cpp = sub # tokenize cpp-sections...
    {
       my $in = shift;
    
       # C and C++, strings and characters
       $in =~ s{ / (
                 / .*                   # C++ style
                 |
                 \* [\s\S]*? \*/        # C style
                )                       # (1)
             | ('(?:[^\\\']|\\.)+')     # (2) Character constants
             | ("(?:[^\\\"]|\\.)*")     # (3) Strings
            } {
		defined $1 ? "<$cdoc>"."/".$1."</$cdoc>" :
                  defined $2 ? "<$cchr>".$2."</$cchr>" :
                    defined $3 ? "<$cstr>".$3."</$cstr>" : 
                           "<$cerr></$cerr>"
            }xgem ;

        return $in;
    };
    
    # C and C++, strings and characters, ... and the c-preprocessor
    $in =~ s{ / (
                 / .*                   # C++ style
                 |
                 \* [\s\S]*? \*/        # C style
                )                       # (1)
             | ('(?:[^\\\']|\\.)+')     # (2) Character constants
             | ("(?:[^\\\"]|\\.)*")     # (3) Strings
             | ( ^ \s* \# .*            # (4) Preprocessor
                 ( \\ $ \n .* )* )      # and continuation lines
            } {
		defined $1 ? "<$cdoc>"."/".$1."</$cdoc>" :
		 defined $4 ? "<$cpre>".&$cpp($4)."</$cpre>" :
                  defined $2 ? "<$cchr>".$2."</$cchr>" :
                    defined $3 ? "<$cstr>".$3."</$cstr>" :
                           "<$cerr></$cerr>"
            }xgem ;

    $in =~ s{\"} {\&quot;}gs;

    # move CPRE to the correct position - at the start of its line.
    $in =~ s{(<$cpre>)(\s+)}{$2$1}gs;
#    $in =~ s{(^\s+)(<$cpre>)}{$2$1}gm;
    
    return $in;
}

1;

