;*=====================================================================*/
;*    serrano/prgm/project/bigloo/api/web/src/Llib/xml.scm             */
;*    -------------------------------------------------------------    */
;*    Author      :  Manuel Serrano                                    */
;*    Creation    :  Fri Mar 11 16:23:53 2005                          */
;*    Last change :  Fri Sep  8 07:21:27 2006 (serrano)                */
;*    Copyright   :  2005-06 Manuel Serrano                            */
;*    -------------------------------------------------------------    */
;*    XML parsing                                                      */
;*=====================================================================*/

;*---------------------------------------------------------------------*/
;*    The module                                                       */
;*---------------------------------------------------------------------*/
(module __web_xml

   (export (xml-parse::pair-nil ::input-port
				#!optional
				(clength::int 0)
				(proc::procedure list)
				(specials::pair-nil '())
				(strict::bool #t))
	   (xml-string-decode::bstring ::bstring)
	   (xml-string-decode!::bstring ::bstring)
	   (xml-string-encode::bstring ::bstring)
	   (xml-attribute-encode::obj ::obj)))
 
;*---------------------------------------------------------------------*/
;*    xml-parse ...                                                    */
;*---------------------------------------------------------------------*/
(define (xml-parse port #!optional
		   (clength::int 0)
		   (proc::procedure list)
		   (specials::pair-nil '())
		   (strict::bool #t))
   (when (>fx clength 0)
      (input-port-fill-barrier-set! port clength))
   (let loop ()
      (let ((obj (read/rp xml-grammar port proc specials strict)))
	 (cond
	    ((xmlkont? obj)
	     (cons (xmlkont-val obj) (loop)))
	    ((eof-object? obj)
	     '())
	    ((and (>fx clength 0) (>=fx (input-port-position port) clength))
	     (list obj))
	    (else
	     (cons obj (loop)))))))

;*---------------------------------------------------------------------*/
;*    xmlkont ...                                                      */
;*---------------------------------------------------------------------*/
(define-struct xmlkont val rest)

;*---------------------------------------------------------------------*/
;*    xml-parse-error ...                                              */
;*---------------------------------------------------------------------*/
(define (xml-parse-error msg obj name pos)
   (raise
    (instantiate::&io-parse-error
       (proc 'xml-parse)
       (msg msg)
       (obj obj)
       (fname name)
       (location pos))))

;*---------------------------------------------------------------------*/
;*    collect-up-to ...                                                */
;*---------------------------------------------------------------------*/
(define-inline (collect-up-to tag specials strict ignore port)
   (define (regular)
      (let ((name (input-port-name port))
	    (po (input-port-position port)))
	 (let loop ((acc '())
		    (item (ignore)))
	    (cond
	       ((symbol? item)
		(cond
		   ((eq? item tag)
		    (reverse! acc))
		   (strict
		    (xml-parse-error "Illegal closing tag"
				     (format "`~a' expected, `~a' provided"
					     tag item)
				     name po))
		   (else
		    (reverse! acc))))
	       ((xmlkont? item)
		(loop (cons (xmlkont-val item) acc) (xmlkont-rest item)))
	       ((eof-object? item)
		(if strict
		    (xml-parse-error
		     (format "Premature end of line, expecting tag `~a'"
			     tag)
		     item name po)
		    (reverse! acc)))
	       (else
		(let ((po (input-port-last-token-position port)))
		   (loop (econs item acc (list 'at name po)) (ignore))))))))
   (let ((spec (assq tag specials)))
      (if (pair? spec)
	  (if (null? (cdr spec))
	      '()
	      ((cdr spec) port))
	  (regular))))

;*---------------------------------------------------------------------*/
;*    attribute-grammar ...                                            */
;*---------------------------------------------------------------------*/
(define attribute-grammar
   (regular-grammar ((id (: (in ("azAZ") "_") (* (in ("azAZ09") ":_-"))))
		     tag
		     strict)
      ((+ (in " \t\n\r"))
       (ignore))
      ((: #\" (* (or (out #\\ #\") (: #\\ all))) #\")
       (the-substring 1 (-fx (the-length) 1)))
      ((: #\' (* (or (out #\\ #\') (: #\\ all))) #\')
       (the-substring 1 (-fx (the-length) 1)))
      ((+ digit)
       (if strict
	   (xml-parse-error "Illegal attribute value"
			    (the-string)
			    (input-port-name (the-port))
			    (input-port-position (the-port)))
	   (the-string)))
      ((: id "=")
       (let* ((key (the-substring 0 (-fx (the-length) 1)))
	      (val (ignore)))
	  (cons (string->symbol key) val)))
      ((: id (+ blank) "=")
       (let* ((key (the-substring 0 (-fx (the-length) 2)))
	      (val (ignore)))
	  (let loop ((i (-fx (string-length key) 1)))
	     (case (string-ref key i)
		((#\space #\tab #\Newline)
		 (loop (-fx i 1)))
		(else
		 (set! key (substring key 0 i)))))
	  (cons (string->symbol key) val)))
      ((: id)
       (let* ((key (the-substring 0 (-fx (the-length) 1))))
	  (cons (string->symbol key) key)))
      ((or "/>" ">")
       (the-symbol))
      (else
       (let ((c (the-failure)))
	  (if (not (eof-object? c))
	      (xml-parse-error "Illegal attribute character"
			       (string-append "{" (string c) "}"
					      (read-line (the-port)))
			       (input-port-name (the-port))
			       (input-port-position (the-port)))
	      (xml-parse-error
	       (format "Premature end of line, expecting tag `~a'" tag)
	       c
	       (input-port-name (the-port))
	       (-fx (input-port-position (the-port)) 1)))))))

;*---------------------------------------------------------------------*/
;*    cdata-grammar ...                                                */
;*---------------------------------------------------------------------*/
(define cdata-grammar
   (regular-grammar ()
      ((* (or (out "]") (: "]" (out "]")) (: "]]" (out ">"))))
       (let* ((res (the-string))
	      (rest (ignore)))
	  (if (eq? rest 'end)
	      res
	      (xml-parse-error "Illegal <![CDATA["
			       res
			       (input-port-name (the-port))
			       (input-port-position (the-port))))))
      ((: "]]>" (? "\n"))
       'end)
      (else
       (let* ((c (the-failure))
	      (msg (if (not (eof-object? c))
		       "Illegal <![CDATA[ character"
		       "Premature end of line, expecting tag `]]>'")))
	  (xml-parse-error msg
			   c
			   (input-port-name (the-port))
			   (input-port-position (the-port)))))))
       
;*---------------------------------------------------------------------*/
;*    xml-grammar ...                                                  */
;*---------------------------------------------------------------------*/
(define xml-grammar
   (regular-grammar ((id (: (in ("azAZ") "!?") (* (in ("azAZ09") ":_-"))))
		     make
		     specials
		     strict)
		     
      ((: "<!--"
	  (* (or (out "-") (: "-" (out "-")) (: "--" (out ">"))))
	  "-->")
       (ignore))
      ((: "<!" (: (or (out "[-") (: "-" (out "-"))) (* (out ">"))) ">")
       (cons 'declaration (the-string)))
      ((: "<!" (: (or (out "[-") (: "-" (out "-"))) (* (out ">"))) ">\n")
       (cons 'declaration (the-substring 0 (-fx (the-length) 1))))
      ("<![CDATA["
       (read/rp cdata-grammar (the-port)))
      ((: "<?" (* (out ">")) ">")
       (cons 'instruction (the-string)))
      ((: "<?" (* (out ">")) ">\n")
       (cons 'instruction (the-substring 0 (-fx (the-length) 1))))
      ((: "<" id ">")
       (let* ((t (the-substring 1 (-fx (the-length) 1)))
	      (ts (string->symbol (string-downcase! t)))
	      (p (the-port))
	      (r (collect-up-to ts specials strict ignore p)))
	  (if (xmlkont? r)
	      (begin
		 (xmlkont-val-set! r (make ts '() (xmlkont-val r)))
		 r)
	      (make ts '() r))))
      ((: "<" id ">\n")
       (let* ((t (the-substring 1 (-fx (the-length) 2)))
	      (ts (string->symbol (string-downcase! t)))
	      (p (the-port))
	      (r (collect-up-to ts specials strict ignore p)))
	  (if (xmlkont? r)
	      (begin
		 (xmlkont-val-set! r (make ts '() (xmlkont-val r)))
		 r)
	      (make ts '() r))))
      ((: "<" id "/>")
       (let ((t (the-substring 1 (-fx (the-length) 2))))
	  (make (string->symbol (string-downcase! t)) '() '())))
      ((: "<" id "/>\n")
       (let ((t (the-substring 1 (-fx (the-length) 3))))
	  (make (string->symbol (string-downcase! t)) '() '())))
      ((: "<" id (in " \n\t\r"))
       (let* ((t (the-substring 1 (-fx (the-length) 1)))
	      (ts (string->symbol (string-downcase! t)))
	      (p (the-port)))
	  (let loop ((attr '()))
	     (let ((obj (read/rp attribute-grammar p t strict)))
		(cond
		   ((pair? obj)
		    (loop (cons obj attr)))
		   ((eq? obj '>)
		    (let ((r (collect-up-to ts specials strict ignore p)))
		       (if (xmlkont? r)
			   (let ((v (make ts (reverse! attr) (xmlkont-val r))))
			      (xmlkont-val-set! r v)
			      r)
			   (make ts (reverse! attr) r))))
		   ((eq? obj '/>)
		    (make ts (reverse! attr) '())))))))
      ((: "</" id ">")
       (string->symbol
	(string-downcase! (the-substring 2 (-fx (the-length) 1)))))
      ((: "</" id ">\n")
       (string->symbol
	(string-downcase! (the-substring 2 (-fx (the-length) 2)))))
      ((+ (out "<"))
       (the-string))
      (else
       (let ((c (the-failure)))
	  (cond
	     ((not (eof-object? c))
	      (xml-parse-error "Illegal character"
			       (string-append "{" (string c) "}")
			       (input-port-name (the-port))
			       (input-port-position (the-port))))
	     (else
	      c))))))

;*---------------------------------------------------------------------*/
;*    char-hexnumeric? ...                                             */
;*---------------------------------------------------------------------*/
(define (char-hexnumeric? c)
   (or (char-numeric? c)
       (and (char>=? c #\A) (char<=? c #\F))
       (and (char>=? c #\a) (char<=? c #\f))))

;*---------------------------------------------------------------------*/
;*    xml-string-decode-inner! ...                                     */
;*---------------------------------------------------------------------*/
(define (xml-string-decode-inner! str ol nl res)
   (define (char-value c)
      (cond
	 ((char-numeric? c)
	  (-fx (char->integer c) (char->integer #\0)))
	 ((char<=? c #\F)
	  (+fx 10 (-fx (char->integer c) (char->integer #\A))))
	 (else
	  (+fx 10 (-fx (char->integer c) (char->integer #\a))))))
   (let ((ol-2 (-fx ol 2)))
      (let loop ((i 0)
		 (j 0))
	 (if (=fx j nl)
	     res
	     (let ((c (string-ref str i)))
		(if (and (char=? c #\%) (<fx i ol-2))
		    (let ((c1 (string-ref str (+fx i 1)))
			  (c2 (string-ref str (+fx i 2))))
		       (if (and (char-hexnumeric? c1) (char-hexnumeric? c2))
			   (let* ((v1 (char-value c1))
				  (v2 (char-value c2))
				  (d (integer->char (+fx (*fx v1 16) v2))))
			      (string-set! res j d)
			      (loop (+fx i 3) (+fx j 1)))
			   (begin
			      (string-set! res j c)
			      (loop (+fx i 1) (+fx j 1)))))
		    (begin
		       (string-set! res j c)
		       (loop (+fx i 1) (+fx j 1)))))))))

;*---------------------------------------------------------------------*/
;*    xml-decode-count ...                                             */
;*---------------------------------------------------------------------*/
(define (xml-decode-count str ol)
   (let loop ((i (-fx ol 3))
	      (c 0))
      (cond
	 ((=fx i -1)
	  c)
	 ((char=? (string-ref str i) #\%)
	  (if (and (char-hexnumeric? (string-ref str (+fx i 1)))
		   (char-hexnumeric? (string-ref str (+fx i 2))))
	      (loop (-fx i 1) (+fx c 1))
	      (loop (-fx i 1) c)))
	 (else
	  (loop (-fx i 1) c)))))

;*---------------------------------------------------------------------*/
;*    xml-string-decode ...                                            */
;*---------------------------------------------------------------------*/
(define (xml-string-decode str)
   (let ((ol (string-length str)))
      (if (>=fx ol 3)
	  (let ((count (xml-decode-count str ol)))
	     (if (=fx count 0)
		 (string-copy str)
		 (let* ((nl (-fx ol (*fx count 2)))
			(res (make-string nl)))
		    (utf8->iso-latin!
		     (xml-string-decode-inner! str ol nl res)))))
	  (string-copy str))))

;*---------------------------------------------------------------------*/
;*    xml-string-decode! ...                                           */
;*---------------------------------------------------------------------*/
(define (xml-string-decode! str)
   (let ((ol (string-length str)))
      (if (>=fx ol 3)
	  (let ((count (xml-decode-count str ol)))
	     (if (=fx count 0)
		 str
		 (let ((nl (-fx ol (*fx count 2))))
		    (utf8->iso-latin!
		     (string-shrink!
		      (xml-string-decode-inner! str ol nl str)
		      nl)))))
	  str)))

;*---------------------------------------------------------------------*/
;*    xml-string-encode ...                                            */
;*---------------------------------------------------------------------*/
(define (xml-string-encode str)
   (define (count str ol)
      (let loop ((i 0)
		 (n 0))
	 (if (=fx i ol)
	     n
	     (let ((c (string-ref str i)))
		(case c
		   ((#\")
		    (loop (+fx i 1) (+fx n 6)))
		   ((#\&)
		    (loop (+fx i 1) (+fx n 5)))
		   ((#\< #\>)
		    (loop (+fx i 1) (+fx n 4)))
		   (else
		    (loop (+fx i 1) (+fx n 1))))))))
   (define (encode str ol nl)
      (if (=fx nl ol)
	  str
	  (let ((res (make-string nl)))
	     (let loop ((i 0)
			(j 0))
		(if (=fx j nl)
		    res
		    (let ((c (string-ref str i)))
		       (case c
			  ((#\<)
			   (blit-string! "&lt;" 0 res j 4)
			   (loop (+fx i 1) (+fx j 4)))
			  ((#\>)
			   (blit-string! "&gt;" 0 res j 4)
			   (loop (+fx i 1) (+fx j 4)))
			  ((#\&)
			   (blit-string! "&amp;" 0 res j 5)
			   (loop (+fx i 1) (+fx j 5)))
			  ((#\")
			   (blit-string! "&quot;" 0 res j 6)
			   (loop (+fx i 1) (+fx j 6)))
			  (else
			   (string-set! res j c)
			   (loop (+fx i 1) (+fx j 1))))))))))
   (let ((ol (string-length str)))
      (encode str ol (count str ol))))
	 
;*---------------------------------------------------------------------*/
;*    xml-attribute-encode ...                                         */
;*---------------------------------------------------------------------*/
(define (xml-attribute-encode obj)
   (if (not (string? obj))
       obj
       (let ((ol (string-length obj)))
	  (define (count str ol)
	     (let loop ((i 0)
			(j 0))
		(if (=fx i ol)
		    j
		    (if (or (char=? (string-ref str i) #\')
			    (char=? (string-ref str i) #\&))
			(loop (+fx i 1) (+fx j 5))
			(loop (+fx i 1) (+fx j 1))))))
	  (define (encode str ol nl)
	     (if (=fx nl ol)
		 obj
		 (let ((nstr (make-string nl)))
		    (let loop ((i 0)
			       (j 0))
		       (if (=fx j nl)
			   nstr
			   (let ((c (string-ref str i)))
			      (case c
				 ((#\')
				  (string-set! nstr j #\&)
				  (string-set! nstr (+fx j 1) #\#)
				  (string-set! nstr (+fx j 2) #\3)
				  (string-set! nstr (+fx j 3) #\9)
				  (string-set! nstr (+fx j 4) #\;)
				  (loop (+fx i 1) (+fx j 5)))
				 ((#\&)
				  (string-set! nstr j #\&)
				  (string-set! nstr (+fx j 1) #\#)
				  (string-set! nstr (+fx j 2) #\3)
				  (string-set! nstr (+fx j 3) #\8)
				  (string-set! nstr (+fx j 4) #\;)
				  (loop (+fx i 1) (+fx j 5)))
				 (else
				  (string-set! nstr j c)
				  (loop (+fx i 1) (+fx j 1))))))))))
	  (encode obj ol (count obj ol)))))
