(library (sph web html) (export html-fold-multipart-form-data html-multipart-form-data-ref html-multipart-form-data? html-parse-urlencoded-form-data html-read-multipart-form-data html-uri-decode html-uri-encode) (import (ice-9 pretty-print) (rnrs bytevectors) (rnrs exceptions) (sph) (sph conditional) (only (guile) identity string-null? string-downcase assoc-ref string-suffix? eof-object? char-alphabetic? char-numeric? string-fold string-join append! string-index string-split list->char-set open-input-string reverse! string-contains string-drop-right string-prefix? string-trim-right unread-string) (only (ice-9 regex) match:substring regexp-substitute/global) (only (sph alist) alist-ref alist-keys-map) (only (sph string) string-equal?) (only (sph two) read-line-crlf read-line-crlf-trim) (only (sph web http) http-read-header http-read-header-value) (only (srfi srfi-1) find)) (define (html-uri-decode str) "string -> string" (regexp-substitute/global #f "\\+|%([0-9A-Fa-f][0-9A-Fa-f])" str (q pre) (l (m) (if (string=? "+" (match:substring m 0)) " " (integer->char (string->number (match:substring m 1) 16)))) (q post))) (define html-uri-encode (let ((safe-char? (l (char) (or (char-alphabetic? char) (char-numeric? char))))) (lambda (r) "string -> string" (list->string (reverse (string-fold (l (e r) (if (safe-char? e) (pair e r) (append! (reverse (string->list (number->string (char->integer e) 16))) (pair #\% r)))) (list) r)))))) (define (html-parse-urlencoded-form-data request-body) "string -> alist parse an application/x-www-form-urlencoded string and result in an alist" (if (string-null? request-body) (list) (map (l (e) (let (split-index (string-index e #\=)) (pair (substring e 0 split-index) (substring e (+ 1 split-index))))) (string-split (html-uri-decode request-body) #\&)))) (define* (html-multipart-form-data? headers #:optional (header-key "content_type")) "(string ...) [string] -> boolean" (if-pass (assoc-ref headers header-key) (l (content-type) (string-prefix? "multipart/form-data" content-type)))) (define (get-boundary-from-headers headers) "(string ...) -> string/boolean-false" (let (content-type (assoc-ref headers "content_type")) (if content-type (assoc-ref (http-read-header-value content-type) "boundary") content-type))) (define (read-boundary port) "port -> string/eof-object" (let (boundary (read-line-crlf-trim port)) (if (eof-object? boundary) boundary (if (string-null? boundary) (raise (pair (q boundary-not-found) boundary)) boundary)))) (define-syntax-rule (header-multipart-mixed? a) (if-pass (or (alist-ref a "content-type") (alist-ref a "Content-Type")) (l (content-type) (alist-ref content-type "multipart/mixed")))) (define* (html-fold-multipart-form-data proc-part proc-multipart result port #:optional boundary) ":: procedure:{alist:header procedure:fold-lines:{string:line any:result procedure:next:{any:result ->} ->} result -> any} procedure:{header port result ->} any port [string] -> any ---- proc-multipart is only called for multipart/mixed. a functional parser for multipart-form-data that allows to stop after any cr-lf-terminated line or part and reads content stream-like via a reader procedure and supports nested multipart/mixed data. see also html-read-multipart-form-data" ;see the code of "html-read-multipart-form-data" for a usage example (let* ((boundary (if boundary boundary (read-boundary port))) (header (http-read-header port))) (if ;todo: also match "multipart/alternative", "multipart/digest" and "multipart/parallel" here, ; it is othewrise the same as "multipart/mixed" (header-multipart-mixed? header) (proc-multipart header port result) (letrec ( (fold-lines (l (proc-line result-fold-lines after-fold-lines) (let (line (read-line-crlf port)) (if (eof-object? line) (after-fold-lines result-fold-lines result identity) (if (string-contains line boundary) (if (string-contains line (string-append boundary "--")) (after-fold-lines result-fold-lines result identity) (after-fold-lines result-fold-lines result (l (result) (html-fold-multipart-form-data proc-part proc-multipart result port boundary)))) (proc-line line result-fold-lines (l (result) (fold-lines proc-line result after-fold-lines))))))))) (proc-part header fold-lines result))))) (define* (html-read-multipart-form-data port #:optional normalise-header-keys?) "port [procedure:{string -> string/any}] -> list parses all multipart form data available on port into a list. for stream-like and conditional parsing see html-fold-multipart-form-data" (reverse (html-fold-multipart-form-data (l (header fold-lines result) (fold-lines (l (line result next) (next (pair line result))) (list) (l (result-fold-lines result next-part) (next-part (pair (pair (if normalise-header-keys? (alist-keys-map string-downcase header) header) (if (null? result-fold-lines) result-fold-lines (string-join (reverse (pair (string-drop-right (first result-fold-lines) 2) (tail result-fold-lines))) ""))) result))))) (l (header port result) (pair (pair (if normalise-header-keys? (alist-keys-map string-downcase header) header) (html-read-multipart-form-data port)) result)) (list) port))) (define (html-multipart-form-data-ref a name) "list string -> pair for parsed multipart form data like html-read-multipart-form-data creates. retrieves (alist:header . string:body) pairs by content-disposition name" (find (l (e) (string-equal? name (assoc-ref (or (assoc-ref (first e) "content-disposition") (assoc-ref (first e) "Content-Disposition")) "name"))) a)))