(library (sph storage dg import) (export dg-import-files dg-import-files-get-data dg-import-formats sph-storage-dg-import-description) (import (rnrs bytevectors) (rnrs io simple) (sph common) (sph lang itpn) (sph lang scheme) (sph lang parser key-values-table) (sph storage dg) (sph storage dg file) (sph storage dg tag) (sph two)) (define sph-storage-dg-import-description "read data from various formats into a sph-dg database. supported formats file-dot imports files dot separated strings in file name become tags uses (sph storage dg file) and (sph storage dg tag) imported files can get an unique tag that begins with \"import-\" to identify all content from a specific import file-dot similar to file-dot individual itpn packets become files tags from file names are merged with the tags from itpn packets scm-1 a text format created by (sph storage dg export) configuration file reads a configuration file $HOME/.config/sph/dg/import/replacements if it exists it can contain tag replacements with each line being in the format \"tag-name replacement-name ...\" one tag can be replaced with multiple tags, possibly including itself data structures import-data: ((tag-strings (symbol:datum/filesystem any:data/path) ...) ...)") (define config-paths (map ensure-trailing-slash (list (string-append (getenv "HOME") "/.config/sph/dg/import")))) (define-as default-tag-strings list "other") (define-as dg-import-formats list-q file-itpn file-dot scm-1) (define (create-import-tag-string) "-> string" (string-append "import-" (number->string (getpid) 32) (number->string (current-time) 32))) (define* (replace-tags strings table #:optional exclude) "(string ...) hashtable:{string -> (string ...)} [(string ...)] -> (string ...)" (fold-right (l (a r) (let (b (ht-ref table a)) (if b (let ((b (any->list b)) (exclude (pair a exclude))) (append b (replace-tags (complement b exclude) table exclude))) (pair a r)))) (list) strings)) (define (prepare-tag-strings a options) "(string ...) -> ensure tags are lowercase and replace special characters" (ht-bind options (replacements add-tag-strings first-is-id) (if (null? a) default-tag-strings (let* ( (a (if first-is-id (tail a) a)) (a (if add-tag-strings (append a add-tag-strings) a)) (a (if replacements (replace-tags a replacements) a)) (a (map (l (a) (regexp-replace (string-downcase a) "[^a-z0-9-]" "_")) (remove string-null? a)))) (if (null? a) default-tag-strings (delete-duplicates a)))))) (define (file-name->tag-strings name options) (ht-bind options (extensions) (let (name (if extensions (remove-filename-extension name extensions) name)) (prepare-tag-strings (string-split name #\.) options)))) (define (itpn-prefix->tag-strings a options) "string -> (integer ...)" (prepare-tag-strings (string-split a #\space) options)) (define (get-import-data-file-dot path options) "dg-txn string boolean procedure procedure -> import-data data to import one file with dot separated tag strings as the file name" (list (list (file-name->tag-strings (basename path) options) (pair (q filesystem) path)))) (define (get-import-data-file-dot-itpn path options) "string hashtable -> import-data data to import itpn packets as separate dg-files from a file. the file name can consist of tag strings to be added to every itpn packet" (pair (file-name->tag-strings (basename path) options) (filter-map (l (packet) (and (list? packet) (list (itpn-prefix->tag-strings (first packet) options) (pair (q datum) (itpn-string (tail packet)))))) (itpn-from-file path)))) (define files-collect-data (let* ( (get-replacements (l (config-path) "-> hashtable/false load a table for replacements on the list of tag strings per element" (if config-path (let (a (string-append (ensure-trailing-slash config-path) "replacements")) (and (file-exists? a) (iq-file-hashtable a))) (any (l (a) (let (a (string-append a "replacements")) (and (file-exists? a) (iq-file-hashtable a)))) config-paths)))) (path-is-entry? (l (prefix path) "string string -> boolean check if path is an entry of directory prefix or of a sub-directory instead" (zero? (string-count (string-drop-prefix prefix path) #\/)))) (prepare-paths (l (a) "list -> list remove duplicate paths, considering files that are included in given directory paths" (let* ( (a (map realpath* a)) (a ; remove from "a" all that have any of "a" as parent (remove (l (b) (any (l (c) (and (not (string-equal? c b)) (string-prefix? c b) (path-is-entry? c b) (not (directory? b)))) a)) a))) (delete-duplicates a)))) (get-dir-paths (l (a) "string -> (string ...)" (if (directory? a) (remove directory? (directory-list-full a)) (list a))))) (l (format paths add-tag-strings first-is-id ignore-mime-extensions config-path) "symbol (string ...) (string ...) boolean boolean -> list:import-data collect tag strings and data source information for all paths to be imported. path interpretation: * directory: import directory contents non-recursively * other: import file" (let ( (replacements (get-replacements config-path)) (extensions (and ignore-mime-extensions (map (l (a) (string-append "." a)) (get-mime-extensions)))) (get-data (case format ((file-itpn) get-import-data-file-dot-itpn) ((file-dot) get-import-data-file-dot)))) (and-let* ( (options (ht-create-binding add-tag-strings first-is-id replacements extensions)) (get-data* (l (a) (get-data a options))) (data (every-map (l (a) (let (a (every-map get-data* (get-dir-paths a))) (and a (apply append a)))) (prepare-paths paths)))) (apply append data)))))) (define dg-import-files (let (create-dg-elements (l (txn import-tag-string tag-strings data) (let (files (and-let* ( (tags (dg-intern-ensure txn (if import-tag-string (pair import-tag-string tag-strings) tag-strings))) (files (every-map (l (a) (let ((type (first a)) (data (tail a))) (case type ((filesystem) (dg-file-from-filesystem txn data)) ((datum) (dg-file-from-data txn data))))) data))) (dg-tag-add txn tags files) files)) (or files (begin (dg-txn-abort txn) #f))))) (l* (txn format paths run #:key add-tag-strings has-no-id import-tag ignore-mime-extensions dry-run-port config) ":: symbol:file-dot/file-itpn (string ...) boolean #:add-tag-strings (string ...) #:has-no-id boolean #:import-tag boolean #:ignore-mime-extensions boolean #:dry-run-port port #:config hashtable -> unspecified dg-tag must have been initialised" (if (not (pair? dg-tag-intern-tag)) (raise (q dg-tag-not-initialised))) (if (not (pair? dg-file-intern-file)) (raise (q dg-file-not-initialised))) (if (contains? dg-import-formats format) (let (import-tag-string (and import-tag (create-import-tag-string))) (and-let* ( (import-data (files-collect-data format paths add-tag-strings (not has-no-id) ignore-mime-extensions config)) (files (every-map (l (a) (let ((tag-strings (first a)) (data (tail a))) (if run (create-dg-elements txn import-tag-string tag-strings data) (let (tags (string-join tag-strings " ")) (each (l (a) (case (first a) ( (filesystem) (let* ((path (tail a)) (stat-info (stat path))) (write (list (q tags) tags (q size) (stat:size stat-info) (q source) path) dry-run-port) (newline dry-run-port))) ((datum) (write (list (q tags) tags) dry-run-port)))) data) (list))))) import-data))) (apply append files))) (raise (q dg-import-invalid-format)))))) (define (dg-import-scm txn source run) "dg-txn string:path boolean -> unspecified" (call-with-input-file source (l (port) (port-lines-each (l (line) (debug-log line) #;(let* ((line (string-split line #\space)) (type (first line)) (data (tail line))) (case type ((relation) (apply (l (left right label ordinal) #t) data)) ((id) (first data)) (else (u8-list->bytevector (tail data)))))) port)))))