diff options
author | rmckenzie <rmckenzie92@gmail.com> | 2013-03-13 01:17:33 -0500 |
---|---|---|
committer | rmckenzie <rmckenzie92@gmail.com> | 2013-03-13 01:17:33 -0500 |
commit | 1761132b6a3a16102cfe5d2fa940f78827adde96 (patch) | |
tree | e5476c126208a943480538d194933ce6ca3f68e5 /src |
initial state
Diffstat (limited to 'src')
-rw-r--r-- | src/me/arrdem/decomp/core.clj | 20 | ||||
-rw-r--r-- | src/me/arrdem/decomp/lexer.clj | 26 | ||||
-rw-r--r-- | src/me/arrdem/decomp/parser.clj | 96 |
3 files changed, 142 insertions, 0 deletions
diff --git a/src/me/arrdem/decomp/core.clj b/src/me/arrdem/decomp/core.clj new file mode 100644 index 0000000..1e0f289 --- /dev/null +++ b/src/me/arrdem/decomp/core.clj @@ -0,0 +1,20 @@ +(ns me.arrdem.decomp.core + (:require [clojure.pprint :refer [pprint]] + [me.arrdem.decomp.parser :refer [build-ast]] + [me.arrdem.decomp.lexer :refer [html]] + [clojure.tools.cli :refer [cli]]) + (:gen-class :main true)) + +(defn process-string [s] + (pprint (build-ast (html s)))) + + +(defn -main + "The only valid arguments are targeted files. If there are no targeted files +then decomp will target stdin as its token source." + [& args] + (if-not (empty? args) + (doseq [f args] + (pprint (build-ast (html (slurp f))))) + + (pprint (build-ast (html (slurp (java.io.BufferedReader. *in*))))))) diff --git a/src/me/arrdem/decomp/lexer.clj b/src/me/arrdem/decomp/lexer.clj new file mode 100644 index 0000000..15ae7d9 --- /dev/null +++ b/src/me/arrdem/decomp/lexer.clj @@ -0,0 +1,26 @@ +(ns me.arrdem.decomp.lexer + (:require [lexington.lexer :refer :all] + [lexington.utils.lexer :refer :all])) + +(deflexer html-base + :cclose "</" + :open "<" + :close ">" + :assign "=" + :string #"\"[^\"]+\"" + :word #"[^ \t\r\n=\"\\<>]+" + :ws #" |\t|\r|\n" + :chr #".") + +(def wordfn (fn [v] (apply str (:lexington.tokens/data v)))) +(def strfn (fn [v] (apply str (drop 1 (butlast (:lexington.tokens/data v)))))) + +(def html + (-> html-base + (discard :ws) + ;; (with-string :str :only [:integer]) + (generate-for :word :val wordfn) + (generate-for :string :val strfn) + (generate-for :chr :val wordfn) + ;; (with-string :str :only [:integer]) + )) diff --git a/src/me/arrdem/decomp/parser.clj b/src/me/arrdem/decomp/parser.clj new file mode 100644 index 0000000..e503e36 --- /dev/null +++ b/src/me/arrdem/decomp/parser.clj @@ -0,0 +1,96 @@ +(ns me.arrdem.decomp.parser + (:require [name.choi.joshua.fnparse :as fnp])) + +(defmacro deftoken [symbol val] + `(def ~symbol + (fnp/term + #(= (:lexington.tokens/type %1) ~val)))) + +(deftoken strtok :string) +(deftoken wordtok :word) +(deftoken assignop :assign) +(deftoken opentok :open) +(deftoken closetok :close) +(deftoken cclosetok :cclose) +(deftoken chrtok :chr) + +(def chr + (fnp/semantics + chrtok + (fn [c] (:val c)))) + +(def word + (fnp/semantics + wordtok + (fn [c] (:val c)))) + +(def html-kv-pair + (fnp/semantics + (fnp/conc + wordtok + assignop + strtok) + ;; prn)) + (fn [[w _ s]] {(keyword (:val w)) (:val s)}))) + +(def html-opentag + (fnp/semantics + (fnp/conc + opentok + wordtok + (fnp/rep* html-kv-pair) + closetok) + ;; prn)) + (fn [[_ name vals __]] + [(keyword (:val name)) (or (reduce merge vals) {})]))) + +(def html-closetag + (fnp/conc + cclosetok + wordtok + closetok)) + +(declare html-ast) + +(defn reduce-strs [s] + (reduce (fn [acc n] + (cond (and (string? (last acc)) + (string? n)) + (concat (butlast acc) + [(str (last acc) " " n)]) + + (or (and (string? (last acc)) + (char? n)) + (and (char? (last acc)) + (string? n)) + (and (char? (last acc)) + (char? n))) + (concat (butlast acc) + [(str (last acc) n)]) + + (vector? n) (concat acc n) + + :else + (concat acc [n]))) + [] s)) + +(def html-ast + (fnp/rep+ + (fnp/semantics + (fnp/conc + html-opentag + (fnp/rep* + (fnp/alt + word + chr + html-ast)) + html-closetag) + (fn [[o v _]] + (apply vector (concat o (reduce-strs v))))))) + +(defn build-ast [toks] + (fnp/rule-match + html-ast + #(println "FAILED: " %) + #(println "LEFTOVER: " %2) + {:remainder toks})) |