aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorrmckenzie <rmckenzie92@gmail.com>2013-03-13 01:17:33 -0500
committerrmckenzie <rmckenzie92@gmail.com>2013-03-13 01:17:33 -0500
commit1761132b6a3a16102cfe5d2fa940f78827adde96 (patch)
treee5476c126208a943480538d194933ce6ca3f68e5 /src
initial state
Diffstat (limited to 'src')
-rw-r--r--src/me/arrdem/decomp/core.clj20
-rw-r--r--src/me/arrdem/decomp/lexer.clj26
-rw-r--r--src/me/arrdem/decomp/parser.clj96
3 files changed, 142 insertions, 0 deletions
diff --git a/src/me/arrdem/decomp/core.clj b/src/me/arrdem/decomp/core.clj
new file mode 100644
index 0000000..1e0f289
--- /dev/null
+++ b/src/me/arrdem/decomp/core.clj
@@ -0,0 +1,20 @@
+(ns me.arrdem.decomp.core
+ (:require [clojure.pprint :refer [pprint]]
+ [me.arrdem.decomp.parser :refer [build-ast]]
+ [me.arrdem.decomp.lexer :refer [html]]
+ [clojure.tools.cli :refer [cli]])
+ (:gen-class :main true))
+
+(defn process-string [s]
+ (pprint (build-ast (html s))))
+
+
+(defn -main
+ "The only valid arguments are targeted files. If there are no targeted files
+then decomp will target stdin as its token source."
+ [& args]
+ (if-not (empty? args)
+ (doseq [f args]
+ (pprint (build-ast (html (slurp f)))))
+
+ (pprint (build-ast (html (slurp (java.io.BufferedReader. *in*)))))))
diff --git a/src/me/arrdem/decomp/lexer.clj b/src/me/arrdem/decomp/lexer.clj
new file mode 100644
index 0000000..15ae7d9
--- /dev/null
+++ b/src/me/arrdem/decomp/lexer.clj
@@ -0,0 +1,26 @@
+(ns me.arrdem.decomp.lexer
+ (:require [lexington.lexer :refer :all]
+ [lexington.utils.lexer :refer :all]))
+
+(deflexer html-base
+ :cclose "</"
+ :open "<"
+ :close ">"
+ :assign "="
+ :string #"\"[^\"]+\""
+ :word #"[^ \t\r\n=\"\\<>]+"
+ :ws #" |\t|\r|\n"
+ :chr #".")
+
+(def wordfn (fn [v] (apply str (:lexington.tokens/data v))))
+(def strfn (fn [v] (apply str (drop 1 (butlast (:lexington.tokens/data v))))))
+
+(def html
+ (-> html-base
+ (discard :ws)
+ ;; (with-string :str :only [:integer])
+ (generate-for :word :val wordfn)
+ (generate-for :string :val strfn)
+ (generate-for :chr :val wordfn)
+ ;; (with-string :str :only [:integer])
+ ))
diff --git a/src/me/arrdem/decomp/parser.clj b/src/me/arrdem/decomp/parser.clj
new file mode 100644
index 0000000..e503e36
--- /dev/null
+++ b/src/me/arrdem/decomp/parser.clj
@@ -0,0 +1,96 @@
+(ns me.arrdem.decomp.parser
+ (:require [name.choi.joshua.fnparse :as fnp]))
+
+(defmacro deftoken [symbol val]
+ `(def ~symbol
+ (fnp/term
+ #(= (:lexington.tokens/type %1) ~val))))
+
+(deftoken strtok :string)
+(deftoken wordtok :word)
+(deftoken assignop :assign)
+(deftoken opentok :open)
+(deftoken closetok :close)
+(deftoken cclosetok :cclose)
+(deftoken chrtok :chr)
+
+(def chr
+ (fnp/semantics
+ chrtok
+ (fn [c] (:val c))))
+
+(def word
+ (fnp/semantics
+ wordtok
+ (fn [c] (:val c))))
+
+(def html-kv-pair
+ (fnp/semantics
+ (fnp/conc
+ wordtok
+ assignop
+ strtok)
+ ;; prn))
+ (fn [[w _ s]] {(keyword (:val w)) (:val s)})))
+
+(def html-opentag
+ (fnp/semantics
+ (fnp/conc
+ opentok
+ wordtok
+ (fnp/rep* html-kv-pair)
+ closetok)
+ ;; prn))
+ (fn [[_ name vals __]]
+ [(keyword (:val name)) (or (reduce merge vals) {})])))
+
+(def html-closetag
+ (fnp/conc
+ cclosetok
+ wordtok
+ closetok))
+
+(declare html-ast)
+
+(defn reduce-strs [s]
+ (reduce (fn [acc n]
+ (cond (and (string? (last acc))
+ (string? n))
+ (concat (butlast acc)
+ [(str (last acc) " " n)])
+
+ (or (and (string? (last acc))
+ (char? n))
+ (and (char? (last acc))
+ (string? n))
+ (and (char? (last acc))
+ (char? n)))
+ (concat (butlast acc)
+ [(str (last acc) n)])
+
+ (vector? n) (concat acc n)
+
+ :else
+ (concat acc [n])))
+ [] s))
+
+(def html-ast
+ (fnp/rep+
+ (fnp/semantics
+ (fnp/conc
+ html-opentag
+ (fnp/rep*
+ (fnp/alt
+ word
+ chr
+ html-ast))
+ html-closetag)
+ (fn [[o v _]]
+ (apply vector (concat o (reduce-strs v)))))))
+
+(defn build-ast [toks]
+ (fnp/rule-match
+ html-ast
+ #(println "FAILED: " %)
+ #(println "LEFTOVER: " %2)
+ {:remainder toks}))