aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStuart Sierra <mail@stuartsierra.com>2010-09-17 13:15:27 -0400
committerStuart Sierra <mail@stuartsierra.com>2010-09-17 13:15:27 -0400
commit5263e4bd9ca8634b50755c5150ebef26e0d191f1 (patch)
tree158685b734779a44911d5d3c51e70612a4a3c495
parent5a928e263ab88cb8d224de8585932f936aa30c8f (diff)
Base64 decoding from Teemu Antti-Poika; refs #84
-rw-r--r--modules/base64/src/main/clojure/clojure/contrib/base64.clj75
-rw-r--r--modules/base64/src/main/clojure/clojure/contrib/test_base64.clj56
2 files changed, 117 insertions, 14 deletions
diff --git a/modules/base64/src/main/clojure/clojure/contrib/base64.clj b/modules/base64/src/main/clojure/clojure/contrib/base64.clj
index 2556487c..cef22c50 100644
--- a/modules/base64/src/main/clojure/clojure/contrib/base64.clj
+++ b/modules/base64/src/main/clojure/clojure/contrib/base64.clj
@@ -1,7 +1,9 @@
-;;; base64.clj: Experimental Base-64 encoding and (later) decoding
+;;; base64.clj: Experimental Base-64 encoding and decoding
-;; by Stuart Sierra, http://stuartsierra.com/
+;; by Stuart Sierra, http://stuartsierra.com/ - encode
;; August 19, 2009
+;; by Teemu Antti-Poika (anttipoi@gmail.com) - decode
+;; May 12, 2010
;; Copyright (c) Stuart Sierra, 2009. All rights reserved. The use
;; and distribution terms for this software are covered by the Eclipse
@@ -12,18 +14,25 @@
;; remove this notice, or any other, from this software.
-(ns ^{:doc "Base-64 encoding and (maybe later) decoding.
+(ns ^{:doc "Base-64 encoding and decoding.
This is mainly here as an example. It is much slower than the
Apache Commons Codec implementation or sun.misc.BASE64Encoder."
:author "Stuart Sierra"}
clojure.contrib.base64
- (:import (java.io InputStream Writer ByteArrayInputStream
- StringWriter)))
+ (:import (java.io InputStream Writer ByteArrayInputStream
+ ByteArrayOutputStream StringReader StringWriter)))
(def *base64-alphabet*
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=")
+(defn- char-to-index-map
+ []
+ (into {}
+ (map #(vec [(int %1) %2])
+ *base64-alphabet*
+ (iterate inc 0))))
+
(defn encode
"Encodes bytes of input, writing Base 64 text on output. alphabet
is a 65-character String containing the 64 characters to use in the
@@ -86,14 +95,52 @@
output *base64-alphabet* line-length)
(.toString output))))
+(defn- get-next-char
+ "Consume and return next character from reader. Ignore and eat end-of-lines characters. Return -1 on end."
+ [reader]
+ (let [c (.read reader)]
+ (if (or (= c 10) (= c 13))
+ (recur reader)
+ c)))
-;;; tests
+(defn decode
+ "Decodes base64-encoded content from str-reader. Writes resulting bytes to out."
+ [^StringReader str-reader ^ByteArrayOutputStream out]
+ (let [next-char (get-next-char str-reader)]
+ (when (not (= next-char -1))
+ (let [c-to-int (char-to-index-map)
+ content-char? (fn [i] (not (= i 64))) ; 64 is index for the pad character =
+ sb0 (c-to-int next-char)
+ sb1 (c-to-int (get-next-char str-reader))
+ sb2 (c-to-int (get-next-char str-reader))
+ sb3 (c-to-int (get-next-char str-reader))
+ _ (when (not (and sb0 sb1 sb2 sb3))
+ (throw (IllegalArgumentException. "Illegal Base64-encoded input: illegal characters or missing padding")))
+ _ (when (not (and (content-char? sb0) (content-char? sb1)))
+ (throw (IllegalArgumentException. "Illegal Base64-encoded input: padding char at illegl position")))
+ b0 (bit-or
+ (bit-shift-left sb0 2)
+ (bit-shift-right (bit-and 0x30 sb1) 4))]
+ (.write out b0)
+ (when (content-char? sb2)
+ (let [b1 (bit-or
+ (bit-shift-left (bit-and 0xF sb1) 4)
+ (bit-shift-right (bit-and 0x3C sb2) 2))]
+ (.write out b1)
+ (when (content-char? sb3)
+ (let [b2 (bit-or
+ (bit-shift-left (bit-and 0x3 sb2) 6)
+ sb3)]
+ (.write out b2)
+ (recur str-reader out)))))))))
+
-;; (deftest t-encode-str
-;; (is (= (encode-str "") ""))
-;; (is (= (encode-str "f") "Zg=="))
-;; (is (= (encode-str "fo") "Zm8="))
-;; (is (= (encode-str "foo") "Zm9v"))
-;; (is (= (encode-str "foob") "Zm9vYg=="))
-;; (is (= (encode-str "fooba") "Zm9vYmE="))
-;; (is (= (encode-str "foobar") "Zm9vYmFy")))
+(defn decode-str
+ "Decodes base64-encoded String using encoding. Encoding defaults to UTF-8."
+ ([s] (decode-str s "UTF-8"))
+ ([^String s ^String encoding]
+ (when s
+ (let [baos (ByteArrayOutputStream.)
+ str-reader (StringReader. s)]
+ (decode str-reader baos)
+ (String. (.toByteArray baos) encoding))))) \ No newline at end of file
diff --git a/modules/base64/src/main/clojure/clojure/contrib/test_base64.clj b/modules/base64/src/main/clojure/clojure/contrib/test_base64.clj
new file mode 100644
index 00000000..8ae63792
--- /dev/null
+++ b/modules/base64/src/main/clojure/clojure/contrib/test_base64.clj
@@ -0,0 +1,56 @@
+; Copyright (c) Teemu Antti-Poika, May 2010. All rights reserved.
+; The use and distribution terms for this software are covered by the
+; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
+; which can be found in the file epl-v10.html at the root of this
+; distribution.
+; By using this software in any fashion, you are agreeing to be bound by
+; the terms of this license.
+; You must not remove this notice, or any other, from this software.
+
+;; test namespace for clojure.contrib.base64
+
+
+(ns clojure.contrib.test-base64
+ (:use [clojure.test]
+ [clojure.contrib.base64]))
+
+
+(deftest t-encode-str
+ (is (= (encode-str "") ""))
+ (is (= (encode-str "f") "Zg=="))
+ (is (= (encode-str "fo") "Zm8="))
+ (is (= (encode-str "foo") "Zm9v"))
+ (is (= (encode-str "foob") "Zm9vYg=="))
+ (is (= (encode-str "fooba") "Zm9vYmE="))
+ (is (= (encode-str "foobar") "Zm9vYmFy")))
+
+(deftest t-encode-multiline-str
+ (is (= (encode-str "This fits on one line" "UTF-8" 72) "VGhpcyBmaXRzIG9uIG9uZSBsaW5l"))
+ (is (= (encode-str "This is written on multiple lines" "UTF-8" 10) "VGhpcyBpcyB3\ncml0dGVuIG9u\nIG11bHRpcGxl\nIGxpbmVz")))
+
+(deftest t-decode-str
+ (is (nil? (decode-str nil)))
+ (is (= (decode-str "") ""))
+ (is (= (decode-str "Zg==") "f"))
+ (is (= (decode-str "Zm8=") "fo"))
+ (is (= (decode-str "Zm9v") "foo"))
+ (is (= (decode-str "Zm9vYg==") "foob"))
+ (is (= (decode-str "Zm9vYmE=") "fooba"))
+ (is (= (decode-str "Zm9vYmFy") "foobar")))
+
+(deftest t-decode-multiline-str
+ (is (= (decode-str "VGhpcyBpcyB3\ncml0dGVuIG9u\nIG11bHRpcGxl\nIGxpbmVz") "This is written on multiple lines"))
+ (is (= (decode-str "VGhpcyBpcyB3\r\ncml0dGVuIG9u\r\nIG11bHRpcGxl\r\nIGxpbmVz") "This is written on multiple lines")))
+
+(deftest t-decode-str-fails-on-incorrect-charcters-in-input
+ (is (thrown? IllegalArgumentException (decode-str ";AB=")))
+ (is (thrown? IllegalArgumentException (decode-str "A;B=")))
+ (is (thrown? IllegalArgumentException (decode-str "AB;=")))
+ (is (thrown? IllegalArgumentException (decode-str "ABC;"))))
+
+(deftest t-decode-str-fails-when-padding-character-occurs-at-illegal-positions
+ (is (thrown? IllegalArgumentException (decode-str "=ABC")))
+ (is (thrown? IllegalArgumentException (decode-str "A=BC"))))
+
+(deftest t-decode-str-fails-when-input-length-is-not-divisble-with-four
+ (is (thrown? IllegalArgumentException (decode-str "ABCDE")))) \ No newline at end of file