diff options
author | Stuart Sierra <mail@stuartsierra.com> | 2010-09-17 13:15:27 -0400 |
---|---|---|
committer | Stuart Sierra <mail@stuartsierra.com> | 2010-09-17 13:15:27 -0400 |
commit | 5263e4bd9ca8634b50755c5150ebef26e0d191f1 (patch) | |
tree | 158685b734779a44911d5d3c51e70612a4a3c495 | |
parent | 5a928e263ab88cb8d224de8585932f936aa30c8f (diff) |
Base64 decoding from Teemu Antti-Poika; refs #84
-rw-r--r-- | modules/base64/src/main/clojure/clojure/contrib/base64.clj | 75 | ||||
-rw-r--r-- | modules/base64/src/main/clojure/clojure/contrib/test_base64.clj | 56 |
2 files changed, 117 insertions, 14 deletions
diff --git a/modules/base64/src/main/clojure/clojure/contrib/base64.clj b/modules/base64/src/main/clojure/clojure/contrib/base64.clj index 2556487c..cef22c50 100644 --- a/modules/base64/src/main/clojure/clojure/contrib/base64.clj +++ b/modules/base64/src/main/clojure/clojure/contrib/base64.clj @@ -1,7 +1,9 @@ -;;; base64.clj: Experimental Base-64 encoding and (later) decoding +;;; base64.clj: Experimental Base-64 encoding and decoding -;; by Stuart Sierra, http://stuartsierra.com/ +;; by Stuart Sierra, http://stuartsierra.com/ - encode ;; August 19, 2009 +;; by Teemu Antti-Poika (anttipoi@gmail.com) - decode +;; May 12, 2010 ;; Copyright (c) Stuart Sierra, 2009. All rights reserved. The use ;; and distribution terms for this software are covered by the Eclipse @@ -12,18 +14,25 @@ ;; remove this notice, or any other, from this software. -(ns ^{:doc "Base-64 encoding and (maybe later) decoding. +(ns ^{:doc "Base-64 encoding and decoding. This is mainly here as an example. It is much slower than the Apache Commons Codec implementation or sun.misc.BASE64Encoder." :author "Stuart Sierra"} clojure.contrib.base64 - (:import (java.io InputStream Writer ByteArrayInputStream - StringWriter))) + (:import (java.io InputStream Writer ByteArrayInputStream + ByteArrayOutputStream StringReader StringWriter))) (def *base64-alphabet* "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=") +(defn- char-to-index-map + [] + (into {} + (map #(vec [(int %1) %2]) + *base64-alphabet* + (iterate inc 0)))) + (defn encode "Encodes bytes of input, writing Base 64 text on output. alphabet is a 65-character String containing the 64 characters to use in the @@ -86,14 +95,52 @@ output *base64-alphabet* line-length) (.toString output)))) +(defn- get-next-char + "Consume and return next character from reader. Ignore and eat end-of-lines characters. Return -1 on end." + [reader] + (let [c (.read reader)] + (if (or (= c 10) (= c 13)) + (recur reader) + c))) -;;; tests +(defn decode + "Decodes base64-encoded content from str-reader. Writes resulting bytes to out." + [^StringReader str-reader ^ByteArrayOutputStream out] + (let [next-char (get-next-char str-reader)] + (when (not (= next-char -1)) + (let [c-to-int (char-to-index-map) + content-char? (fn [i] (not (= i 64))) ; 64 is index for the pad character = + sb0 (c-to-int next-char) + sb1 (c-to-int (get-next-char str-reader)) + sb2 (c-to-int (get-next-char str-reader)) + sb3 (c-to-int (get-next-char str-reader)) + _ (when (not (and sb0 sb1 sb2 sb3)) + (throw (IllegalArgumentException. "Illegal Base64-encoded input: illegal characters or missing padding"))) + _ (when (not (and (content-char? sb0) (content-char? sb1))) + (throw (IllegalArgumentException. "Illegal Base64-encoded input: padding char at illegl position"))) + b0 (bit-or + (bit-shift-left sb0 2) + (bit-shift-right (bit-and 0x30 sb1) 4))] + (.write out b0) + (when (content-char? sb2) + (let [b1 (bit-or + (bit-shift-left (bit-and 0xF sb1) 4) + (bit-shift-right (bit-and 0x3C sb2) 2))] + (.write out b1) + (when (content-char? sb3) + (let [b2 (bit-or + (bit-shift-left (bit-and 0x3 sb2) 6) + sb3)] + (.write out b2) + (recur str-reader out))))))))) + -;; (deftest t-encode-str -;; (is (= (encode-str "") "")) -;; (is (= (encode-str "f") "Zg==")) -;; (is (= (encode-str "fo") "Zm8=")) -;; (is (= (encode-str "foo") "Zm9v")) -;; (is (= (encode-str "foob") "Zm9vYg==")) -;; (is (= (encode-str "fooba") "Zm9vYmE=")) -;; (is (= (encode-str "foobar") "Zm9vYmFy"))) +(defn decode-str + "Decodes base64-encoded String using encoding. Encoding defaults to UTF-8." + ([s] (decode-str s "UTF-8")) + ([^String s ^String encoding] + (when s + (let [baos (ByteArrayOutputStream.) + str-reader (StringReader. s)] + (decode str-reader baos) + (String. (.toByteArray baos) encoding)))))
\ No newline at end of file diff --git a/modules/base64/src/main/clojure/clojure/contrib/test_base64.clj b/modules/base64/src/main/clojure/clojure/contrib/test_base64.clj new file mode 100644 index 00000000..8ae63792 --- /dev/null +++ b/modules/base64/src/main/clojure/clojure/contrib/test_base64.clj @@ -0,0 +1,56 @@ +; Copyright (c) Teemu Antti-Poika, May 2010. All rights reserved. +; The use and distribution terms for this software are covered by the +; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) +; which can be found in the file epl-v10.html at the root of this +; distribution. +; By using this software in any fashion, you are agreeing to be bound by +; the terms of this license. +; You must not remove this notice, or any other, from this software. + +;; test namespace for clojure.contrib.base64 + + +(ns clojure.contrib.test-base64 + (:use [clojure.test] + [clojure.contrib.base64])) + + +(deftest t-encode-str + (is (= (encode-str "") "")) + (is (= (encode-str "f") "Zg==")) + (is (= (encode-str "fo") "Zm8=")) + (is (= (encode-str "foo") "Zm9v")) + (is (= (encode-str "foob") "Zm9vYg==")) + (is (= (encode-str "fooba") "Zm9vYmE=")) + (is (= (encode-str "foobar") "Zm9vYmFy"))) + +(deftest t-encode-multiline-str + (is (= (encode-str "This fits on one line" "UTF-8" 72) "VGhpcyBmaXRzIG9uIG9uZSBsaW5l")) + (is (= (encode-str "This is written on multiple lines" "UTF-8" 10) "VGhpcyBpcyB3\ncml0dGVuIG9u\nIG11bHRpcGxl\nIGxpbmVz"))) + +(deftest t-decode-str + (is (nil? (decode-str nil))) + (is (= (decode-str "") "")) + (is (= (decode-str "Zg==") "f")) + (is (= (decode-str "Zm8=") "fo")) + (is (= (decode-str "Zm9v") "foo")) + (is (= (decode-str "Zm9vYg==") "foob")) + (is (= (decode-str "Zm9vYmE=") "fooba")) + (is (= (decode-str "Zm9vYmFy") "foobar"))) + +(deftest t-decode-multiline-str + (is (= (decode-str "VGhpcyBpcyB3\ncml0dGVuIG9u\nIG11bHRpcGxl\nIGxpbmVz") "This is written on multiple lines")) + (is (= (decode-str "VGhpcyBpcyB3\r\ncml0dGVuIG9u\r\nIG11bHRpcGxl\r\nIGxpbmVz") "This is written on multiple lines"))) + +(deftest t-decode-str-fails-on-incorrect-charcters-in-input + (is (thrown? IllegalArgumentException (decode-str ";AB="))) + (is (thrown? IllegalArgumentException (decode-str "A;B="))) + (is (thrown? IllegalArgumentException (decode-str "AB;="))) + (is (thrown? IllegalArgumentException (decode-str "ABC;")))) + +(deftest t-decode-str-fails-when-padding-character-occurs-at-illegal-positions + (is (thrown? IllegalArgumentException (decode-str "=ABC"))) + (is (thrown? IllegalArgumentException (decode-str "A=BC")))) + +(deftest t-decode-str-fails-when-input-length-is-not-divisble-with-four + (is (thrown? IllegalArgumentException (decode-str "ABCDE"))))
\ No newline at end of file |