diff options
-rw-r--r-- | src/clojure/contrib/str_utils2.clj | 225 | ||||
-rw-r--r-- | src/clojure/contrib/test_contrib/str_utils2.clj | 39 |
2 files changed, 264 insertions, 0 deletions
diff --git a/src/clojure/contrib/str_utils2.clj b/src/clojure/contrib/str_utils2.clj new file mode 100644 index 00000000..2bbc2649 --- /dev/null +++ b/src/clojure/contrib/str_utils2.clj @@ -0,0 +1,225 @@ +;;; str_utils2.clj -- experimental new string utilities for Clojure + +;; by Stuart Sierra, http://stuartsierra.com/ +;; June 4, 2009 + +;; Copyright (c) Stuart Sierra, 2009. All rights reserved. The use +;; and distribution terms for this software are covered by the Eclipse +;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) +;; which can be found in the file epl-v10.html at the root of this +;; distribution. By using this software in any fashion, you are +;; agreeing to be bound by the terms of this license. You must not +;; remove this notice, or any other, from this software. + + +(ns #^{:author "Stuart Sierra" + :doc "This is a library of string manipulation functions. It + is intented as a replacement for clojure.contrib.str-utils. + + You cannot (use 'clojure.contrib.str-utils2) because it defines + functions with the same names as functions in clojure.core. + Instead, do (require '[clojure.contrib.str-utils2 :as s]) + or something similar. + + Goals: + 1. Be functional + 2. String argument first, to work with -> + 3. Performance linear in string length + + Some ideas are borrowed from + http://github.com/francoisdevlin/devlinsf-clojure-utils/"} + clojure.contrib.str-utils2 + (:refer-clojure :exclude (take replace drop butlast partition contains? get)) + (:require [clojure.contrib.java-utils :as j]) + (:import (java.util.regex Pattern))) + +(defmacro dochars + "bindings => [name string] + Repeatedly executes body, with name bound to each character in + string." + [bindings & body] + (assert (vector bindings)) + (assert (= 2 (count bindings))) + `(let [#^String s# ~(second bindings)] + (dotimes [i# (.length ~(second bindings))] + (let [~(first bindings) (.charAt s# i#)] + ~@body)))) + +(defn escape + "Escapes characters in string according to a cmap, a function or map + from characters to their replacements." + [#^String s cmap] + (let [buffer (StringBuilder. (.length s))] + (dochars [c s] + (if-let [r (cmap s)] + (.append buffer r) + (.append buffer c))) + (.toString buffer))) + +(defn escape-pattern [#^String s] + (escape s (fn [c] (when (#{\\ \[ \] \. \^ \$ \? \* \+ \( \)} c) + (str \\ c))))) + +(defn as-pattern [re] + (if (instance? Pattern re) + re + (Pattern/compile (escape-pattern (j/as-str re))))) + +(defn blank? + "True if s is nil, empty, or contains only whitespace." + [#^String s] + (every? (fn [#^Character c] (Character/isWhitespace c)) s)) + +(defn take + "Take first n characters from s, up to the length of s." + [#^String s n] + (if (< (count s) n) + s + (.substring s 0 n))) + +(defn drop [#^String s n] + "Drops first n characters from s. Returns an empty string if n is + greater than the length of s." + (if (< (count s) n) + "" + (.substring s n))) + +(defn butlast + "Returns s without the last n characters. Returns an empty string + if n is greater than the length of s." + [#^String s n] + (if (< (count s) n) + "" + (.substring s 0 (- (count s) n)))) + +(defn tail + "Returns the last n characters of s." + [#^String s n] + (if (< (count s) n) + s + (.substring s (- (count s) n)))) + +(defmulti + #^{:doc "Replaces all instances of a in s with b. a and b may be + Characters, Strings, Pattern/String, or Pattern/Fn."} + replace + (fn [#^String s a b] + [(class a) (class b)])) + +(defmethod replace [String String] [#^String s #^String a #^String b] + (.replace s a b)) + +(defmethod replace [Character Character] [#^String s #^Character a #^Character b] + (.replace s a b)) + +(defmethod replace [Pattern String] [#^String s re replacement] + (.replaceAll (re-matcher re s) replacement)) + +(defmethod replace [Pattern clojure.lang.IFn] [#^String s re replacement] + (let [m (re-matcher re s)] + (let [buffer (StringBuffer. (.length s))] + (loop [] + (if (.find m) + (do (.appendReplacement m buffer (replacement (re-groups m))) + (recur)) + (do (.appendTail m buffer) + (.toString buffer))))))) + +(defmulti + #^{:doc "Replaces the first instance of a in s with b. a must be + Pattern, b may be String or Fn."} + replace-first + (fn [s a b] + [(class a) (class b)])) + +(defmethod replace-first [Pattern String] [#^String s #^Pattern re replacement] + (.replaceFirst (re-matcher re s) replacement)) + +(defmethod replace-first [Pattern IFn] [#^String s #^Pattern re f] + (let [m (re-matcher re s)] + (let [buffer (StringBuffer.)] + (if (.find m) + (let [rep (f (re-groups m))] + (.appendReplacement m buffer rep) + (.appendTail m buffer) + (str buffer)))))) + +(defn partition + "Splits the string into a lazy sequence of substrings, alternating + between substrings that match the patthern and the substrings + between the matches. The sequence always starts with the substring + before the first match, or an empty string if the beginning of the + string matches. + + For example: (partition \"abc123def\" #\"[a-z]+\") + returns: (\"\" \"abc\" \"123\" \"def\")" + [#^String s #^Pattern re] + (let [m (re-matcher re s)] + ((fn step [prevend] + (lazy-seq + (if (.find m) + (cons (.subSequence s prevend (.start m)) + (cons (re-groups m) + (step (+ (.start m) (count (.group m)))))) + (when (< prevend (.length s)) + (list (.subSequence s prevend (.length s))))))) + 0))) + +(defn join + "Returns a string of all elements in coll, separated by + separator. Like Perl's join." + [#^String separator coll] + (apply str (interpose separator coll))) + +(defn chop + "Removes the last character of string." + [#^String s] + (subs s 0 (dec (count s)))) + +(defn chomp + "Removes all trailing newline \\n or return \\r characters from + string. Note: String.trim() is similar and faster." + [#^String s] + (replace s #"[\r\n]+$" "")) + +(defn title-case [#^String s] + (throw (IllegalStateException. "title-case not implemented yet."))) + +(defn swap-case [#^String s] + (throw (IllegalStateException. "swap-case not implemented yet."))) + +(defn ltrim [#^String s] + (replace s #"^\s+" "")) + +(defn rtrim [#^String s] + (replace s #"\s+$" "")) + +(defn split-lines [#^String s] + (seq (.split #"\r?\n" s))) + + +;;; WRAPPERS + +;; The following functions are simple wrappers around java.lang.String +;; functions. They are included here for completeness, and for use +;; when mapping over a collection of strings. + +(defn upper-case [#^String s] + (.toUpperCase s)) + +(defn lower-case [#^String s] + (.toLowerCase s)) + +(defn split + ([#^String s #^Pattern re] (seq (.split re s))) + ([#^String s #^Pattern re limit] (seq (.split re s limit)))) + +(defn trim [#^String s] + (.trim s)) + +(defn contains? [#^String s substring] + (.contains s substring)) + +(defn get [#^String s i] + (.charAt s i)) + diff --git a/src/clojure/contrib/test_contrib/str_utils2.clj b/src/clojure/contrib/test_contrib/str_utils2.clj new file mode 100644 index 00000000..981b5d31 --- /dev/null +++ b/src/clojure/contrib/test_contrib/str_utils2.clj @@ -0,0 +1,39 @@ +(ns clojure.contrib.test-contrib.str-utils2 + (:require [clojure.contrib.str-utils2 :as s]) + (:use clojure.contrib.test-is)) + +(deftest t-blank + (is (s/blank? nil)) + (is (s/blank? "")) + (is (s/blank? " ")) + (is (s/blank? " \t \n \r ")) + (is (not (s/blank? " foo ")))) + +(deftest t-take + (is (= "foo" (s/take "foobar" 3))) + (is (= "foobar" (s/take "foobar" 7))) + (is (= "" (s/take "foo" 0)))) + +(deftest t-drop + (is (= "bar" (s/drop "foobar" 3))) + (is (= "" (s/drop "foobar" 9))) + (is (= "foobar" (s/drop "foobar" 0)))) + +(deftest t-butlast + (is (= "foob" (s/butlast "foobar" 2))) + (is (= "" (s/butlast "foobar" 9))) + (is (= "foobar" (s/butlast "foobar" 0)))) + +(deftest t-tail + (is (= "ar" (s/tail "foobar" 2))) + (is (= "foobar" (s/tail "foobar" 9))) + (is (= "" (s/tail "foobar" 0)))) + +(deftest t-replace + (is (= "faabar" (s/replace "foobar" \o \a))) + (is (= "barbarbar" (s/replace "foobarfoo" "foo" "bar"))) + (is (= "FOObarFOO" (s/replace "foobarfoo" #"foo" s/upper-case)))) + +(deftest t-replace-first + (is (= "barbarfoo" (s/replace-first "foobarfoo" #"foo" "bar"))) + (is (= "FOObarfoo" (s/replace-first "foobarfoo" #"foo" s/upper-case)))) |