aboutsummaryrefslogtreecommitdiff
path: root/src/clojure/contrib/http/agent.clj
blob: 6a3e082f022a4a6362b88ac092df54f210e3700e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
;;; http/agent.clj: agent-based asynchronous HTTP client

;; by Stuart Sierra, http://stuartsierra.com/
;; August 17, 2009

;; Copyright (c) Stuart Sierra, 2009. All rights reserved.  The use
;; and distribution terms for this software are covered by the Eclipse
;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
;; which can be found in the file epl-v10.html at the root of this
;; distribution.  By using this software in any fashion, you are
;; agreeing to be bound by the terms of this license.  You must not
;; remove this notice, or any other, from this software.


(ns #^{:doc "Agent-based asynchronous HTTP client.

  This is a HTTP client library based on Java's HttpURLConnection
  class and Clojure's Agent system.  It allows you to make multiple
  HTTP requests in parallel.

  Start an HTTP request with the 'http-agent' function, which
  immediately returns a Clojure Agent.  You will never deref this
  agent; that is handled by the accessor functions.  The agent will
  execute the HTTP request on a separate thread.

  If you pass a :handler function to http-agent, that function will be
  called as soon as the HTTP response body is ready.  The handler
  function is called with one argument, the HTTP agent itself.  The
  handler can read the response body by calling the 'stream' function
  on the agent.

  The value returned by the handler function becomes part of the state
  of the agent, and you can retrieve it with the 'result' function.
  If you call 'result' before the HTTP request has finished, it will
  block until the handler function returns.

  If you don't provide a handler function, the default handler will
  buffer the entire response body in memory, which you can retrieve
  with the 'bytes', 'string', or 'stream' functions.  Like 'result',
  these functions will block until the HTTP request is completed.

  If you want to check if an HTTP request is finished without
  blocking, use the 'done?' function.

  A single GET request could be as simple as:

    (string (http-agent \"http://www.stuartsierra.com/\"))

  A simple POST might look like:

    (http-agent \"http...\" :method \"POST\" :body \"foo=1\")

  And you could write the response directly to a file like this:

    (require '[clojure.contrib.duck-streams :as d])

    (http-agent \"http...\"
                :handler (fn [agnt] 
                           (with-open [w (d/writer \"/tmp/out\")] 
                             (d/copy (stream agnt) w))))
"
       :author "Stuart Sierra"
       }

  clojure.contrib.http.agent
  (:refer-clojure :exclude [bytes])
  (:require [clojure.contrib.http.connection :as c]
            [clojure.contrib.duck-streams :as duck])
  (:import (java.io InputStream ByteArrayOutputStream
                    ByteArrayInputStream)
           (java.net HttpURLConnection)))


;;; PRIVATE

(declare result stream)

(defn- setup-http-connection
  "Sets the instance method, redirect behavior, and request headers of
  the HttpURLConnection."
  [#^HttpURLConnection conn options]
  (.setRequestMethod conn (:method options))
  (.setInstanceFollowRedirects conn (:follow-redirects options))
  (doseq [[name value] (:headers options)]
    (.setRequestProperty conn name value)))

(defn- start-request
  "Agent action that starts sending the HTTP request."
  [state options]
  (let [conn (::connection state)]
    (setup-http-connection conn options)
    (c/start-http-connection conn (:body options))
    (assoc state ::state ::started)))

(defn- connection-success? [#^HttpURLConnection conn]
  "Returns true if the HttpURLConnection response code is in the 2xx
  range."
  (= 2 (unchecked-divide (.getResponseCode conn) 100)))

(defn- open-response
  "Agent action that opens the response body stream on the HTTP
  request; this will block until the response stream is available." ;
  [state options]
  (let [#^HttpURLConnection conn (::connection state)]
    (assoc state
      ::response-stream (if (connection-success? conn)
                          (.getInputStream conn)
                          (.getErrorStream conn))
      ::state ::receiving)))

(defn- handle-response
  "Agent action that calls the provided handler function, with no
  arguments, and sets the ::result key of the agent to the handler's
  return value."
  [state handler options]
  (let [conn (::connection state)]
    (assoc state
      ::result (handler)
      ::state ::finished)))

(defn- disconnect
  "Agent action that closes the response body stream and disconnects
  the HttpURLConnection."
  [state options]
  (when (::response-stream state)
    (.close #^InputStream (::response-stream state)))
  (.disconnect #^HttpURLConnection (::connection state))
  (assoc state
    ::response-stream nil
    ::state ::disconnected))

(defn- status-in-range?
  "Returns true if the response status of the HTTP agent begins with
  digit, an Integer."
  [digit http-agnt]
  (= digit (unchecked-divide (.getResponseCode
                              #^HttpURLConnection (::connection @http-agnt))
                             100)))

(defn- #^ByteArrayOutputStream get-byte-buffer [http-agnt]
  (let [buffer (result http-agnt)]
    (if (instance? ByteArrayOutputStream buffer)
      buffer
      (throw (Exception. "Handler result was not a ByteArrayOutputStream")))))


(defn buffer-bytes
  "The default HTTP agent result handler; it collects the response
  body in a java.io.ByteArrayOutputStream, which can later be
  retrieved with the 'stream', 'string', and 'bytes' functions."
  [http-agnt]
  (let [output (ByteArrayOutputStream.)]
    (duck/copy (or (stream http-agnt) "") output)
    output))


;;; CONSTRUCTOR

(def *http-agent-defaults*
     {:method "GET"
      :headers {}
      :body nil
      :connect-timeout 0
      :read-timeout 0
      :follow-redirects true
      :handler buffer-bytes})

(defn http-agent
  "Creates (and immediately returns) an Agent representing an HTTP
  request running in a new thread.

  options are key/value pairs:

  :method string

  The HTTP method name.  Default is \"GET\".

  :headers h

  HTTP headers, as a Map or a sequence of pairs like 
  ([key1,value1], [key2,value2])  Default is nil.

  :body b
  
  HTTP request entity body, one of nil, String, byte[], InputStream,
  Reader, or File.  Default is nil.

  :connect-timeout int

  Timeout value, in milliseconds, when opening a connection to the
  URL.  Default is zero, meaning no timeout.

  :read-timeout int

  Timeout value, in milliseconds, when reading data from the
  connection.  Default is zero, meaning no timeout.

  :follow-redirects boolean

  If true, HTTP 3xx redirects will be followed automatically.  Default
  is true.

  :handler f

  Function to be called when the HTTP response body is ready.  If you
  do not provide a handler function, the default is to buffer the
  entire response body in memory.

  The handler function will be called with the HTTP agent as its
  argument, and can use the 'stream' function to read the response
  body.  The return value of this function will be stored in the state
  of the agent and can be retrieved with the 'result' function.  Any
  exceptions thrown by this function will be added to the agent's
  error queue (see agent-errors).  The default function collects the
  response stream in a memory buffer.
  "
  ([uri & options]
     (let [opts (merge *http-agent-defaults* (apply array-map options))]
       (let [a (agent {::connection (c/http-connection uri)
                       ::state ::created
                       ::uri uri
                       ::options opts})]
         (send-off a start-request opts)
         (send-off a open-response opts)
         (send-off a handle-response (partial (:handler opts) a) opts)
         (send-off a disconnect opts)))))


;;; RESPONSE BODY ACCESSORS

(defn result
  "Returns the value returned by the :handler function of the HTTP
  agent; blocks until the HTTP request is completed.  The default
  handler function returns a ByteArrayOutputStream."
  [http-agnt]
  (await http-agnt)
  (::result @http-agnt))

(defn stream
  "Returns an InputStream of the HTTP response body.  When called by
  the handler function passed to http-agent, this is the raw
  HttpURLConnection stream.

  If the default handler function was used, this function returns a
  ByteArrayInputStream on the buffered response body."
  [http-agnt]
  (let [a @http-agnt]
    (if (= (::state a) ::receiving)
      (::response-stream a)
      (ByteArrayInputStream.
       (.toByteArray (get-byte-buffer http-agnt))))))

(defn bytes
  "Returns a Java byte array of the content returned by the server;
  nil if the content is not yet available."
  [http-agnt]
  (.toByteArray (get-byte-buffer http-agnt)))

(defn string
  "Returns the HTTP response body as a string, using the given
  encoding.

  If no encoding is given, uses the encoding specified in the server
  headers, or clojure.contrib.duck-streams/*default-encoding* if it is
  not specified."
  ([http-agnt]
     (await http-agnt) ;; have to wait for Content-Encoding
     (string http-agnt (or (.getContentEncoding
                            #^HttpURLConnection (::connection @http-agnt))
                           duck/*default-encoding*)))
  ([http-agnt #^String encoding]
     (.toString (get-byte-buffer http-agnt) encoding)))


;;; REQUEST ACCESSORS

(defn request-uri
  "Returns the URI/URL requested by this HTTP agent, as a String."
  [http-agnt]
  (::uri @http-agnt))

(defn request-headers
  "Returns the request headers specified for this HTTP agent."
  [http-agnt]
  (:headers (::options @http-agnt)))

(defn method
  "Returns the HTTP method name used by this HTTP agent, as a String."
  [http-agnt]
  (:method (::options @http-agnt)))

(defn request-body
  "Returns the HTTP request body given to this HTTP agent.  

  Note: if the request body was an InputStream or a Reader, it will no
  longer be usable."
  [http-agnt]
  (:body (::options @http-agnt)))


;;; RESPONSE ACCESSORS

(defn done?
  "Returns true if the HTTP request/response has completed."
  [http-agnt]
  (if (#{::finished ::disconnected} (::state @http-agnt))
    true false))

(defn status
  "Returns the HTTP response status code (e.g. 200, 404) for this
  request, as an Integer, or nil if the status has not yet been
  received."
  [http-agnt]
  (when (done? http-agnt)
    (.getResponseCode #^HttpURLConnection (::connection @http-agnt))))

(defn message
  "Returns the HTTP response message (e.g. 'Not Found'), for this
  request, or nil if the response has not yet been received."
  [http-agnt]
  (when (done? http-agnt)
    (.getResponseMessage #^HttpURLConnection (::connection @http-agnt))))

(defn headers
  "Returns a map of HTTP response headers.  Header names are converted
  to keywords in all lower-case Header values are strings.  If a
  header appears more than once, only the last value is returned."
  [http-agnt]
  (reduce (fn [m [#^String k v]]
            (assoc m (when k (keyword (.toLowerCase k))) (last v)))
          {} (.getHeaderFields
              #^HttpURLConnection (::connection @http-agnt))))

(defn headers-seq
  "Returns the HTTP response headers in order as a sequence of
  [String,String] pairs.  The first 'header' name may be null for the
  HTTP status line."
  [http-agnt]
  (let [#^HttpURLConnection conn (::connection @http-agnt)
        f (fn thisfn [#^Integer i]
            ;; Get value first because first key may be nil.
            (when-let [value (.getHeaderField conn i)]
              (cons [(.getHeaderFieldKey conn i) value]
                    (thisfn (inc i)))))]
    (lazy-seq (f 0))))


;;; RESPONSE STATUS CODE ACCESSORS

(defn success?
  "Returns true if the HTTP response code was in the 200-299 range."
  [http-agnt]
  (status-in-range? 2 http-agnt))

(defn redirect?
  "Returns true if the HTTP response code was in the 300-399 range.

  Note: if the :follow-redirects option was true (the default),
  redirects will be followed automatically and a the agent will never
  return a 3xx response code."
  [http-agnt]
  (status-in-range? 3 http-agnt))

(defn client-error?
  "Returns true if the HTTP response code was in the 400-499 range."
  [http-agnt]
  (status-in-range? 4 http-agnt))

(defn server-error?
  "Returns true if the HTTP response code was in the 500-599 range."
  [http-agnt]
  (status-in-range? 5 http-agnt))

(defn error?
  "Returns true if the HTTP response code was in the 400-499 range OR
  the 500-599 range."
  [http-agnt]
  (or (client-error? http-agnt)
      (server-error? http-agnt)))