e.g. Calendar Search Help
You must enter a value before pressing Search
batik

Class: org.apache.batik.util.ParsedURLData   ©

 OK to copy?
001 /*
002 
003  ============================================================================
004                    The Apache Software License, Version 1.1
005  ============================================================================
006 
007  Copyright (C) 1999-2003 The Apache Software Foundation. All rights reserved.
008 
009  Redistribution and use in source and binary forms, with or without modifica-
010  tion, are permitted provided that the following conditions are met:
011 
012  1. Redistributions of  source code must  retain the above copyright  notice,
013     this list of conditions and the following disclaimer.
014 
015  2. Redistributions in binary form must reproduce the above copyright notice,
016     this list of conditions and the following disclaimer in the documentation
017     and/or other materials provided with the distribution.
018 
019  3. The end-user documentation included with the redistribution, if any, must
020     include  the following  acknowledgment:  "This product includes  software
021     developed  by the  Apache Software Foundation  (http://www.apache.org/)."
022     Alternately, this  acknowledgment may  appear in the software itself,  if
023     and wherever such third-party acknowledgments normally appear.
024 
025  4. The names "Batik" and  "Apache Software Foundation" must  not  be
026     used to  endorse or promote  products derived from  this software without
027     prior written permission. For written permission, please contact
028     apache@apache.org.
029 
030  5. Products  derived from this software may not  be called "Apache", nor may
031     "Apache" appear  in their name,  without prior written permission  of the
032     Apache Software Foundation.
033 
034  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
035  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
036  FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
037  APACHE SOFTWARE  FOUNDATION  OR ITS CONTRIBUTORS  BE LIABLE FOR  ANY DIRECT,
038  INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLU-
039  DING, BUT NOT LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS
040  OF USE, DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION)  HOWEVER CAUSED AND ON
041  ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
042  (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
043  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
044 
045  This software  consists of voluntary contributions made  by many individuals
046  on  behalf of the Apache Software  Foundation. For more  information on the
047  Apache Software Foundation, please see <http://www.apache.org/>.
048 
049 */
050 
051 package org.apache.batik.util;
052 
053 import java.io.BufferedInputStream;
054 import java.io.IOException;
055 import java.io.InputStream;
056 import java.net.HttpURLConnection;
057 import java.net.MalformedURLException;
058 import java.net.URL;
059 import java.net.URLConnection;
060 import java.util.Iterator;
061 import java.util.LinkedList;
062 import java.util.List;
063 import java.util.zip.GZIPInputStream;
064 import java.util.zip.InflaterInputStream;
065 import java.util.zip.ZipException;
066 
067 /**
068  * Holds the data for more URL's
069  *
070  * @author <a href="mailto:deweese@apache.org">Thomas DeWeese</a>
071  * @version $Id: ParsedURLData.java,v 1.14 2003/12/18 01:36:39 deweese Exp $ 
072  */
073 public class ParsedURLData {
074     
075     String HTTP_USER_AGENT_HEADER      = "User-Agent";
076 
077     String HTTP_ACCEPT_HEADER          = "Accept";
078     String HTTP_ACCEPT_LANGUAGE_HEADER = "Accept-Language";
079     String HTTP_ACCEPT_ENCODING_HEADER = "Accept-Encoding";
080 
081     protected static List acceptedEncodings = new LinkedList();
082     static {
083         acceptedEncodings.add("gzip");
084     }
085 
086     /**
087      * GZIP header magic number bytes, like found in a gzipped
088      * files, which are encoded in Intel format (ie. little indian).
089      */
090     public final static byte GZIP_MAGIC[] = {(byte)0x1f, (byte)0x8b};
091 
092     /**
093      * This is a utility function others can call that checks if
094      * is is a GZIP stream if so it returns a GZIPInputStream that
095      * will decode the contents, otherwise it returns (or a
096      * buffered version of is) untouched.
097      * @param is Stream that may potentially be a GZIP stream.
098      */
099     public static InputStream checkGZIP(InputStream is) 
100         throws IOException {
101 
102             if (!is.markSupported())
103                 is = new BufferedInputStream(is);
104             byte data[] = new byte[2];
105             try {
106                 is.mark(2);
107                 is.read(data);
108                 is.reset();
109             } catch (Exception ex) {
110                 is.reset();
111                 return is;
112             }
113         if ((data[0] == GZIP_MAGIC[0]) &&
114             (data[1] == GZIP_MAGIC[1]))
115             return new GZIPInputStream(is);
116 
117         if (((data[0]&0x0F)  == 8) &&
118             ((data[0]>>>4)   <= 7)) {
119             // Check for a zlib (deflate) stream 
120             int chk = ((((int)data[0])&0xFF)*256+
121                        (((int)data[1])&0xFF));
122             if ((chk %31)  == 0) {
123                 try {
124                     // I'm not really as certain of this check
125                     // as I would like so I want to force it
126                     // to decode part of the stream.
127                     is.mark(100);
128                     InputStream ret = new InflaterInputStream(is);
129                     if (!ret.markSupported())
130                         ret = new BufferedInputStream(ret);
131                     ret.mark(2);
132                     ret.read(data);
133                     is.reset();
134                     ret = new InflaterInputStream(is);
135                     return ret;
136                 } catch (ZipException ze) {
137                     is.reset();
138                     return is;
139                 }
140             }
141         }
142         
143         return is;
144     }
145 
146     /**
147      * Since the Data instance is 'hidden' in the ParsedURL
148      * instance we make all our methods public.  This makes it
149      * easy for the various Protocol Handlers to update an
150      * instance as parsing proceeds.
151      */
152     public String protocol        = null;
153     public String host            = null;
154     public int    port            = -1;
155     public String path            = null;
156     public String ref             = null;
157     public String contentType     = null;
158     public String contentEncoding = null;
159 
160     public InputStream stream     = null;
161     public boolean     hasBeenOpened  = false;
162 
163     /**
164      * Void constructor
165      */
166     public ParsedURLData() {
167     }
168 
169     /**
170      * Build from an existing URL.
171      */
172     public ParsedURLData(URL url) {
173         protocol = url.getProtocol();
174         if ((protocol != null) && (protocol.length() == 0)) 
175             protocol = null;
176 
177         host = url.getHost();
178         if ((host != null) && (host.length() == 0)) 
179             host = null;
180 
181         port     = url.getPort();
182 
183         path     = url.getFile();
184         if ((path != null) && (path.length() == 0)) 
185             path = null;
186 
Rate187         ref      = url.getRef();
188         if ((ref != null) && (ref.length() == 0))  
189             ref = null;
190     }
191 
192     /**
193      * Attempts to build a normal java.net.URL instance from this
194      * URL.
195      */
196     protected URL buildURL() throws MalformedURLException {
197 
198         // System.out.println("File: " + file);
199         // if (ref != null)
200         //     file += "#" + ref;
201         // System.err.println("Building: " + protocol + " - " + 
202         //                     host + " - " + path);
203 
204         if ((protocol != null) && (host != null)) {
205             String file = "";
206             if (path != null) 
207                 file = path;
208             if (port == -1)
209                 return new URL(protocol, host, file);
210 
211             return new URL(protocol, host, port, file);
212         }
213 
214         // System.err.println("toString: " + toString());
215         return new URL(toString());
216     }
217 
218     /**
219      * Implement Object.hashCode.
220      */
221     public int hashCode() {
222         int hc = port;
223         if (protocol != null) 
224             hc ^= protocol.hashCode();
225         if (host != null)
226             hc ^= host.hashCode();
227 
228         // For some URLS path and ref can get fairly long
229         // and the most unique part is towards the end
230         // so we grab that part for HC purposes
231         if (path != null) {
232             int len = path.length();
233             if (len > 20)
234                 hc ^= path.substring(len-20).hashCode();
235             else
236                 hc ^= path.hashCode();
237         }
238         if (ref != null) {
239             int len = ref.length();
240             if (len > 20)
241                 hc ^= ref.substring(len-20).hashCode();
242             else
243                 hc ^= ref.hashCode();
244         }
245 
246         return hc;
247     }
248 
249     /**
250      * Implement Object.equals for ParsedURLData.
251      */
252     public boolean equals(Object obj) {
253         if (obj == null) return false;
254         if (! (obj instanceof ParsedURLData)) 
255             return false;
256 
257         ParsedURLData ud = (ParsedURLData)obj;
258         if (ud.port != port)
259             return false;
260             
261         if (ud.protocol==null) {
262             if (protocol != null)
263                 return false;
264         } else if (protocol == null)
265             return false;
266         else if (!ud.protocol.equals(protocol))
267             return false;
268 
269         if (ud.host==null) {
270             if (host   !=null)
271                 return false;
272         } else if (host == null)
273             return false;
274         else if (!ud.host.equals(host))
275             return false;
276 
277         if (ud.ref==null) {
278             if (ref   !=null)
279                 return false;
280         } else if (ref == null)
281             return false;
282         else if (!ud.ref.equals(ref))
283             return false;
284 
285         if (ud.path==null) {
286             if (path   !=null)
287                 return false;
288         } else if (path == null)
289             return false;
290         else if (!ud.path.equals(path))
291             return false;
292 
293         return true;
294     }
295 
296     /**
297      * Returns the content type if available.  This is only available
298      * for some protocols.
299      */
300     public String getContentType(String userAgent) {
301         if (contentType != null)
302             return contentType;
303 
304         if (!hasBeenOpened) {
305             try {
306                 openStreamInternal(userAgent, null,  null);
307             } catch (IOException ioe) { /* nothing */ }
308         }
309 
310         return contentType;
311     }
312 
313     /**
314      * Returns the content encoding if available.  This is only available
315      * for some protocols.
316      */
317     public String getContentEncoding(String userAgent) {
318         if (contentEncoding != null)
319             return contentEncoding;
320 
321         if (!hasBeenOpened) {
322             try {
323                 openStreamInternal(userAgent, null,  null);
324             } catch (IOException ioe) { /* nothing */ }
325         }
326 
327         return contentEncoding;
328     }
329 
330     /**
331      * Returns true if the URL looks well formed and complete.
332      * This does not garuntee that the stream can be opened but
333      * is a good indication that things aren't totally messed up.
334      */
335     public boolean complete() {
336         try {
337             buildURL();
338         } catch (MalformedURLException mue) {
339             return false;
340         }
341         return true;
342     }
343 
344     /**
345      * Open the stream and check for common compression types.  If
346      * the stream is found to be compressed with a standard
347      * compression type it is automatically decompressed.
348      * @param userAgent The user agent opening the stream (may be null).
349      * @param mimeTypes The expected mime types of the content 
350      *        in the returned InputStream (mapped to Http accept
351      *        header among other possability).  The elements of
352      *        the iterator must be strings (may be null)
353      */
354     public InputStream openStream(String userAgent, Iterator mimeTypes) 
355         throws IOException {
356         InputStream raw = openStreamInternal(userAgent, mimeTypes, 
357                                              acceptedEncodings.iterator());
358         if (raw == null)
359             return null;
360         stream = null;
361                 
362         return checkGZIP(raw);
363     }
364 
365     /**
366      * Open the stream and returns it.  No checks are made to see
367      * if the stream is compressed or encoded in any way.
368      * @param userAgent The user agent opening the stream (may be null).
369      * @param mimeTypes The expected mime types of the content 
370      *        in the returned InputStream (mapped to Http accept
371      *        header among other possability).  The elements of
372      *        the iterator must be strings (may be null)
373      */
374     public InputStream openStreamRaw(String userAgent, Iterator mimeTypes) 
375         throws IOException {
376         
377         InputStream ret = openStreamInternal(userAgent, mimeTypes, null);
378         stream = null;
379         return ret;
380     }
381 
382     protected InputStream openStreamInternal(String userAgent,
383                                              Iterator mimeTypes,
384                                              Iterator encodingTypes) 
385         throws IOException {
386         if (stream != null)
387             return stream;
388         
389         hasBeenOpened = true;
390 
391         URL url = null;
392         try {
393             url = buildURL();
394         } catch (MalformedURLException mue) {
395             throw new IOException
396                 ("Unable to make sense of URL for connection");
397         }
398 
399         if (url == null)
400             return null;
401 
402         URLConnection urlC = url.openConnection();
403         if (urlC instanceof HttpURLConnection) {
404             if (userAgent != null)
405                 urlC.setRequestProperty(HTTP_USER_AGENT_HEADER, userAgent);
406 
407             if (mimeTypes != null) {
408                 String acceptHeader = "";
409                 while (mimeTypes.hasNext()) {
410                     acceptHeader += mimeTypes.next();
411                     if (mimeTypes.hasNext())
412                         acceptHeader += ",";
413                 }
414                 urlC.setRequestProperty(HTTP_ACCEPT_HEADER, acceptHeader);
415             }
416 
417             if (encodingTypes != null) {
418                 String encodingHeader = "";
419                 while (encodingTypes.hasNext()) {
420                     encodingHeader += encodingTypes.next();
421                     if (encodingTypes.hasNext())
422                         encodingHeader += ",";
423                 }
424                 urlC.setRequestProperty(HTTP_ACCEPT_ENCODING_HEADER, 
425                                         encodingHeader);
426             }
427 
428             contentType     = urlC.getContentType();
429             contentEncoding = urlC.getContentEncoding();
430         }
431 
432         return (stream = urlC.getInputStream());
433     }
434 
435     /**
436      * Returns the URL up to and include the port number on
437      * the host.  Does not include the path or fragment pieces.
438      */
439     public String getPortStr() {
440         String portStr ="";
441         if (protocol != null)
442             portStr += protocol + ":";
443 
444         if ((host != null) || (port != -1)) {
445             portStr += "//";
446             if (host != null) portStr += host;
447             if (port != -1)   portStr += ":" + port;
448         }
449 
450         return portStr;
451     }
452 
453     protected boolean sameFile(ParsedURLData other) {
454         if (this == other) return true;
455 
456         // Check if the rest of the two PURLs matche other than
457         // the 'ref'
458         if ((port      == other.port) &&
459             ((path     == other.path) 
460              || ((path!=null) && path.equals(other.path))) &&
461             ((host     == other.host) 
462              || ((host!=null) && host.equals(other.host))) &&
463             ((protocol == other.protocol) 
464              || ((protocol!=null) && protocol.equals(other.protocol))))
465             return true;
466 
467         return false;
468     }
469 
470 
471     /**
472      * Return a string representation of the data.
473      */
474     public String toString() {
475         String ret = getPortStr();
476         if (path != null)
477             ret += path;
478 
479         if (ref != null) 
480             ret += "#" + ref;
481 
482         return ret;
483     }
484 }


            
All Examples in File:
Example
Line
Rating (found
useful by...)
187 0% of 0