View Javadoc
1   /*
2    * Copyright 2010 FatWire Corporation. All Rights Reserved.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *    http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package com.fatwire.gst.foundation.url;
17  
18  import com.fatwire.cs.core.uri.Assembler;
19  import com.fatwire.cs.core.uri.Util;
20  import org.apache.commons.logging.Log;
21  import org.apache.commons.logging.LogFactory;
22  
23  import java.io.UnsupportedEncodingException;
24  import java.net.URI;
25  import java.net.URISyntaxException;
26  import java.util.*;
27  
28  /**
29   * Lightweight abstract assembler that handles property management, provides a
30   * logger, handles encoding and decoding and query string processing. Much
31   * lighter in weight than <code>com.fatwire.cs.core.uri.AbstractAssembler</code>
32   * .
33   * 
34   * @author Tony Field
35   * @since Sep 27, 2008
36   */
37  public abstract class LightweightAbstractAssembler implements Assembler {
38      /**
39       * Logger for use by sub-classes.
40       */
41      protected static final Log LOG = LogFactory.getLog(LightweightAbstractAssembler.class.getName());
42  
43      private static final String CHARSET_lower = "_charset_";
44      private static final String CHARSET_upper = "_CHARSET_";
45  
46      private final String encoding;
47  
48      private final Map<String, String> properties = new HashMap<String, String>();
49  
50      /**
51       * Constructor. Upon object construction, support for UTF-8 encoding is
52       * tested, and the result is cached for future use in the encode() and
53       * decode() methods.
54       * <p/>
55       * UTF-8 is the recommended URLEncoding:
56       * <ul>
57       * <li><a
58       * href="http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"
59       * >http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars</a></li>
60       * <li><a
61       * href="http://java.sun.com/j2ee/1.4/docs/tutorial/doc/WebI18N5.html"
62       * >http://java.sun.com/j2ee/1.4/docs/tutorial/doc/WebI18N5.html</a></li>;
63       * <li><a
64       * href="http://www.ietf.org/rfc/rfc2396.txt">http://www.ietf.org/rfc/
65       * rfc2396.txt</a></li>
66       * </ul>
67       */
68      protected LightweightAbstractAssembler() {
69          String enc = "UTF-8";
70          try {
71              Util.encode("fake string", enc);
72          } catch (UnsupportedEncodingException e) {
73              LOG.warn("UTF-8 encoding not supported by this platform. Using the platform's default encoding as the URL encoding.");
74              enc = null;
75          }
76          this.encoding = enc;
77      }
78  
79      public void setProperties(Properties props) {
80          Enumeration<?> en = props.propertyNames();
81          while (en.hasMoreElements()) {
82              String pName = (String) en.nextElement();
83              String pValue = props.getProperty(pName);
84              this.properties.put(pName, pValue);
85          }
86      }
87  
88      /**
89       * Convenience method to get a property value set into the assembler from
90       * the configuration files.
91       * 
92       * @param name name of property to import
93       * @param dephault default value of property - returned if the property
94       *            value is not specified
95       * @return property value or dephault value
96       */
97      protected String getProperty(String name, String dephault) {
98          String result = properties.get(name);
99          if (result == null) {
100             result = dephault;
101         }
102         return result;
103     }
104 
105     /**
106      * URLEncodes a string using the encoding specified by this class.
107      * 
108      * @param string the string to encode
109      * @return encoded string
110      * @throws IllegalStateException if UTF-8 encoding is not supported and the
111      *             platform's default encoding is not supported.
112      */
113     protected final String encode(String string) {
114         String result;
115         try {
116             if (string == null) {
117                 result = null;
118             } else {
119                 result = Util.encode(string, encoding);
120             }
121         } catch (UnsupportedEncodingException ex) {
122             String msg = "Unexpected failure encoding string '" + string + "'using an encoding (" + encoding
123                     + ").  Exception: " + ex;
124             throw new IllegalStateException(msg);
125         }
126         return result;
127     }
128 
129     /**
130      * URLDecodes a string using the encoding specified by this class.
131      * 
132      * @param string encoded string
133      * @return decoded string
134      * @throws IllegalStateException if UTF-8 encoding is not supported and the
135      *             platform's default encoding is not supported.
136      * @throws IllegalArgumentException if the string is not well-formed for
137      *             decoding.
138      */
139     protected final String decode(String string) {
140         return decode(string, null);
141     }
142 
143     /**
144      * URLDecodes a string using the encoding specified.
145      * 
146      * @param string encoded string
147      * @param encoding the encoding to use to decode the string. If null is
148      *            specified, the decoding specified by this class shall be used.
149      * @return decoded string
150      * @throws IllegalStateException if the encoding specified is not supported,
151      *             or if UTF-8 encoding is not supported and the platform's
152      *             default encoding is not supported.
153      * @throws IllegalArgumentException if the string is not well-formed for
154      *             decoding.
155      */
156     protected final String decode(String string, String encoding) {
157         String result;
158         if (string == null) {
159             result = null;
160         } else {
161             if (encoding == null) {
162                 encoding = this.encoding;
163             }
164             try {
165                 result = Util.decode(string, encoding);
166             } catch (IllegalArgumentException iae) {
167                 throw new IllegalArgumentException("Failure decoding string '" + string + "' using encoding '"
168                         + encoding + "'.  (" + iae.getMessage() + ")");
169             } catch (UnsupportedEncodingException ex) {
170                 // This is not expected to ever occur.
171                 throw new IllegalStateException("Unexpected failure decoding string '" + string + "'using encoding '"
172                         + encoding + "'.  (" + ex + ")");
173             }
174         }
175         return result;
176     }
177 
178     /**
179      * The multi-arg <code>java.net.URI</code> constructors quote illegal
180      * characters. However, this class requires that the query string already be
181      * properly URLEncoded. As a result, we can't use the multi-arg URI
182      * constructor because all of our % symbols and the + symbol will end up
183      * getting double-encoded. So, we need to construct a full URL ourselves so
184      * we can use the single-arg URI constructor, because it does not quote
185      * anything.
186      * <p/>
187      * There are multiple variants of combinations of these parameters to create
188      * a valid URL. Consult the URI specificaiton for what is allowed and what
189      * is not. The URI constructor will throw a URISyntaxException if required
190      * components are missing for a given combination.
191      * 
192      * @param scheme the URI scheme (protocol)
193      * @param authority the URI authority (host:port)
194      * @param path the path for the URI (servlet context path, servlet name,
195      *            pathinfo)
196      * @param quotedQueryString the query string, with illegal characters
197      *            already quoted.
198      * @param fragment the fragment (anchor)
199      * @return the valid URI with proper encoding
200      * @throws URISyntaxException if there is a problem with what is passed in
201      */
202     protected static final URI constructURI(final String scheme, final String authority, final String path,
203             final String quotedQueryString, final String fragment) throws URISyntaxException {
204         // Update, Feb 25, 2005 by Tony Field
205         StringBuilder bf = new StringBuilder();
206         if (scheme != null) {
207             bf.append(scheme).append(':'); // nothing legal can be quoted
208         }
209         if (authority != null) {
210             bf.append("//").append(authority); // nothing legal to quote until
211                                                // I18N URLs work
212         }
213         // Path needs quoting though, so let the URI object do it for us.
214         // Use the toASCIIString() method because we need the quoted values.
215         // (toString() is really just for readability and debugging, not
216         // programmatic use)
217         if (path != null) {
218             bf.append(new URI(null, null, path, null, null).getRawPath());
219         }
220         if (quotedQueryString != null) {
221             bf.append('?').append(quotedQueryString); // already quoted
222         }
223         // needs quoting
224         if (fragment != null) {
225             bf.append(new URI(null, null, null, null, fragment).toASCIIString());
226         }
227         URI uri = new URI(bf.toString());
228 
229         if (LOG.isDebugEnabled()) {
230             LOG.trace("Constructing new URI using the following components: \n" + "scheme=" + scheme + " \n"
231                     + "authority=" + authority + " \n" + "path=" + path + " \n" + "query=" + quotedQueryString + " \n"
232                     + "fragment=" + fragment);
233 
234             LOG.debug("Assembled URI: " + uri.toASCIIString());
235         }
236         return uri;
237     }
238 
239     /**
240      * Parse a query string and put the parameters into a map. Input parameters
241      * will be URLDecoded prior to their addition into the resultant map.
242      * <p/>
243      * Note that the map returned contains a <em><code>String[]</code> as the
244      * value, not a single <code>String</code> value</em> This provides support
245      * for query strings with multiple values for a given parameter name.
246      * <p/>
247      * This decoding method is smart enough to be able to interpret the
248      * <code>_charset_</code> URL parameter that is often used by IE.
249      * 
250      * @param qry
251      * @return map containing <code>String</code>/<code>String[]</code> pairs.
252      * @throws IllegalArgumentException if there are mistakes in the string that
253      *             make it impossible to parse.
254      */
255     protected final Map<String, String[]> parseQueryString(String qry) {
256         Map<String, String[]> rawPairs = new HashMap<String, String[]>();
257         if (qry == null) {
258             return rawPairs;
259         }
260         int inlen = qry.length();
261         if (inlen == 0) {
262             return rawPairs;
263         }
264 
265         if (LOG.isTraceEnabled()) {
266             LOG.trace("Parsing query string: " + qry);
267         }
268 
269         int iequal;
270         int iamper;
271         int startAt = 0;
272         boolean bDone = false;
273 
274         while (!bDone) {
275             String n;
276             String v;
277             if ((iequal = qry.indexOf("=", startAt)) != -1) {
278                 // End of current name=value is '&' or EOL
279                 iamper = qry.indexOf("&", iequal);
280                 n = qry.substring(startAt, iequal);
281                 n = n.trim(); // deal with accidental odd chars in the URL
282                 iequal++;
283                 if (iequal >= inlen) {
284                     break;
285                 }
286 
287                 if (iamper == -1) {
288                     v = qry.substring(iequal);
289                 } else {
290                     v = qry.substring(iequal, iamper);
291                 }
292 
293                 if (iamper != -1) {
294                     startAt = iamper + 1;
295                 } else {
296                     bDone = true;
297                 }
298 
299                 v = v.trim(); // deal with stupid value
300 
301                 // add the value to the result.
302                 String[] av = rawPairs.get(n);
303                 if (av == null) {
304                     av = new String[1];
305                     av[0] = v;
306                     rawPairs.put(n, av);
307                 } else {
308                     // param specified twice in the url.
309                     String[] newVal = new String[av.length + 1];
310                     System.arraycopy(av, 0, newVal, 0, av.length);
311                     newVal[av.length] = v;
312                     rawPairs.put(n, newVal);
313                 }
314             } else {
315                 break; // no more pairs
316             }
317         }
318 
319         // Figure out which encoding to use to decode the params
320         String[] _charset_ = rawPairs.get(CHARSET_lower) == null ? rawPairs.get(CHARSET_upper) : rawPairs
321                 .get(CHARSET_lower);
322         final String encoding;
323         if (_charset_ == null) {
324             encoding = null; // try to follow the spec
325         } else {
326             switch (_charset_.length) {
327                 case 0:
328                     throw new IllegalStateException(
329                             "Somehow an empty _charst_ param made it into our map. Impossible...");
330                 case 1:
331                     encoding = _charset_[0]; // url contains an override for the
332                                              // spec
333                     break;
334                 default:
335                     throw new IllegalStateException("Too many values of _charset_ found in the URL");
336             }
337         }
338 
339         // Decode the raw pairs using the proper encoding and set them into the
340         // result map
341         Map<String, String[]> res = new HashMap<String, String[]>(rawPairs.size());
342         for (String rawKey : rawPairs.keySet()) {
343             String key = decode(rawKey, encoding);
344             String[] val = rawPairs.get(rawKey);
345             for (int i = 0; i < val.length; i++) {
346                 String rawVal = val[i];
347                 val[i] = decode(rawVal, encoding);
348 
349                 if (LOG.isTraceEnabled()) {
350                     StringBuilder bf = new StringBuilder("Parsing query string.  Found raw pair [name]=[value]: ");
351                     bf.append('[').append(rawKey).append(']').append('=').append('[').append(rawVal).append(']');
352                     bf.append(" decoded to: ");
353                     bf.append('[').append(key).append(']').append('=').append('[').append(val[i]).append(']');
354                     LOG.trace(bf);
355                 }
356             }
357             res.put(key, val);
358         }
359 
360         return res;
361     }
362 
363     /**
364      * Given an input map of name-value pairs, construct a query string. This
365      * supports multiple values for any given parameter. Names and values are
366      * properly encoded.
367      * 
368      * @param parameters parameters to encode and place in the query string
369      * @return the query string, or null if no values needed to be added.
370      * @see #encode(String)
371      */
372     protected final String constructQueryString(Map<String, String[]> parameters) {
373         StringBuilder qryStr = new StringBuilder();
374         for (String key : parameters.keySet()) {
375             String[] vals = parameters.get(key);
376             if (vals != null) {
377                 // Loop through the values for the parameter
378                 for (String val : vals) {
379                     if (val != null && val.length() > 0) {
380                         // Append the correct separator
381                         if (qryStr.length() > 0) {
382                             qryStr.append('&');
383                         }
384 
385                         // Append the name and value to the URL
386                         if (LOG.isTraceEnabled()) {
387                             StringBuilder bf = new StringBuilder("About to add [key]=[value] to url [" + key + "]=["
388                                     + val + "]");
389                             bf.append(" after encoding: [").append(encode(key)).append("]=[").append(encode(val))
390                                     .append("]");
391                             LOG.trace(bf);
392 
393                         }
394                         qryStr.append(encode(key)).append('=').append(encode(val));
395                     }
396                 }
397             }
398         }
399 
400         // prepare result
401         if (qryStr.length() > 0) {
402             return qryStr.toString();
403         } else {
404             return null;
405         }
406     }
407 
408     /**
409      * Given an array of query-string-like packed arguments, eliminate the
410      * specified parameters and return the packedargs parameter with the values
411      * stripped.
412      * 
413      * @param origPackedargsStrings array of query string-like packed args.
414      * @param toExclude list of args to remove from the packed args.
415      * @return array the same length as the original array, containing the same
416      *         values, except the <code>toExclude</code> parameters are removed.
417      *         If all params end up getting removed, the packedargs string ends
418      *         up being null. The array returned is never null though.
419      * @throws IllegalArgumentException if the input args or the input list are
420      *             null.
421      */
422     protected final String[] excludeFromPackedargs(String[] origPackedargsStrings, Collection<String> toExclude) {
423         if (origPackedargsStrings == null) {
424             throw new IllegalArgumentException("OrigPackedArgsStrings must not be null");
425         }
426         if (toExclude == null) {
427             throw new IllegalArgumentException("ToExclude list may not be null");
428         }
429 
430         String[] newPackedargsStrings = new String[origPackedargsStrings.length];
431 
432         for (int i = 0; i < origPackedargsStrings.length; i++) {
433             Map<String, String[]> oldPacked = parseQueryString(origPackedargsStrings[i]);
434             Map<String, String[]> newPacked = new HashMap<String, String[]>();
435             for (String opK : oldPacked.keySet()) {
436                 if (LOG.isTraceEnabled()) {
437                     LOG.trace("checking to see if a param should be excluded from packedargs: " + opK);
438                 }
439                 if (!toExclude.contains(opK)) {
440                     newPacked.put(opK, oldPacked.get(opK));
441                 }
442             }
443 
444             newPackedargsStrings[i] = constructQueryString(newPacked);
445 
446         }
447 
448         return newPackedargsStrings;
449     }
450 }