View Javadoc
1   /*
2    * Copyright 2010 FatWire Corporation. All Rights Reserved.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *    http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package tools.gsf.url;
17  
18  import com.fatwire.cs.core.uri.Assembler;
19  import com.fatwire.cs.core.uri.Util;
20  import org.slf4j.Logger;
21  import org.slf4j.LoggerFactory;
22  
23  import java.io.UnsupportedEncodingException;
24  import java.net.URI;
25  import java.net.URISyntaxException;
26  import java.util.Collection;
27  import java.util.Enumeration;
28  import java.util.HashMap;
29  import java.util.Map;
30  import java.util.Properties;
31  
32  /**
33   * Lightweight abstract assembler that handles property management, provides a
34   * logger, handles encoding and decoding and query string processing. Much
35   * lighter in weight than <code>com.fatwire.cs.core.uri.AbstractAssembler</code>
36   * .
37   *
38   * @author Tony Field
39   * @since Sep 27, 2008
40   */
41  public abstract class LightweightAbstractAssembler implements Assembler {
42      /**
43       * Logger for use by sub-classes.
44       */
45      protected static final Logger LOG = LoggerFactory.getLogger("tools.gsf.url.LightweightAbstractAssembler");
46  
47      private static final String CHARSET_lower = "_charset_";
48      private static final String CHARSET_upper = "_CHARSET_";
49  
50      private final String encoding;
51  
52      private final Map<String, String> properties = new HashMap<String, String>();
53  
54      /**
55       * Constructor. Upon object construction, support for UTF-8 encoding is
56       * tested, and the result is cached for future use in the encode() and
57       * decode() methods.
58       * <p>
59       * UTF-8 is the recommended URLEncoding:
60       * <ul>
61       * <li><a
62       * href="http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"
63       * >http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars</a></li>
64       * <li><a
65       * href="http://java.sun.com/j2ee/1.4/docs/tutorial/doc/WebI18N5.html"
66       * >http://java.sun.com/j2ee/1.4/docs/tutorial/doc/WebI18N5.html</a></li>;
67       * <li><a
68       * href="http://www.ietf.org/rfc/rfc2396.txt">http://www.ietf.org/rfc/
69       * rfc2396.txt</a></li>
70       * </ul>
71       */
72      protected LightweightAbstractAssembler() {
73          String enc = "UTF-8";
74          try {
75              Util.encode("fake string", enc);
76          } catch (UnsupportedEncodingException e) {
77              LOG.warn("UTF-8 encoding not supported by this platform. Using the platform's default encoding as the URL encoding.");
78              enc = null;
79          }
80          this.encoding = enc;
81      }
82  
83      public void setProperties(Properties props) {
84          Enumeration<?> en = props.propertyNames();
85          while (en.hasMoreElements()) {
86              String pName = (String) en.nextElement();
87              String pValue = props.getProperty(pName);
88              this.properties.put(pName, pValue);
89          }
90      }
91  
92      /**
93       * Convenience method to get a property value set into the assembler from
94       * the configuration files.
95       *
96       * @param name     name of property to import
97       * @param dephault default value of property - returned if the property
98       *                 value is not specified
99       * @return property value or dephault value
100      */
101     protected String getProperty(String name, String dephault) {
102         String result = properties.get(name);
103         if (result == null) {
104             result = dephault;
105         }
106         return result;
107     }
108 
109     /**
110      * URLEncodes a string using the encoding specified by this class.
111      *
112      * @param string the string to encode
113      * @return encoded string
114      * @throws IllegalStateException if UTF-8 encoding is not supported and the
115      *                               platform's default encoding is not supported.
116      */
117     protected final String encode(String string) {
118         String result;
119         try {
120             if (string == null) {
121                 result = null;
122             } else {
123                 result = Util.encode(string, encoding);
124             }
125         } catch (UnsupportedEncodingException ex) {
126             String msg = "Unexpected failure encoding string '" + string + "'using an encoding (" + encoding
127                     + ").  Exception: " + ex;
128             throw new IllegalStateException(msg);
129         }
130         return result;
131     }
132 
133     /**
134      * URLDecodes a string using the encoding specified by this class.
135      *
136      * @param string encoded string
137      * @return decoded string
138      * @throws IllegalStateException    if UTF-8 encoding is not supported and the
139      *                                  platform's default encoding is not supported.
140      * @throws IllegalArgumentException if the string is not well-formed for
141      *                                  decoding.
142      */
143     protected final String decode(String string) {
144         return decode(string, null);
145     }
146 
147     /**
148      * URLDecodes a string using the encoding specified.
149      *
150      * @param string   encoded string
151      * @param encoding the encoding to use to decode the string. If null is
152      *                 specified, the decoding specified by this class shall be used.
153      * @return decoded string
154      * @throws IllegalStateException    if the encoding specified is not supported,
155      *                                  or if UTF-8 encoding is not supported and the platform's
156      *                                  default encoding is not supported.
157      * @throws IllegalArgumentException if the string is not well-formed for
158      *                                  decoding.
159      */
160     protected final String decode(String string, String encoding) {
161         String result;
162         if (string == null) {
163             result = null;
164         } else {
165             if (encoding == null) {
166                 encoding = this.encoding;
167             }
168             try {
169                 result = Util.decode(string, encoding);
170             } catch (IllegalArgumentException iae) {
171                 throw new IllegalArgumentException("Failure decoding string '" + string + "' using encoding '"
172                         + encoding + "'.  (" + iae.getMessage() + ")");
173             } catch (UnsupportedEncodingException ex) {
174                 // This is not expected to ever occur.
175                 throw new IllegalStateException("Unexpected failure decoding string '" + string + "'using encoding '"
176                         + encoding + "'.  (" + ex + ")");
177             }
178         }
179         return result;
180     }
181 
182     /**
183      * The multi-arg <code>java.net.URI</code> constructors quote illegal
184      * characters. However, this class requires that the query string already be
185      * properly URLEncoded. As a result, we can't use the multi-arg URI
186      * constructor because all of our % symbols and the + symbol will end up
187      * getting double-encoded. So, we need to construct a full URL ourselves so
188      * we can use the single-arg URI constructor, because it does not quote
189      * anything.
190      * <p>
191      * There are multiple variants of combinations of these parameters to create
192      * a valid URL. Consult the URI specificaiton for what is allowed and what
193      * is not. The URI constructor will throw a URISyntaxException if required
194      * components are missing for a given combination.
195      *
196      * @param scheme            the URI scheme (protocol)
197      * @param authority         the URI authority (host:port)
198      * @param path              the path for the URI (servlet context path, servlet name,
199      *                          pathinfo)
200      * @param quotedQueryString the query string, with illegal characters
201      *                          already quoted.
202      * @param fragment          the fragment (anchor)
203      * @return the valid URI with proper encoding
204      * @throws URISyntaxException if there is a problem with what is passed in
205      */
206     protected static final URI constructURI(final String scheme, final String authority, final String path,
207                                             final String quotedQueryString, final String fragment) throws URISyntaxException {
208         // Update, Feb 25, 2005 by Tony Field
209         StringBuilder bf = new StringBuilder();
210         if (scheme != null) {
211             bf.append(scheme).append(':'); // nothing legal can be quoted
212         }
213         if (authority != null) {
214             bf.append("//").append(authority); // nothing legal to quote until
215             // I18N URLs work
216         }
217         // Path needs quoting though, so let the URI object do it for us.
218         // Use the toASCIIString() method because we need the quoted values.
219         // (toString() is really just for readability and debugging, not
220         // programmatic use)
221         if (path != null) {
222             bf.append(new URI(null, null, path, null, null).getRawPath());
223         }
224         if (quotedQueryString != null) {
225             bf.append('?').append(quotedQueryString); // already quoted
226         }
227         // needs quoting
228         if (fragment != null) {
229             bf.append(new URI(null, null, null, null, fragment).toASCIIString());
230         }
231         URI uri = new URI(bf.toString());
232 
233         if (LOG.isDebugEnabled()) {
234             LOG.trace("Constructing new URI using the following components: \n" + "scheme=" + scheme + " \n"
235                     + "authority=" + authority + " \n" + "path=" + path + " \n" + "query=" + quotedQueryString + " \n"
236                     + "fragment=" + fragment);
237 
238             LOG.debug("Assembled URI: " + uri.toASCIIString());
239         }
240         return uri;
241     }
242 
243     /**
244      * Parse a query string and put the parameters into a map. Input parameters
245      * will be URLDecoded prior to their addition into the resultant map.
246      * <p>
247      * Note that the map returned contains a <em><code>String[]</code> as the
248      * value, not a single <code>String</code> value</em> This provides support
249      * for query strings with multiple values for a given parameter name.
250      * <p>
251      * This decoding method is smart enough to be able to interpret the
252      * <code>_charset_</code> URL parameter that is often used by IE.
253      *
254      * @param qry string value for query
255      * @return map containing <code>String</code>/<code>String[]</code> pairs.
256      * @throws IllegalArgumentException if there are mistakes in the string that
257      *                                  make it impossible to parse.
258      */
259     protected final Map<String, String[]> parseQueryString(String qry) {
260         Map<String, String[]> rawPairs = new HashMap<String, String[]>();
261         if (qry == null) {
262             return rawPairs;
263         }
264         int inlen = qry.length();
265         if (inlen == 0) {
266             return rawPairs;
267         }
268 
269         if (LOG.isTraceEnabled()) {
270             LOG.trace("Parsing query string: " + qry);
271         }
272 
273         int iequal;
274         int iamper;
275         int startAt = 0;
276         boolean bDone = false;
277 
278         while (!bDone) {
279             String n;
280             String v;
281             if ((iequal = qry.indexOf("=", startAt)) != -1) {
282                 // End of current name=value is '&' or EOL
283                 iamper = qry.indexOf("&", iequal);
284                 n = qry.substring(startAt, iequal);
285                 n = n.trim(); // deal with accidental odd chars in the URL
286                 iequal++;
287                 if (iequal >= inlen) {
288                     break;
289                 }
290 
291                 if (iamper == -1) {
292                     v = qry.substring(iequal);
293                 } else {
294                     v = qry.substring(iequal, iamper);
295                 }
296 
297                 if (iamper != -1) {
298                     startAt = iamper + 1;
299                 } else {
300                     bDone = true;
301                 }
302 
303                 v = v.trim(); // deal with stupid value
304 
305                 // add the value to the result.
306                 String[] av = rawPairs.get(n);
307                 if (av == null) {
308                     av = new String[1];
309                     av[0] = v;
310                     rawPairs.put(n, av);
311                 } else {
312                     // param specified twice in the url.
313                     String[] newVal = new String[av.length + 1];
314                     System.arraycopy(av, 0, newVal, 0, av.length);
315                     newVal[av.length] = v;
316                     rawPairs.put(n, newVal);
317                 }
318             } else {
319                 break; // no more pairs
320             }
321         }
322 
323         // Figure out which encoding to use to decode the params
324         String[] _charset_ = rawPairs.get(CHARSET_lower) == null ? rawPairs.get(CHARSET_upper) : rawPairs
325                 .get(CHARSET_lower);
326         final String encoding;
327         if (_charset_ == null) {
328             encoding = null; // try to follow the spec
329         } else {
330             switch (_charset_.length) {
331                 case 0:
332                     throw new IllegalStateException(
333                             "Somehow an empty _charst_ param made it into our map. Impossible...");
334                 case 1:
335                     encoding = _charset_[0]; // url contains an override for the
336                     // spec
337                     break;
338                 default:
339                     throw new IllegalStateException("Too many values of _charset_ found in the URL");
340             }
341         }
342 
343         // Decode the raw pairs using the proper encoding and set them into the
344         // result map
345         Map<String, String[]> res = new HashMap<String, String[]>(rawPairs.size());
346         for (String rawKey : rawPairs.keySet()) {
347             String key = decode(rawKey, encoding);
348             String[] val = rawPairs.get(rawKey);
349             for (int i = 0; i < val.length; i++) {
350                 String rawVal = val[i];
351                 val[i] = decode(rawVal, encoding);
352 
353                 if (LOG.isTraceEnabled()) {
354                     StringBuilder bf = new StringBuilder("Parsing query string.  Found raw pair [name]=[value]: ");
355                     bf.append('[').append(rawKey).append(']').append('=').append('[').append(rawVal).append(']');
356                     bf.append(" decoded to: ");
357                     bf.append('[').append(key).append(']').append('=').append('[').append(val[i]).append(']');
358                     LOG.trace(bf.toString());
359                 }
360             }
361             res.put(key, val);
362         }
363 
364         return res;
365     }
366 
367     /**
368      * Given an input map of name-value pairs, construct a query string. This
369      * supports multiple values for any given parameter. Names and values are
370      * properly encoded.
371      *
372      * @param parameters parameters to encode and place in the query string
373      * @return the query string, or null if no values needed to be added.
374      * @see #encode(String)
375      */
376     protected final String constructQueryString(Map<String, String[]> parameters) {
377         StringBuilder qryStr = new StringBuilder();
378         for (String key : parameters.keySet()) {
379             String[] vals = parameters.get(key);
380             if (vals != null) {
381                 // Loop through the values for the parameter
382                 for (String val : vals) {
383                     if (val != null && val.length() > 0) {
384                         // Append the correct separator
385                         if (qryStr.length() > 0) {
386                             qryStr.append('&');
387                         }
388 
389                         // Append the name and value to the URL
390                         if (LOG.isTraceEnabled()) {
391                             StringBuilder bf = new StringBuilder("About to add [key]=[value] to url [" + key + "]=["
392                                     + val + "]");
393                             bf.append(" after encoding: [").append(encode(key)).append("]=[").append(encode(val))
394                                     .append("]");
395                             LOG.trace(bf.toString());
396 
397                         }
398                         qryStr.append(encode(key)).append('=').append(encode(val));
399                     }
400                 }
401             }
402         }
403 
404         // prepare result
405         if (qryStr.length() > 0) {
406             return qryStr.toString();
407         } else {
408             return null;
409         }
410     }
411 
412     /**
413      * Given an array of query-string-like packed arguments, eliminate the
414      * specified parameters and return the packedargs parameter with the values
415      * stripped.
416      *
417      * @param origPackedargsStrings array of query string-like packed args.
418      * @param toExclude             list of args to remove from the packed args.
419      * @return array the same length as the original array, containing the same
420      * values, except the <code>toExclude</code> parameters are removed.
421      * If all params end up getting removed, the packedargs string ends
422      * up being null. The array returned is never null though.
423      * @throws IllegalArgumentException if the input args or the input list are
424      *                                  null.
425      */
426     protected final String[] excludeFromPackedargs(String[] origPackedargsStrings, Collection<String> toExclude) {
427         if (origPackedargsStrings == null) {
428             throw new IllegalArgumentException("OrigPackedArgsStrings must not be null");
429         }
430         if (toExclude == null) {
431             throw new IllegalArgumentException("ToExclude list may not be null");
432         }
433 
434         String[] newPackedargsStrings = new String[origPackedargsStrings.length];
435 
436         for (int i = 0; i < origPackedargsStrings.length; i++) {
437             Map<String, String[]> oldPacked = parseQueryString(origPackedargsStrings[i]);
438             Map<String, String[]> newPacked = new HashMap<String, String[]>();
439             for (String opK : oldPacked.keySet()) {
440                 if (LOG.isTraceEnabled()) {
441                     LOG.trace("checking to see if a param should be excluded from packedargs: " + opK);
442                 }
443                 if (!toExclude.contains(opK)) {
444                     newPacked.put(opK, oldPacked.get(opK));
445                 }
446             }
447 
448             newPackedargsStrings[i] = constructQueryString(newPacked);
449 
450         }
451 
452         return newPackedargsStrings;
453     }
454 }