1 /*
2 * Copyright 2010 FatWire Corporation. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 package com.fatwire.gst.foundation.url;
17
18 import com.fatwire.cs.core.uri.Assembler;
19 import com.fatwire.cs.core.uri.Util;
20 import org.apache.commons.logging.Log;
21 import org.apache.commons.logging.LogFactory;
22
23 import java.io.UnsupportedEncodingException;
24 import java.net.URI;
25 import java.net.URISyntaxException;
26 import java.util.*;
27
28 /**
29 * Lightweight abstract assembler that handles property management, provides a
30 * logger, handles encoding and decoding and query string processing. Much
31 * lighter in weight than <code>com.fatwire.cs.core.uri.AbstractAssembler</code>
32 * .
33 *
34 * @author Tony Field
35 * @since Sep 27, 2008
36 */
37 public abstract class LightweightAbstractAssembler implements Assembler {
38 /**
39 * Logger for use by sub-classes.
40 */
41 protected static final Log LOG = LogFactory.getLog(LightweightAbstractAssembler.class.getName());
42
43 private static final String CHARSET_lower = "_charset_";
44 private static final String CHARSET_upper = "_CHARSET_";
45
46 private final String encoding;
47
48 private final Map<String, String> properties = new HashMap<String, String>();
49
50 /**
51 * Constructor. Upon object construction, support for UTF-8 encoding is
52 * tested, and the result is cached for future use in the encode() and
53 * decode() methods.
54 * <p/>
55 * UTF-8 is the recommended URLEncoding:
56 * <ul>
57 * <li><a
58 * href="http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"
59 * >http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars</a></li>
60 * <li><a
61 * href="http://java.sun.com/j2ee/1.4/docs/tutorial/doc/WebI18N5.html"
62 * >http://java.sun.com/j2ee/1.4/docs/tutorial/doc/WebI18N5.html</a></li>
63 * <li><a
64 * href="http://www.ietf.org/rfc/rfc2396.txt">http://www.ietf.org/rfc/
65 * rfc2396.txt</a></li>
66 * </ul>
67 */
68 protected LightweightAbstractAssembler() {
69 String enc = "UTF-8";
70 try {
71 Util.encode("fake string", enc);
72 } catch (UnsupportedEncodingException e) {
73 LOG.warn("UTF-8 encoding not supported by this platform. Using the platform's default encoding as the URL encoding.");
74 enc = null;
75 }
76 this.encoding = enc;
77 }
78
79 public void setProperties(Properties props) {
80 Enumeration<?> en = props.propertyNames();
81 while (en.hasMoreElements()) {
82 String pName = (String) en.nextElement();
83 String pValue = props.getProperty(pName);
84 this.properties.put(pName, pValue);
85 }
86 }
87
88 /**
89 * Convenience method to get a property value set into the assembler from
90 * the configuration files.
91 *
92 * @param name name of property to import
93 * @param dephault default value of property - returned if the property
94 * value is not specified
95 * @return property value or dephault value
96 */
97 protected String getProperty(String name, String dephault) {
98 String result = properties.get(name);
99 if (result == null) {
100 result = dephault;
101 }
102 return result;
103 }
104
105 /**
106 * URLEncodes a string using the encoding specified by this class.
107 *
108 * @param string the string to encode
109 * @return encoded string
110 * @throws IllegalStateException if UTF-8 encoding is not supported and the
111 * platform's default encoding is not supported.
112 */
113 protected final String encode(String string) {
114 String result;
115 try {
116 if (string == null) {
117 result = null;
118 } else {
119 result = Util.encode(string, encoding);
120 }
121 } catch (UnsupportedEncodingException ex) {
122 String msg = "Unexpected failure encoding string '" + string + "'using an encoding (" + encoding
123 + "). Exception: " + ex;
124 throw new IllegalStateException(msg);
125 }
126 return result;
127 }
128
129 /**
130 * URLDecodes a string using the encoding specified by this class.
131 *
132 * @param string encoded string
133 * @return decoded string
134 * @throws IllegalStateException if UTF-8 encoding is not supported and the
135 * platform's default encoding is not supported.
136 * @throws IllegalArgumentException if the string is not well-formed for
137 * decoding.
138 */
139 protected final String decode(String string) {
140 return decode(string, null);
141 }
142
143 /**
144 * URLDecodes a string using the encoding specified.
145 *
146 * @param string encoded string
147 * @param encoding the encoding to use to decode the string. If null is
148 * specified, the decoding specified by this class shall be used.
149 * @return decoded string
150 * @throws IllegalStateException if the encoding specified is not supported,
151 * or if UTF-8 encoding is not supported and the platform's
152 * default encoding is not supported.
153 * @throws IllegalArgumentException if the string is not well-formed for
154 * decoding.
155 */
156 protected final String decode(String string, String encoding) {
157 String result;
158 if (string == null) {
159 result = null;
160 } else {
161 if (encoding == null) {
162 encoding = this.encoding;
163 }
164 try {
165 result = Util.decode(string, encoding);
166 } catch (IllegalArgumentException iae) {
167 throw new IllegalArgumentException("Failure decoding string '" + string + "' using encoding '"
168 + encoding + "'. (" + iae.getMessage() + ")");
169 } catch (UnsupportedEncodingException ex) {
170 // This is not expected to ever occur.
171 throw new IllegalStateException("Unexpected failure decoding string '" + string + "'using encoding '"
172 + encoding + "'. (" + ex + ")");
173 }
174 }
175 return result;
176 }
177
178 /**
179 * The multi-arg <code>java.net.URI</code> constructors quote illegal
180 * characters. However, this class requires that the query string already be
181 * properly URLEncoded. As a result, we can't use the multi-arg URI
182 * constructor because all of our % symbols and the + symbol will end up
183 * getting double-encoded. So, we need to construct a full URL ourselves so
184 * we can use the single-arg URI constructor, because it does not quote
185 * anything.
186 * <p/>
187 * There are multiple variants of combinations of these parameters to create
188 * a valid URL. Consult the URI specificaiton for what is allowed and what
189 * is not. The URI constructor will throw a URISyntaxException if required
190 * components are missing for a given combination.
191 *
192 * @param scheme the URI scheme (protocol)
193 * @param authority the URI authority (host:port)
194 * @param path the path for the URI (servlet context path, servlet name,
195 * pathinfo)
196 * @param quotedQueryString the query string, with illegal characters
197 * already quoted.
198 * @param fragment the fragment (anchor)
199 * @return the valid URI with proper encoding
200 * @throws URISyntaxException if there is a problem with what is passed in
201 */
202 protected static final URI constructURI(final String scheme, final String authority, final String path,
203 final String quotedQueryString, final String fragment) throws URISyntaxException {
204 // Update, Feb 25, 2005 by Tony Field
205 StringBuilder bf = new StringBuilder();
206 if (scheme != null) {
207 bf.append(scheme).append(':'); // nothing legal can be quoted
208 }
209 if (authority != null) {
210 bf.append("//").append(authority); // nothing legal to quote until
211 // I18N URLs work
212 }
213 // Path needs quoting though, so let the URI object do it for us.
214 // Use the toASCIIString() method because we need the quoted values.
215 // (toString() is really just for readability and debugging, not
216 // programmatic use)
217 if (path != null) {
218 bf.append(new URI(null, null, path, null, null).getRawPath());
219 }
220 if (quotedQueryString != null) {
221 bf.append('?').append(quotedQueryString); // already quoted
222 }
223 // needs quoting
224 if (fragment != null) {
225 bf.append(new URI(null, null, null, null, fragment).toASCIIString());
226 }
227 URI uri = new URI(bf.toString());
228
229 if (LOG.isDebugEnabled()) {
230 LOG.trace("Constructing new URI using the following components: \n" + "scheme=" + scheme + " \n"
231 + "authority=" + authority + " \n" + "path=" + path + " \n" + "query=" + quotedQueryString + " \n"
232 + "fragment=" + fragment);
233
234 LOG.debug("Assembled URI: " + uri.toASCIIString());
235 }
236 return uri;
237 }
238
239 /**
240 * Parse a query string and put the parameters into a map. Input parameters
241 * will be URLDecoded prior to their addition into the resultant map.
242 * <p/>
243 * Note that the map returned contains a <em><code>String[]</code> as the
244 * value, not a single <code>String</code> value</em> This provides support
245 * for query strings with multiple values for a given parameter name.
246 * <p/>
247 * This decoding method is smart enough to be able to interpret the
248 * <code>_charset_</code> URL parameter that is often used by IE.
249 *
250 * @param qry
251 * @return map containing <code>String</code>/<code>String[]</code> pairs.
252 * @throws IllegalArgumentException if there are mistakes in the string that
253 * make it impossible to parse.
254 */
255 protected final Map<String, String[]> parseQueryString(String qry) {
256 Map<String, String[]> rawPairs = new HashMap<String, String[]>();
257 if (qry == null) {
258 return rawPairs;
259 }
260 int inlen = qry.length();
261 if (inlen == 0) {
262 return rawPairs;
263 }
264
265 if (LOG.isTraceEnabled()) {
266 LOG.trace("Parsing query string: " + qry);
267 }
268
269 int iequal;
270 int iamper;
271 int startAt = 0;
272 boolean bDone = false;
273
274 while (!bDone) {
275 String n;
276 String v;
277 if ((iequal = qry.indexOf("=", startAt)) != -1) {
278 // End of current name=value is '&' or EOL
279 iamper = qry.indexOf("&", iequal);
280 n = qry.substring(startAt, iequal);
281 n = n.trim(); // deal with accidental odd chars in the URL
282 iequal++;
283 if (iequal >= inlen) {
284 break;
285 }
286
287 if (iamper == -1) {
288 v = qry.substring(iequal);
289 } else {
290 v = qry.substring(iequal, iamper);
291 }
292
293 if (iamper != -1) {
294 startAt = iamper + 1;
295 } else {
296 bDone = true;
297 }
298
299 v = v.trim(); // deal with stupid value
300
301 // add the value to the result.
302 String[] av = rawPairs.get(n);
303 if (av == null) {
304 av = new String[1];
305 av[0] = v;
306 rawPairs.put(n, av);
307 } else {
308 // param specified twice in the url.
309 String[] newVal = new String[av.length + 1];
310 System.arraycopy(av, 0, newVal, 0, av.length);
311 newVal[av.length] = v;
312 rawPairs.put(n, newVal);
313 }
314 } else {
315 break; // no more pairs
316 }
317 }
318
319 // Figure out which encoding to use to decode the params
320 String[] _charset_ = rawPairs.get(CHARSET_lower) == null ? rawPairs.get(CHARSET_upper) : rawPairs
321 .get(CHARSET_lower);
322 final String encoding;
323 if (_charset_ == null) {
324 encoding = null; // try to follow the spec
325 } else {
326 switch (_charset_.length) {
327 case 0:
328 throw new IllegalStateException(
329 "Somehow an empty _charst_ param made it into our map. Impossible...");
330 case 1:
331 encoding = _charset_[0]; // url contains an override for the
332 // spec
333 break;
334 default:
335 throw new IllegalStateException("Too many values of _charset_ found in the URL");
336 }
337 }
338
339 // Decode the raw pairs using the proper encoding and set them into the
340 // result map
341 Map<String, String[]> res = new HashMap<String, String[]>(rawPairs.size());
342 for (String rawKey : rawPairs.keySet()) {
343 String key = decode(rawKey, encoding);
344 String[] val = rawPairs.get(rawKey);
345 for (int i = 0; i < val.length; i++) {
346 String rawVal = val[i];
347 val[i] = decode(rawVal, encoding);
348
349 if (LOG.isTraceEnabled()) {
350 StringBuilder bf = new StringBuilder("Parsing query string. Found raw pair [name]=[value]: ");
351 bf.append('[').append(rawKey).append(']').append('=').append('[').append(rawVal).append(']');
352 bf.append(" decoded to: ");
353 bf.append('[').append(key).append(']').append('=').append('[').append(val[i]).append(']');
354 LOG.trace(bf);
355 }
356 }
357 res.put(key, val);
358 }
359
360 return res;
361 }
362
363 /**
364 * Given an input map of name-value pairs, construct a query string. This
365 * supports multiple values for any given parameter. Names and values are
366 * properly encoded.
367 *
368 * @param parameters parameters to encode and place in the query string
369 * @return the query string, or null if no values needed to be added.
370 * @see #encode(String)
371 */
372 protected final String constructQueryString(Map<String, String[]> parameters) {
373 StringBuilder qryStr = new StringBuilder();
374 for (String key : parameters.keySet()) {
375 String[] vals = parameters.get(key);
376 if (vals != null) {
377 // Loop through the values for the parameter
378 for (String val : vals) {
379 if (val != null && val.length() > 0) {
380 // Append the correct separator
381 if (qryStr.length() > 0) {
382 qryStr.append('&');
383 }
384
385 // Append the name and value to the URL
386 if (LOG.isTraceEnabled()) {
387 StringBuilder bf = new StringBuilder("About to add [key]=[value] to url [" + key + "]=["
388 + val + "]");
389 bf.append(" after encoding: [").append(encode(key)).append("]=[").append(encode(val))
390 .append("]");
391 LOG.trace(bf);
392
393 }
394 qryStr.append(encode(key)).append('=').append(encode(val));
395 }
396 }
397 }
398 }
399
400 // prepare result
401 if (qryStr.length() > 0) {
402 return qryStr.toString();
403 } else {
404 return null;
405 }
406 }
407
408 /**
409 * Given an array of query-string-like packed arguments, eliminate the
410 * specified parameters and return the packedargs parameter with the values
411 * stripped.
412 *
413 * @param origPackedargsStrings array of query string-like packed args.
414 * @param toExclude list of args to remove from the packed args.
415 * @return array the same length as the original array, containing the same
416 * values, except the <code>toExclude</code> parameters are removed.
417 * If all params end up getting removed, the packedargs string ends
418 * up being null. The array returned is never null though.
419 * @throws IllegalArgumentException if the input args or the input list are
420 * null.
421 */
422 protected final String[] excludeFromPackedargs(String[] origPackedargsStrings, Collection<String> toExclude) {
423 if (origPackedargsStrings == null) {
424 throw new IllegalArgumentException("OrigPackedArgsStrings must not be null");
425 }
426 if (toExclude == null) {
427 throw new IllegalArgumentException("ToExclude list may not be null");
428 }
429
430 String[] newPackedargsStrings = new String[origPackedargsStrings.length];
431
432 for (int i = 0; i < origPackedargsStrings.length; i++) {
433 Map<String, String[]> oldPacked = parseQueryString(origPackedargsStrings[i]);
434 Map<String, String[]> newPacked = new HashMap<String, String[]>();
435 for (String opK : oldPacked.keySet()) {
436 if (LOG.isTraceEnabled()) {
437 LOG.trace("checking to see if a param should be excluded from packedargs: " + opK);
438 }
439 if (!toExclude.contains(opK)) {
440 newPacked.put(opK, oldPacked.get(opK));
441 }
442 }
443
444 newPackedargsStrings[i] = constructQueryString(newPacked);
445
446 }
447
448 return newPackedargsStrings;
449 }
450 }