1 /*
2 * Copyright 2010 FatWire Corporation. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 package tools.gsf.url;
17
18 import com.fatwire.cs.core.uri.Assembler;
19 import com.fatwire.cs.core.uri.Util;
20 import org.slf4j.Logger;
21 import org.slf4j.LoggerFactory;
22
23 import java.io.UnsupportedEncodingException;
24 import java.net.URI;
25 import java.net.URISyntaxException;
26 import java.util.Collection;
27 import java.util.Enumeration;
28 import java.util.HashMap;
29 import java.util.Map;
30 import java.util.Properties;
31
32 /**
33 * Lightweight abstract assembler that handles property management, provides a
34 * logger, handles encoding and decoding and query string processing. Much
35 * lighter in weight than <code>com.fatwire.cs.core.uri.AbstractAssembler</code>
36 * .
37 *
38 * @author Tony Field
39 * @since Sep 27, 2008
40 */
41 public abstract class LightweightAbstractAssembler implements Assembler {
42 /**
43 * Logger for use by sub-classes.
44 */
45 protected static final Logger LOG = LoggerFactory.getLogger("tools.gsf.url.LightweightAbstractAssembler");
46
47 private static final String CHARSET_lower = "_charset_";
48 private static final String CHARSET_upper = "_CHARSET_";
49
50 private final String encoding;
51
52 private final Map<String, String> properties = new HashMap<String, String>();
53
54 /**
55 * Constructor. Upon object construction, support for UTF-8 encoding is
56 * tested, and the result is cached for future use in the encode() and
57 * decode() methods.
58 * <p>
59 * UTF-8 is the recommended URLEncoding:
60 * <ul>
61 * <li><a
62 * href="http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars"
63 * >http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars</a></li>
64 * <li><a
65 * href="http://java.sun.com/j2ee/1.4/docs/tutorial/doc/WebI18N5.html"
66 * >http://java.sun.com/j2ee/1.4/docs/tutorial/doc/WebI18N5.html</a></li>
67 * <li><a
68 * href="http://www.ietf.org/rfc/rfc2396.txt">http://www.ietf.org/rfc/
69 * rfc2396.txt</a></li>
70 * </ul>
71 */
72 protected LightweightAbstractAssembler() {
73 String enc = "UTF-8";
74 try {
75 Util.encode("fake string", enc);
76 } catch (UnsupportedEncodingException e) {
77 LOG.warn("UTF-8 encoding not supported by this platform. Using the platform's default encoding as the URL encoding.");
78 enc = null;
79 }
80 this.encoding = enc;
81 }
82
83 public void setProperties(Properties props) {
84 Enumeration<?> en = props.propertyNames();
85 while (en.hasMoreElements()) {
86 String pName = (String) en.nextElement();
87 String pValue = props.getProperty(pName);
88 this.properties.put(pName, pValue);
89 }
90 }
91
92 /**
93 * Convenience method to get a property value set into the assembler from
94 * the configuration files.
95 *
96 * @param name name of property to import
97 * @param dephault default value of property - returned if the property
98 * value is not specified
99 * @return property value or dephault value
100 */
101 protected String getProperty(String name, String dephault) {
102 String result = properties.get(name);
103 if (result == null) {
104 result = dephault;
105 }
106 return result;
107 }
108
109 /**
110 * URLEncodes a string using the encoding specified by this class.
111 *
112 * @param string the string to encode
113 * @return encoded string
114 * @throws IllegalStateException if UTF-8 encoding is not supported and the
115 * platform's default encoding is not supported.
116 */
117 protected final String encode(String string) {
118 String result;
119 try {
120 if (string == null) {
121 result = null;
122 } else {
123 result = Util.encode(string, encoding);
124 }
125 } catch (UnsupportedEncodingException ex) {
126 String msg = "Unexpected failure encoding string '" + string + "'using an encoding (" + encoding
127 + "). Exception: " + ex;
128 throw new IllegalStateException(msg);
129 }
130 return result;
131 }
132
133 /**
134 * URLDecodes a string using the encoding specified by this class.
135 *
136 * @param string encoded string
137 * @return decoded string
138 * @throws IllegalStateException if UTF-8 encoding is not supported and the
139 * platform's default encoding is not supported.
140 * @throws IllegalArgumentException if the string is not well-formed for
141 * decoding.
142 */
143 protected final String decode(String string) {
144 return decode(string, null);
145 }
146
147 /**
148 * URLDecodes a string using the encoding specified.
149 *
150 * @param string encoded string
151 * @param encoding the encoding to use to decode the string. If null is
152 * specified, the decoding specified by this class shall be used.
153 * @return decoded string
154 * @throws IllegalStateException if the encoding specified is not supported,
155 * or if UTF-8 encoding is not supported and the platform's
156 * default encoding is not supported.
157 * @throws IllegalArgumentException if the string is not well-formed for
158 * decoding.
159 */
160 protected final String decode(String string, String encoding) {
161 String result;
162 if (string == null) {
163 result = null;
164 } else {
165 if (encoding == null) {
166 encoding = this.encoding;
167 }
168 try {
169 result = Util.decode(string, encoding);
170 } catch (IllegalArgumentException iae) {
171 throw new IllegalArgumentException("Failure decoding string '" + string + "' using encoding '"
172 + encoding + "'. (" + iae.getMessage() + ")");
173 } catch (UnsupportedEncodingException ex) {
174 // This is not expected to ever occur.
175 throw new IllegalStateException("Unexpected failure decoding string '" + string + "'using encoding '"
176 + encoding + "'. (" + ex + ")");
177 }
178 }
179 return result;
180 }
181
182 /**
183 * The multi-arg <code>java.net.URI</code> constructors quote illegal
184 * characters. However, this class requires that the query string already be
185 * properly URLEncoded. As a result, we can't use the multi-arg URI
186 * constructor because all of our % symbols and the + symbol will end up
187 * getting double-encoded. So, we need to construct a full URL ourselves so
188 * we can use the single-arg URI constructor, because it does not quote
189 * anything.
190 * <p>
191 * There are multiple variants of combinations of these parameters to create
192 * a valid URL. Consult the URI specificaiton for what is allowed and what
193 * is not. The URI constructor will throw a URISyntaxException if required
194 * components are missing for a given combination.
195 *
196 * @param scheme the URI scheme (protocol)
197 * @param authority the URI authority (host:port)
198 * @param path the path for the URI (servlet context path, servlet name,
199 * pathinfo)
200 * @param quotedQueryString the query string, with illegal characters
201 * already quoted.
202 * @param fragment the fragment (anchor)
203 * @return the valid URI with proper encoding
204 * @throws URISyntaxException if there is a problem with what is passed in
205 */
206 protected static final URI constructURI(final String scheme, final String authority, final String path,
207 final String quotedQueryString, final String fragment) throws URISyntaxException {
208 // Update, Feb 25, 2005 by Tony Field
209 StringBuilder bf = new StringBuilder();
210 if (scheme != null) {
211 bf.append(scheme).append(':'); // nothing legal can be quoted
212 }
213 if (authority != null) {
214 bf.append("//").append(authority); // nothing legal to quote until
215 // I18N URLs work
216 }
217 // Path needs quoting though, so let the URI object do it for us.
218 // Use the toASCIIString() method because we need the quoted values.
219 // (toString() is really just for readability and debugging, not
220 // programmatic use)
221 if (path != null) {
222 bf.append(new URI(null, null, path, null, null).getRawPath());
223 }
224 if (quotedQueryString != null) {
225 bf.append('?').append(quotedQueryString); // already quoted
226 }
227 // needs quoting
228 if (fragment != null) {
229 bf.append(new URI(null, null, null, null, fragment).toASCIIString());
230 }
231 URI uri = new URI(bf.toString());
232
233 if (LOG.isDebugEnabled()) {
234 LOG.trace("Constructing new URI using the following components: \n" + "scheme=" + scheme + " \n"
235 + "authority=" + authority + " \n" + "path=" + path + " \n" + "query=" + quotedQueryString + " \n"
236 + "fragment=" + fragment);
237
238 LOG.debug("Assembled URI: " + uri.toASCIIString());
239 }
240 return uri;
241 }
242
243 /**
244 * Parse a query string and put the parameters into a map. Input parameters
245 * will be URLDecoded prior to their addition into the resultant map.
246 * <p>
247 * Note that the map returned contains a <em><code>String[]</code> as the
248 * value, not a single <code>String</code> value</em> This provides support
249 * for query strings with multiple values for a given parameter name.
250 * <p>
251 * This decoding method is smart enough to be able to interpret the
252 * <code>_charset_</code> URL parameter that is often used by IE.
253 *
254 * @param qry string value for query
255 * @return map containing <code>String</code>/<code>String[]</code> pairs.
256 * @throws IllegalArgumentException if there are mistakes in the string that
257 * make it impossible to parse.
258 */
259 protected final Map<String, String[]> parseQueryString(String qry) {
260 Map<String, String[]> rawPairs = new HashMap<String, String[]>();
261 if (qry == null) {
262 return rawPairs;
263 }
264 int inlen = qry.length();
265 if (inlen == 0) {
266 return rawPairs;
267 }
268
269 if (LOG.isTraceEnabled()) {
270 LOG.trace("Parsing query string: " + qry);
271 }
272
273 int iequal;
274 int iamper;
275 int startAt = 0;
276 boolean bDone = false;
277
278 while (!bDone) {
279 String n;
280 String v;
281 if ((iequal = qry.indexOf("=", startAt)) != -1) {
282 // End of current name=value is '&' or EOL
283 iamper = qry.indexOf("&", iequal);
284 n = qry.substring(startAt, iequal);
285 n = n.trim(); // deal with accidental odd chars in the URL
286 iequal++;
287 if (iequal >= inlen) {
288 break;
289 }
290
291 if (iamper == -1) {
292 v = qry.substring(iequal);
293 } else {
294 v = qry.substring(iequal, iamper);
295 }
296
297 if (iamper != -1) {
298 startAt = iamper + 1;
299 } else {
300 bDone = true;
301 }
302
303 v = v.trim(); // deal with stupid value
304
305 // add the value to the result.
306 String[] av = rawPairs.get(n);
307 if (av == null) {
308 av = new String[1];
309 av[0] = v;
310 rawPairs.put(n, av);
311 } else {
312 // param specified twice in the url.
313 String[] newVal = new String[av.length + 1];
314 System.arraycopy(av, 0, newVal, 0, av.length);
315 newVal[av.length] = v;
316 rawPairs.put(n, newVal);
317 }
318 } else {
319 break; // no more pairs
320 }
321 }
322
323 // Figure out which encoding to use to decode the params
324 String[] _charset_ = rawPairs.get(CHARSET_lower) == null ? rawPairs.get(CHARSET_upper) : rawPairs
325 .get(CHARSET_lower);
326 final String encoding;
327 if (_charset_ == null) {
328 encoding = null; // try to follow the spec
329 } else {
330 switch (_charset_.length) {
331 case 0:
332 throw new IllegalStateException(
333 "Somehow an empty _charst_ param made it into our map. Impossible...");
334 case 1:
335 encoding = _charset_[0]; // url contains an override for the
336 // spec
337 break;
338 default:
339 throw new IllegalStateException("Too many values of _charset_ found in the URL");
340 }
341 }
342
343 // Decode the raw pairs using the proper encoding and set them into the
344 // result map
345 Map<String, String[]> res = new HashMap<String, String[]>(rawPairs.size());
346 for (String rawKey : rawPairs.keySet()) {
347 String key = decode(rawKey, encoding);
348 String[] val = rawPairs.get(rawKey);
349 for (int i = 0; i < val.length; i++) {
350 String rawVal = val[i];
351 val[i] = decode(rawVal, encoding);
352
353 if (LOG.isTraceEnabled()) {
354 StringBuilder bf = new StringBuilder("Parsing query string. Found raw pair [name]=[value]: ");
355 bf.append('[').append(rawKey).append(']').append('=').append('[').append(rawVal).append(']');
356 bf.append(" decoded to: ");
357 bf.append('[').append(key).append(']').append('=').append('[').append(val[i]).append(']');
358 LOG.trace(bf.toString());
359 }
360 }
361 res.put(key, val);
362 }
363
364 return res;
365 }
366
367 /**
368 * Given an input map of name-value pairs, construct a query string. This
369 * supports multiple values for any given parameter. Names and values are
370 * properly encoded.
371 *
372 * @param parameters parameters to encode and place in the query string
373 * @return the query string, or null if no values needed to be added.
374 * @see #encode(String)
375 */
376 protected final String constructQueryString(Map<String, String[]> parameters) {
377 StringBuilder qryStr = new StringBuilder();
378 for (String key : parameters.keySet()) {
379 String[] vals = parameters.get(key);
380 if (vals != null) {
381 // Loop through the values for the parameter
382 for (String val : vals) {
383 if (val != null && val.length() > 0) {
384 // Append the correct separator
385 if (qryStr.length() > 0) {
386 qryStr.append('&');
387 }
388
389 // Append the name and value to the URL
390 if (LOG.isTraceEnabled()) {
391 StringBuilder bf = new StringBuilder("About to add [key]=[value] to url [" + key + "]=["
392 + val + "]");
393 bf.append(" after encoding: [").append(encode(key)).append("]=[").append(encode(val))
394 .append("]");
395 LOG.trace(bf.toString());
396
397 }
398 qryStr.append(encode(key)).append('=').append(encode(val));
399 }
400 }
401 }
402 }
403
404 // prepare result
405 if (qryStr.length() > 0) {
406 return qryStr.toString();
407 } else {
408 return null;
409 }
410 }
411
412 /**
413 * Given an array of query-string-like packed arguments, eliminate the
414 * specified parameters and return the packedargs parameter with the values
415 * stripped.
416 *
417 * @param origPackedargsStrings array of query string-like packed args.
418 * @param toExclude list of args to remove from the packed args.
419 * @return array the same length as the original array, containing the same
420 * values, except the <code>toExclude</code> parameters are removed.
421 * If all params end up getting removed, the packedargs string ends
422 * up being null. The array returned is never null though.
423 * @throws IllegalArgumentException if the input args or the input list are
424 * null.
425 */
426 protected final String[] excludeFromPackedargs(String[] origPackedargsStrings, Collection<String> toExclude) {
427 if (origPackedargsStrings == null) {
428 throw new IllegalArgumentException("OrigPackedArgsStrings must not be null");
429 }
430 if (toExclude == null) {
431 throw new IllegalArgumentException("ToExclude list may not be null");
432 }
433
434 String[] newPackedargsStrings = new String[origPackedargsStrings.length];
435
436 for (int i = 0; i < origPackedargsStrings.length; i++) {
437 Map<String, String[]> oldPacked = parseQueryString(origPackedargsStrings[i]);
438 Map<String, String[]> newPacked = new HashMap<String, String[]>();
439 for (String opK : oldPacked.keySet()) {
440 if (LOG.isTraceEnabled()) {
441 LOG.trace("checking to see if a param should be excluded from packedargs: " + opK);
442 }
443 if (!toExclude.contains(opK)) {
444 newPacked.put(opK, oldPacked.get(opK));
445 }
446 }
447
448 newPackedargsStrings[i] = constructQueryString(newPacked);
449
450 }
451
452 return newPackedargsStrings;
453 }
454 }