001 /* URI.java -- An URI class
002 Copyright (C) 2002, 2004, 2005, 2006, 2008 Free Software Foundation, Inc.
003
004 This file is part of GNU Classpath.
005
006 GNU Classpath is free software; you can redistribute it and/or modify
007 it under the terms of the GNU General Public License as published by
008 the Free Software Foundation; either version 2, or (at your option)
009 any later version.
010
011 GNU Classpath is distributed in the hope that it will be useful, but
012 WITHOUT ANY WARRANTY; without even the implied warranty of
013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 General Public License for more details.
015
016 You should have received a copy of the GNU General Public License
017 along with GNU Classpath; see the file COPYING. If not, write to the
018 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019 02110-1301 USA.
020
021 Linking this library statically or dynamically with other modules is
022 making a combined work based on this library. Thus, the terms and
023 conditions of the GNU General Public License cover the whole
024 combination.
025
026 As a special exception, the copyright holders of this library give you
027 permission to link this library with independent modules to produce an
028 executable, regardless of the license terms of these independent
029 modules, and to copy and distribute the resulting executable under
030 terms of your choice, provided that you also meet, for each linked
031 independent module, the terms and conditions of the license of that
032 module. An independent module is a module which is not derived from
033 or based on this library. If you modify this library, you may extend
034 this exception to your version of the library, but you are not
035 obligated to do so. If you do not wish to do so, delete this
036 exception statement from your version. */
037
038
039 package java.net;
040
041 import java.io.IOException;
042 import java.io.ObjectInputStream;
043 import java.io.ObjectOutputStream;
044 import java.io.Serializable;
045 import java.util.regex.Matcher;
046 import java.util.regex.Pattern;
047
048 /**
049 * <p>
050 * A URI instance represents that defined by
051 * <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC3986</a>,
052 * with some deviations.
053 * </p>
054 * <p>
055 * At its highest level, a URI consists of:
056 * </p>
057 * <code>[<em>scheme</em><strong>:</strong>]<em>scheme-specific-part</em>
058 * [<strong>#</strong><em>fragment</em>]</code>
059 * </p>
060 * <p>
061 * where <strong>#</strong> and <strong>:</strong> are literal characters,
062 * and those parts enclosed in square brackets are optional.
063 * </p>
064 * <p>
065 * There are two main types of URI. An <em>opaque</em> URI is one
066 * which just consists of the above three parts, and is not further
067 * defined. An example of such a URI would be <em>mailto:</em> URI.
068 * In contrast, <em>hierarchical</em> URIs give further definition
069 * to the scheme-specific part, so as represent some part of a hierarchical
070 * structure.
071 * </p>
072 * <p>
073 * <code>[<strong>//</strong><em>authority</em>][<em>path</em>]
074 * [<strong>?</strong><em>query</em>]</code>
075 * </p>
076 * <p>
077 * with <strong>/</strong> and <strong>?</strong> being literal characters.
078 * When server-based, the authority section is further subdivided into:
079 * </p>
080 * <p>
081 * <code>[<em>user-info</em><strong>@</strong>]<em>host</em>
082 * [<strong>:</strong><em>port</em>]</code>
083 * </p>
084 * <p>
085 * with <strong>@</strong> and <strong>:</strong> as literal characters.
086 * Authority sections that are not server-based are said to be registry-based.
087 * </p>
088 * <p>
089 * Hierarchical URIs can be either relative or absolute. Absolute URIs
090 * always start with a `<strong>/</strong>', while relative URIs don't
091 * specify a scheme. Opaque URIs are always absolute.
092 * </p>
093 * <p>
094 * Each part of the URI may have one of three states: undefined, empty
095 * or containing some content. The former two of these are represented
096 * by <code>null</code> and the empty string in Java, respectively.
097 * The scheme-specific part may never be undefined. It also follows from
098 * this that the path sub-part may also not be undefined, so as to ensure
099 * the former.
100 * </p>
101 * <h2>Character Escaping and Quoting</h2>
102 * <p>
103 * The characters that can be used within a valid URI are restricted.
104 * There are two main classes of characters which can't be used as is
105 * within the URI:
106 * </p>
107 * <ol>
108 * <li><strong>Characters outside the US-ASCII character set</strong>.
109 * These have to be <strong>escaped</strong> in order to create
110 * an RFC-compliant URI; this means replacing the character with the
111 * appropriate hexadecimal value, preceded by a `%'.</li>
112 * <li><strong>Illegal characters</strong> (e.g. space characters,
113 * control characters) are quoted, which results in them being encoded
114 * in the same way as non-US-ASCII characters.</li>
115 * </ol>
116 * <p>
117 * The set of valid characters differs depending on the section of the URI:
118 * </p>
119 * <ul>
120 * <li><strong>Scheme</strong>: Must be an alphanumeric, `-', `.' or '+'.</li>
121 * <li><strong>Authority</strong>:Composed of the username, host, port, `@'
122 * and `:'.</li>
123 * <li><strong>Username</strong>: Allows unreserved or percent-encoded
124 * characters, sub-delimiters and `:'.</li>
125 * <li><strong>Host</strong>: Allows unreserved or percent-encoded
126 * characters, sub-delimiters and square brackets (`[' and `]') for IPv6
127 * addresses.</li>
128 * <li><strong>Port</strong>: Digits only.</li>
129 * <li><strong>Path</strong>: Allows the path characters and `/'.
130 * <li><strong>Query</strong>: Allows the path characters, `?' and '/'.
131 * <li><strong>Fragment</strong>: Allows the path characters, `?' and '/'.
132 * </ul>
133 * <p>
134 * These definitions reference the following sets of characters:
135 * </p>
136 * <ul>
137 * <li><strong>Unreserved characters</strong>: The alphanumerics plus
138 * `-', `.', `_', and `~'.</li>
139 * <li><strong>Sub-delimiters</strong>: `!', `$', `&', `(', `)', `*',
140 * `+', `,', `;', `=' and the single-quote itself.</li>
141 * <li><strong>Path characters</strong>: Unreserved and percent-encoded
142 * characters and the sub-delimiters along with `@' and `:'.</li>
143 * </ul>
144 * <p>
145 * The constructors and accessor methods allow the use and retrieval of
146 * URI components which contain non-US-ASCII characters directly.
147 * They are only escaped when the <code>toASCIIString()</code> method
148 * is used. In contrast, illegal characters are always quoted, with the
149 * exception of the return values of the non-raw accessors.
150 * </p>
151 *
152 * @author Ito Kazumitsu (ito.kazumitsu@hitachi-cable.co.jp)
153 * @author Dalibor Topic (robilad@kaffe.org)
154 * @author Michael Koch (konqueror@gmx.de)
155 * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
156 * @since 1.4
157 */
158 public final class URI
159 implements Comparable<URI>, Serializable
160 {
161 /**
162 * For serialization compatability.
163 */
164 static final long serialVersionUID = -6052424284110960213L;
165
166 /**
167 * Regular expression for parsing URIs.
168 *
169 * Taken from RFC 2396, Appendix B.
170 * This expression doesn't parse IPv6 addresses.
171 */
172 private static final String URI_REGEXP =
173 "^(([^:/?#]+):)?((//([^/?#]*))?([^?#]*)(\\?([^#]*))?)?(#(.*))?";
174
175 /**
176 * Regular expression for parsing the authority segment.
177 */
178 private static final String AUTHORITY_REGEXP =
179 "(([^?#]*)@)?([^?#:]*)(:([0-9]*))?";
180
181 /**
182 * Valid characters (taken from rfc2396/3986)
183 */
184 private static final String RFC2396_DIGIT = "0123456789";
185 private static final String RFC2396_LOWALPHA = "abcdefghijklmnopqrstuvwxyz";
186 private static final String RFC2396_UPALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
187 private static final String RFC2396_ALPHA =
188 RFC2396_LOWALPHA + RFC2396_UPALPHA;
189 private static final String RFC2396_ALPHANUM = RFC2396_DIGIT + RFC2396_ALPHA;
190 private static final String RFC3986_UNRESERVED = RFC2396_ALPHANUM + "-._~";
191 private static final String RFC3986_SUBDELIMS = "!$&'()*+,;=";
192 private static final String RFC3986_REG_NAME =
193 RFC3986_UNRESERVED + RFC3986_SUBDELIMS + "%";
194 private static final String RFC3986_PCHAR = RFC3986_UNRESERVED +
195 RFC3986_SUBDELIMS + ":@%";
196 private static final String RFC3986_SEGMENT = RFC3986_PCHAR;
197 private static final String RFC3986_PATH_SEGMENTS = RFC3986_SEGMENT + "/";
198 private static final String RFC3986_SSP = RFC3986_PCHAR + "?/";
199 private static final String RFC3986_HOST = RFC3986_REG_NAME + "[]";
200 private static final String RFC3986_USERINFO = RFC3986_REG_NAME + ":";
201
202 /**
203 * Index of scheme component in parsed URI.
204 */
205 private static final int SCHEME_GROUP = 2;
206
207 /**
208 * Index of scheme-specific-part in parsed URI.
209 */
210 private static final int SCHEME_SPEC_PART_GROUP = 3;
211
212 /**
213 * Index of authority component in parsed URI.
214 */
215 private static final int AUTHORITY_GROUP = 5;
216
217 /**
218 * Index of path component in parsed URI.
219 */
220 private static final int PATH_GROUP = 6;
221
222 /**
223 * Index of query component in parsed URI.
224 */
225 private static final int QUERY_GROUP = 8;
226
227 /**
228 * Index of fragment component in parsed URI.
229 */
230 private static final int FRAGMENT_GROUP = 10;
231
232 /**
233 * Index of userinfo component in parsed authority section.
234 */
235 private static final int AUTHORITY_USERINFO_GROUP = 2;
236
237 /**
238 * Index of host component in parsed authority section.
239 */
240 private static final int AUTHORITY_HOST_GROUP = 3;
241
242 /**
243 * Index of port component in parsed authority section.
244 */
245 private static final int AUTHORITY_PORT_GROUP = 5;
246
247 /**
248 * The compiled version of the URI regular expression.
249 */
250 private static final Pattern URI_PATTERN;
251
252 /**
253 * The compiled version of the authority regular expression.
254 */
255 private static final Pattern AUTHORITY_PATTERN;
256
257 /**
258 * The set of valid hexadecimal characters.
259 */
260 private static final String HEX = "0123456789ABCDEF";
261
262 private transient String scheme;
263 private transient String rawSchemeSpecificPart;
264 private transient String schemeSpecificPart;
265 private transient String rawAuthority;
266 private transient String authority;
267 private transient String rawUserInfo;
268 private transient String userInfo;
269 private transient String rawHost;
270 private transient String host;
271 private transient int port = -1;
272 private transient String rawPath;
273 private transient String path;
274 private transient String rawQuery;
275 private transient String query;
276 private transient String rawFragment;
277 private transient String fragment;
278 private String string;
279
280 /**
281 * Static initializer to pre-compile the regular expressions.
282 */
283 static
284 {
285 URI_PATTERN = Pattern.compile(URI_REGEXP);
286 AUTHORITY_PATTERN = Pattern.compile(AUTHORITY_REGEXP);
287 }
288
289 private void readObject(ObjectInputStream is)
290 throws ClassNotFoundException, IOException
291 {
292 this.string = (String) is.readObject();
293 try
294 {
295 parseURI(this.string);
296 }
297 catch (URISyntaxException x)
298 {
299 // Should not happen.
300 throw new RuntimeException(x);
301 }
302 }
303
304 private void writeObject(ObjectOutputStream os) throws IOException
305 {
306 if (string == null)
307 string = toString();
308 os.writeObject(string);
309 }
310
311 /**
312 * <p>
313 * Returns the string content of the specified group of the supplied
314 * matcher. The returned value is modified according to the following:
315 * </p>
316 * <ul>
317 * <li>If the resulting string has a length greater than 0, then
318 * that string is returned.</li>
319 * <li>If a string of zero length, is matched, then the content
320 * of the preceding group is considered. If this is also an empty
321 * string, then <code>null</code> is returned to indicate an undefined
322 * value. Otherwise, the value is truly the empty string and this is
323 * the returned value.</li>
324 * </ul>
325 * <p>
326 * This method is used for matching against all parts of the URI
327 * that may be either undefined or empty (i.e. all those but the
328 * scheme-specific part and the path). In each case, the preceding
329 * group is the content of the original group, along with some
330 * additional distinguishing feature. For example, the preceding
331 * group for the query includes the preceding question mark,
332 * while that of the fragment includes the hash symbol. The presence
333 * of these features enables disambiguation between the two cases
334 * of a completely unspecified value and a simple non-existant value.
335 * The scheme differs in that it will never return an empty string;
336 * the delimiter follows the scheme rather than preceding it, so
337 * it becomes part of the following section. The same is true
338 * of the user information.
339 * </p>
340 *
341 * @param match the matcher, which contains the results of the URI
342 * matched against the URI regular expression.
343 * @return either the matched content, <code>null</code> for undefined
344 * values, or an empty string for a URI part with empty content.
345 */
346 private static String getURIGroup(Matcher match, int group)
347 {
348 String matched = match.group(group);
349 if (matched == null || matched.length() == 0)
350 {
351 String prevMatched = match.group(group -1);
352 if (prevMatched == null || prevMatched.length() == 0)
353 return null;
354 else
355 return "";
356 }
357 return matched;
358 }
359
360 /**
361 * Sets fields of this URI by parsing the given string.
362 *
363 * @param str The string to parse
364 *
365 * @exception URISyntaxException If the given string violates RFC 2396
366 */
367 private void parseURI(String str) throws URISyntaxException
368 {
369 Matcher matcher = URI_PATTERN.matcher(str);
370
371 if (matcher.matches())
372 {
373 scheme = getURIGroup(matcher, SCHEME_GROUP);
374 rawSchemeSpecificPart = matcher.group(SCHEME_SPEC_PART_GROUP);
375 schemeSpecificPart = unquote(rawSchemeSpecificPart);
376 if (!isOpaque())
377 {
378 rawAuthority = getURIGroup(matcher, AUTHORITY_GROUP);
379 rawPath = matcher.group(PATH_GROUP);
380 rawQuery = getURIGroup(matcher, QUERY_GROUP);
381 }
382 rawFragment = getURIGroup(matcher, FRAGMENT_GROUP);
383 }
384 else
385 throw new URISyntaxException(str,
386 "doesn't match URI regular expression");
387 parseServerAuthority();
388
389 // We must eagerly unquote the parts, because this is the only time
390 // we may throw an exception.
391 authority = unquote(rawAuthority);
392 userInfo = unquote(rawUserInfo);
393 host = unquote(rawHost);
394 path = unquote(rawPath);
395 query = unquote(rawQuery);
396 fragment = unquote(rawFragment);
397 }
398
399 /**
400 * Unquote "%" + hex quotes characters
401 *
402 * @param str The string to unquote or null.
403 *
404 * @return The unquoted string or null if str was null.
405 *
406 * @exception URISyntaxException If the given string contains invalid
407 * escape sequences.
408 */
409 private static String unquote(String str) throws URISyntaxException
410 {
411 if (str == null)
412 return null;
413 byte[] buf = new byte[str.length()];
414 int pos = 0;
415 for (int i = 0; i < str.length(); i++)
416 {
417 char c = str.charAt(i);
418 if (c == '%')
419 {
420 if (i + 2 >= str.length())
421 throw new URISyntaxException(str, "Invalid quoted character");
422 int hi = Character.digit(str.charAt(++i), 16);
423 int lo = Character.digit(str.charAt(++i), 16);
424 if (lo < 0 || hi < 0)
425 throw new URISyntaxException(str, "Invalid quoted character");
426 buf[pos++] = (byte) (hi * 16 + lo);
427 }
428 else
429 buf[pos++] = (byte) c;
430 }
431 try
432 {
433 return new String(buf, 0, pos, "utf-8");
434 }
435 catch (java.io.UnsupportedEncodingException x2)
436 {
437 throw (Error) new InternalError().initCause(x2);
438 }
439 }
440
441 /**
442 * Quote characters illegal in URIs in given string.
443 *
444 * Replace illegal characters by encoding their UTF-8
445 * representation as "%" + hex code for each resulting
446 * UTF-8 character.
447 *
448 * @param str The string to quote
449 *
450 * @return The quoted string.
451 */
452 private static String quote(String str)
453 {
454 return quote(str, RFC3986_SSP);
455 }
456
457 /**
458 * Quote characters illegal in URI authorities in given string.
459 *
460 * Replace illegal characters by encoding their UTF-8
461 * representation as "%" + hex code for each resulting
462 * UTF-8 character.
463 *
464 * @param str The string to quote
465 *
466 * @return The quoted string.
467 */
468 private static String quoteAuthority(String str)
469 {
470 // Technically, we should be using RFC2396_AUTHORITY, but
471 // it contains no additional characters.
472 return quote(str, RFC3986_REG_NAME);
473 }
474
475 /**
476 * Quotes the characters in the supplied string that are not part of
477 * the specified set of legal characters.
478 *
479 * @param str the string to quote
480 * @param legalCharacters the set of legal characters
481 *
482 * @return the quoted string.
483 */
484 private static String quote(String str, String legalCharacters)
485 {
486 StringBuffer sb = new StringBuffer(str.length());
487 for (int i = 0; i < str.length(); i++)
488 {
489 char c = str.charAt(i);
490 if ((legalCharacters.indexOf(c) == -1)
491 && (c <= 127))
492 {
493 sb.append('%');
494 sb.append(HEX.charAt(c / 16));
495 sb.append(HEX.charAt(c % 16));
496 }
497 else
498 sb.append(c);
499 }
500 return sb.toString();
501 }
502
503 /**
504 * Quote characters illegal in URI hosts in given string.
505 *
506 * Replace illegal characters by encoding their UTF-8
507 * representation as "%" + hex code for each resulting
508 * UTF-8 character.
509 *
510 * @param str The string to quote
511 *
512 * @return The quoted string.
513 */
514 private static String quoteHost(String str)
515 {
516 return quote(str, RFC3986_HOST);
517 }
518
519 /**
520 * Quote characters illegal in URI paths in given string.
521 *
522 * Replace illegal characters by encoding their UTF-8
523 * representation as "%" + hex code for each resulting
524 * UTF-8 character.
525 *
526 * @param str The string to quote
527 *
528 * @return The quoted string.
529 */
530 private static String quotePath(String str)
531 {
532 // Technically, we should be using RFC2396_PATH, but
533 // it contains no additional characters.
534 return quote(str, RFC3986_PATH_SEGMENTS);
535 }
536
537 /**
538 * Quote characters illegal in URI user infos in given string.
539 *
540 * Replace illegal characters by encoding their UTF-8
541 * representation as "%" + hex code for each resulting
542 * UTF-8 character.
543 *
544 * @param str The string to quote
545 *
546 * @return The quoted string.
547 */
548 private static String quoteUserInfo(String str)
549 {
550 return quote(str, RFC3986_USERINFO);
551 }
552
553 /**
554 * Creates an URI from the given string
555 *
556 * @param str The string to create the URI from
557 *
558 * @exception URISyntaxException If the given string violates RFC 2396
559 * @exception NullPointerException If str is null
560 */
561 public URI(String str) throws URISyntaxException
562 {
563 this.string = str;
564 parseURI(str);
565 }
566
567 /**
568 * Create an URI from the given components
569 *
570 * @param scheme The scheme name
571 * @param userInfo The username and authorization info
572 * @param host The hostname
573 * @param port The port number
574 * @param path The path
575 * @param query The query
576 * @param fragment The fragment
577 *
578 * @exception URISyntaxException If the given string violates RFC 2396
579 */
580 public URI(String scheme, String userInfo, String host, int port,
581 String path, String query, String fragment)
582 throws URISyntaxException
583 {
584 this((scheme == null ? "" : scheme + ":")
585 + (userInfo == null && host == null && port == -1 ? "" : "//")
586 + (userInfo == null ? "" : quoteUserInfo(userInfo) + "@")
587 + (host == null ? "" : quoteHost(host))
588 + (port == -1 ? "" : ":" + String.valueOf(port))
589 + (path == null ? "" : quotePath(path))
590 + (query == null ? "" : "?" + quote(query))
591 + (fragment == null ? "" : "#" + quote(fragment)));
592 }
593
594 /**
595 * Create an URI from the given components
596 *
597 * @param scheme The scheme name
598 * @param authority The authority
599 * @param path The apth
600 * @param query The query
601 * @param fragment The fragment
602 *
603 * @exception URISyntaxException If the given string violates RFC 2396
604 */
605 public URI(String scheme, String authority, String path, String query,
606 String fragment) throws URISyntaxException
607 {
608 this((scheme == null ? "" : scheme + ":")
609 + (authority == null ? "" : "//" + quoteAuthority(authority))
610 + (path == null ? "" : quotePath(path))
611 + (query == null ? "" : "?" + quote(query))
612 + (fragment == null ? "" : "#" + quote(fragment)));
613 }
614
615 /**
616 * Create an URI from the given components
617 *
618 * @param scheme The scheme name
619 * @param host The hostname
620 * @param path The path
621 * @param fragment The fragment
622 *
623 * @exception URISyntaxException If the given string violates RFC 2396
624 */
625 public URI(String scheme, String host, String path, String fragment)
626 throws URISyntaxException
627 {
628 this(scheme, null, host, -1, path, null, fragment);
629 }
630
631 /**
632 * Create an URI from the given components
633 *
634 * @param scheme The scheme name
635 * @param ssp The scheme specific part
636 * @param fragment The fragment
637 *
638 * @exception URISyntaxException If the given string violates RFC 2396
639 */
640 public URI(String scheme, String ssp, String fragment)
641 throws URISyntaxException
642 {
643 this((scheme == null ? "" : scheme + ":")
644 + (ssp == null ? "" : quote(ssp))
645 + (fragment == null ? "" : "#" + quote(fragment)));
646 }
647
648 /**
649 * Create an URI from the given string
650 *
651 * @param str The string to create the URI from
652 *
653 * @exception IllegalArgumentException If the given string violates RFC 2396
654 * @exception NullPointerException If str is null
655 */
656 public static URI create(String str)
657 {
658 try
659 {
660 return new URI(str);
661 }
662 catch (URISyntaxException e)
663 {
664 throw (IllegalArgumentException) new IllegalArgumentException()
665 .initCause(e);
666 }
667 }
668
669 /**
670 * Attempts to parse this URI's authority component, if defined,
671 * into user-information, host, and port components. The purpose
672 * of this method was to disambiguate between some authority sections,
673 * which form invalid server-based authories, but valid registry
674 * based authorities. In the updated RFC 3986, the authority section
675 * is defined differently, with registry-based authorities part of
676 * the host section. Thus, this method is now simply an explicit
677 * way of parsing any authority section.
678 *
679 * @return the URI, with the authority section parsed into user
680 * information, host and port components.
681 * @throws URISyntaxException if the given string violates RFC 2396
682 */
683 public URI parseServerAuthority() throws URISyntaxException
684 {
685 if (rawAuthority != null)
686 {
687 Matcher matcher = AUTHORITY_PATTERN.matcher(rawAuthority);
688
689 if (matcher.matches())
690 {
691 rawUserInfo = getURIGroup(matcher, AUTHORITY_USERINFO_GROUP);
692 rawHost = getURIGroup(matcher, AUTHORITY_HOST_GROUP);
693
694 String portStr = getURIGroup(matcher, AUTHORITY_PORT_GROUP);
695
696 if (portStr != null && ! portStr.isEmpty())
697 try
698 {
699 port = Integer.parseInt(portStr);
700 }
701 catch (NumberFormatException e)
702 {
703 URISyntaxException use =
704 new URISyntaxException
705 (string, "doesn't match URI regular expression");
706 use.initCause(e);
707 throw use;
708 }
709 }
710 else
711 throw new URISyntaxException(string,
712 "doesn't match URI regular expression");
713 }
714 return this;
715 }
716
717 /**
718 * <p>
719 * Returns a normalized version of the URI. If the URI is opaque,
720 * or its path is already in normal form, then this URI is simply
721 * returned. Otherwise, the following transformation of the path
722 * element takes place:
723 * </p>
724 * <ol>
725 * <li>All `.' segments are removed.</li>
726 * <li>Each `..' segment which can be paired with a prior non-`..' segment
727 * is removed along with the preceding segment.</li>
728 * <li>A `.' segment is added to the front if the first segment contains
729 * a colon (`:'). This is a deviation from the RFC, which prevents
730 * confusion between the path and the scheme.</li>
731 * </ol>
732 * <p>
733 * The resulting URI will be free of `.' and `..' segments, barring those
734 * that were prepended or which couldn't be paired, respectively.
735 * </p>
736 *
737 * @return the normalized URI.
738 */
739 public URI normalize()
740 {
741 if (isOpaque() || path.indexOf("/./") == -1 && path.indexOf("/../") == -1)
742 return this;
743 try
744 {
745 return new URI(scheme, authority, normalizePath(path), query,
746 fragment);
747 }
748 catch (URISyntaxException e)
749 {
750 throw (Error) new InternalError("Normalized URI variant could not "+
751 "be constructed").initCause(e);
752 }
753 }
754
755 /**
756 * <p>
757 * Normalize the given path. The following transformation takes place:
758 * </p>
759 * <ol>
760 * <li>All `.' segments are removed.</li>
761 * <li>Each `..' segment which can be paired with a prior non-`..' segment
762 * is removed along with the preceding segment.</li>
763 * <li>A `.' segment is added to the front if the first segment contains
764 * a colon (`:'). This is a deviation from the RFC, which prevents
765 * confusion between the path and the scheme.</li>
766 * </ol>
767 * <p>
768 * The resulting URI will be free of `.' and `..' segments, barring those
769 * that were prepended or which couldn't be paired, respectively.
770 * </p>
771 *
772 * @param relativePath the relative path to be normalized.
773 * @return the normalized path.
774 */
775 private String normalizePath(String relativePath)
776 {
777 /*
778 This follows the algorithm in section 5.2.4. of RFC3986,
779 but doesn't modify the input buffer.
780 */
781 StringBuffer input = new StringBuffer(relativePath);
782 StringBuffer output = new StringBuffer();
783 int start = 0;
784 while (start < input.length())
785 {
786 /* A */
787 if (input.indexOf("../",start) == start)
788 {
789 start += 3;
790 continue;
791 }
792 if (input.indexOf("./",start) == start)
793 {
794 start += 2;
795 continue;
796 }
797 /* B */
798 if (input.indexOf("/./",start) == start)
799 {
800 start += 2;
801 continue;
802 }
803 if (input.indexOf("/.",start) == start
804 && input.charAt(start + 2) != '.')
805 {
806 start += 1;
807 input.setCharAt(start,'/');
808 continue;
809 }
810 /* C */
811 if (input.indexOf("/../",start) == start)
812 {
813 start += 3;
814 removeLastSegment(output);
815 continue;
816 }
817 if (input.indexOf("/..",start) == start)
818 {
819 start += 2;
820 input.setCharAt(start,'/');
821 removeLastSegment(output);
822 continue;
823 }
824 /* D */
825 if (start == input.length() - 1 && input.indexOf(".",start) == start)
826 {
827 input.delete(0,1);
828 continue;
829 }
830 if (start == input.length() - 2 && input.indexOf("..",start) == start)
831 {
832 input.delete(0,2);
833 continue;
834 }
835 /* E */
836 int indexOfSlash = input.indexOf("/",start);
837 while (indexOfSlash == start)
838 {
839 output.append("/");
840 ++start;
841 indexOfSlash = input.indexOf("/",start);
842 }
843 if (indexOfSlash == -1)
844 indexOfSlash = input.length();
845 output.append(input.substring(start, indexOfSlash));
846 start = indexOfSlash;
847 }
848 return output.toString();
849 }
850
851 /**
852 * Removes the last segment of the path from the specified buffer.
853 *
854 * @param buffer the buffer containing the path.
855 */
856 private void removeLastSegment(StringBuffer buffer)
857 {
858 int lastSlash = buffer.lastIndexOf("/");
859 if (lastSlash == -1)
860 buffer.setLength(0);
861 else
862 buffer.setLength(lastSlash);
863 }
864
865 /**
866 * Resolves the given URI against this URI
867 *
868 * @param uri The URI to resolve against this URI
869 *
870 * @return The resulting URI, or null when it couldn't be resolved
871 * for some reason.
872 *
873 * @throws NullPointerException if uri is null
874 */
875 public URI resolve(URI uri)
876 {
877 if (uri.isAbsolute())
878 return uri;
879 if (uri.isOpaque())
880 return uri;
881
882 String scheme = uri.getScheme();
883 String schemeSpecificPart = uri.getSchemeSpecificPart();
884 String authority = uri.getAuthority();
885 String path = uri.getPath();
886 String query = uri.getQuery();
887 String fragment = uri.getFragment();
888
889 try
890 {
891 if (fragment != null && path != null && path.equals("")
892 && scheme == null && authority == null && query == null)
893 return new URI(this.scheme, this.schemeSpecificPart, fragment);
894
895 if (authority == null)
896 {
897 authority = this.authority;
898 if (path == null)
899 path = "";
900 if (! (path.startsWith("/")))
901 {
902 StringBuffer basepath = new StringBuffer(this.path);
903 int i = this.path.lastIndexOf('/');
904
905 if (i >= 0)
906 basepath.delete(i + 1, basepath.length());
907
908 basepath.append(path);
909 path = normalizePath(basepath.toString());
910 }
911 }
912 return new URI(this.scheme, authority, path, query, fragment);
913 }
914 catch (URISyntaxException e)
915 {
916 throw (Error) new InternalError("Resolved URI variant could not "+
917 "be constructed").initCause(e);
918 }
919 }
920
921 /**
922 * Resolves the given URI string against this URI
923 *
924 * @param str The URI as string to resolve against this URI
925 *
926 * @return The resulting URI
927 *
928 * @throws IllegalArgumentException If the given URI string
929 * violates RFC 2396
930 * @throws NullPointerException If uri is null
931 */
932 public URI resolve(String str) throws IllegalArgumentException
933 {
934 return resolve(create(str));
935 }
936
937 /**
938 * <p>
939 * Relativizes the given URI against this URI. The following
940 * algorithm is used:
941 * </p>
942 * <ul>
943 * <li>If either URI is opaque, the given URI is returned.</li>
944 * <li>If the schemes of the URIs differ, the given URI is returned.</li>
945 * <li>If the authority components of the URIs differ, then the given
946 * URI is returned.</li>
947 * <li>If the path of this URI is not a prefix of the supplied URI,
948 * then the given URI is returned.</li>
949 * <li>If all the above conditions hold, a new URI is created using the
950 * query and fragment components of the given URI, along with a path
951 * computed by removing the path of this URI from the start of the path
952 * of the supplied URI.</li>
953 * </ul>
954 *
955 * @param uri the URI to relativize agsint this URI
956 * @return the resulting URI
957 * @throws NullPointerException if the uri is null
958 */
959 public URI relativize(URI uri)
960 {
961 if (isOpaque() || uri.isOpaque())
962 return uri;
963 if (scheme == null && uri.getScheme() != null)
964 return uri;
965 if (scheme != null && !(scheme.equals(uri.getScheme())))
966 return uri;
967 if (rawAuthority == null && uri.getRawAuthority() != null)
968 return uri;
969 if (rawAuthority != null && !(rawAuthority.equals(uri.getRawAuthority())))
970 return uri;
971 String basePath = rawPath;
972 if (!(uri.getRawPath().equals(rawPath)))
973 {
974 if (!(basePath.endsWith("/")))
975 basePath = basePath.concat("/");
976 if (!(uri.getRawPath().startsWith(basePath)))
977 return uri;
978 }
979 try
980 {
981 return new URI(null, null,
982 uri.getRawPath().substring(basePath.length()),
983 uri.getRawQuery(), uri.getRawFragment());
984 }
985 catch (URISyntaxException e)
986 {
987 throw (Error) new InternalError("Relativized URI variant could not "+
988 "be constructed").initCause(e);
989 }
990 }
991
992 /**
993 * Creates an URL from an URI
994 *
995 * @throws MalformedURLException If a protocol handler for the URL could
996 * not be found, or if some other error occurred while constructing the URL
997 * @throws IllegalArgumentException If the URI is not absolute
998 */
999 public URL toURL() throws IllegalArgumentException, MalformedURLException
1000 {
1001 if (isAbsolute())
1002 return new URL(this.toString());
1003
1004 throw new IllegalArgumentException("not absolute");
1005 }
1006
1007 /**
1008 * Returns the scheme of the URI
1009 */
1010 public String getScheme()
1011 {
1012 return scheme;
1013 }
1014
1015 /**
1016 * Tells whether this URI is absolute or not
1017 */
1018 public boolean isAbsolute()
1019 {
1020 return scheme != null;
1021 }
1022
1023 /**
1024 * Tell whether this URI is opaque or not
1025 */
1026 public boolean isOpaque()
1027 {
1028 return ((scheme != null) && ! (schemeSpecificPart.startsWith("/")));
1029 }
1030
1031 /**
1032 * Returns the raw scheme specific part of this URI.
1033 * The scheme-specific part is never undefined, though it may be empty
1034 */
1035 public String getRawSchemeSpecificPart()
1036 {
1037 return rawSchemeSpecificPart;
1038 }
1039
1040 /**
1041 * Returns the decoded scheme specific part of this URI.
1042 */
1043 public String getSchemeSpecificPart()
1044 {
1045 return schemeSpecificPart;
1046 }
1047
1048 /**
1049 * Returns the raw authority part of this URI
1050 */
1051 public String getRawAuthority()
1052 {
1053 return rawAuthority;
1054 }
1055
1056 /**
1057 * Returns the decoded authority part of this URI
1058 */
1059 public String getAuthority()
1060 {
1061 return authority;
1062 }
1063
1064 /**
1065 * Returns the raw user info part of this URI
1066 */
1067 public String getRawUserInfo()
1068 {
1069 return rawUserInfo;
1070 }
1071
1072 /**
1073 * Returns the decoded user info part of this URI
1074 */
1075 public String getUserInfo()
1076 {
1077 return userInfo;
1078 }
1079
1080 /**
1081 * Returns the hostname of the URI
1082 */
1083 public String getHost()
1084 {
1085 return host;
1086 }
1087
1088 /**
1089 * Returns the port number of the URI
1090 */
1091 public int getPort()
1092 {
1093 return port;
1094 }
1095
1096 /**
1097 * Returns the raw path part of this URI
1098 */
1099 public String getRawPath()
1100 {
1101 return rawPath;
1102 }
1103
1104 /**
1105 * Returns the path of the URI
1106 */
1107 public String getPath()
1108 {
1109 return path;
1110 }
1111
1112 /**
1113 * Returns the raw query part of this URI
1114 */
1115 public String getRawQuery()
1116 {
1117 return rawQuery;
1118 }
1119
1120 /**
1121 * Returns the query of the URI
1122 */
1123 public String getQuery()
1124 {
1125 return query;
1126 }
1127
1128 /**
1129 * Return the raw fragment part of this URI
1130 */
1131 public String getRawFragment()
1132 {
1133 return rawFragment;
1134 }
1135
1136 /**
1137 * Returns the fragment of the URI
1138 */
1139 public String getFragment()
1140 {
1141 return fragment;
1142 }
1143
1144 /**
1145 * <p>
1146 * Compares the URI with the given object for equality. If the
1147 * object is not a <code>URI</code>, then the method returns false.
1148 * Otherwise, the following criteria are observed:
1149 * </p>
1150 * <ul>
1151 * <li>The scheme of the URIs must either be null (undefined) in both cases,
1152 * or equal, ignorant of case.</li>
1153 * <li>The raw fragment of the URIs must either be null (undefined) in both
1154 * cases, or equal, ignorant of case.</li>
1155 * <li>Both URIs must be of the same type (opaque or hierarchial)</li>
1156 * <li><strong>For opaque URIs:</strong></li>
1157 * <ul>
1158 * <li>The raw scheme-specific parts must be equal.</li>
1159 * </ul>
1160 * <li>For hierarchical URIs:</li>
1161 * <ul>
1162 * <li>The raw paths must be equal, ignorant of case.</li>
1163 * <li>The raw queries are either both undefined or both equal, ignorant
1164 * of case.</li>
1165 * <li>The raw authority sections are either both undefined or:</li>
1166 * <li><strong>For registry-based authorities:</strong></li>
1167 * <ul><li>they are equal.</li></ul>
1168 * <li><strong>For server-based authorities:</strong></li>
1169 * <ul>
1170 * <li>the hosts are equal, ignoring case</li>
1171 * <li>the ports are equal</li>
1172 * <li>the user information components are equal</li>
1173 * </ul>
1174 * </ul>
1175 * </ul>
1176 *
1177 * @param obj the obj to compare the URI with.
1178 * @return <code>true</code> if the objects are equal, according to
1179 * the specification above.
1180 */
1181 public boolean equals(Object obj)
1182 {
1183 if (!(obj instanceof URI))
1184 return false;
1185 URI uriObj = (URI) obj;
1186 if (scheme == null)
1187 {
1188 if (uriObj.getScheme() != null)
1189 return false;
1190 }
1191 else
1192 if (!(scheme.equalsIgnoreCase(uriObj.getScheme())))
1193 return false;
1194 if (rawFragment == null)
1195 {
1196 if (uriObj.getRawFragment() != null)
1197 return false;
1198 }
1199 else
1200 if (!(rawFragment.equalsIgnoreCase(uriObj.getRawFragment())))
1201 return false;
1202 boolean opaqueThis = isOpaque();
1203 boolean opaqueObj = uriObj.isOpaque();
1204 if (opaqueThis && opaqueObj)
1205 return rawSchemeSpecificPart.equals(uriObj.getRawSchemeSpecificPart());
1206 else if (!opaqueThis && !opaqueObj)
1207 {
1208 boolean common = rawPath.equalsIgnoreCase(uriObj.getRawPath())
1209 && ((rawQuery == null && uriObj.getRawQuery() == null)
1210 || rawQuery.equalsIgnoreCase(uriObj.getRawQuery()));
1211 if (rawAuthority == null && uriObj.getRawAuthority() == null)
1212 return common;
1213 if (host == null)
1214 return common
1215 && rawAuthority.equalsIgnoreCase(uriObj.getRawAuthority());
1216 return common
1217 && host.equalsIgnoreCase(uriObj.getHost())
1218 && port == uriObj.getPort()
1219 && (rawUserInfo == null ?
1220 uriObj.getRawUserInfo() == null :
1221 rawUserInfo.equalsIgnoreCase(uriObj.getRawUserInfo()));
1222 }
1223 else
1224 return false;
1225 }
1226
1227 /**
1228 * Computes the hashcode of the URI
1229 */
1230 public int hashCode()
1231 {
1232 return (getScheme() == null ? 0 : 13 * getScheme().hashCode())
1233 + 17 * getRawSchemeSpecificPart().hashCode()
1234 + (getRawFragment() == null ? 0 : 21 + getRawFragment().hashCode());
1235 }
1236
1237 /**
1238 * Compare the URI with another URI.
1239 * Undefined components are taken to be less than any other component.
1240 * The following criteria are observed:
1241 * </p>
1242 * <ul>
1243 * <li>Two URIs with different schemes are compared according to their
1244 * scheme, regardless of case.</li>
1245 * <li>A hierarchical URI is less than an opaque URI with the same
1246 * scheme.</li>
1247 * <li><strong>For opaque URIs:</strong></li>
1248 * <ul>
1249 * <li>URIs with differing scheme-specific parts are ordered according
1250 * to the ordering of the scheme-specific part.</li>
1251 * <li>URIs with the same scheme-specific part are ordered by the
1252 * raw fragment.</li>
1253 * </ul>
1254 * <li>For hierarchical URIs:</li>
1255 * <ul>
1256 * <li>URIs are ordered according to their raw authority sections,
1257 * if they are unequal.</li>
1258 * <li><strong>For registry-based authorities:</strong></li>
1259 * <ul><li>they are ordered according to the ordering of the authority
1260 * component.</li></ul>
1261 * <li><strong>For server-based authorities:</strong></li>
1262 * <ul>
1263 * <li>URIs are ordered according to the raw user information.</li>
1264 * <li>URIs with the same user information are ordered by the host,
1265 * ignoring case.</li>
1266 * <lI>URIs with the same host are ordered by the port.</li>
1267 * </ul>
1268 * <li>URIs with the same authority section are ordered by the raw path.</li>
1269 * <li>URIs with the same path are ordered by their raw query.</li>
1270 * <li>URIs with the same query are ordered by their raw fragments.</li>
1271 * </ul>
1272 * </ul>
1273 *
1274 * @param uri The other URI to compare this URI with
1275 * @return a negative integer, zero or a positive integer depending
1276 * on whether this URI is less than, equal to or greater
1277 * than that supplied, respectively.
1278 */
1279 public int compareTo(URI uri)
1280 throws ClassCastException
1281 {
1282 if (scheme == null && uri.getScheme() != null)
1283 return -1;
1284 if (scheme != null)
1285 {
1286 int sCompare = scheme.compareToIgnoreCase(uri.getScheme());
1287 if (sCompare != 0)
1288 return sCompare;
1289 }
1290 boolean opaqueThis = isOpaque();
1291 boolean opaqueObj = uri.isOpaque();
1292 if (opaqueThis && !opaqueObj)
1293 return 1;
1294 if (!opaqueThis && opaqueObj)
1295 return -1;
1296 if (opaqueThis)
1297 {
1298 int ssCompare =
1299 rawSchemeSpecificPart.compareTo(uri.getRawSchemeSpecificPart());
1300 if (ssCompare == 0)
1301 return compareFragments(uri);
1302 else
1303 return ssCompare;
1304 }
1305 if (rawAuthority == null && uri.getRawAuthority() != null)
1306 return -1;
1307 if (rawAuthority != null)
1308 {
1309 int aCompare = rawAuthority.compareTo(uri.getRawAuthority());
1310 if (aCompare != 0)
1311 {
1312 if (host == null)
1313 return aCompare;
1314 if (rawUserInfo == null && uri.getRawUserInfo() != null)
1315 return -1;
1316 int uCompare = rawUserInfo.compareTo(uri.getRawUserInfo());
1317 if (uCompare != 0)
1318 return uCompare;
1319 if (host == null && uri.getHost() != null)
1320 return -1;
1321 int hCompare = host.compareTo(uri.getHost());
1322 if (hCompare != 0)
1323 return hCompare;
1324 return new Integer(port).compareTo(new Integer(uri.getPort()));
1325 }
1326 }
1327 if (rawPath == null && uri.getRawPath() != null)
1328 return -1;
1329 if (rawPath != null)
1330 {
1331 int pCompare = rawPath.compareTo(uri.getRawPath());
1332 if (pCompare != 0)
1333 return pCompare;
1334 }
1335 if (rawQuery == null && uri.getRawQuery() != null)
1336 return -1;
1337 if (rawQuery != null)
1338 {
1339 int qCompare = rawQuery.compareTo(uri.getRawQuery());
1340 if (qCompare != 0)
1341 return qCompare;
1342 }
1343 return compareFragments(uri);
1344 }
1345
1346 /**
1347 * Compares the fragment of this URI with that of the supplied URI.
1348 *
1349 * @param uri the URI to compare with this one.
1350 * @return a negative integer, zero or a positive integer depending
1351 * on whether this uri's fragment is less than, equal to
1352 * or greater than the fragment of the uri supplied, respectively.
1353 */
1354 private int compareFragments(URI uri)
1355 {
1356 if (rawFragment == null && uri.getRawFragment() != null)
1357 return -1;
1358 else if (rawFragment == null)
1359 return 0;
1360 else
1361 return rawFragment.compareTo(uri.getRawFragment());
1362 }
1363
1364 /**
1365 * Returns the URI as a String. If the URI was created using a constructor,
1366 * then this will be the same as the original input string.
1367 *
1368 * @return a string representation of the URI.
1369 */
1370 public String toString()
1371 {
1372 return (scheme == null ? "" : scheme + ":")
1373 + rawSchemeSpecificPart
1374 + (rawFragment == null ? "" : "#" + rawFragment);
1375 }
1376
1377 /**
1378 * Returns the URI as US-ASCII string. This is the same as the result
1379 * from <code>toString()</code> for URIs that don't contain any non-US-ASCII
1380 * characters. Otherwise, the non-US-ASCII characters are replaced
1381 * by their percent-encoded representations.
1382 *
1383 * @return a string representation of the URI, containing only US-ASCII
1384 * characters.
1385 */
1386 public String toASCIIString()
1387 {
1388 String strRep = toString();
1389 boolean inNonAsciiBlock = false;
1390 StringBuffer buffer = new StringBuffer();
1391 StringBuffer encBuffer = null;
1392 for (int i = 0; i < strRep.length(); i++)
1393 {
1394 char c = strRep.charAt(i);
1395 if (c <= 127)
1396 {
1397 if (inNonAsciiBlock)
1398 {
1399 buffer.append(escapeCharacters(encBuffer.toString()));
1400 inNonAsciiBlock = false;
1401 }
1402 buffer.append(c);
1403 }
1404 else
1405 {
1406 if (!inNonAsciiBlock)
1407 {
1408 encBuffer = new StringBuffer();
1409 inNonAsciiBlock = true;
1410 }
1411 encBuffer.append(c);
1412 }
1413 }
1414 return buffer.toString();
1415 }
1416
1417 /**
1418 * Converts the non-ASCII characters in the supplied string
1419 * to their equivalent percent-encoded representations.
1420 * That is, they are replaced by "%" followed by their hexadecimal value.
1421 *
1422 * @param str a string including non-ASCII characters.
1423 * @return the string with the non-ASCII characters converted to their
1424 * percent-encoded representations.
1425 */
1426 private static String escapeCharacters(String str)
1427 {
1428 try
1429 {
1430 StringBuffer sb = new StringBuffer();
1431 // this is far from optimal, but it works
1432 byte[] utf8 = str.getBytes("utf-8");
1433 for (int j = 0; j < utf8.length; j++)
1434 {
1435 sb.append('%');
1436 sb.append(HEX.charAt((utf8[j] & 0xff) / 16));
1437 sb.append(HEX.charAt((utf8[j] & 0xff) % 16));
1438 }
1439 return sb.toString();
1440 }
1441 catch (java.io.UnsupportedEncodingException x)
1442 {
1443 throw (Error) new InternalError("Escaping error").initCause(x);
1444 }
1445 }
1446
1447 }