View Javadoc

1   // ========================================================================
2   // Copyright (c) 2006-2009 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // All rights reserved. This program and the accompanying materials
5   // are made available under the terms of the Eclipse Public License v1.0
6   // and Apache License v2.0 which accompanies this distribution.
7   // The Eclipse Public License is available at 
8   // http://www.eclipse.org/legal/epl-v10.html
9   // The Apache License v2.0 is available at
10  // http://www.opensource.org/licenses/apache2.0.php
11  // You may elect to redistribute this code under either of these licenses. 
12  // ========================================================================
13  
14  package org.eclipse.jetty.http;
15  
16  import java.io.UnsupportedEncodingException;
17  
18  import org.eclipse.jetty.util.MultiMap;
19  import org.eclipse.jetty.util.StringUtil;
20  import org.eclipse.jetty.util.TypeUtil;
21  import org.eclipse.jetty.util.URIUtil;
22  import org.eclipse.jetty.util.UrlEncoded;
23  import org.eclipse.jetty.util.Utf8StringBuilder;
24  
25  
26  /* ------------------------------------------------------------ */
27  /** Http URI.
28   * Parse a HTTP URI from a string or byte array.  Given a URI
29   * <code>http://user@host:port/path/info;param?query#fragment</code>
30   * this class will split it into the following undecoded optional elements:<ul>
31   * <li>{@link #getScheme()} - http:</li>
32   * <li>{@link #getAuthority()} - //name@host:port</li>
33   * <li>{@link #getHost()} - host</li>
34   * <li>{@link #getPort()} - port</li>
35   * <li>{@link #getPath()} - /path/info</li>
36   * <li>{@link #getParam()} - param</li>
37   * <li>{@link #getQuery()} - query</li>
38   * <li>{@link #getFragment()} - fragment</li>
39   * </ul>
40   * 
41   */
42  public class HttpURI
43  {
44      private static final byte[] __empty={}; 
45      private final static int 
46      START=0,
47      AUTH_OR_PATH=1,
48      SCHEME_OR_PATH=2,
49      AUTH=4,
50      IPV6=5,
51      PORT=6,
52      PATH=7,
53      PARAM=8,
54      QUERY=9,
55      ASTERISK=10;
56      
57      boolean _partial=false;
58      byte[] _raw=__empty;
59      String _rawString;
60      int _scheme;
61      int _authority;
62      int _host;
63      int _port;
64      int _portValue;
65      int _path;
66      int _param;
67      int _query;
68      int _fragment;
69      int _end;
70      boolean _encoded=false;
71      
72      final Utf8StringBuilder _utf8b = new Utf8StringBuilder(64);
73      
74      public HttpURI()
75      {
76          
77      } 
78      
79      /* ------------------------------------------------------------ */
80      /**
81       * @param parsePartialAuth If True, parse auth without prior scheme, else treat all URIs starting with / as paths
82       */
83      public HttpURI(boolean parsePartialAuth)
84      {
85          _partial=parsePartialAuth;
86      }
87      
88      public HttpURI(String raw)
89      {
90          _rawString=raw;
91          byte[] b = raw.getBytes();
92          parse(b,0,b.length);
93      }
94      
95      public HttpURI(byte[] raw,int offset, int length)
96      {
97          parse2(raw,offset,length);
98      }
99      
100     public void parse(String raw)
101     {
102         byte[] b = raw.getBytes();
103         parse2(b,0,b.length);
104         _rawString=raw;
105     }
106     
107     public void parse(byte[] raw,int offset, int length)
108     {
109         _rawString=null;
110         parse2(raw,offset,length);
111     }
112     
113     private void parse2(byte[] raw,int offset, int length)
114     {
115         _encoded=false;
116         _raw=raw;
117         int i=offset;
118         int e=offset+length;
119         int state=START;
120         int m=offset;
121         _end=offset+length;
122         _scheme=offset;
123         _authority=offset;
124         _host=offset;
125         _port=offset;
126         _portValue=-1;
127         _path=offset;
128         _param=_end;
129         _query=_end;
130         _fragment=_end;
131         while (i<e)
132         {
133             char c=(char)(0xff&_raw[i]);
134             int s=i++;
135             
136             state: switch (state)
137             {
138                 case START:
139                 {
140                     m=s;
141                     switch(c)
142                     {
143                         case '/':
144                             state=AUTH_OR_PATH;
145                             break;
146                         case ';':
147                             _param=s;
148                             state=PARAM;
149                             break;
150                         case '?':
151                             _param=s;
152                             _query=s;
153                             state=QUERY;
154                             break;
155                         case '#':
156                             _param=s;
157                             _query=s;
158                             _fragment=s;
159                             break;
160                         case '*':
161                             _path=s;
162                             state=ASTERISK;
163                             break;
164                             
165                         default:
166                             if (Character.isLetterOrDigit(c))
167                                 state=SCHEME_OR_PATH;
168                             else
169                                 throw new IllegalArgumentException("!(SCHEME|PATH|AUTH):"+StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
170                     }
171                     
172                     continue;
173                 }
174 
175                 case AUTH_OR_PATH:
176                 {
177                     if ((_partial||_scheme!=_authority) && c=='/')
178                     {
179                         _host=i;
180                         _port=_end;
181                         _path=_end;
182                         state=AUTH;
183                     }
184                     else if (c==';' || c=='?' || c=='#')
185                     {
186                         i--;
187                         state=PATH;
188                     }  
189                     else
190                     {
191                         _host=m;
192                         _port=m;
193                         state=PATH;
194                     }  
195                     continue;
196                 }
197                 
198                 case SCHEME_OR_PATH:
199                 {
200                     // short cut for http and https
201                     if (length>6 && c=='t')
202                     {
203                         if (_raw[offset+3]==':')
204                         {
205                             s=offset+3;
206                             i=offset+4;
207                             c=':';
208                         }
209                         else if (_raw[offset+4]==':')
210                         {
211                             s=offset+4;
212                             i=offset+5;
213                             c=':';
214                         }
215                         else if (_raw[offset+5]==':')
216                         {
217                             s=offset+5;
218                             i=offset+6;
219                             c=':';
220                         }
221                     }
222                     
223                     switch (c)
224                     {
225                         case ':':
226                         {
227                             m = i++;
228                             _authority = m;
229                             _path = m;
230                             c = (char)(0xff & _raw[i]);
231                             if (c == '/')
232                                 state = AUTH_OR_PATH;
233                             else
234                             {
235                                 _host = m;
236                                 _port = m;
237                                 state = PATH;
238                             }
239                             break;
240                         }
241                         
242                         case '/':
243                         {
244                             state = PATH;
245                             break;
246                         }
247                         
248                         case ';':
249                         {
250                             _param = s;
251                             state = PARAM;
252                             break;
253                         }
254                         
255                         case '?':
256                         {
257                             _param = s;
258                             _query = s;
259                             state = QUERY;
260                             break;
261                         }
262                         
263                         case '#':
264                         {
265                             _param = s;
266                             _query = s;
267                             _fragment = s;
268                             break;
269                         }
270                     }
271                     continue;
272                 }
273                 
274                 case AUTH:
275                 {
276                     switch (c)
277                     {
278 
279                         case '/':
280                         {
281                             m = s;
282                             _path = m;
283                             _port = _path;
284                             state = PATH;
285                             break;
286                         }
287                         case '@':
288                         {
289                             _host = i;
290                             break;
291                         }
292                         case ':':
293                         {
294                             _port = s;
295                             state = PORT;
296                             break;
297                         }
298                         case '[':
299                         {
300                             state = IPV6;
301                             break;
302                         }
303                     }
304                     continue;
305                 }
306 
307                 case IPV6:
308                 {
309                     switch (c)
310                     {
311                         case '/':
312                         {
313                             throw new IllegalArgumentException("No closing ']' for " + StringUtil.toString(_raw,offset,length,URIUtil.__CHARSET));
314                         }
315                         case ']':
316                         {
317                             state = AUTH;
318                             break;
319                         }
320                     }
321 
322                     continue;
323                 }
324                 
325                 case PORT:
326                 {
327                     if (c=='/')
328                     {
329                         m=s;
330                         _path=m;
331                         if (_port<=_authority)
332                             _port=_path;
333                         state=PATH;
334                     }
335                     continue;
336                 }
337                 
338                 case PATH:
339                 {
340                     switch (c)
341                     {
342                         case ';':
343                         {
344                             _param = s;
345                             state = PARAM;
346                             break;
347                         }
348                         case '?':
349                         {
350                             _param = s;
351                             _query = s;
352                             state = QUERY;
353                             break;
354                         }
355                         case '#':
356                         {
357                             _param = s;
358                             _query = s;
359                             _fragment = s;
360                             break state;
361                         }
362                         case '%':
363                         {
364                             _encoded=true;
365                         }
366                     }
367                     continue;
368                 }
369                 
370                 case PARAM:
371                 {
372                     switch (c)
373                     {
374                         case '?':
375                         {
376                             _query = s;
377                             state = QUERY;
378                             break;
379                         }
380                         case '#':
381                         {
382                             _query = s;
383                             _fragment = s;
384                             break state;
385                         }
386                     }
387                     continue;
388                 }
389                 
390                 case QUERY:
391                 {
392                     if (c=='#')
393                     {
394                         _fragment=s;
395                         break state;
396                     }
397                     continue;
398                 }
399                 
400                 case ASTERISK:
401                 {
402                     throw new IllegalArgumentException("only '*'");
403                 }
404             }
405         }
406 
407         if (_port<_path)
408             _portValue=TypeUtil.parseInt(_raw, _port+1, _path-_port-1,10);
409     }
410     
411     private String toUtf8String(int offset,int length)
412     {
413         _utf8b.reset();
414         _utf8b.append(_raw,offset,length);
415         return _utf8b.toString();
416     }
417     
418     public String getScheme()
419     {
420         if (_scheme==_authority)
421             return null;
422         int l=_authority-_scheme;
423         if (l==5 && 
424             _raw[_scheme]=='h' && 
425             _raw[_scheme+1]=='t' && 
426             _raw[_scheme+2]=='t' && 
427             _raw[_scheme+3]=='p' )
428             return HttpSchemes.HTTP;
429         if (l==6 && 
430             _raw[_scheme]=='h' && 
431             _raw[_scheme+1]=='t' && 
432             _raw[_scheme+2]=='t' && 
433             _raw[_scheme+3]=='p' && 
434             _raw[_scheme+4]=='s' )
435             return HttpSchemes.HTTPS;
436         
437         return toUtf8String(_scheme,_authority-_scheme-1);
438     }
439     
440     public String getAuthority()
441     {
442         if (_authority==_path)
443             return null;
444         return toUtf8String(_authority,_path-_authority);
445     }
446     
447     public String getHost()
448     {
449         if (_host==_port)
450             return null;
451         return toUtf8String(_host,_port-_host);
452     }
453     
454     public int getPort()
455     {
456         return _portValue;
457     }
458     
459     public String getPath()
460     {
461         if (_path==_param)
462             return null;
463         return toUtf8String(_path,_param-_path);
464     }
465     
466     public String getDecodedPath()
467     {
468         if (_path==_param)
469             return null;
470 
471         int length = _param-_path;
472         byte[] bytes=null;
473         int n=0;
474 
475         for (int i=_path;i<_param;i++)
476         {
477             byte b = _raw[i];
478             
479             if (b=='%')
480             {
481                 if ((i+2)>=_param)
482                     throw new IllegalArgumentException("Bad % encoding: "+this);
483                 b=(byte)(0xff&TypeUtil.parseInt(_raw,i+1,2,16));
484                 i+=2;
485             }
486             else if (bytes==null)
487             {
488                 n++;
489                 continue;
490             }
491             
492             if (bytes==null)
493             {
494                 bytes=new byte[length];
495                 System.arraycopy(_raw,_path,bytes,0,n);
496             }
497             
498             bytes[n++]=b;
499         }
500 
501         if (bytes==null)
502             return toUtf8String(_path,length);
503 
504         _utf8b.reset();
505         _utf8b.append(bytes,0,n);
506         return _utf8b.toString();
507     }
508     
509     public String getPathAndParam()
510     {
511         if (_path==_query)
512             return null;
513         return toUtf8String(_path,_query-_path);
514     }
515     
516     public String getCompletePath()
517     {
518         if (_path==_end)
519             return null;
520         return toUtf8String(_path,_end-_path);
521     }
522     
523     public String getParam()
524     {
525         if (_param==_query)
526             return null;
527         return toUtf8String(_param+1,_query-_param-1);
528     }
529     
530     public String getQuery()
531     {
532         if (_query==_fragment)
533             return null;
534         return toUtf8String(_query+1,_fragment-_query-1);
535     }
536     
537     public String getQuery(String encoding)
538     {
539         if (_query==_fragment)
540             return null;
541         return StringUtil.toString(_raw,_query+1,_fragment-_query-1,encoding);
542     }
543     
544     public boolean hasQuery()
545     {
546         return (_fragment>_query);
547     }
548     
549     public String getFragment()
550     {
551         if (_fragment==_end)
552             return null;
553         return toUtf8String(_fragment+1,_end-_fragment-1);
554     }
555 
556     public void decodeQueryTo(MultiMap parameters) 
557     {
558         if (_query==_fragment)
559             return;
560         _utf8b.reset();
561         UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters,_utf8b);
562     }
563 
564     public void decodeQueryTo(MultiMap parameters, String encoding) 
565         throws UnsupportedEncodingException
566     {
567         if (_query==_fragment)
568             return;
569        
570         if (encoding==null || StringUtil.isUTF8(encoding))
571             UrlEncoded.decodeUtf8To(_raw,_query+1,_fragment-_query-1,parameters);
572         else
573             UrlEncoded.decodeTo(toUtf8String(_query+1,_fragment-_query-1),parameters,encoding);
574     }
575 
576     public void clear()
577     {
578         _scheme=_authority=_host=_port=_path=_param=_query=_fragment=_end=0;
579         _raw=__empty;
580         _rawString="";
581         _encoded=false;
582     }
583     
584     @Override
585     public String toString()
586     {
587         if (_rawString==null)
588             _rawString=toUtf8String(_scheme,_end-_scheme);
589         return _rawString;
590     }
591     
592     public void writeTo(Utf8StringBuilder buf)
593     {
594         buf.append(_raw,_scheme,_end-_scheme);
595     }
596     
597 }