Source Code of org.exoplatform.services.rest.impl.uri.UriComponent

/*
 * Copyright (C) 2009 eXo Platform SAS.
 *
 * This is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */
package org.exoplatform.services.rest.impl.uri;


import org.exoplatform.services.rest.impl.MultivaluedMapImpl;


import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;


import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.PathSegment;


/**
 * @author <a href="mailto:andrew00x@gmail.com">Andrey Parfonov</a>
 * @version $Id: $
 */
public final class UriComponent
{


   /**
    * Constructor.
    */
   private UriComponent()
   {
   }


   // Components of URI, see http://gbiv.com/protocols/uri/rfc/rfc3986.htm
   /**
    * Scheme URI component.
    */
   public static final int SCHEME = 0;


   /**
    * UserInfo URI component.
    */
   public static final int USER_INFO = 1;


   /**
    * Host URI component.
    */
   public static final int HOST = 2;


   /**
    * Port URI component.
    */
   public static final int PORT = 3;


   /**
    * Path segment URI sub-component, it can't contains '/'.
    */
   public static final int PATH_SEGMENT = 4;


   /**
    * Path URI components, consists of path-segments.
    */
   public static final int PATH = 5;


   /**
    * Query string.
    */
   public static final int QUERY = 6;


   /**
    * Fragment.
    */
   public static final int FRAGMENT = 7;


   /**
    * Scheme-specific part.
    */
   public static final int SSP = 8;


   // very mess :( part


   /**
    * The letters of the basic Latin alphabet.
    */
   private static final String ALPHA = fillTable("A-Z") + fillTable("a-z");


   /**
    * Digits.
    */
   private static final String DIGIT = fillTable("0-9");


   /**
    * Characters that are allowed in a URI but do not have a reserved purpose are
    * called unreserved. These include uppercase and lowercase letters, decimal
    * digits, hyphen, period, underscore, and tilde.
    * <p>
    * Unreserved = ALPHA | DIGIT | '-' | '.' | '_' | '~'
    */
   private static final String UNRESERVED = ALPHA + DIGIT + "-._~";


   /**
    * The subset of the reserved characters (gen-delims) is used as delimiters of
    * the generic URI components.
    */
   private static final String GEN_DELIM = ":/?#[]@";


   /**
    * Sub-delims characters.
    */
   private static final String SUB_DELIM = "!$&'()*+,;=";


   // --------------------


   /**
    * Characters that used for percent encoding.
    */
   private static final String HEX_DIGITS = "0123456789ABCDEF";


   /**
    * Array of legal characters for each component of URI.
    */
   private static final String[] ENCODING = new String[9];


   // fill table
   static
   {
      ENCODING[SCHEME] = ALPHA + DIGIT + "+-.";
      ENCODING[USER_INFO] = UNRESERVED + SUB_DELIM + ":";
      ENCODING[HOST] = UNRESERVED + SUB_DELIM;
      ENCODING[PORT] = DIGIT;
      ENCODING[PATH_SEGMENT] = UNRESERVED + SUB_DELIM + ":@";
      ENCODING[PATH] = ENCODING[PATH_SEGMENT] + "/";
      ENCODING[QUERY] = ENCODING[PATH] + "?";
      ENCODING[FRAGMENT] = ENCODING[QUERY];
      ENCODING[SSP] = UNRESERVED + SUB_DELIM + GEN_DELIM;
   }


   /**
    * UTF-8 Charset.
    */
   private static final Charset UTF8 = Charset.forName("UTF-8");


   /**
    * For processing statements such as 'a-z', '0-9', etc.
    * 
    * @param statement statement
    * @return string abcd...zABCD...Z0123456789
    */
   private static String fillTable(String statement)
   {
      StringBuffer sb = new StringBuffer();
      if (statement.length() != 3 || statement.charAt(1) != '-')
         throw new IllegalArgumentException("Illegal format of source string, e. g. A-Z");


      char end = statement.charAt(2);


      for (char c = statement.charAt(0); c <= end; c++)
         sb.append(c);


      return sb.toString();
   }


   /**
    * Encode given URI string.
    * 
    * @param str the URI string
    * @param containsUriParams true if the source string contains URI parameters
    * @param component component of URI, scheme, host, port, etc
    * @return encoded string
    */
   // TODO encoding for IPv6
   public static String encode(String str, int component, boolean containsUriParams)
   {
      if (str == null)
         throw new IllegalArgumentException();
      return encodingInt(str, component, containsUriParams, false);
   }


   /**
    * Validate content of percent-encoding string.
    * 
    * @param str the string which must be validate
    * @param component component of URI, scheme, host, port, etc
    * @param containsUriParams true if the source string contains URI parameters
    * @return the source string
    */
   // TODO validation for IPv6
   public static String validate(String str, int component, boolean containsUriParams)
   {
      for (int i = 0; i < str.length(); i++)
      {
         char ch = str.charAt(i);


         if ((ch < 128 && !needEncode(ch, component)) || ((ch == '{' || ch == '}') && containsUriParams) || ch == '%')
            continue;


         throw new IllegalArgumentException("Illegal character, index " + i + ": " + str);
      }


      return str;
   }


   /**
    * Check string and if it does not contains any '%' characters validate it for
    * contains only valid characters. If it contains '%' then check does
    * following two character is valid hex numbers, if not then encode '%' to
    * '%25' otherwise keep characters without change, there is no double encoding.
    * 
    * @param str source string
    * @param component part of URI, e. g. schema, host, path
    * @param containsUriParams does string may contains URI templates
    * @return valid string
    */
   public static String recognizeEncode(String str, int component, boolean containsUriParams)
   {
      if (str == null)
         throw new IllegalArgumentException();
      return encodingInt(str, component, containsUriParams, true);
   }


   /**
    * @param str source string
    * @param component part of URI, e. g. schema, host, path
    * @param containsUriParams does string may contains URI templates
    * @param recognizeEncoded must check string to avoid double encoding
    * @return valid string
    */
   private static String encodingInt(String str, int component, boolean containsUriParams, boolean recognizeEncoded)
   {
      StringBuffer sb = null;
      int l = str.length();
      for (int i = 0; i < l; i++)
      {
         char ch = str.charAt(i);


         if (ch == '%' && recognizeEncoded)
         {
            if (UriComponent.checkHexCharacters(str, i))
            {


               if (sb != null)
                  sb.append(ch).append(str.charAt(++i)).append(str.charAt(++i));


            }
            else
            {


               if (sb == null)
               {
                  sb = new StringBuffer();
                  sb.append(str.substring(0, i));
               }
               addPercentEncoded(ch, sb); // in fact add '%25'


            }
         }
         else if (ch < 128 && !needEncode(ch, component))
         {


            if (sb != null)
               sb.append(ch);


         }
         else
         {


            if ((ch == '{' || ch == '}') && containsUriParams)
            {


               if (sb != null)
                  sb.append(ch);


            }
            else
            {


               if (sb == null)
               {
                  sb = new StringBuffer();
                  sb.append(str.substring(0, i));
               }


               if (ch < 128)
                  addPercentEncoded(ch, sb);
               else
                  addUTF8Encoded(ch, sb);


            }


         }
      }


      return sb != null ? sb.toString() : str;
   }


   /**
    * Decode percent encoded URI string.
    * 
    * @param str the source percent encoded string
    * @param component component of URI, scheme, host, port, etc. NOTE type of
    *          component is not used currently but will be used for decoding IPv6
    *          addresses
    * @return decoded string
    */
   // TODO decoding for IPv6
   public static String decode(String str, int component)
   {
      if (str == null)
         throw new IllegalArgumentException("Decoded string is null");


      int p = 0;
      int l = str.length();
      StringBuffer sb = new StringBuffer();


      /* NOTE spaces can be encoded with '+' */
      //    if ((p = str.indexOf('%')) < 0)
      //      return str; // nothing to do


      //    if (l < 3)
      if (l < 3 && str.indexOf('%') > 0)
         throw new IllegalArgumentException("Mailformed string " + str);


      //    if ((p = str.lastIndexOf('%')) > l - 3)
      p = str.lastIndexOf('%');
      if (p > 0 && p > l - 3)
         throw new IllegalArgumentException("Mailformed string at index " + p);


      ByteArrayOutputStream out = new ByteArrayOutputStream();
      p = 0; // reset pointer
      while (p < l)
      {
         char c = str.charAt(p);


         if (c != '%')
         {


            // NOTE can be potential problem but we can't ignore this
            if (c == '+')
               sb.append(' ');
            else
               sb.append(c);


            p++;


         }
         else
         {


            p = percentDecode(str, p, out);


            byte[] buff = out.toByteArray();


            if (buff.length == 1 && (buff[0] & 0xFF) < 128)
               sb.append((char)buff[0]);
            else
               addUTF8Decoded(buff, sb);


            out.reset();
         }
      }
      return sb.toString();
   }


   /**
    * Check must charter be encoded.
    * 
    * @param ch character
    * @param component the URI component
    * @return true if character must be encoded false otherwise
    */
   private static boolean needEncode(char ch, int component)
   {
      return ENCODING[component].indexOf(ch) == -1;
   }


   /**
    * Append percent encoded character in StringBuffer.
    * 
    * @param c character which must be encoded
    * @param sb StringBuffer to add character
    */
   private static void addPercentEncoded(int c, StringBuffer sb)
   {
      sb.append('%');
      sb.append(HEX_DIGITS.charAt(c >> 4));
      sb.append(HEX_DIGITS.charAt(c & 0x0F));
   }


   /**
    * Append UTF-8 encoded character in StringBuffer.
    * 
    * @param c character which must be encoded
    * @param sb StringBuffer to add character
    */
   private static void addUTF8Encoded(char c, StringBuffer sb)
   {
      ByteBuffer buf = UTF8.encode("" + c);
      while (buf.hasRemaining())
         addPercentEncoded(buf.get() & 0xFF, sb);
   }


   /**
    * Decode percent encoded string.
    * 
    * @param str the source string
    * @param p start position in string
    * @param out output buffer for decoded characters
    * @return current position in source string
    */
   private static int percentDecode(String str, int p, ByteArrayOutputStream out)
   {
      int l = str.length();
      for (;;)
      {
         char hc = getHexCharacter(str, ++p); // higher char
         char lc = getHexCharacter(str, ++p); // lower char


         int r =
            (Character.isDigit(hc) ? hc - '0' : hc - 'A' + 10) << 4
               | (Character.isDigit(lc) ? lc - '0' : lc - 'A' + 10);


         out.write((byte)r);
         p++;


         if (p == l || str.charAt(p) != '%')
            break;
      }


      return p;
   }


   /**
    * Check does two next characters after '%' represent percent-encoded
    * character.
    * 
    * @param s source string
    * @param p position of character in string
    * @return true is two characters after '%' represent percent-encoded
    *         character false otherwise
    */
   public static boolean checkHexCharacters(String s, int p)
   {
      if (p > s.length() - 3)
         return false;
      try
      {
         getHexCharacter(s, ++p);
         getHexCharacter(s, ++p);
         return true;
      }
      catch (IllegalArgumentException e)
      {
         return false;
      }
   }


   /**
    * Extract character from given string and check is it one of valid for hex
    * sequence.
    * 
    * @param s source string
    * @param p position of character in string
    * @return character
    */
   private static char getHexCharacter(String s, int p)
   {
      char c = s.charAt(p);
      if (Character.isLetter(c))
         c = Character.toUpperCase(c);


      if (HEX_DIGITS.indexOf(c) == -1)
         throw new IllegalArgumentException("Mailformed string at index " + p);


      return c;
   }


   /**
    * Decodes bytes to characters using the UTF-8 decoding and add them to a
    * StringBuffer.
    * 
    * @param buff source bytes
    * @param sb StringBuffer for append characters
    */
   private static void addUTF8Decoded(byte[] buff, StringBuffer sb)
   {
      CharBuffer cbuff = UTF8.decode(ByteBuffer.wrap(buff));
      sb.append(cbuff.toString());
   }


   /**
    * Parse path segments.
    * 
    * @param path the relative path
    * @param decode true if character must be decoded false otherwise
    * @return List of {@link PathSegment}
    */
   public static List<PathSegment> parsePathSegments(String path, boolean decode)
   {
      List<PathSegment> l = new ArrayList<PathSegment>();
      if (path == null || path.length() == 0)
         return l;


      // remove leading slash
      if (path.charAt(0) == '/')
         path = path.substring(1);


      int p = 0;
      int n = 0;
      while (n < path.length())
      {
         n = path.indexOf('/', p);
         if (n == -1)
            n = path.length();


         l.add(PathSegmentImpl.fromString(path.substring(p, n), decode));
         p = n + 1;


      }


      return l;
   }


   /**
    * Parse encoded query string.
    * 
    * @param rawQuery source query string
    * @param decode if true then query parameters will be decoded
    * @return {@link MultivaluedMap} with query parameters
    */
   public static MultivaluedMap<String, String> parseQueryString(String rawQuery, boolean decode)
   {
      MultivaluedMap<String, String> m = new MultivaluedMapImpl();
      if (rawQuery == null || rawQuery.length() == 0)
         return m;


      int p = 0;
      int n = 0;
      while (n < rawQuery.length())
      {
         n = rawQuery.indexOf('&', p);
         if (n == -1)
            n = rawQuery.length();


         String pair = rawQuery.substring(p, n);
         if (pair.length() == 0)
            continue;


         String name;
         String value = ""; // default value
         int eq = pair.indexOf('=');
         if (eq == -1) // no value, default is ""
            name = pair;
         else
         {
            name = pair.substring(0, eq);
            value = pair.substring(eq + 1);
         }


         m.add(decode ? decode(name, QUERY) : name, decode ? decode(value, QUERY) : value);


         p = n + 1;
      }


      return m;
   }


}
Source Code of org.exoplatform.services.rest.impl.uri.UriComponent

Related Classes of org.exoplatform.services.rest.impl.uri.UriComponent