Source Code of org.apache.sis.internal.storage.IOUtilities

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.sis.internal.storage;


import java.util.Set;
import java.util.List;
import java.util.HashSet;
import java.util.Collections;
import java.util.Arrays;
import java.util.Locale;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.net.URLDecoder;
import java.net.URISyntaxException;
import java.net.MalformedURLException;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import org.apache.sis.util.logging.Logging;
import org.apache.sis.util.CharSequences;
import org.apache.sis.util.Exceptions;
import org.apache.sis.util.Static;
import org.apache.sis.util.resources.Errors;


// Related to JDK7
import org.apache.sis.internal.jdk7.Files;
import org.apache.sis.internal.jdk7.StandardCharsets;
import org.apache.sis.internal.jdk7.StandardOpenOption;




/**
 * Utility methods related to I/O operations. Many methods in this class accept arbitrary {@link Object} argument
 * and perform a sequence of {@code instanceof} checks. Since this approach provides no type safety and since the
 * sequence of {@code instanceof} checks is somewhat arbitrary, those methods can not be in public API.
 *
 * <p>Unless otherwise specified, giving an instance of unknown type or a {@code null} value cause the methods to
 * return {@code null}. No exception is thrown for unknown type - callers must check that the return value is not
 * null. However exceptions may be thrown for malformed URI or URL.</p>
 *
 * @author  Martin Desruisseaux (Geomatys)
 * @author  Johann Sorel (Geomatys)
 * @since   0.3 (derived from geotk-3.00)
 * @version 0.4
 * @module
 */
public final class IOUtilities extends Static {
    /**
     * Options to be rejected by {@link #open(Object, String, OpenOption[])} for safety reasons.
     */
    private static final List<String> ILLEGAL_OPTIONS = Arrays.asList( // EnumSet of StandardOpenOption on JDK7 branch
            StandardOpenOption.APPEND, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.DELETE_ON_CLOSE);


    /**
     * Do not allow instantiation of this class.
     */
    private IOUtilities() {
    }


    /**
     * Returns the filename from a {@link Path}, {@link File}, {@link URL}, {@link URI} or {@link CharSequence}
     * instance. If the given argument is specialized type like {@code Path} or {@code File}, then this method uses
     * dedicated API like {@link Path#getFileName()}. Otherwise this method gets a string representation of the path
     * and returns the part after the last {@code '/'} or platform-dependent name separator character, if any.
     *
     * @param  path The path as an instance of one of the above-cited types, or {@code null}.
     * @return The filename in the given path, or {@code null} if the given object is null or of unknown type.
     */
    public static String filename(final Object path) {
        return part(path, false);
    }


    /**
     * Returns the filename extension from a {@link Path}, {@link File}, {@link URL}, {@link URI} or
     * {@link CharSequence} instance. If no extension is found, returns an empty string. If the given
     * object is of unknown type, return {@code null}.
     *
     * @param  path The path as an instance of one of the above-cited types, or {@code null}.
     * @return The extension in the given path, or an empty string if none, or {@code null}
     *         if the given object is null or of unknown type.
     */
    public static String extension(final Object path) {
        return part(path, true);
    }


    /**
     * Implementation of {@link #filename(Object)} and {@link #extension(Object)} methods.
     */
    private static String part(final Object path, final boolean extension) {
        int fromIndex = 0;
        final String name;
        if (path instanceof File) {
            name = ((File) path).getName();
        } else {
            char separator = '/';
            if (path instanceof URL) {
                name = ((URL) path).getPath();
            } else if (path instanceof URI) {
                final URI uri = (URI) path;
                name = uri.isOpaque() ? uri.getSchemeSpecificPart() : uri.getPath();
            } else if (path instanceof CharSequence) {
                name = path.toString();
                separator = File.separatorChar;
            } else {
                return null;
            }
            fromIndex = name.lastIndexOf('/') + 1;
            if (separator != '/') {
                // Search for platform-specific character only if the object is neither a URL or a URI.
                fromIndex = Math.max(fromIndex, CharSequences.lastIndexOf(name, separator, fromIndex, name.length()) + 1);
            }
        }
        if (extension) {
            fromIndex = CharSequences.lastIndexOf(name, '.', fromIndex, name.length()) + 1;
            if (fromIndex <= 1) { // If the dot is the first character, do not consider as a filename extension.
                return "";
            }
        }
        return name.substring(fromIndex);
    }


    /**
     * Returns a string representation of the given path, or {@code null} if none. The current implementation
     * recognizes only the {@link Path}, {@link File}, {@link URL}, {@link URI} or {@link CharSequence} types.
     *
     * @param  path The path for which to return a string representation.
     * @return The string representation, or {@code null} if none.
     */
    public static String toString(final Object path) {
        // For the following types, the string that we want can be obtained only by toString(),
        // or the class is final so we know that the toString(à behavior can not be changed.
        if (path instanceof CharSequence || path instanceof URL || path instanceof URI) {
            return path.toString();
        }
        // While toString() would work too on the default implementation, the following
        // type is not final. So we are better to invoke the dedicated method.
        if (path instanceof File) {
            return ((File) path).getPath();
        }
        return null;
    }


    /**
     * Encodes the characters that are not legal for the {@link URI#URI(String)} constructor.
     * Note that in addition to unreserved characters ("{@code _-!.~'()*}"), the reserved
     * characters ("{@code ?/[]@}") and the punctuation characters ("{@code ,;:$&+=}")
     * are left unchanged, so they will be processed with their special meaning by the
     * URI constructor.
     *
     * <p>The current implementations replaces only the space characters, control characters
     * and the {@code %} character. Future versions may replace more characters as we learn
     * from experience.</p>
     *
     * @param  path The path to encode, or {@code null}.
     * @return The encoded path, or {@code null} if and only if the given path was null.
     */
    public static String encodeURI(final String path) {
        if (path == null) {
            return null;
        }
        StringBuilder buffer = null;
        final int length = path.length();
        for (int i=0; i<length;) {
            final int c = path.codePointAt(i);
            final int n = Character.charCount(c);
            if (!Character.isSpaceChar(c) && !Character.isISOControl(c) && c != '%') {
                /*
                 * The character is valid, or is punction character, or is a reserved character.
                 * All those characters should be handled properly by the URI(String) constructor.
                 */
                if (buffer != null) {
                    buffer.appendCodePoint(c);
                }
            } else {
                /*
                 * The character is invalid, so we need to escape it. Note that the encoding
                 * is fixed to UTF-8 as of java.net.URI specification (see its class javadoc).
                 */
                if (buffer == null) {
                    buffer = new StringBuilder(path);
                    buffer.setLength(i);
                }
                for (final byte b : path.substring(i, i+n).getBytes(StandardCharsets.UTF_8)) {
                    buffer.append('%');
                    final String hex = Integer.toHexString(b & 0xFF).toUpperCase(Locale.ROOT);
                    if (hex.length() < 2) {
                        buffer.append('0');
                    }
                    buffer.append(hex);
                }
            }
            i += n;
        }
        return (buffer != null) ? buffer.toString() : path;
    }


    /**
     * Converts a {@link URL} to a {@link URI}. This is equivalent to a call to the standard {@link URL#toURI()}
     * method, except for the following functionalities:
     *
     * <ul>
     *   <li>Optionally decodes the {@code "%XX"} sequences, where {@code "XX"} is a number.</li>
     *   <li>Converts various exceptions into subclasses of {@link IOException}.</li>
     * </ul>
     *
     * @param  url The URL to convert, or {@code null}.
     * @param  encoding If the URL is encoded in a {@code application/x-www-form-urlencoded}
     *         MIME format, the character encoding (normally {@code "UTF-8"}). If the URL is
     *         not encoded, then {@code null}.
     * @return The URI for the given URL, or {@code null} if the given URL was null.
     * @throws IOException if the URL can not be converted to a URI.
     *
     * @see URI#URI(String)
     */
    public static URI toURI(final URL url, final String encoding) throws IOException {
        if (url == null) {
            return null;
        }
        /*
         * Convert the URL to an URI, taking in account the encoding if any.
         *
         * Note: URL.toURI() is implemented as new URI(URL.toString()) where toString()
         * delegates to toExternalForm(), and all those methods are final. So we really
         * don't lost anything by doing those steps ourself.
         */
        String path = url.toExternalForm();
        if (encoding != null) {
            path = URLDecoder.decode(path, encoding);
        }
        path = encodeURI(path);
        try {
            return new URI(path);
        } catch (URISyntaxException cause) {
            /*
             * Occurs only if the URL is not compliant with RFC 2396. Otherwise every URL
             * should succeed, so a failure can actually be considered as a malformed URL.
             */
            final MalformedURLException e = new MalformedURLException(Exceptions.formatChainedMessages(
                    null, Errors.format(Errors.Keys.IllegalArgumentValue_2, "URL", path), cause));
            e.initCause(cause);
            throw e;
        }
    }


    /**
     * Converts a {@link URL} to a {@link File}. This is equivalent to a call to the standard
     * {@link URL#toURI()} method followed by a call to the {@link File#File(URI)} constructor,
     * except for the following functionalities:
     *
     * <ul>
     *   <li>Optionally decodes the {@code "%XX"} sequences, where {@code "XX"} is a number.</li>
     *   <li>Converts various exceptions into subclasses of {@link IOException}.</li>
     * </ul>
     *
     * @param  url The URL to convert, or {@code null}.
     * @param  encoding If the URL is encoded in a {@code application/x-www-form-urlencoded}
     *         MIME format, the character encoding (normally {@code "UTF-8"}). If the URL is
     *         not encoded, then {@code null}.
     * @return The file for the given URL, or {@code null} if the given URL was null.
     * @throws IOException if the URL can not be converted to a file.
     *
     * @see File#File(URI)
     */
    public static File toFile(final URL url, final String encoding) throws IOException {
        if (url == null) {
            return null;
        }
        final URI uri = toURI(url, encoding);
        /*
         * We really want to call the File constructor expecting a URI argument,
         * not the constructor expecting a String argument, because the one for
         * the URI argument performs additional platform-specific parsing.
         */
        try {
            return new File(uri);
        } catch (IllegalArgumentException cause) {
            /*
             * Typically happen when the URI scheme is not "file". But may also happen if the
             * URI contains fragment that can not be represented in a File (e.g. a Query part).
             * The IllegalArgumentException does not allow us to distinguish those cases.
             */
            throw new IOException(Exceptions.formatChainedMessages(null,
                    Errors.format(Errors.Keys.IllegalArgumentValue_2, "URL", url), cause), cause);
        }
    }


    /**
     * Parses the following path as a {@link File} if possible, or a {@link URL} otherwise.
     * In the special case where the given {@code path} is a URL using the {@code "file"} protocol,
     * the URL is converted to a {@link File} object using the given {@code encoding} for decoding
     * the {@code "%XX"} sequences, if any.
     *
     * {@section Rational}
     * A URL can represent a file, but {@link URL#openStream()} appears to return a {@code BufferedInputStream}
     * wrapping the {@link FileInputStream}, which is not a desirable feature when we want to obtain a channel.
     *
     * @param  path The path to convert, or {@code null}.
     * @param  encoding If the URL is encoded in a {@code application/x-www-form-urlencoded}
     *         MIME format, the character encoding (normally {@code "UTF-8"}). If the URL is
     *         not encoded, then {@code null}. This argument is ignored if the given path does
     *         not need to be converted from URL to {@code File}.
     * @return The path as a {@link File} if possible, or a {@link URL} otherwise.
     * @throws IOException If the given path is not a file and can't be parsed as a URL.
     */
    public static Object toFileOrURL(final String path, final String encoding) throws IOException {
        if (path == null) {
            return null;
        }
        /*
         * Check if the path seems to be a local file. Those paths are assumed never encoded.
         * The heuristic rules applied here may change in any future SIS version.
         */
        if (path.indexOf('?') < 0 && path.indexOf('#') < 0) {
            final int s = path.indexOf(':');
            /*
             * If the ':' character is found, the part before it is probably a protocol in a URL,
             * except in the particular case where there is just one letter before ':'. In such
             * case, it may be the drive letter of a Windows file.
             */
            if (s<0 || (s==1 && Character.isLetter(path.charAt(0)) && !path.regionMatches(2, "//", 0, 2))) {
                return new File(path);
            }
        }
        final URL url = new URL(path);
        final String scheme = url.getProtocol();
        if (scheme != null && scheme.equalsIgnoreCase("file")) {
            return toFile(url, encoding);
        }
        /*
         * Leave the URL in its original encoding on the assumption that this is the encoding expected by
         * the server. This is different than the policy for URI, because the later are always in UTF-8.
         * If a URI is needed, callers should use toURI(url, encoding).
         */
        return url;
    }


    /**
     * Returns a byte channel from the given input, or {@code null} if the input is of unknown type.
     * More specifically:
     *
     * <ul>
     *   <li>If the given input is {@code null}, then this method returns {@code null}.</li>
     *   <li>If the given input is a {@link ReadableByteChannel} or an {@link InputStream},
     *       then it is returned directly, or indirectly as a wrapper.</li>
     *   <li>If the given input if a {@link Path}, {@link File}, {@link URL}, {@link URI}
     *       or {@link CharSequence}, then a new channel is opened.</li>
     * </ul>
     *
     * The given options are used for opening the channel on a <em>best effort basis</em>.
     * In particular, even if the caller provided the {@code WRITE} option, he still needs
     * to verify if the returned channel implements {@link java.nio.channels.WritableByteChannel}.
     * This is because the channel may be opened by {@link URL#openStream()}, in which case the
     * options are ignored.
     *
     * <p>The following options are illegal and will cause an exception to be thrown if provided:
     * {@code APPEND}, {@code TRUNCATE_EXISTING}, {@code DELETE_ON_CLOSE}. We reject those options
     * because this method is primarily designed for readable channels, with optional data edition.
     * Since the write option is not guaranteed to be honored, we have to reject the options that
     * would alter significatively the channel behavior depending on whether we have been able to
     * honor the options or not.</p>
     *
     * @param  input The file to open, or {@code null}.
     * @param  encoding If the input is an encoded URL, the character encoding (normally {@code "UTF-8"}).
     *         If the URL is not encoded, then {@code null}. This argument is ignored if the given input
     *         does not need to be converted from URL to {@code File}.
     * @param  options The options to use for creating a new byte channel. Can be null or empty for read-only.
     * @return The channel for the given input, or {@code null} if the given input is of unknown type.
     * @throws IOException If an error occurred while opening the given file.
     */
    public static ReadableByteChannel open(Object input, final String encoding, Object... options) throws IOException {
        /*
         * Unconditionally verify the options, even if we may not use them.
         */
        final Set<Object> optionSet;
        if (options == null || options.length == 0) {
            optionSet = Collections.emptySet();
        } else {
            optionSet = new HashSet<Object>(Arrays.asList(options));
            optionSet.add(StandardOpenOption.READ);
            if (optionSet.removeAll(ILLEGAL_OPTIONS)) {
                throw new IllegalArgumentException(Errors.format(Errors.Keys.IllegalArgumentValue_2,
                        "options", Arrays.toString(options)));
            }
        }
        /*
         * Check for inputs that are already readable channels or input streams. We perform an explicit check
         * for FileInputStream even if  Channels.newChannel(InputStream)  does this check implicitly, because
         * Channels.newChannel(…) restricts itself to the exact FileInputStream class while we want to invoke
         * getChannel() for any subclasses.
         */
        if (input instanceof ReadableByteChannel) {
            return (ReadableByteChannel) input;
        }
        if (input instanceof InputStream) {
            if (input instanceof FileInputStream) {
                return ((FileInputStream) input).getChannel();
            }
            return Channels.newChannel((InputStream) input);
        }
        // NOTE: Many comments below this point actually apply to the JDK7 branch.
        //       We keep them here for making easier the synchonization between the branches.
        /*
         * In the following cases, we will try hard to convert to Path objects before to fallback
         * on File, URL or URI, because only Path instances allow us to use the given OpenOptions.
         */
        if (input instanceof URL) {
            try {
                input = toFile((URL) input, encoding);
            } catch (IOException e) {
                // This is normal if the URL uses HTTP or FTP protocol for instance.
                // Log the exception at FINE level without stack trace. We will open
                // the channel later using the URL instead than using the Path.
                recoverableException(e);
            }
        } else if (input instanceof URI) {
            /*
             * If the user gave us a URI, try to convert to a Path before to fallback to URL, in order to be
             * able to use the given OpenOptions.  Note that the conversion to Path is likely to fail if the
             * URL uses HTTP or FTP protocols, because JDK7 does not provide file systems for them by default.
             */
            final URI uri = (URI) input;
            if (!uri.isAbsolute()) {
                // All methods invoked in this block throws IllegalArgumentException if the URI has no scheme,
                // so we are better to check now and provide a more appropriate exception for this method.
                throw new IOException(Errors.format(Errors.Keys.MissingSchemeInURI));
            } else try {
                input = new File(uri);
            } catch (IllegalArgumentException e) {
                input = uri.toURL();
                // We have been able to convert to URL, but the given OpenOptions may not be used.
                // Log the exception at FINE level without stack trace, because the exception is
                // probably a normal behavior in this context.
                recoverableException(e);
            }
        } else {
            if (input instanceof CharSequence) { // Needs to be before the check for File or URL.
                input = toFileOrURL(input.toString(), encoding);
            }
        }
        /*
         * One last check for URL. The URL may be either the given input if we have not been able
         * to convert it to a Path, or a URI, File or CharSequence input converted to URL. Do not
         * try to convert the URL to a Path, because this has already been tried before this point.
         */
        if (input instanceof URL) {
            return Channels.newChannel(((URL) input).openStream());
        }
        if (input instanceof File) {
            return Files.newByteChannel((File) input, optionSet);
        }
        return null;
    }


    /**
     * Invoked for reporting exceptions that may be normal behavior. This method logs
     * the exception at {@link java.util.logging.Level#FINE} without stack trace.
     */
    private static void recoverableException(final Exception warning) {
        Logging.recoverableException(Logging.getLogger("org.apache.sis.storage"), IOUtilities.class, "open", warning);
    }
}
Source Code of org.apache.sis.internal.storage.IOUtilities

Related Classes of org.apache.sis.internal.storage.IOUtilities