Package com.facebook.presto.operator.scalar

Source Code of com.facebook.presto.operator.scalar.JsonExtract$JsonValueJsonExtractor

/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.operator.scalar;

import com.facebook.presto.util.ThreadLocalCache;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.io.SerializedString;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Charsets;
import com.google.common.base.Splitter;
import com.google.common.base.Supplier;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.DynamicSliceOutput;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;

import javax.annotation.Nullable;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT;
import static com.facebook.presto.util.Failures.checkCondition;
import static com.fasterxml.jackson.core.JsonFactory.Feature.CANONICALIZE_FIELD_NAMES;
import static com.fasterxml.jackson.core.JsonToken.END_ARRAY;
import static com.fasterxml.jackson.core.JsonToken.END_OBJECT;
import static com.fasterxml.jackson.core.JsonToken.FIELD_NAME;
import static com.fasterxml.jackson.core.JsonToken.START_ARRAY;
import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
import static com.fasterxml.jackson.core.JsonToken.VALUE_NULL;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

/**
* Extracts values from JSON
* <p/>
* Supports the following JSON path primitives:
* <pre>
*    $ : Root object
*    . : Child operator
*   [] : Subscript operator for array
* </pre>
* <p/>
* Supported JSON Path Examples:
* <pre>
*    { "store": {
*        "book": [
*          { "category": "reference",
*            "author": "Nigel Rees",
*            "title": "Sayings of the Century",
*            "price": 8.95,
*            "contributors": [["Adam", "Levine"], ["Bob", "Strong"]]
*          },
*          { "category": "fiction",
*            "author": "Evelyn Waugh",
*            "title": "Sword of Honour",
*            "price": 12.99,
*            "isbn": "0-553-21311-3",
*            "last_owner": null
*          }
*        ],
*        "bicycle": {
*          "color": "red",
*          "price": 19.95
*        }
*      }
*    }
* </pre>
* <p/>
* With only scalar values:
* <pre>
*    $.store.book[0].author => Nigel Rees
*    $.store.bicycle.price => 19.95
*    $.store.book[0].isbn => NULL (Doesn't exist becomes java null)
*    $.store.book[1].last_owner => NULL (json null becomes java null)
*    $.store.book[0].contributors[0][1] => Levine
* </pre>
* <p/>
* With json values:
* <pre>
*    $.store.book[0].author => "Nigel Rees"
*    $.store.bicycle.price => 19.95
*    $.store.book[0].isbn => NULL (Doesn't exist becomes java null)
*    $.store.book[1].last_owner => null (json null becomes the string "null")
*    $.store.book[0].contributors[0] => ["Adam", "Levine"]
*    $.store.bicycle => {"color": "red", "price": 19.95}
* </pre>
*/
public final class JsonExtract
{
    private static final Pattern EXPECTED_PATH = Pattern.compile("\\$(\\[\\d+\\])*(\\.[^@\\.\\[\\]\\$\\*]+(\\[\\d+\\])*)*");
    private static final int ESTIMATED_JSON_OUTPUT_SIZE = 512;

    private static final List<StringReplacer> PATH_STRING_REPLACERS = ImmutableList.of(
            new StringReplacer("[", ".["),
            new StringReplacer("]", "")
    );

    private static final Splitter DOT_SPLITTER = Splitter.on(".").trimResults();
    private static final JsonFactory JSON_FACTORY = new JsonFactory()
            .disable(CANONICALIZE_FIELD_NAMES);

    private static final JsonExtractCache<Slice> SCALAR_CACHE = new JsonExtractCache<>(20, new Supplier<JsonExtractor<Slice>>() {
        @Override
        public JsonExtractor<Slice> get()
        {
            return new ScalarValueJsonExtractor();
        }
    });

    private static final JsonExtractCache<Slice> JSON_CACHE = new JsonExtractCache<>(20, new Supplier<JsonExtractor<Slice>>() {
        @Override
        public JsonExtractor<Slice> get()
        {
            return new JsonValueJsonExtractor();
        }
    });

    private static final JsonExtractCache<Long> JSON_SIZE_CACHE = new JsonExtractCache<>(20, new Supplier<JsonExtractor<Long>>() {
        @Override
        public JsonExtractor<Long> get()
        {
            return new JsonSizeExtractor();
        }
    });

    private JsonExtract() {}

    /**
     * Main scalar extraction entry point
     *
     * @param jsonInput - Slice representation of a JSON object to inspect
     * @param jsonPath - Slice representation of the extraction path
     * @return extracted scalar value as Slice, or NULL on mismatch
     * @throws JsonParseException - jsonInput is malformed
     * @throws IOException
     */
    public static Slice extractScalar(@Nullable Slice jsonInput, Slice jsonPath)
            throws IOException
    {
        return extract(SCALAR_CACHE, jsonInput, jsonPath);
    }

    /**
     * Main json extraction entry point
     *
     * @param jsonInput - Slice representation of a JSON object to inspect
     * @param jsonPath - Slice representation of the extraction path
     * @return extracted json value as Slice, or NULL on mismatch
     * @throws JsonParseException - jsonInput is malformed
     * @throws IOException
     */
    public static Slice extractJson(@Nullable Slice jsonInput, Slice jsonPath)
            throws IOException
    {
        return extract(JSON_CACHE, jsonInput, jsonPath);
    }

    public static Slice extract(ThreadLocalCache<Slice, JsonExtractor<Slice>> cache, @Nullable Slice jsonInput, Slice jsonPath)
            throws IOException
    {
        checkNotNull(jsonPath, "jsonPath is null");
        if (jsonInput == null) {
            return null;
        }

        try {
            return extractInternal(jsonInput, cache.get(jsonPath));
        }
        catch (JsonParseException e) {
            // Return null if we failed to parse something
            return null;
        }
    }

    public static Slice extract(Slice jsonInput, JsonExtractor<Slice> jsonExtractor)
            throws IOException
    {
        try {
            return extractInternal(jsonInput, jsonExtractor);
        }
        catch (JsonParseException e) {
            // Return null if we failed to parse something
            return null;
        }
    }

    @VisibleForTesting
    static Slice extractInternal(Slice jsonInput, JsonExtractor<Slice> jsonExtractor)
            throws IOException
    {
        checkNotNull(jsonInput, "jsonInput is null");
        try (JsonParser jsonParser = JSON_FACTORY.createJsonParser(jsonInput.getInput())) {
            // Initialize by advancing to first token and make sure it exists
            if (jsonParser.nextToken() == null) {
                throw new JsonParseException("Missing starting token", jsonParser.getCurrentLocation());
            }

            return jsonExtractor.extract(jsonParser);
        }
    }

    public static Long extractSize(Slice jsonInput, Slice jsonPath)
            throws IOException
    {
        return extractSize(JSON_SIZE_CACHE, jsonInput, jsonPath);
    }

    public static Long extractSize(ThreadLocalCache<Slice, JsonExtractor<Long>> cache, @Nullable Slice jsonInput, Slice jsonPath)
            throws IOException
    {
        checkNotNull(jsonPath, "jsonPath is null");
        if (jsonInput == null) {
            return null;
        }

        try {
            return extractSizeInternal(jsonInput, cache.get(jsonPath));
        }
        catch (JsonParseException e) {
            // Return null if we failed to parse something
            return null;
        }
    }

    public static Long extractSize(Slice jsonInput, JsonExtractor<Long> jsonExtractor)
            throws IOException
    {
        try {
            return extractSizeInternal(jsonInput, jsonExtractor);
        }
        catch (JsonParseException e) {
            // Return null if we failed to parse something
            return null;
        }
    }

    @VisibleForTesting
    static Long extractSizeInternal(Slice jsonInput, JsonExtractor<Long> jsonExtractor)
            throws IOException
    {
        checkNotNull(jsonInput, "jsonInput is null");
        try (JsonParser jsonParser = JSON_FACTORY.createJsonParser(jsonInput.getInput())) {
            // Initialize by advancing to first token and make sure it exists
            if (jsonParser.nextToken() == null) {
                throw new JsonParseException("Missing starting token", jsonParser.getCurrentLocation());
            }

            return jsonExtractor.extract(jsonParser);
        }
    }

    private static Iterable<String> tokenizePath(String path)
    {
        checkCondition(EXPECTED_PATH.matcher(path).matches(), INVALID_FUNCTION_ARGUMENT, "Invalid/unsupported JSON path: '%s'", path);
        // This performs the following transformation:
        // $.blah[0].fuu[1][2].bar => $.blah.[0.fuu.[1.[2.bar
        for (StringReplacer replacer : PATH_STRING_REPLACERS) {
            path = replacer.replace(path);
        }
        return DOT_SPLITTER.split(path);
    }

    public static <T> JsonExtractor<T> generateExtractor(String path, JsonExtractor<T> rootExtractor)
    {
        Iterator<String> iterator = tokenizePath(path).iterator();
        checkCondition(iterator.hasNext() && iterator.next().equals("$"), INVALID_FUNCTION_ARGUMENT, "JSON path must begin with root: '$'");
        return generateExtractor(iterator, rootExtractor);
    }

    private static <T> JsonExtractor<T> generateExtractor(Iterator<String> filters, JsonExtractor<T> rootExtractor)
    {
        if (!filters.hasNext()) {
            return rootExtractor;
        }

        String filter = filters.next();
        if (filter.startsWith("[")) {
            int index = Integer.parseInt(filter.substring(1).trim());
            return new ArrayElementJsonExtractor<>(index, generateExtractor(filters, rootExtractor));
        }
        else {
            return new ObjectFieldJsonExtractor<>(filter, generateExtractor(filters, rootExtractor));
        }
    }

    public interface JsonExtractor<T>
    {
        /**
         * Executes the extraction on the existing content of the JasonParser and outputs the match.
         * <p/>
         * Notes:
         * <ul>
         * <li>JsonParser must be on the FIRST token of the value to be processed when extract is called</li>
         * <li>INVARIANT: when extract() returns, the current token of the parser will be the LAST token of the value</li>
         * </ul>
         *
         * @return the value, or null if not applicable
         */
        T extract(JsonParser jsonParser)
                throws IOException;
    }

    public static class ObjectFieldJsonExtractor<T>
            implements JsonExtractor<T>
    {
        private final SerializedString fieldName;
        private final JsonExtractor<? extends T> delegate;

        public ObjectFieldJsonExtractor(String fieldName, JsonExtractor<? extends T> delegate)
        {
            this.fieldName = new SerializedString(checkNotNull(fieldName, "fieldName is null"));
            this.delegate = checkNotNull(delegate, "delegate is null");
        }

        @Override
        public T extract(JsonParser jsonParser)
                throws IOException
        {
            if (jsonParser.getCurrentToken() != START_OBJECT) {
                throw new JsonParseException("Expected a Json object", jsonParser.getCurrentLocation());
            }

            while (!jsonParser.nextFieldName(fieldName)) {
                if (!jsonParser.hasCurrentToken()) {
                    throw new JsonParseException("Unexpected end of object", jsonParser.getCurrentLocation());
                }
                if (jsonParser.getCurrentToken() == END_OBJECT) {
                    // Unable to find matching field
                    return null;
                }
                jsonParser.skipChildren(); // Skip nested structure if currently at the start of one
            }

            jsonParser.nextToken(); // Shift to first token of the value

            return delegate.extract(jsonParser);
        }
    }

    public static class ArrayElementJsonExtractor<T>
            implements JsonExtractor<T>
    {
        private final int index;
        private final JsonExtractor<? extends T> delegate;

        public ArrayElementJsonExtractor(int index, JsonExtractor<? extends T> delegate)
        {
            checkArgument(index >= 0, "index must be greater than or equal to zero: %s", index);
            checkNotNull(delegate, "delegate is null");
            this.index = index;
            this.delegate = delegate;
        }

        @Override
        public T extract(JsonParser jsonParser)
                throws IOException
        {
            if (jsonParser.getCurrentToken() != START_ARRAY) {
                throw new JsonParseException("Expected a Json array", jsonParser.getCurrentLocation());
            }

            int currentIndex = 0;
            while (true) {
                JsonToken token = jsonParser.nextToken();
                if (token == null) {
                    throw new JsonParseException("Unexpected end of array", jsonParser.getCurrentLocation());
                }
                if (token == END_ARRAY) {
                    // Index out of bounds
                    return null;
                }
                if (currentIndex == index) {
                    break;
                }
                currentIndex++;
                jsonParser.skipChildren(); // Skip nested structure if currently at the start of one
            }

            return delegate.extract(jsonParser);
        }
    }

    public static class ScalarValueJsonExtractor
            implements JsonExtractor<Slice>
    {
        @Override
        public Slice extract(JsonParser jsonParser)
                throws IOException
        {
            JsonToken token = jsonParser.getCurrentToken();
            if (token == null) {
                throw new JsonParseException("Unexpected end of value", jsonParser.getCurrentLocation());
            }
            if (!token.isScalarValue() || token == VALUE_NULL) {
                return null;
            }
            return Slices.wrappedBuffer(jsonParser.getText().getBytes(Charsets.UTF_8));
        }
    }

    public static class JsonValueJsonExtractor
            implements JsonExtractor<Slice>
    {
        @Override
        public Slice extract(JsonParser jsonParser)
                throws IOException
        {
            if (!jsonParser.hasCurrentToken()) {
                throw new JsonParseException("Unexpected end of value", jsonParser.getCurrentLocation());
            }

            DynamicSliceOutput dynamicSliceOutput = new DynamicSliceOutput(ESTIMATED_JSON_OUTPUT_SIZE);
            try (JsonGenerator jsonGenerator = JSON_FACTORY.createJsonGenerator(dynamicSliceOutput)) {
                jsonGenerator.copyCurrentStructure(jsonParser);
            }
            return dynamicSliceOutput.slice();
        }
    }

    public static class JsonSizeExtractor
            implements JsonExtractor<Long>
    {
        @Override
        public Long extract(JsonParser jsonParser)
                throws IOException
        {
            if (!jsonParser.hasCurrentToken()) {
                throw new JsonParseException("Unexpected end of value", jsonParser.getCurrentLocation());
            }

            if (jsonParser.getCurrentToken() == START_ARRAY) {
                long length = 0;
                while (true) {
                    JsonToken token = jsonParser.nextToken();
                    if (token == null) {
                        return null;
                    }
                    if (token == END_ARRAY) {
                        return length;
                    }
                    jsonParser.skipChildren();

                    length++;
                }
            }
            else if (jsonParser.getCurrentToken() == START_OBJECT) {
                long length = 0;
                while (true) {
                    JsonToken token = jsonParser.nextToken();
                    if (token == null) {
                        return null;
                    }
                    if (token == END_OBJECT) {
                        return length;
                    }

                    if (token == FIELD_NAME) {
                        length++;
                    }
                    else {
                        jsonParser.skipChildren();
                    }
                }
            }
            else {
                return 0L;
            }
        }
    }

    private static class StringReplacer
    {
        private final Pattern pattern;
        private final String replacement;

        private StringReplacer(String original, String replacement)
        {
            this.pattern = Pattern.compile(original, Pattern.LITERAL);
            this.replacement = Matcher.quoteReplacement(replacement);
        }

        public String replace(String target)
        {
            return pattern.matcher(target).replaceAll(replacement);
        }
    }

    public static class JsonExtractCache<T>
            extends ThreadLocalCache<Slice, JsonExtractor<T>>
    {
        private final Supplier<JsonExtractor<T>> rootSupplier;

        public JsonExtractCache(int maxSizePerThread, Supplier<JsonExtractor<T>> rootSupplier)
        {
            super(maxSizePerThread);
            this.rootSupplier = rootSupplier;
        }

        @Override
        protected JsonExtractor<T> load(Slice jsonPath)
        {
            return generateExtractor(jsonPath.toString(Charsets.UTF_8), rootSupplier.get());
        }
    }
}
TOP

Related Classes of com.facebook.presto.operator.scalar.JsonExtract$JsonValueJsonExtractor

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.