Package org.apache.tika.mime

Examples of org.apache.tika.mime.MediaType


            return q;
        } else if ("*".equals(this.type.getSubtype())
                && type.getType().equals(this.type.getType())) {
            return q;
        } else {
            MediaType superType = registry.getSupertype(type);
            if (superType != null) {
                return match(superType, registry);
            } else {
                return 0.0;
            }
View Full Code Here


    public String resolve(String... types) {
        int maxIndex = 0;
        double[] qs = new double[types.length];
        for (int i = 0; i < types.length; i++) {
            MediaType type = registry.normalize(MediaType.parse(types[i]));
            for (MediaRange range : ranges) {
                qs[i] = Math.max(qs[i], range.match(type, registry));
            }
            if (qs[i] > qs[maxIndex]) {
                maxIndex = i;
View Full Code Here

        this.parser = parser;
        this.detector = detector;
    }

    public boolean isSupported(TikaInputStream input) throws IOException {
        MediaType type = detector.detect(input, new Metadata());
        return parser.getSupportedTypes(new ParseContext()).contains(type);
    }
View Full Code Here

            public void parse(InputStream stream, ContentHandler ignored,
                    Metadata metadata, ParseContext context)
                    throws IOException, SAXException, TikaException {
                // Figure out what we have to process
                String filename = metadata.get(Metadata.RESOURCE_NAME_KEY);
                MediaType type;
                if(metadata.get(Metadata.CONTENT_TYPE) != null) {
                   type = MediaType.parse( metadata.get(Metadata.CONTENT_TYPE) );
                } else {
                   if(! stream.markSupported()) {
                      stream = TikaInputStream.get(stream);
View Full Code Here

        this(Arrays.asList(detectors));
    }

    public MediaType detect(InputStream input, Metadata metadata)
            throws IOException {
        MediaType type = MediaType.OCTET_STREAM;
        for (Detector detector : detectors) {
            MediaType detected = detector.detect(input, metadata);
            if (registry.isSpecializationOf(detected, type)) {
                type = detected;
            }
        }
        return type;
View Full Code Here

                NodeList mimes = node.getElementsByTagName("mime");
                if (mimes.getLength() > 0) {
                    Set<MediaType> types = new HashSet<MediaType>();
                    for (int j = 0; j < mimes.getLength(); j++) {
                        String mime = getText(mimes.item(j));
                        MediaType type = MediaType.parse(mime);
                        if (type != null) {
                            types.add(type);
                        } else {
                            throw new TikaException(
                                    "Invalid media type name: " + mime);
View Full Code Here

        return getParser(metadata, new ParseContext());
    }

    protected Parser getParser(Metadata metadata, ParseContext context) {
        Map<MediaType, Parser> map = getParsers(context);
        MediaType type = MediaType.parse(metadata.get(Metadata.CONTENT_TYPE));
        while (type != null) {
            Parser parser = map.get(type);
            if (parser != null) {
                return parser;
            }
View Full Code Here

           // We need (reliable!) mark support for type detection before parsing
           stream = new BufferedInputStream(stream);
        }

        // Automatically detect the MIME type of the document
        MediaType type = detector.detect(stream, metadata);
        metadata.set(Metadata.CONTENT_TYPE, type.toString());

        // TIKA-216: Zip bomb prevention
        CountingInputStream count = new CountingInputStream(stream);
        SecureContentHandler secure = new SecureContentHandler(handler, count);
View Full Code Here

        assert representations != null && representations.length > 0;
        int maxIndex = 0;
        double maxQ = 0.0;
        for (int i = 0; i < representations.length; i++) {
            double q = 0.0;
            MediaType type = registry.normalize(representations[i].getType());
            for (MediaRange range : ranges) {
                q = Math.max(q, range.match(type, registry));
            }
            if (q > maxQ) {
                maxIndex = i;
View Full Code Here

    public void computeEnhancements(ContentItem ci) throws EngineException {
        MediaTypeAndStream mtas = extractMediaType(ci);
        if(mtas.mediaType == null){
            return; //unable to parse and detect content type
        }
        MediaType plainMediaType = mtas.mediaType.getBaseType();
        if(plainMediaType.equals(MediaType.TEXT_PLAIN)){
            return; //we need not to process plain text!
        }
        final ParseContext context = new ParseContext();
        context.set(Parser.class,parser);
        Set<MediaType> supproted = parser.getSupportedTypes(context);
        if(supproted.contains(plainMediaType)) {
            final InputStream in;
            if(mtas.in == null){
                in = ci.getStream();
            } else {
                in = mtas.in;
            }
            final Metadata metadata = new Metadata();
            //set the already parsed contentType
            metadata.set(Metadata.CONTENT_TYPE, mtas.mediaType.toString());
            //also explicitly set the charset as contentEncoding
            String charset = mtas.mediaType.getParameters().get("charset");
            if(charset != null){
                metadata.set(Metadata.CONTENT_ENCODING, charset);
            }
            ContentSink plainTextSink;
            try {
                plainTextSink = ciFactory.createContentSink(TEXT_PLAIN +"; charset="+UTF8.name());
            } catch (IOException e) {
                IOUtils.closeQuietly(in); //close the input stream
                throw new EngineException("Error while initialising Blob for" +
                    "writing the text/plain version of the parsed content",e);
            }
            final Writer plainTextWriter = new OutputStreamWriter(plainTextSink.getOutputStream(), UTF8);
            final ContentHandler textHandler = new BodyContentHandler( //only the Body
                new PlainTextHandler(plainTextWriter, false,skipLinebreaks)); //skip ignoreable
            final ToXMLContentHandler xhtmlHandler;
            final ContentHandler mainHandler;
            ContentSink xhtmlSink = null;
            try {
                if(!plainMediaType.equals(XHTML)){ //do not parse XHTML from XHTML
                    try {
                        xhtmlSink = ciFactory.createContentSink(XHTML +"; charset="+UTF8.name());
                    } catch (IOException e) {
                        throw new EngineException("Error while initialising Blob for" +
                                "writing the application/xhtml+xml version of the parsed content",e);
View Full Code Here

TOP

Related Classes of org.apache.tika.mime.MediaType

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.