Package org.apache.tika.mime

Examples of org.apache.tika.mime.MediaType


            log.warn("Encounterd illegal formatted mediaType '{}'  -> will try " +
                "to detect the mediaType based on the parsed content!",
                blob.getMimeType());
            return null;
        } else {
            return new MediaType(mediaTypeArray[0], mediaTypeArray[1],
                blob.getParameter());
        }
    }
View Full Code Here


      String encoding = null;
      if(inputType == null) {
        Metadata metadata = new Metadata();
        metadata.set(Metadata.RESOURCE_NAME_KEY, sourceName);
        DefaultDetector detector = new DefaultDetector();
        MediaType type = detector.detect(is, metadata);
   
        ContentHandler contenthandler = new BodyContentHandler();
        AutoDetectParser parser = new AutoDetectParser();
        try {
          parser.parse(is, contenthandler, metadata);
        } catch (SAXException | TikaException e) {
          logger.error("Unable to parse stream: " + e.getMessage());
          throw new KarmaException("Unable to parse stream: "
              + e.getMessage());
        }
        MediaTypeRegistry registry = MimeTypes.getDefaultMimeTypes()
            .getMediaTypeRegistry();
        registry.addSuperType(new MediaType("text", "csv"), new MediaType(
            "text", "plain"));
        MediaType parsedType = MediaType.parse(metadata
            .get(Metadata.CONTENT_TYPE));
   
        if (registry.isSpecializationOf(registry.normalize(type), registry
            .normalize(parsedType).getBaseType())) {
          metadata.set(Metadata.CONTENT_TYPE, type.toString());
View Full Code Here

        for (MediaType type : registry.getTypes()) {
            System.out.println(type);
            for (MediaType alias : registry.getAliases(type)) {
                System.out.println("  alias:     " + alias);
            }
            MediaType supertype = registry.getSupertype(type);
            if (supertype != null) {
                System.out.println("  supertype: " + supertype);
            }
            Parser p = parsers.get(type);
            if (p != null) {
View Full Code Here

            if (name == null) {
                name = "file" + count++;
            }

            MediaType contentType = detector.detect(inputStream, metadata);

            if (name.indexOf('.')==-1 && contentType!=null) {
                try {
                    name += config.getMimeRepository().forName(
                            contentType.toString()).getExtension();
                } catch (MimeTypeException e) {
                    e.printStackTrace();
                }
            }
View Full Code Here

        TikaInputStream tis = TikaInputStream.cast(input);
        if (tis != null) {
            try {
                ZipFile zip = new ZipFile(tis.getFile());
                try {
                    MediaType type = detectOpenDocument(zip);
                    if (type == null) {
                        type = detectOfficeOpenXML(zip, tis);
                    }
                    if (type == null) {
                        type = detectIWork(zip);
View Full Code Here

            // It's OOXML
            TikaInputStream stream = TikaInputStream.get(
                    new DocumentInputStream((DocumentEntry) ooxml));
            try {
                ZipContainerDetector detector = new ZipContainerDetector();
                MediaType type = detector.detect(stream, new Metadata());
                handleEmbeddedResource(stream, null, type.toString(), xhtml, true);
                return;
            } finally {
                stream.close();
            }
        } catch(FileNotFoundException e) {
            // It's regular OLE2
        }

       // Need to dump the directory out to a new temp file, so
       //  it's stand along
       POIFSFileSystem newFS = new POIFSFileSystem();
       copy(dir, newFS.getRoot());

       File tmpFile = File.createTempFile("tika", ".ole2");
       try {
           FileOutputStream out = new FileOutputStream(tmpFile);
           newFS.writeFilesystem(out);
           out.close();

           // What kind of document is it?
           Metadata metadata = new Metadata();
           POIFSDocumentType type = POIFSDocumentType.detectType(dir);

           TikaInputStream embedded;

           if (type==POIFSDocumentType.OLE10_NATIVE) {
               Entry entry = dir.getEntry(Ole10Native.OLE10_NATIVE);
               ByteArrayOutputStream bos = new ByteArrayOutputStream();
               IOUtils.copy(new DocumentInputStream((DocumentEntry) entry), bos);
               byte[] data = bos.toByteArray();

               try {
                    Ole10Native ole = new Ole10Native(data, 0);
                    byte[] dataBuffer = ole.getDataBuffer();

                    metadata.set("resourceName", dir.getName() + '/' + ole.getLabel());

                    embedded = TikaInputStream.get(dataBuffer);
               } catch (Ole10NativeException ex) {
                 embedded = TikaInputStream.get(data);
               }
           } else {
               metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
               metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + '.' + type.getExtension());

               embedded = TikaInputStream.get(tmpFile);
           }

           try {
View Full Code Here

        CharsetDetector detector = new CharsetDetector();
        String incomingCharset = metadata.get(Metadata.CONTENT_ENCODING);
        String incomingType = metadata.get(Metadata.CONTENT_TYPE);
        if (incomingCharset == null && incomingType != null) {
            // TIKA-341: Use charset in content-type
            MediaType mt = MediaType.parse(incomingType);
            if (mt != null) {
                String charset = mt.getParameters().get("charset");
                if ((charset != null) && Charset.isSupported(charset)) {
                    incomingCharset = charset;
                }
            }
        }
View Full Code Here

        CharsetDetector detector = new CharsetDetector();
        String incomingCharset = metadata.get(Metadata.CONTENT_ENCODING);
        String incomingType = metadata.get(Metadata.CONTENT_TYPE);
        if (incomingCharset == null && incomingType != null) {
            // TIKA-341: Use charset in content-type
            MediaType mt = MediaType.parse(incomingType);
            if (mt != null) {
                incomingCharset = mt.getParameters().get("charset");
            }
        }

        if (incomingCharset != null) {
            detector.setDeclaredEncoding(incomingCharset);
View Full Code Here

        TemporaryResources tmp = new TemporaryResources();
        try {
            TikaInputStream tis = TikaInputStream.get(stream, tmp);

            // Automatically detect the MIME type of the document
            MediaType type = detector.detect(tis, metadata);
            metadata.set(Metadata.CONTENT_TYPE, type.toString());

            // TIKA-216: Zip bomb prevention
            SecureContentHandler sch = new SecureContentHandler(handler, tis);
            try {
                // Parse the document
View Full Code Here

                NodeList mimes = node.getElementsByTagName("mime");
                if (mimes.getLength() > 0) {
                    Set<MediaType> types = new HashSet<MediaType>();
                    for (int j = 0; j < mimes.getLength(); j++) {
                        String mime = getText(mimes.item(j));
                        MediaType type = MediaType.parse(mime);
                        if (type != null) {
                            types.add(type);
                        } else {
                            throw new TikaException(
                                    "Invalid media type name: " + mime);
View Full Code Here

TOP

Related Classes of org.apache.tika.mime.MediaType

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.