Package org.apache.tika.mime

Examples of org.apache.tika.mime.MediaType


    errHeader = "ExtractingDocumentLoader: " + stream.getSourceInfo();
    Parser parser = null;
    String streamType = req.getParams().get(ExtractingParams.STREAM_TYPE, null);
    if (streamType != null) {
      //Cache?  Parsers are lightweight to construct and thread-safe, so I'm told
      MediaType mt = MediaType.parse(streamType.trim().toLowerCase());
      parser = config.getParser(mt);
    } else {
      parser = autoDetectParser;
    }
    if (parser != null) {
View Full Code Here


    errHeader = "ExtractingDocumentLoader: " + stream.getSourceInfo();
    Parser parser = null;
    String streamType = req.getParams().get(ExtractingParams.STREAM_TYPE, null);
    if (streamType != null) {
      //Cache?  Parsers are lightweight to construct and thread-safe, so I'm told
      MediaType mt = MediaType.parse(streamType.trim().toLowerCase());
      parser = config.getParser(mt);
    } else {
      parser = autoDetectParser;
    }
    if (parser != null) {
View Full Code Here

        if (!stream.markSupported()) {
            stream = new BufferedInputStream(stream);
        }

        // Automatically detect the MIME type of the document
        MediaType type = detector.detect(stream, metadata);
        metadata.set(Metadata.CONTENT_TYPE, type.toString());

        // TIKA-216: Zip bomb prevention
        CountingInputStream count = new CountingInputStream(stream);
        SecureContentHandler secure = new SecureContentHandler(handler, count);
View Full Code Here

        CharsetDetector detector = new CharsetDetector();
        String incomingCharset = metadata.get(Metadata.CONTENT_ENCODING);
        String incomingType = metadata.get(Metadata.CONTENT_TYPE);
        if (incomingCharset == null && incomingType != null) {
            // TIKA-341: Use charset in content-type
            MediaType mt = MediaType.parse(incomingType);
            if (mt != null) {
                String charset = mt.getParameters().get("charset");
                if ((charset != null) && Charset.isSupported(charset)) {
                    incomingCharset = charset;
                }
            }
        }
View Full Code Here

          // It's OOXML
          TikaInputStream ooxmlStream = TikaInputStream.get(
                new DocumentInputStream((DocumentEntry)ooxml)
          );
          ZipContainerDetector detector = new ZipContainerDetector();
          MediaType type = detector.detect(ooxmlStream, new Metadata());
          handleEmbeddedResource(ooxmlStream, null, type.toString(), xhtml, true);
          return;
       } catch(FileNotFoundException e) {
          // It's regular OLE2
       }

       // Need to dump the directory out to a new temp file, so
       //  it's stand along
       POIFSFileSystem newFS = new POIFSFileSystem();
       copy(dir, newFS.getRoot());

       File tmpFile = File.createTempFile("tika", ".ole2");
       try {
           FileOutputStream out = new FileOutputStream(tmpFile);
           newFS.writeFilesystem(out);
           out.close();

           // What kind of document is it?
           Metadata metadata = new Metadata();
           POIFSDocumentType type = POIFSDocumentType.detectType(dir);
           metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());

           // Trigger for the document itself
           TikaInputStream embedded = TikaInputStream.get(tmpFile);
           try {
               if (extractor.shouldParseEmbedded(metadata)) {
View Full Code Here

        CharsetDetector detector = new CharsetDetector();
        String incomingCharset = metadata.get(Metadata.CONTENT_ENCODING);
        String incomingType = metadata.get(Metadata.CONTENT_TYPE);
        if (incomingCharset == null && incomingType != null) {
            // TIKA-341: Use charset in content-type
            MediaType mt = MediaType.parse(incomingType);
            if (mt != null) {
                incomingCharset = mt.getParameters().get("charset");
            }
        }

        if (incomingCharset != null) {
            detector.setDeclaredEncoding(incomingCharset);
View Full Code Here

     * Does container-detector based detection, handling
     *  fallback in case of the default.
     */
    private MediaType detect(TikaInputStream input, Metadata metadata,
               ContainerDetector detector) throws IOException {
       MediaType detected = detector.detect(input, metadata);
       MediaType defaultType = detector.getDefault();
       if(! detected.equals(defaultType)) {
          return detected;
       }
      
       // See if the fallback can do better
View Full Code Here

        for (MediaType type : registry.getTypes()) {
            System.out.println(type);
            for (MediaType alias : registry.getAliases(type)) {
                System.out.println("  alias:     " + alias);
            }
            MediaType supertype = registry.getSupertype(type);
            if (supertype != null) {
                System.out.println("  supertype: " + supertype);
            }
            Parser parser = parsers.get(type);
            if (parser != null) {
View Full Code Here

        // First up a truncated OOXML (zip) file
        InputStream input = getTestDoc("testEXCEL.xlsx");
        byte [] buffer = new byte[300];
        assertEquals(300,input.read(buffer));
        Metadata metadata = new Metadata();
        MediaType mt = detector.detect(new ByteArrayInputStream(buffer), metadata);
        // no exception should be thrown
        assertEquals(MediaType.application("x-tika-ooxml"),mt);
       
        // Now a truncated OLE2 file
        input = getTestDoc("testEXCEL.xls");
View Full Code Here

         if(type != POIFSDocumentType.UNKNOWN) {
            return type.getType();
         }
        
         // Is it one of the Corel formats which use OLE2?
         MediaType mt = detectCorel(fs.getRoot());
         if(mt != null) return mt;
        
         // We don't know, sorry
         return DEFAULT;
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.mime.MediaType

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.