Package org.apache.tika.mime

Examples of org.apache.tika.mime.MediaType


        // Automatically detect the character encoding
        AutoDetectReader reader = new AutoDetectReader(
                new CloseShieldInputStream(stream), metadata, LOADER);
        try {
            Charset charset = reader.getCharset();
            MediaType type = new MediaType(MediaType.TEXT_PLAIN, charset);
            metadata.set(Metadata.CONTENT_TYPE, type.toString());
            // deprecated, see TIKA-431
            metadata.set(Metadata.CONTENT_ENCODING, charset.name());

            XHTMLContentHandler xhtml =
                    new XHTMLContentHandler(handler, metadata);
View Full Code Here


                metadata.set(Metadata.LONGITUDE, m.group(2));
            } else {
                metadata.set("ICBM", value);
            }
        } else if (name.equalsIgnoreCase(Metadata.CONTENT_TYPE)){
            MediaType type = MediaType.parse(value);
            if (type != null) {
                metadata.set(Metadata.CONTENT_TYPE, type.toString());
            } else {
                metadata.set(Metadata.CONTENT_TYPE, value);
            }
        } else {
            metadata.set(name, value);
View Full Code Here

                new CloseShieldInputStream(stream), metadata, LOADER);
        try {
            Charset charset = reader.getCharset();
            String previous = metadata.get(Metadata.CONTENT_TYPE);
            if (previous == null || previous.startsWith("text/html")) {
                MediaType type = new MediaType(MediaType.TEXT_HTML, charset);
                metadata.set(Metadata.CONTENT_TYPE, type.toString());
            }
            // deprecated, see TIKA-431
            metadata.set(Metadata.CONTENT_ENCODING, charset.name());

            // Get the HTML mapper from the parse context
View Full Code Here

            ais = factory.createArchiveInputStream(stream);
        } catch (ArchiveException e) {
            throw new TikaException("Unable to unpack document stream", e);
        }

        MediaType type = getMediaType(ais);
        if (!type.equals(MediaType.OCTET_STREAM)) {
            metadata.set(CONTENT_TYPE, type.toString());
        }

        // Use the delegate parser to parse the contained document
        EmbeddedDocumentExtractor extractor = context.get(
                EmbeddedDocumentExtractor.class,
View Full Code Here

       };
      
       // Check we found the parser
       CompositeParser parser = (CompositeParser)tika.getParser();
       for (String type : mimetypes) {
          MediaType mt = MediaType.parse(type);
          assertNotNull("Parser not found for " + type, parser.getParsers().get(mt) );
       }
      
       // Have each file parsed, and check
       for (int i=0; i<testFiles.length; i++) {
View Full Code Here

             "/test-documents/testEXCEL.xlsb");
       Metadata m = new Metadata();
       m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
      
       // Should be detected correctly
       MediaType type = null;
       try {
          type = detector.detect(input, m);
          assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
       } finally {
          input.close();
       }
      
       // OfficeParser won't handle it
View Full Code Here

               return WPS;
            } else if (names.contains("CONTENTS") && names.contains("\u0001CompObj")) {
               // CompObj is a general kind of OLE2 embedding, but this may be an old Works file
               // If we have the Directory, check
               if (root != null) {
                  MediaType type = processCompObjFormatType(root);
                  if (type == WPS) {
                     return WPS;
                  } else {
                     // Assume it's a general CompObj embedded resource
                     return COMP_OBJ;
View Full Code Here

        Parser parser = (Parser) obj;
        this.parsers.add(parser);

        List<String> mediaTypes = getConfigs().getStringList(parserConfig, SUPPORTED_MIME_TYPES, Collections.<String>emptyList());
        for (String mediaTypeStr : mediaTypes) {
          MediaType mediaType = parseMediaType(mediaTypeStr);
          addSupportedMimeType(mediaTypeStr);
          this.mediaTypeToParserMap.put(mediaType, parser);
        }
       
        if (!parserConfig.hasPath(SUPPORTED_MIME_TYPES)) {
          for (MediaType mediaType : parser.getSupportedTypes(new ParseContext())) {
            mediaType = mediaType.getBaseType();
            addSupportedMimeType(mediaType.toString());
            this.mediaTypeToParserMap.put(mediaType, parser);
          }       
          List<String> extras = getConfigs().getStringList(parserConfig, ADDITIONAL_SUPPORTED_MIME_TYPES, Collections.<String>emptyList());
          for (String mediaTypeStr : extras) {
            MediaType mediaType = parseMediaType(mediaTypeStr);
            addSupportedMimeType(mediaTypeStr);
            this.mediaTypeToParserMap.put(mediaType, parser);           
          }
        }
      }
View Full Code Here

        return null;
      }
      String mediaTypeStr = (String) record.getFirstValue(Fields.ATTACHMENT_MIME_TYPE); //ExtractingParams.STREAM_TYPE);
      assert mediaTypeStr != null;
     
      MediaType mediaType = parseMediaType(mediaTypeStr).getBaseType();
      Parser parser = mediaTypeToParserMap.get(mediaType); // fast path
      if (parser != null) {
        return parser;
      }
      // wildcard matching
View Full Code Here

     
      return true;
    }

    private MediaType parseMediaType(String mediaTypeStr) {
      MediaType mediaType = MediaType.parse(mediaTypeStr.trim().toLowerCase(Locale.ROOT));
      return mediaType.getBaseType();
    };
View Full Code Here

TOP

Related Classes of org.apache.tika.mime.MediaType

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.