Package org.apache.tika.mime

Examples of org.apache.tika.mime.MediaType


                return COMP_OBJ;
            } else if (names.contains("CONTENTS") && names.contains("\u0001CompObj")) {
               // CompObj is a general kind of OLE2 embedding, but this may be an old Works file
               // If we have the Directory, check
               if (root != null) {
                  MediaType type = processCompObjFormatType(root);
                  if (type == WPS) {
                     return WPS;
                  } else {
                     // Assume it's a general CompObj embedded resource
                     return COMP_OBJ;
View Full Code Here


                NodeList mimes = node.getElementsByTagName("mime");
                if (mimes.getLength() > 0) {
                    Set<MediaType> types = new HashSet<MediaType>();
                    for (int j = 0; j < mimes.getLength(); j++) {
                        String mime = getText(mimes.item(j));
                        MediaType type = MediaType.parse(mime);
                        if (type != null) {
                            types.add(type);
                        } else {
                            throw new TikaException(
                                    "Invalid media type name: " + mime);
View Full Code Here

        for (MediaType type : registry.getTypes()) {
            System.out.println(type);
            for (MediaType alias : registry.getAliases(type)) {
                System.out.println("  alias:     " + alias);
            }
            MediaType supertype = registry.getSupertype(type);
            if (supertype != null) {
                System.out.println("  supertype: " + supertype);
            }
            Parser p = parsers.get(type);
            if (p != null) {
View Full Code Here

            if (name == null) {
                name = "file" + count++;
            }

            MediaType contentType = detector.detect(inputStream, metadata);

            if (name.indexOf('.')==-1 && contentType!=null) {
                try {
                    name += config.getMimeRepository().forName(
                            contentType.toString()).getExtension();
                } catch (MimeTypeException e) {
                    e.printStackTrace();
                }
            }
View Full Code Here

            TikaInputStream tis = TikaInputStream.get(input, tmp);

            byte[] prefix = new byte[1024]; // enough for all known formats
            int length = tis.peek(prefix);

            MediaType type = detectArchiveFormat(prefix, length);
            if (PackageParser.isZipArchive(type)
                    && TikaInputStream.isTikaInputStream(input)) {
                return detectZipFormat(tis);
            } else if (!type.equals(MediaType.OCTET_STREAM)) {
                return type;
            } else {
                return detectCompressorFormat(prefix, length);
            }
        } finally {
View Full Code Here

    private static MediaType detectZipFormat(TikaInputStream tis) {
        try {
            ZipFile zip = new ZipFile(tis.getFile()); // TODO: hasFile()?
            try {
                MediaType type = detectOpenDocument(zip);
                if (type == null) {
                    type = detectOfficeOpenXML(zip, tis);
                }
                if (type == null) {
                    type = detectIWork(zip);
View Full Code Here

       };
      
       // Check we found the parser
       CompositeParser parser = (CompositeParser)tika.getParser();
       for (String type : mimetypes) {
          MediaType mt = MediaType.parse(type);
          assertNotNull("Parser not found for " + type, parser.getParsers().get(mt) );
       }
      
       // Have each file parsed, and check
       for (int i=0; i<testFiles.length; i++) {
View Full Code Here

    public void testPowerpointImages() throws Exception {
        ContainerExtractor extractor = new ParserContainerExtractor();
        TrackingHandler handler;

        handler = process("pictures.ppt", extractor, false);
        assertTrue(handler.mediaTypes.contains(new MediaType("image", "jpeg")));
        assertTrue(handler.mediaTypes.contains(new MediaType("image", "png")));
    }
View Full Code Here

             "/test-documents/testEXCEL.xlsb");
       Metadata m = new Metadata();
       m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
      
       // Should be detected correctly
       MediaType type = null;
       try {
          type = detector.detect(input, m);
          assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
       } finally {
          input.close();
       }
      
       // OfficeParser won't handle it
View Full Code Here

             "/test-documents/testEXCEL_95.xls");
       Metadata m = new Metadata();
       m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
      
       // Should be detected correctly
       MediaType type = null;
       try {
          type = detector.detect(input, m);
          assertEquals("application/vnd.ms-excel", type.toString());
       } finally {
          input.close();
       }
      
       // OfficeParser will claim to handle it
View Full Code Here

TOP

Related Classes of org.apache.tika.mime.MediaType

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.