Package org.apache.tika.exception

Examples of org.apache.tika.exception.TikaException


    public static List<Parser> getParsersFromZip(URL zip, TikaConfig config)
            throws TikaException, IOException {
        String zipMimeType = config.getMimeRepository().getMimeType(zip)
        .getName();
        if (!zipMimeType.equalsIgnoreCase("application/zip")) {
            throw new TikaException("The file you are using is note a zip file");
        }
        return getParsersFromZip(zip.openStream(), config);
    }
View Full Code Here


            StringWriter writer = new StringWriter();
            parser.parse(
                    stream, new WriteOutContentHandler(writer), new Metadata());
            return writer.toString();
        } catch (SAXException e) {
            throw new TikaException("Unexpected SAX error", e);
        }
    }
View Full Code Here

        Parser parser = config.getParser(type.getName());
        if (parser == null) {
            parser = config.getParser(MimeTypes.DEFAULT);
        }
        if (parser == null) {
            throw new TikaException("No parsers available: " + type.getName());
        }

        // Parse the document
        parser.parse(stream, handler, metadata);
    }
View Full Code Here

            new PDF2XHTML(handler, metadata).getText(document);
        } catch (IOException e) {
            if (e.getCause() instanceof SAXException) {
                throw (SAXException) e.getCause();
            } else {
                throw new TikaException("Unable to extract PDF content", e);
            }
        }
    }
View Full Code Here

        din.read(header);
        din.close();

        int info = LittleEndian.getShort(header, 0xa);
        if ((info & 0x4) != 0) {
            throw new TikaException("Fast-saved files are unsupported");
        }
        if ((info & 0x100) != 0) {
            throw new TikaException("This document is password protected");
        }

        // determine the version of Word this document came from.
        int nFib = LittleEndian.getShort(header, 0x2);
        switch (nFib) {
View Full Code Here

                    Tag tag = (Tag)tags.next();
                    metadata.set(tag.getTagName(), tag.getDescription());
                }
            }
        } catch (JpegProcessingException e) {
            throw new TikaException("Can't read JPEG metadata", e);
        } catch (MetadataException e) {
            throw new TikaException("Can't read JPEG metadata", e);
        }
    }
View Full Code Here

                    metadata.set("height", Integer.toString(reader.getHeight(0)));
                    metadata.set("width", Integer.toString(reader.getWidth(0)));
                    reader.dispose();
                }
            } catch (IIOException e) {
                throw new TikaException(type + " parse error", e);
            }
        }

        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
View Full Code Here

        // Return the remaining substring
        try {
            return new String(buffer, start, end - start, "ISO-8859-1");
        } catch (UnsupportedEncodingException e) {
            throw new TikaException("ISO-8859-1 encoding is not available", e);
        }
    }
View Full Code Here

            return parser;
        } else {
            try {
                return getSAXParserFactory(context).newSAXParser();
            } catch (ParserConfigurationException e) {
                throw new TikaException("Unable to configure a SAX parser", e);
            } catch (SAXException e) {
                throw new TikaException("Unable to create a SAX parser", e);
            }
        }
    }
View Full Code Here

            }
        }

        String encoding = metadata.get(Metadata.CONTENT_ENCODING);
        if (encoding == null) {
            throw new TikaException(
                    "Text encoding could not be detected and no encoding"
                    + " hint is available in document metadata");
        }

        // TIKA-341: Only stomp on content-type after we're done trying to
        // use it to guess at the charset.
        metadata.set(Metadata.CONTENT_TYPE, "text/plain");

        try {
            Reader reader =
                new BufferedReader(new InputStreamReader(stream, encoding));

            // TIKA-240: Drop the BOM when extracting plain text
            reader.mark(1);
            int bom = reader.read();
            if (bom != '\ufeff') { // zero-width no-break space
                reader.reset();
            }

            XHTMLContentHandler xhtml =
                new XHTMLContentHandler(handler, metadata);
            xhtml.startDocument();

            xhtml.startElement("p");
            char[] buffer = new char[4096];
            int n = reader.read(buffer);
            while (n != -1) {
                xhtml.characters(buffer, 0, n);
                n = reader.read(buffer);
            }
            xhtml.endElement("p");

            xhtml.endDocument();
        } catch (UnsupportedEncodingException e) {
            throw new TikaException(
                    "Unsupported text encoding: " + encoding, e);
        }
    }
View Full Code Here

TOP

Related Classes of org.apache.tika.exception.TikaException

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.