Package org.apache.pdfbox.pdmodel.graphics.xobject

Examples of org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage


            PDXObject object = entry.getValue();
            if (object instanceof PDXObjectForm) {
                extractImages(((PDXObjectForm) object).getResources());
            } else if (object instanceof PDXObjectImage) {

                PDXObjectImage image = (PDXObjectImage) object;

                Metadata metadata = new Metadata();
                String extension = "";
                if (image instanceof PDJpeg) {
                    metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
                    extension = ".jpg";
                } else if (image instanceof PDCcitt) {
                    metadata.set(Metadata.CONTENT_TYPE, "image/tiff");
                    extension = ".tif";
                } else if (image instanceof PDPixelMap) {
                    metadata.set(Metadata.CONTENT_TYPE, "image/png");
                    extension = ".png";
                }

                Integer imageNumber = processedInlineImages.get(entry.getKey());
                if (imageNumber == null) {
                    imageNumber = inlineImageCounter++;
                }
                String fileName = "image"+imageNumber+extension;
                metadata.set(Metadata.RESOURCE_NAME_KEY, fileName);

                // Output the img tag
                AttributesImpl attr = new AttributesImpl();
                attr.addAttribute("", "src", "src", "CDATA", "embedded:" + fileName);
                attr.addAttribute("", "alt", "alt", "CDATA", fileName);
                handler.startElement("img", attr);
                handler.endElement("img");

                //Do we only want to process unique COSObject ids?
                //If so, have we already processed this one?
                if (config.getExtractUniqueInlineImagesOnly() == true) {
                    String cosObjectId = entry.getKey();
                    if (processedInlineImages.containsKey(cosObjectId)){
                        continue;
                    }
                    processedInlineImages.put(cosObjectId, imageNumber);
                }

                metadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
                        TikaCoreProperties.EmbeddedResourceType.INLINE.toString());

                EmbeddedDocumentExtractor extractor =
                        getEmbeddedDocumentExtractor();
                if (extractor.shouldParseEmbedded(metadata)) {
                    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
                    try {
                        image.write2OutputStream(buffer);
                        image.clear();
                        extractor.parseEmbedded(
                                new ByteArrayInputStream(buffer.toByteArray()),
                                new EmbeddedContentHandler(handler),
                                metadata, false);
                    } catch (IOException e) {
View Full Code Here

TOP

Related Classes of org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.