Package org.apache.pdfbox.util

Examples of org.apache.pdfbox.util.PDFTextStripper.writeText()


        try {
            // create a writer for output
            PDFTextStripper stripper = null;
            writer = new CharBuffer();
            stripper = new PDFTextStripper();
            stripper.writeText(pdfDoc, writer); // may throw a NPE
            pdfDoc.close();
            writer.close();
        } catch (final IOException e) {
            // close the writer
            if (writer != null) try { writer.close(); } catch (final Exception ex) {}
View Full Code Here


                  }

                  PDFTextStripper stripper = new PDFTextStripper();
                  stripper.setStartPage(1);
                  stripper.setEndPage(Integer.MAX_VALUE);
                  stripper.writeText(pdDocument, sw);
               }
               finally
               {
                  if (pdDocument != null)
                     try
View Full Code Here

                {
                    System.err.println("Writing to "+outputFile);
                }
               
                // Extract text for main document:
                stripper.writeText( document, output );
               
                // ... also for any embedded PDFs:
                PDDocumentCatalog catalog = document.getDocumentCatalog();
                PDDocumentNameDictionary names = catalog.getNames();   
                if (names != null)
View Full Code Here

                                    {
                                        fis.close();
                                    }
                                    try
                                    {
                                        stripper.writeText( subDoc, output );
                                    }
                                    finally
                                    {
                                        subDoc.close();
                                    }
View Full Code Here

                {
                    System.err.println("Writing to "+outputFile);
                }
               
                // Extract text for main document:
                stripper.writeText( document, output );
               
                // ... also for any embedded PDFs:
                PDDocumentCatalog catalog = document.getDocumentCatalog();
                PDDocumentNameDictionary names = catalog.getNames();   
                if (names != null)
View Full Code Here

                                    {
                                        fis.close();
                                    }
                                    try
                                    {
                                        stripper.writeText( subDoc, output );
                                    }
                                    finally
                                    {
                                        subDoc.close();
                                    }
View Full Code Here

                {
                    System.err.println("Writing to "+outputFile);
                }
               
                // Extract text for main document:
                stripper.writeText( document, output );
               
                // ... also for any embedded PDFs:
                PDDocumentCatalog catalog = document.getDocumentCatalog();
                PDDocumentNameDictionary names = catalog.getNames();   
                if (names != null)
View Full Code Here

                                    {
                                        fis.close();
                                    }
                                    try
                                    {
                                        stripper.writeText( subDoc, output );
                                    }
                                    finally
                                    {
                                        subDoc.close();
                                    }
View Full Code Here

                stripper.setShouldSeparateByBeads( separateBeads );
                stripper.setStartPage( startPage );
                stripper.setEndPage( endPage );

                startTime = startProcessing("Starting text extraction");
                stripper.writeText( document, output );
                stopProcessing("Time for extraction: ", startTime);
            }
            finally
            {
                if( output != null )
View Full Code Here

    }

    public static String extractTextFrom(PDDocument document) throws IOException {
        Writer output = new StringWriter();
        PDFTextStripper stripper = new PDFTextStripper();
        stripper.writeText(document, output);
        return output.toString().trim();
    }

    public static String getDocumentMetadataValue(PDDocument document, COSName name) {
        PDDocumentInformation info = document.getDocumentInformation();
View Full Code Here

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.