Examples of PowerPointExtractor


Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

                for (String paragraph : extractor.getParagraphText()) {
                    xhtml.element("p", paragraph);
                }
            } else if ("PowerPoint Document".equals(name)) {
                setType(metadata, "application/vnd.ms-powerpoint");
                PowerPointExtractor extractor =
                    new PowerPointExtractor(filesystem);
                xhtml.element("p", extractor.getText(true, true));
            } else if ("Workbook".equals(name)) {
                setType(metadata, "application/vnd.ms-excel");
                new ExcelExtractor().parse(filesystem, xhtml);
            } else if ("VisioDocument".equals(name)) {
                setType(metadata, "application/vnd.visio");
                VisioTextExtractor extractor =
                    new VisioTextExtractor(filesystem);
                for (String text : extractor.getAllText()) {
                    xhtml.element("p", text);
                }
            } else if (!outlookExtracted && name.startsWith("__substg1.0_")) {
                // TODO: Cleaner mechanism for detecting Outlook
                outlookExtracted = true;
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

                return new Word6Extractor(poifsDir);
            }
        }

        if (poifsDir.hasEntry("PowerPoint Document")) {
            return new PowerPointExtractor(poifsDir);
        }

        if (poifsDir.hasEntry("VisioDocument")) {
            return new VisioTextExtractor(poifsDir);
        }
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

          } catch(OldWordFileFormatException e) {
              return new Word6Extractor(poifsDir, fs);
          }
      }
      if(entry.getName().equals("PowerPoint Document")) {
        return new PowerPointExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(poifsDir, fs);
      }
         if(entry.getName().equals("Quill")) {
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

          } catch(OldWordFileFormatException e) {
              return new Word6Extractor(poifsDir, fs);
          }
      }
      if(entry.getName().equals("PowerPoint Document")) {
        return new PowerPointExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(poifsDir, fs);
      }
         if(entry.getName().equals("Quill")) {
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

         if (is.available() == 0)
         {
            return "";
         }
        
         PowerPointExtractor ppe;
         try
         {
            ppe = new PowerPointExtractor(is);
         }
         catch (IOException e)
         {
            throw new DocumentReadException("Can't open presentation.", e);
         }
         return ppe.getText(true, true);
      }
      finally
      {
         if (is != null)
         {
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

      }
      if(entry.getName().equals("WordDocument")) {
        return new WordExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("PowerPoint Document")) {
        return new PowerPointExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(poifsDir, fs);
      }
    }
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

        try {
            /*
             * create new PowerPointExtractor and extract text and notes
             * of the document
             */
            final PowerPointExtractor pptExtractor = new PowerPointExtractor(new BufferedInputStream(source));
            final String contents = pptExtractor.getText(true, true).trim();
            String title = contents.replaceAll("\r"," ").replaceAll("\n"," ").replaceAll("\t"," ").trim();
            if (title.length() > 80) title = title.substring(0, 80);
            int l = title.length();
            while (true) {
                title = title.replaceAll("  ", " ");
                if (title.length() == l) break;
                l = title.length();
            }
           
            /*
             * create the plasmaParserDocument for the database
             * and set shortText and bodyText properly
             */
            final Document[] docs = new Document[]{new Document(
                    location,
                    mimeType,
                    "UTF-8",
                    this,
                    null,
                    null,
                    title,
                    "", // TODO: AUTHOR
                    pptExtractor.getDocSummaryInformation().getCompany(),
                    null,
                    null,
                    0.0f, 0.0f,
                    UTF8.getBytes(contents),
                    null,
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

         if (is.available() == 0)
         {
            return "";
         }

         PowerPointExtractor ppe;
         try
         {
            ppe = new PowerPointExtractor(is);
         }
         catch (IOException e)
         {
            throw new DocumentReadException("Can't open presentation.", e);
         }
         return ppe.getText(true, true);
      }
      finally
      {
         if (is != null)
         {
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

      }
      if(entry.getName().equals("WordDocument")) {
        return new WordExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("PowerPoint Document")) {
        return new PowerPointExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(poifsDir, fs);
      }
    }
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

    }

    protected void parse(
            POIFSFileSystem filesystem, XHTMLContentHandler xhtml)
            throws IOException, SAXException, TikaException {
        PowerPointExtractor powerPointExtractor =
            new PowerPointExtractor(filesystem);
        xhtml.element("p", powerPointExtractor.getText(true, true));

        List<OLEShape> shapeList = powerPointExtractor.getOLEShapes();
        for (OLEShape shape : shapeList) {
            TikaInputStream stream =
                TikaInputStream.get(shape.getObjectData().getData());
            try {
                String mediaType = null;
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.