Examples of PowerPointExtractor


Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

 
  public IndexDocument getIndexedDocument(File2Index fileData)
      throws SolrException {
    try {
      POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data));
      PowerPointExtractor extractor = new PowerPointExtractor(fs);
      String ppText = extractor.getText();

      return new IndexDocument(fileData.path, ppText, null);
    } catch (IOException e) {
      String msg = "Failed to write to the index";
      log.error(msg, e);
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

                resourceURL = new URL(url);
                is = resourceURL.openStream();
            }

            POIFSFileSystem fs = new POIFSFileSystem(is);
            PowerPointExtractor extractor = new PowerPointExtractor(fs);
            String ppText = extractor.getText();

            Document document = new Document();
            document.add(new Field("id", id, Field.Store.YES, Field.Index.TOKENIZED));
            document.add(
                    new Field("content", ppText, Field.Store.NO, Field.Index.TOKENIZED));
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

                return new Word6Extractor(poifsDir);
            }
        }

        if (poifsDir.hasEntry("PowerPoint Document")) {
            return new PowerPointExtractor(poifsDir);
        }

        if (poifsDir.hasEntry("VisioDocument")) {
            return new VisioTextExtractor(poifsDir);
        }
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

      }
      if(entry.getName().equals("WordDocument")) {
        return new WordExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("PowerPoint Document")) {
        return new PowerPointExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(poifsDir, fs);
      }
    }
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

      }
      if(entry.getName().equals("WordDocument")) {
        return new WordExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("PowerPoint Document")) {
        return new PowerPointExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(poifsDir, fs);
      }
    }
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

      }
      if(entry.getName().equals("WordDocument")) {
        return new WordExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("PowerPoint Document")) {
        return new PowerPointExtractor(poifsDir, fs);
      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(poifsDir, fs);
      }
         if(entry.getName().equals("Quill")) {
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

     */
    public Reader extractText(InputStream stream,
                              String type,
                              String encoding) throws IOException {
        try {
            PowerPointExtractor extractor = new PowerPointExtractor(stream);
            return new StringReader(extractor.getText(true, true));
        } catch (RuntimeException e) {
            logger.warn("Failed to extract PowerPoint text content", e);
            return new StringReader("");
        } finally {
            try {
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

                for (String paragraph : extractor.getParagraphText()) {
                    xhtml.element("p", paragraph);
                }
            } else if ("PowerPoint Document".equals(name)) {
                setType(metadata, "application/vnd.ms-powerpoint");
                PowerPointExtractor extractor =
                    new PowerPointExtractor(filesystem);
                xhtml.element("p", extractor.getText(true, true));
            } else if ("Workbook".equals(name)) {
                setType(metadata, "application/vnd.ms-excel");
                new ExcelExtractor().parse(filesystem, xhtml);
            } else if ("VisioDocument".equals(name)) {
                setType(metadata, "application/vnd.visio");
                VisioTextExtractor extractor =
                    new VisioTextExtractor(filesystem);
                for (String text : extractor.getAllText()) {
                    xhtml.element("p", text);
                }
            } else if (name.startsWith("__substg1.0_")) {
                setType(metadata, "application/vnd.ms-outlook");
                new OutlookExtractor(filesystem).parse(xhtml, metadata);
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

      throws DocumentHandlerException {

    String bodyText = null;

    try {
      PowerPointExtractor ppe = new PowerPointExtractor(is);
      bodyText = ppe.getText();
    } catch (Exception e) {
      throw new DocumentHandlerException(
          "Cannot extract text from a ppt document", e);
    }
View Full Code Here

Examples of org.apache.poi.hslf.extractor.PowerPointExtractor

      }
      if(entry.getName().equals("WordDocument")) {
        return new WordExtractor(fs);
      }
      if(entry.getName().equals("PowerPoint Document")) {
        return new PowerPointExtractor(fs);
      }
      if(entry.getName().equals("VisioDocument")) {
        return new VisioTextExtractor(fs);
      }
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.