Examples of DocumentReader


Examples of org.exoplatform.services.document.DocumentReader

            {
               List<ValueData> values = prop.getValues();
               ValueData mimeValue = values.get(0);
               String mime = new String(mimeValue.getAsByteArray());

               DocumentReader dreader = extractor.getDocumentReader(mime);

               InputStream is = null;
               try
               {
                  is = internalValue.getAsStream();

                  // check the jcr:encoding property
                  PropertyData encProp =
                           (PropertyData) stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0));
                  if (encProp != null)
                  {
                     ValueData encValue = encProp.getValues().get(0);
                     text = dreader.getContentAsText(is, new String(encValue.getAsByteArray()));
                  }
                  else
                  {
                     text = dreader.getContentAsText(is);
                  }
               }
               finally
               {
                  try
View Full Code Here

Examples of org.exoplatform.services.document.DocumentReader

      fis.close();
      fis = new FileInputStream(url.getFile());
      DocumentReaderService extr =
         (DocumentReaderService)session.getContainer().getComponentInstanceOfType(DocumentReaderService.class);

      DocumentReader dreader = extr.getDocumentReader("application/excel");
      assertNotNull(dreader);

      System.out.println(dreader);

      if (dreader instanceof MSExcelDocumentReader)
View Full Code Here

Examples of org.exoplatform.services.document.DocumentReader

   public void testPDFDocumentReaderServiceXMPMetadataTikasFile() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/tikaTestPDF.pdf");
      try
      {
         DocumentReader rdr = service.getDocumentReader("application/pdf");
         Properties testprops = rdr.getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "Document title");
         etalon.put(DCMetaData.CREATOR, "Document author");
         evalProps(etalon, testprops);
      }
View Full Code Here

Examples of org.exoplatform.services.document.DocumentReader

          if (pmime != null)
          {
             // index if have jcr:mimeType sibling for this binary property only
             try
             {
                DocumentReader dreader =
                   extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray()));

                // ok, have a reader
                // if the prop obtainer from cache it will contains a values,
                // otherwise read prop with values from DM
                data =
                   prop.getValues().size() > 0 ? prop.getValues() : ((PropertyData)stateProvider.getItemData(node,
                      new QPathEntry(Constants.JCR_DATA, 0))).getValues();
                if (data == null)
                   log.warn("null value found at property " + prop.getQPath().getAsString());

                // check the jcr:encoding property
                PropertyData encProp =
                   (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0));

                if (encProp != null)
                {
                   // encoding parameter used
                   String encoding = new String(encProp.getValues().get(0).getAsByteArray());
                   for (ValueData pvd : data)
                   {
                      InputStream is = null;
                      try
                      {
                        
                         is = pvd.getAsStream();
                         Reader  reader = new StringReader(dreader.getContentAsText(is, encoding));
                         doc.add(createFulltextField(reader));
                        
                      }
                      finally
                      {
                         try
                         {
                            is.close();
                         }
                         catch (Throwable e)
                         {
                         }
                      }
                   }
                }
                else
                {
                   // no encoding parameter
                   for (ValueData pvd : data)
                   {
                      InputStream is = null;
                      try
                      {
                         doc.add(createFulltextField(dreader.getContentAsText(is = pvd.getAsStream())));
                      }
                      finally
                      {
                         try
                         {
View Full Code Here

Examples of org.exoplatform.services.document.DocumentReader

                     Constants.JCR_DATA, 0), ItemType.PROPERTY));

               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                     extractor.getDocumentReader(ValueDataUtil.getString(pmime.getValues().get(0)));

                  data = propData.getValues();

                  if (data == null)
                  {
                     LOG.warn("null value found at property " + prop.getQPath().getAsString());
                     return;
                  }

                  // check the jcr:encoding property
                  PropertyData encProp = node.getProperty(Constants.JCR_ENCODING.getAsString());
                  if (encProp == null && !node.containAllProperties())
                  {
                     encProp =
                        (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0),
                           ItemType.PROPERTY);
                  }

                  String encoding = null;
                  if (encProp != null)
                  {
                     // encoding parameter used
                     ValueDataUtil.getString(encProp.getValues().get(0));
                  }

                  if (dreader instanceof AdvancedDocumentReader)
                  {
                     // its a tika document reader that supports getContentAsReader
                     for (ValueData pvd : data)
                     {
                        // tikaDocumentReader will close inputStream, so no need to close it at finally
                        // statement

                        InputStream is = null;
                        is = pvd.getAsStream();
                        Reader reader;
                        if (encoding != null)
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is, encoding);
                        }
                        else
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is);
                        }
                        doc.add(createFulltextField(reader));
                     }
                  }
                  else
                  {
                     // old-style document reader
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader;
                           if (encoding != null)
                           {
                              reader = new StringReader(dreader.getContentAsText(is, encoding));
                           }
                           else
                           {
                              reader = new StringReader(dreader.getContentAsText(is));
                           }
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
View Full Code Here

Examples of org.exoplatform.services.document.DocumentReader

   public void testPDFDocumentReaderServiceXMPMetadataTikasFile() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/tikaTestPDF.pdf");
      try
      {
         DocumentReader rdr = service.getDocumentReader("application/pdf");
         Properties testprops = rdr.getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "Document title");
         etalon.put(DCMetaData.CREATOR, "Document author");
         evalProps(etalon, testprops);
      }
View Full Code Here

Examples of org.exoplatform.services.document.DocumentReader

      InputStream is = TestHtmlDocumentReader.class.getResourceAsStream("/test.html");
      try
      {
         String mimeType = mimetypeResolver.getMimeType("test.html");

         DocumentReader dr = service.getDocumentReader(mimeType);
         String text = dr.getContentAsText(is);
         assertTrue(text.contains("This is the third maintenance release of the redesigned 2.0"));
      }
      finally
      {
         is.close();
View Full Code Here

Examples of org.exoplatform.services.document.DocumentReader

   {

      InputStream is = TestHtmlDocumentReader.class.getResourceAsStream("/ch-core.html");
      String mimeType = mimetypeResolver.getMimeType("ch-core.html");

      DocumentReader dr = service.getDocumentReader(mimeType);
      String text = dr.getContentAsText(is);

      assertTrue((normalizeWhitespaces(text))
         .contains("The eXo Core is a set of common services that are used by eXo products and modules, it also can be used in the business logic. It's Authentication and Security, Organization, Database, Logging, JNDI, LDAP, Document reader and other services."));
   }
View Full Code Here

Examples of org.exoplatform.services.document.DocumentReader

      InputStream is = TestHtmlDocumentReader.class.getResourceAsStream("/test.html");
      try
      {
         String mimeType = mimetypeResolver.getMimeType("test.html");

         DocumentReader dr = service.getDocumentReader(mimeType);
         String text = dr.getContentAsText(is);
         assertTrue(text.contains("This is the third maintenance release of the redesigned 2.0"));
      }
      finally
      {
         is.close();
View Full Code Here

Examples of org.exoplatform.services.document.DocumentReader

   public void testXHTMLGetContentAsString() throws Exception
   {
      InputStream is = TestHtmlDocumentReader.class.getResourceAsStream("/testXHTML.html");
      try
      {
         DocumentReader dr = service.getDocumentReader("application/xhtml+xml");
         String text = dr.getContentAsText(is);
         assertTrue(text
            .contains("This document tests the ability of Apache Tika to extract content from an XHTML document."));
      }
      finally
      {
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.