Examples of DocumentReader


Examples of org.exoplatform.services.document.DocumentReader

   public void testPDFDocumentReaderServiceXMPMetadata() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/MyTest.pdf");
      try
      {
         DocumentReader rdr = service.getDocumentReader("application/pdf");
         Properties testprops = rdr.getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "Test de convertion de fichier tif");
         etalon.put(DCMetaData.CREATOR, "Christian Klaus");
         etalon.put(DCMetaData.DESCRIPTION, "20080901 TEST Christian Etat OK");
         //         Calendar c = ISO8601.parseEx("2008-09-01T08:01:10+00:00");
View Full Code Here

Examples of org.exoplatform.services.document.DocumentReader

   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/pfs_accapp.pdf");
      try
      {

         DocumentReader rdr = service.getDocumentReader("application/pdf");
         Properties testprops = rdr.getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "Personal Account Opening Form VN");
         etalon.put(DCMetaData.CREATOR, "mr");
         etalon.put(DCMetaData.PUBLISHER, "Adobe LiveCycle Designer ES 8.2");
         evalProps(etalon, testprops, false);
View Full Code Here

Examples of org.exoplatform.services.document.DocumentReader

   public void testPDFDocumentReaderServiceXMPUsecase1() throws Exception
   {
      InputStream is = TestPropertiesExtracting.class.getResourceAsStream("/Trait_union.06.Mai_2009.pdf");
      try
      {
         DocumentReader rdr = service.getDocumentReader("application/pdf");
         Properties testprops = rdr.getProperties(is);
         Properties etalon = new Properties();
         etalon.put(DCMetaData.TITLE, "journal interne mai 2009.qxp");
         etalon.put(DCMetaData.CREATOR, "presse");
         evalProps(etalon, testprops, false);
      }
View Full Code Here

Examples of org.exoplatform.services.document.DocumentReader

                     Constants.JCR_DATA, 0), ItemType.PROPERTY));

               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                     extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray()));

                  data = propData.getValues();

                  if (data == null)
                     log.warn("null value found at property " + prop.getQPath().getAsString());

                  // check the jcr:encoding property
                  PropertyData encProp =
                     (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0),
                        ItemType.PROPERTY);

                  if (encProp != null)
                  {
                     // encoding parameter used
                     String encoding = new String(encProp.getValues().get(0).getAsByteArray());
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader = new StringReader(dreader.getContentAsText(is, encoding));
                           doc.add(createFulltextField(reader));

                        }
                        finally
                        {
                           try
                           {
                              is.close();
                           }
                           catch (Throwable e)
                           {
                           }
                        }
                     }
                  }
                  else
                  {
                     // no encoding parameter
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader = new StringReader(dreader.getContentAsText(is));
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
                           try
View Full Code Here

Examples of org.exoplatform.services.document.DocumentReader

                     Constants.JCR_DATA, 0)));

               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                     extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray()));

                  data = propData.getValues();

                  if (data == null)
                     log.warn("null value found at property " + prop.getQPath().getAsString());

                  // check the jcr:encoding property
                  PropertyData encProp =
                     (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0));

                  if (encProp != null)
                  {
                     // encoding parameter used
                     String encoding = new String(encProp.getValues().get(0).getAsByteArray());
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader = new StringReader(dreader.getContentAsText(is, encoding));
                           doc.add(createFulltextField(reader));

                        }
                        finally
                        {
                           try
                           {
                              is.close();
                           }
                           catch (Throwable e)
                           {
                           }
                        }
                     }
                  }
                  else
                  {
                     // no encoding parameter
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader = new StringReader(dreader.getContentAsText(is));
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
                           try
View Full Code Here

Examples of org.exoplatform.services.document.DocumentReader

                     Constants.JCR_DATA, 0), ItemType.PROPERTY));

               // index if have jcr:mimeType sibling for this binary property only
               try
               {
                  DocumentReader dreader =
                     extractor.getDocumentReader(new String(pmime.getValues().get(0).getAsByteArray(),
                        Constants.DEFAULT_ENCODING));

                  data = propData.getValues();

                  if (data == null)
                  {
                     log.warn("null value found at property " + prop.getQPath().getAsString());
                  }

                  // check the jcr:encoding property
                  PropertyData encProp = node.getProperty(Constants.JCR_ENCODING.getAsString());
                  if (encProp == null)
                  {
                     encProp =
                        (PropertyData)stateProvider.getItemData(node, new QPathEntry(Constants.JCR_ENCODING, 0),
                           ItemType.PROPERTY);
                  }

                  String encoding = null;
                  if (encProp != null)
                  {
                     // encoding parameter used
                     encoding = new String(encProp.getValues().get(0).getAsByteArray(), Constants.DEFAULT_ENCODING);
                  }

                  if (dreader instanceof AdvancedDocumentReader)
                  {
                     // its a tika document reader that supports getContentAsReader
                     for (ValueData pvd : data)
                     {
                        // tikaDocumentReader will close inputStream, so no need to close it at finally
                        // statement

                        InputStream is = null;
                        is = pvd.getAsStream();
                        Reader reader;
                        if (encoding != null)
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is, encoding);
                        }
                        else
                        {
                           reader = ((AdvancedDocumentReader)dreader).getContentAsReader(is);
                        }
                        doc.add(createFulltextField(reader));
                     }
                  }
                  else
                  {
                     // old-style document reader
                     for (ValueData pvd : data)
                     {
                        InputStream is = null;
                        try
                        {
                           is = pvd.getAsStream();
                           Reader reader;
                           if (encoding != null)
                           {
                              reader = new StringReader(dreader.getContentAsText(is, encoding));
                           }
                           else
                           {
                              reader = new StringReader(dreader.getContentAsText(is));
                           }
                           doc.add(createFulltextField(reader));
                        }
                        finally
                        {
View Full Code Here

Examples of org.fife.io.DocumentReader

    }

    try {
      SAXParser sp = spf.newSAXParser();
      Handler handler = new Handler();
      DocumentReader r = new DocumentReader(doc);
      InputSource input = new InputSource(r);
      sp.parse(input, handler);
      r.close();
    } catch (SAXParseException spe) {
      // A fatal parse error - ignore; a ParserNotice was already created.
    } catch (Exception e) {
      e.printStackTrace();
      result.addNotice(new DefaultParserNotice(this,
View Full Code Here

Examples of org.fife.io.DocumentReader

    }

    try {
      SAXParser sp = spf.newSAXParser();
      Handler handler = new Handler(doc);
      DocumentReader r = new DocumentReader(doc);
      InputSource input = new InputSource(r);
      sp.parse(input, handler);
      r.close();
    } catch (SAXParseException spe) {
      // A fatal parse error - ignore; a ParserNotice was already created.
    } catch (Exception e) {
      //e.printStackTrace(); // Will print if DTD specified and can't be found
      result.addNotice(new DefaultParserNotice(this,
View Full Code Here

Examples of org.fnlp.data.reader.DocumentReader

   
    InstanceSet trainset = new InstanceSet(pp,af);
    InstanceSet testset = new InstanceSet(pp,af);
   
    //用不同的Reader读取相应格式的文件
    Reader reader = new DocumentReader(trainDataPath);
   
    //读入数据,并进行数据处理
    trainset.loadThruStagePipes(reader);
   
    reader = new DocumentReader(testDataPath);
     
    testset.loadThruStagePipes(reader);
   
   
    /**
 
View Full Code Here

Examples of org.foray.common.sax.DocumentReader

     */
    public FOrayDocument(final FOraySession session,
            final Document domDocument) throws FOrayException {
        this(session);
        this.inputSource = new DocumentInputSource(domDocument);
        this.parser = new DocumentReader();
        this.parser.setContentHandler(this.treeBuilder);
    }
View Full Code Here
TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.