Package org.apache.nutch.searcher

Examples of org.apache.nutch.searcher.HitDetails


      NutchBean.LOG.info("request from " + request.getRemoteAddr());
    }

    Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")),
                      request.getParameter("id"));
    HitDetails details = bean.getDetails(hit);

    // raw bytes
    byte[] bytes = bean.getContent(details);

    // pass all original headers? only these for now.
View Full Code Here


      final Element urlElement = (Element) doc.getElementsByTagName("url").item(0);
      final Element snippetElement = (Element) doc.getElementsByTagName("snippet").item(0);
      final Element titleElement = (Element) doc.getElementsByTagName("title").item(0);

      summaries.add(toText(titleElement) + " " + toText(snippetElement));
      hitDetails.add(new HitDetails(
          new String [] {"url"},
          new String [] {toText(urlElement)}));
    }

    HitsCluster [] clusters = c.clusterHits(
View Full Code Here

    }

    int detailsLength = in.readInt();
    details = new HitDetails[detailsLength];
    for (int i = 0; i < detailsLength; i++) {
      details[i] = new HitDetails();
      details[i].readFields(in);
    }

    int summariesLength = in.readInt();
    summaries = new Summary[summariesLength];
View Full Code Here

            + urlQuery + "&hitsPerDup=" + 0 + params);
      }

      for (int i = 0; i < length; i++) {
        Hit hit = show[i];
        HitDetails detail = details[i];
        String title = detail.getValue("title");
        String url = detail.getValue("url");
        String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo();

        if (title == null || title.equals("")) {
          // use url for docs w/o title
          title = url;
        }

        Element item = addNode(doc, channel, "item");

        addNode(doc, item, "title", title);
        addNode(doc, item, "description", summaries[i].toString());
        addNode(doc, item, "link", url);

        addNode(doc, item, "nutch", "site", hit.getDedupValue());

        addNode(doc, item, "nutch", "cache", base + "/cached.do?" + id);
        addNode(doc, item, "nutch", "explain", base + "/explain.do?" + id
            + "&query=" + urlQuery);

        if (hit.moreFromDupExcluded()) {
          addNode(doc, item, "nutch", "moreFromSite", requestUrl
              + "?query="
              + URLEncoder.encode("site:" + hit.getDedupValue() + " "
                  + queryString, "UTF-8") + "&hitsPerSite=" + 0 + params);
        }

        for (int j = 0; j < detail.getLength(); j++) { // add all from detail
          String field = detail.getField(j);
          if (!SKIP_DETAILS.contains(field)) {
            addNode(doc, item, "nutch", field, detail.getValue(j));
          }
        }
      }

      // dump DOM tree
View Full Code Here

    LOG.info("Cache request from " + request.getRemoteAddr());

    Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")),
                      Integer.parseInt(request.getParameter("id")));

    HitDetails details = bean.getDetails(hit);
    String id = "idx=" + hit.getIndexNo() + "&id=" + hit.getIndexDocNo();

    Metadata metaData = bean.getParseData(details).getContentMeta();

    String content = null;
    String contentType = (String) metaData.get(Metadata.CONTENT_TYPE);


    if (contentType.startsWith("text/html")) {
      // FIXME : it's better to emit the original 'byte' sequence
      // with 'charset' set to the value of 'CharEncoding',
      // but I don't know how to emit 'byte sequence' in JSP.
      // out.getOutputStream().write(bean.getContent(details)) may work,
      // but I'm not sure.
      String encoding = (String) metaData.get("CharEncodingForConversion");
      if (encoding != null) {
        try {
          content = new String(bean.getContent(details), encoding);
        } catch (UnsupportedEncodingException e) {
          //fallback to configured charset
          content = new String(bean.getContent(details), locator
              .getConfiguration().get("parser.character.encoding.default"));
        }
      } else {
        //construct String with system default encoding
        content = new String(bean.getContent(details));
      }
    }

    // page content
    request.setAttribute("content", content);
    // page content type
    request.setAttribute("contentType", contentType);
    // page url
    request.setAttribute("url", details.getValue("url"));
    // page id
    request.setAttribute("id", id);
    // page content if html
    request.setAttribute("isHtml", new Boolean(contentType
        .startsWith("text/html")));
View Full Code Here

    LOG.info("anchors request from " + request.getRemoteAddr());
    Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")), Integer
        .parseInt(request.getParameter("id")));

    HitDetails details = bean.getDetails(hit);

    String[] anchors = bean.getAnchors(details);
    ArrayList anchorVector = new ArrayList();
    if (anchors != null) {
      for (int i = 0; i < anchors.length; i++) {
View Full Code Here

    NutchBean.LOG.info("request from " + request.getRemoteAddr());

    Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")), Integer
        .parseInt(request.getParameter("id")));
    HitDetails details = getServiceLocator().getNutchBean().getDetails(hit);

    // raw bytes
    byte[] bytes = getServiceLocator().getNutchBean().getContent(details);

    // pass all original headers? only these for now.
View Full Code Here

    ServiceLocator locator = getServiceLocator(request);
    NutchBean bean = locator.getNutchBean();

    Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")), Integer
        .parseInt(request.getParameter("id")));
    HitDetails details = bean.getDetails(hit);
    Query query = Query.parse(request.getParameter("query"), locator
        .getConfiguration());

    // put explanation and hitDetails into request so view can access them
    request.setAttribute("explanation", bean.getExplanation(query, hit));
    request.setAttribute("hitDetails", details.toHtml());
    request.setAttribute("query", query);
  }
View Full Code Here

      LOG.debug("request from " + request.getRemoteAddr());
    //}

    Hit hit = new Hit(Integer.parseInt(request.getParameter("idx")),
                      Integer.parseInt(request.getParameter("id")));
    HitDetails details = bean.getDetails(hit);

    // raw bytes
    byte[] bytes = bean.getContent(details);

    // pass all original headers? only these for now.
View Full Code Here

    * @param h
    * @return
    */
  protected HitDetails getCollectionQualifiedHitDetails(final HitDetails h)
  {
    return new HitDetails(h.getValue("segment"),
      Nutchwax.generateWaxKey(h.getValue("url"),
      h.getValue("collection")).toString());
  }
View Full Code Here

TOP

Related Classes of org.apache.nutch.searcher.HitDetails

Copyright © 2018 www.massapicom. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.