Package com.commafeed.backend.favicon

Source Code of com.commafeed.backend.favicon.DefaultFaviconFetcher

package com.commafeed.backend.favicon;

import javax.inject.Inject;
import javax.inject.Singleton;

import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;

import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import com.commafeed.backend.HttpGetter;
import com.commafeed.backend.HttpGetter.HttpResult;
import com.commafeed.backend.feed.FeedUtils;
import com.commafeed.backend.model.Feed;

/**
* Inspired/Ported from https://github.com/potatolondon/getfavicon
*
*/
@Slf4j
@RequiredArgsConstructor(onConstructor = @__({ @Inject }))
@Singleton
public class DefaultFaviconFetcher extends AbstractFaviconFetcher {

  private final HttpGetter getter;

  @Override
  public byte[] fetch(Feed feed) {
    String url = feed.getLink() != null ? feed.getLink() : feed.getUrl();

    if (url == null) {
      log.debug("url is null");
      return null;
    }

    int doubleSlash = url.indexOf("//");
    if (doubleSlash == -1) {
      doubleSlash = 0;
    } else {
      doubleSlash += 2;
    }
    int firstSlash = url.indexOf('/', doubleSlash);
    if (firstSlash != -1) {
      url = url.substring(0, firstSlash);
    }

    byte[] icon = getIconAtRoot(url);

    if (icon == null) {
      icon = getIconInPage(url);
    }

    return icon;
  }

  private byte[] getIconAtRoot(String url) {
    byte[] bytes = null;
    String contentType = null;

    try {
      url = FeedUtils.removeTrailingSlash(url) + "/favicon.ico";
      log.debug("getting root icon at {}", url);
      HttpResult result = getter.getBinary(url, TIMEOUT);
      bytes = result.getContent();
      contentType = result.getContentType();
    } catch (Exception e) {
      log.debug("Failed to retrieve iconAtRoot for url {}: ", url, e);
    }

    if (!isValidIconResponse(bytes, contentType)) {
      bytes = null;
    }
    return bytes;
  }

  private byte[] getIconInPage(String url) {

    Document doc = null;
    try {
      HttpResult result = getter.getBinary(url, TIMEOUT);
      doc = Jsoup.parse(new String(result.getContent()), url);
    } catch (Exception e) {
      log.debug("Failed to retrieve page to find icon", e);
      return null;
    }

    Elements icons = doc.select("link[rel~=(?i)^(shortcut|icon|shortcut icon)$]");

    if (icons.isEmpty()) {
      log.debug("No icon found in page {}", url);
      return null;
    }

    String href = icons.get(0).attr("abs:href");
    if (StringUtils.isBlank(href)) {
      log.debug("No icon found in page");
      return null;
    }

    log.debug("Found unconfirmed iconInPage at {}", href);

    byte[] bytes = null;
    String contentType = null;
    try {
      HttpResult result = getter.getBinary(href, TIMEOUT);
      bytes = result.getContent();
      contentType = result.getContentType();
    } catch (Exception e) {
      log.debug("Failed to retrieve icon found in page {}", href, e);
      return null;
    }

    if (!isValidIconResponse(bytes, contentType)) {
      log.debug("Invalid icon found for {}", href);
      return null;
    }

    return bytes;
  }
}
TOP

Related Classes of com.commafeed.backend.favicon.DefaultFaviconFetcher

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.