/*
* This file is part of dependency-check-core.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright (c) 2012 Jeremy Long. All Rights Reserved.
*/
package org.owasp.dependencycheck.dependency;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.lang.StringUtils;
import org.owasp.dependencycheck.utils.DependencyVersion;
import org.owasp.dependencycheck.utils.DependencyVersionUtil;
import org.owasp.dependencycheck.utils.Filter;
import org.owasp.dependencycheck.utils.UrlStringUtils;
/**
* Used to maintain a collection of Evidence.
*
* @author Jeremy Long <jeremy.long@owasp.org>
*/
public class EvidenceCollection implements Serializable, Iterable<Evidence> {
/**
* The logger.
*/
private static final Logger LOGGER = Logger.getLogger(EvidenceCollection.class.getName());
/**
* Used to iterate over highest confidence evidence contained in the collection.
*/
private static final Filter<Evidence> HIGHEST_CONFIDENCE = new Filter<Evidence>() {
public boolean passes(Evidence evidence) {
return evidence.getConfidence() == Confidence.HIGHEST;
}
};
/**
* Used to iterate over high confidence evidence contained in the collection.
*/
private static final Filter<Evidence> HIGH_CONFIDENCE = new Filter<Evidence>() {
public boolean passes(Evidence evidence) {
return evidence.getConfidence() == Confidence.HIGH;
}
};
/**
* Used to iterate over medium confidence evidence contained in the collection.
*/
private static final Filter<Evidence> MEDIUM_CONFIDENCE = new Filter<Evidence>() {
public boolean passes(Evidence evidence) {
return evidence.getConfidence() == Confidence.MEDIUM;
}
};
/**
* Used to iterate over low confidence evidence contained in the collection.
*/
private static final Filter<Evidence> LOW_CONFIDENCE = new Filter<Evidence>() {
public boolean passes(Evidence evidence) {
return evidence.getConfidence() == Confidence.LOW;
}
};
/**
* Used to iterate over evidence that has was used (aka read) from the collection.
*/
private static final Filter<Evidence> EVIDENCE_USED = new Filter<Evidence>() {
public boolean passes(Evidence evidence) {
return evidence.isUsed();
}
};
/**
* Used to iterate over evidence of the specified confidence.
*
* @param confidence the confidence level for the evidence to be iterated over.
* @return Iterable<Evidence> an iterable collection of evidence
*/
public final Iterable<Evidence> iterator(Confidence confidence) {
if (confidence == Confidence.HIGHEST) {
return EvidenceCollection.HIGHEST_CONFIDENCE.filter(this.list);
} else if (confidence == Confidence.HIGH) {
return EvidenceCollection.HIGH_CONFIDENCE.filter(this.list);
} else if (confidence == Confidence.MEDIUM) {
return EvidenceCollection.MEDIUM_CONFIDENCE.filter(this.list);
} else {
return EvidenceCollection.LOW_CONFIDENCE.filter(this.list);
}
}
/**
* A collection of evidence.
*/
private final Set<Evidence> list;
/**
* A collection of strings used to adjust Lucene's term weighting.
*/
private final Set<String> weightedStrings;
/**
* Creates a new EvidenceCollection.
*/
public EvidenceCollection() {
list = new TreeSet<Evidence>();
weightedStrings = new HashSet<String>();
}
/**
* Adds evidence to the collection.
*
* @param e Evidence.
*/
public void addEvidence(Evidence e) {
list.add(e);
}
/**
* Creates an Evidence object from the parameters and adds the resulting object to the collection.
*
* @param source the source of the Evidence.
* @param name the name of the Evidence.
* @param value the value of the Evidence.
* @param confidence the confidence of the Evidence.
*/
public void addEvidence(String source, String name, String value, Confidence confidence) {
final Evidence e = new Evidence(source, name, value, confidence);
addEvidence(e);
}
/**
* Adds term to the weighting collection. The terms added here are used later to boost the score of other terms.
* This is a way of combining evidence from multiple sources to boost the confidence of the given evidence.
*
* Example: The term 'Apache' is found in the manifest of a JAR and is added to the Collection. When we parse the
* package names within the JAR file we may add these package names to the "weighted" strings collection to boost
* the score in the Lucene query. That way when we construct the Lucene query we find the term Apache in the
* collection AND in the weighted strings; as such, we will boost the confidence of the term Apache.
*
* @param str to add to the weighting collection.
*/
public void addWeighting(String str) {
weightedStrings.add(str);
}
/**
* Returns a set of Weightings - a list of terms that are believed to be of higher confidence when also found in
* another location.
*
* @return Set<String>
*/
public Set<String> getWeighting() {
return weightedStrings;
}
/**
* Returns the set of evidence.
*
* @return the set of evidence.
*/
public Set<Evidence> getEvidence() {
return list;
}
/**
* Returns the set of evidence from a given source.
*
* @param source the source of the evidence
* @return the set of evidence.
*/
public Set<Evidence> getEvidence(String source) {
if (source == null) {
return null;
}
final Set<Evidence> ret = new HashSet<Evidence>();
for (Evidence e : list) {
if (source.equals(e.getSource())) {
ret.add(e);
}
}
return ret;
}
/**
* Returns the set of evidence from a given source and name.
*
* @param source the source of the evidence
* @param name the name of the evidence to return
* @return the set of evidence.
*/
public Set<Evidence> getEvidence(String source, String name) {
if (source == null || name == null) {
return null;
}
final Set<Evidence> ret = new HashSet<Evidence>();
for (Evidence e : list) {
if (source.equals(e.getSource()) && name.equals(e.getName())) {
ret.add(e);
}
}
return ret;
}
/**
* Implements the iterator interface for the Evidence Collection.
*
* @return an Iterator<Evidence>.
*/
public Iterator<Evidence> iterator() {
return list.iterator();
}
/**
* Used to determine if a given string was used (aka read).
*
* @param text the string to search for.
* @return whether or not the string was used.
*/
public boolean containsUsedString(String text) {
if (text == null) {
return false;
}
final String textToTest = text.toLowerCase();
for (Evidence e : EvidenceCollection.EVIDENCE_USED.filter(this)) {
//TODO consider changing the regex to only compare alpha-numeric (i.e. strip everything else)
final String value = urlCorrection(e.getValue().toLowerCase()).replaceAll("[\\s_-]", "");
if (value.contains(textToTest)) {
return true;
}
}
return false;
}
/**
* Used to determine if a given version was used (aka read) from the EvidenceCollection.
*
* @param version the version to search for within the collected evidence.
* @return whether or not the string was used.
*/
public boolean containsUsedVersion(DependencyVersion version) {
if (version == null) {
return false;
}
for (Evidence e : EvidenceCollection.EVIDENCE_USED.filter(this)) {
final DependencyVersion value = DependencyVersionUtil.parseVersion(e.getValue());
if (value != null && value.matchesAtLeastThreeLevels(version)) {
return true;
}
}
return false;
}
/**
* Returns whether or not the collection contains evidence of a specified Confidence.
*
* @param confidence A Confidence value.
* @return boolean.
*/
public boolean contains(Confidence confidence) {
for (Evidence e : list) {
if (e.getConfidence().equals(confidence)) {
return true;
}
}
return false;
}
/**
* Merges multiple EvidenceCollections together, only merging evidence that was used, into a new EvidenceCollection.
*
* @param ec One or more EvidenceCollections.
* @return a new EvidenceCollection containing the used evidence.
*/
public static EvidenceCollection mergeUsed(EvidenceCollection... ec) {
final EvidenceCollection ret = new EvidenceCollection();
for (EvidenceCollection col : ec) {
for (Evidence e : col.list) {
if (e.isUsed()) {
ret.addEvidence(e);
}
}
}
return ret;
}
/**
* Merges multiple EvidenceCollections together.
*
* @param ec One or more EvidenceCollections.
* @return a new EvidenceCollection.
*/
public static EvidenceCollection merge(EvidenceCollection... ec) {
final EvidenceCollection ret = new EvidenceCollection();
for (EvidenceCollection col : ec) {
ret.list.addAll(col.list);
ret.weightedStrings.addAll(col.weightedStrings);
}
return ret;
}
/**
* Merges multiple EvidenceCollections together; flattening all of the evidence items by removing the confidence.
*
* @param ec One or more EvidenceCollections
* @return new set of evidence resulting from merging the evidence in the collections
*/
public static Set<Evidence> mergeForDisplay(EvidenceCollection... ec) {
final Set<Evidence> ret = new TreeSet<Evidence>();
for (EvidenceCollection col : ec) {
for (Evidence e : col) {
if (e.isUsed()) {
final Evidence newEvidence = new Evidence(e.getSource(), e.getName(), e.getValue(), null);
newEvidence.setUsed(true);
ret.add(newEvidence);
}
}
}
return ret;
}
/**
* Returns a string of evidence 'values'.
*
* @return a string containing the evidence.
*/
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
for (Evidence e : this.list) {
sb.append(e.getValue()).append(' ');
}
return sb.toString();
}
/**
* Returns the number of elements in the EvidenceCollection.
*
* @return the number of elements in the collection.
*/
public int size() {
return list.size();
}
/**
* <p>
* Takes a string that may contain a fully qualified domain and it will return the string having removed the query
* string, the protocol, the sub-domain of 'www', and the file extension of the path.</p>
* <p>
* This is useful for checking if the evidence contains a specific string. The presence of the protocol, file
* extension, etc. may produce false positives.
*
* <p>
* Example, given the following input:</p>
* <code>'Please visit https://www.somedomain.com/path1/path2/file.php?id=439'</code>
* <p>
* The function would return:</p>
* <code>'Please visit somedomain path1 path2 file'</code>
*
* @param value the value that may contain a url
* @return the modified string
*/
private String urlCorrection(String value) {
if (value == null || !UrlStringUtils.containsUrl(value)) {
return value;
}
final StringBuilder sb = new StringBuilder(value.length());
final String[] parts = value.split("\\s");
for (String part : parts) {
if (UrlStringUtils.isUrl(part)) {
try {
final List<String> data = UrlStringUtils.extractImportantUrlData(part);
sb.append(' ').append(StringUtils.join(data, ' '));
} catch (MalformedURLException ex) {
LOGGER.log(Level.FINE, "error parsing " + part, ex);
sb.append(' ').append(part);
}
} else {
sb.append(' ').append(part);
}
}
return sb.toString().trim();
}
}