Package org.apache.mahout.cf.taste.impl.similarity

Source Code of org.apache.mahout.cf.taste.impl.similarity.SpearmanCorrelationSimilarity

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mahout.cf.taste.impl.similarity;

import java.util.Collection;

import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

import com.google.common.base.Preconditions;

/**
* <p>
* Like {@link PearsonCorrelationSimilarity}, but compares relative ranking of preference values instead of
* preference values themselves. That is, each user's preferences are sorted and then assign a rank as their
* preference value, with 1 being assigned to the least preferred item.
* </p>
*/
public final class SpearmanCorrelationSimilarity implements UserSimilarity {
 
  private final DataModel dataModel;
 
  public SpearmanCorrelationSimilarity(DataModel dataModel) {
    Preconditions.checkArgument(dataModel != null, "dataModel is null");
    this.dataModel = dataModel;
  }
 
  @Override
  public double userSimilarity(long userID1, long userID2) throws TasteException {
    PreferenceArray xPrefs = dataModel.getPreferencesFromUser(userID1);
    PreferenceArray yPrefs = dataModel.getPreferencesFromUser(userID2);
    int xLength = xPrefs.length();
    int yLength = yPrefs.length();
   
    if (xLength <= 1 || yLength <= 1) {
      return Double.NaN;
    }
   
    // Copy prefs since we need to modify pref values to ranks
    xPrefs = xPrefs.clone();
    yPrefs = yPrefs.clone();
   
    // First sort by values from low to high
    xPrefs.sortByValue();
    yPrefs.sortByValue();
   
    // Assign ranks from low to high
    float nextRank = 1.0f;
    for (int i = 0; i < xLength; i++) {
      // ... but only for items that are common to both pref arrays
      if (yPrefs.hasPrefWithItemID(xPrefs.getItemID(i))) {
        xPrefs.setValue(i, nextRank);
        nextRank += 1.0f;
      }
      // Other values are bogus but don't matter
    }
    nextRank = 1.0f;
    for (int i = 0; i < yLength; i++) {
      if (xPrefs.hasPrefWithItemID(yPrefs.getItemID(i))) {
        yPrefs.setValue(i, nextRank);
        nextRank += 1.0f;
      }
    }
   
    xPrefs.sortByItem();
    yPrefs.sortByItem();
   
    long xIndex = xPrefs.getItemID(0);
    long yIndex = yPrefs.getItemID(0);
    int xPrefIndex = 0;
    int yPrefIndex = 0;
   
    double sumXYRankDiff2 = 0.0;
    int count = 0;
   
    while (true) {
      int compare = xIndex < yIndex ? -1 : xIndex > yIndex ? 1 : 0;
      if (compare == 0) {
        double diff = xPrefs.getValue(xPrefIndex) - yPrefs.getValue(yPrefIndex);
        sumXYRankDiff2 += diff * diff;
        count++;
      }
      if (compare <= 0) {
        if (++xPrefIndex >= xLength) {
          break;
        }
        xIndex = xPrefs.getItemID(xPrefIndex);
      }
      if (compare >= 0) {
        if (++yPrefIndex >= yLength) {
          break;
        }
        yIndex = yPrefs.getItemID(yPrefIndex);
      }
    }
   
    if (count <= 1) {
      return Double.NaN;
    }
   
    // When ranks are unique, this formula actually gives the Pearson correlation
    return 1.0 - 6.0 * sumXYRankDiff2 / (count * (count * count - 1));
  }
 
  @Override
  public void setPreferenceInferrer(PreferenceInferrer inferrer) {
    throw new UnsupportedOperationException();
  }
 
  @Override
  public void refresh(Collection<Refreshable> alreadyRefreshed) {
    alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed);
    RefreshHelper.maybeRefresh(alreadyRefreshed, dataModel);
  }
 
}
TOP

Related Classes of org.apache.mahout.cf.taste.impl.similarity.SpearmanCorrelationSimilarity

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.