RankSGD, PRankD added

guoguibing · Jun 16, 2014 · 7ca8296 · 7ca8296
1 parent 438f880
commit 7ca8296
Show file tree

Hide file tree

Showing 4 changed files with 330 additions and 2 deletions.
diff --git a/librec/src/main/java/librec/ext/PRankD.java b/librec/src/main/java/librec/ext/PRankD.java
@@ -0,0 +1,190 @@
+// Copyright (C) 2014 Guibing Guo
+//
+// This file is part of LibRec.
+//
+// LibRec is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// LibRec is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with LibRec. If not, see <http://www.gnu.org/licenses/>.
+//
+
+package librec.ext;
+
+import happy.coding.io.KeyValPair;
+import happy.coding.io.Lists;
+import happy.coding.math.Randoms;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import librec.data.DenseVector;
+import librec.data.SparseMatrix;
+import librec.data.SparseVector;
+import librec.data.SymmMatrix;
+import librec.data.VectorEntry;
+import librec.ranking.RankSGD;
+
+/**
+ * Neil Hurley, <strong>Personalised ranking with diversity</strong>, RecSys
+ * 2013.
+ * 
+ * <p>
+ * Related Work:
+ * <ul>
+ * <li>Jahrer and Toscher, Collaborative Filtering Ensemble for Ranking, JMLR,
+ * 2012 (KDD Cup 2011 Track 2).</li>
+ * </ul>
+ * </p>
+ * 
+ * @author guoguibing
+ * 
+ */
+public class PRankD extends RankSGD {
+
+	// item importance
+	private DenseVector s;
+
+	// item correlations
+	private SymmMatrix itemCorrs;
+
+	// similarity filter
+	private double alpha;
+
+	public PRankD(SparseMatrix trainMatrix, SparseMatrix testMatrix, int fold) {
+		super(trainMatrix, testMatrix, fold);
+
+		algoName = "PRankD";
+		isRankingPred = true;
+	}
+
+	@Override
+	protected void initModel() {
+		super.initModel();
+
+		// compute item sampling probability
+		Map<Integer, Double> itemProbsMap = new HashMap<>();
+		double maxUsers = 0;
+
+		s = new DenseVector(numItems);
+		for (int j = 0; j < numItems; j++) {
+			int users = trainMatrix.columnSize(j);
+
+			if (maxUsers < users)
+				maxUsers = users;
+
+			s.set(j, users);
+
+			// sample items based on popularity
+			double prob = (users + 0.0) / numRates;
+			if (prob > 0)
+				itemProbsMap.put(j, prob);
+		}
+		itemProbs = Lists.sortMap(itemProbsMap);
+
+		// compute item relative importance
+		for (int j = 0; j < numItems; j++) {
+			s.set(j, s.get(j) / maxUsers);
+		}
+
+		alpha = cf.getDouble("PRankD.alpha");
+
+		// compute item correlations by cosine similarity
+		itemCorrs = buildCorrs(false);
+	}
+
+	/**
+	 * override this approach to transform item similarity
+	 */
+	protected double correlation(SparseVector iv, SparseVector jv) {
+		double sim = correlation(iv, jv, "cos-binary");
+
+		if (Double.isNaN(sim))
+			sim = 0.0;
+
+		// to obtain a greater spread of diversity values
+		return Math.tanh(alpha * sim);
+	}
+
+	@Override
+	protected void buildModel() {
+		for (int iter = 1; iter <= maxIters; iter++) {
+
+			errs = 0;
+			loss = 0;
+
+			// for each rated user-item (u,i) pair
+			for (int u : trainMatrix.rows()) {
+
+				SparseVector Ru = trainMatrix.row(u);
+				for (VectorEntry ve : Ru) {
+					// each rated item i
+					int i = ve.index();
+					double rui = ve.get();
+
+					int j = -1;
+					while (true) {
+						// draw an item j with probability proportional to popularity
+						double sum = 0, rand = Randoms.random();
+						for (KeyValPair<Integer> en : itemProbs) {
+							int k = en.getKey();
+							double prob = en.getValue();
+
+							sum += prob;
+							if (sum >= rand) {
+								j = k;
+								break;
+							}
+						}
+
+						// ensure that it is unrated by user u
+						if (!Ru.contains(j))
+							break;
+					}
+					double ruj = 0;
+
+					// compute predictions
+					double pui = predict(u, i), puj = predict(u, j);
+
+					double dij = Math.sqrt(1 - itemCorrs.get(i, j));
+					double sj = s.get(j);
+
+					double e = sj * (pui - puj - dij * (rui - ruj));
+
+					errs += e * e;
+					loss += e * e;
+
+					// update vectors
+					double ye = lRate * e;
+					for (int f = 0; f < numFactors; f++) {
+						double puf = P.get(u, f);
+						double qif = Q.get(i, f);
+						double qjf = Q.get(j, f);
+
+						P.add(u, f, -ye * (qif - qjf));
+						Q.add(i, f, -ye * puf);
+						Q.add(j, f, ye * puf);
+					}
+				}
+			}
+
+			errs *= 0.5;
+			loss *= 0.5;
+
+			if (isConverged(iter))
+				break;
+		}
+	}
+
+	@Override
+	public String toString() {
+		return super.toString() + "," + (float) alpha;
+	}
+}
diff --git a/librec/src/main/java/librec/main/LibRec.java b/librec/src/main/java/librec/main/LibRec.java
@@ -41,12 +41,14 @@
 import librec.ext.Hybrid;
 import librec.ext.NMF;
 import librec.ext.PD;
+import librec.ext.PRankD;
 import librec.ext.SlopeOne;
 import librec.intf.Recommender;
 import librec.intf.Recommender.Measure;
 import librec.ranking.BPRMF;
 import librec.ranking.CLiMF;
 import librec.ranking.RankALS;
+import librec.ranking.RankSGD;
 import librec.ranking.WRMF;
 import librec.rating.BPMF;
 import librec.rating.BiasedMF;
@@ -277,6 +279,8 @@ private static Recommender getRecommender(SparseMatrix[] data, int fold) throws
 			return new CLiMF(trainMatrix, testMatrix, fold);
 		case "rankals":
 			return new RankALS(trainMatrix, testMatrix, fold);
+		case "ranksgd":
+			return new RankSGD(trainMatrix, testMatrix, fold);
 		case "wrmf":
 			return new WRMF(trainMatrix, testMatrix, fold);
 		case "bprmf":
@@ -293,6 +297,8 @@ private static Recommender getRecommender(SparseMatrix[] data, int fold) throws
 			return new PD(trainMatrix, testMatrix, fold);
 		case "ar":
 			return new AR(trainMatrix, testMatrix, fold);
+		case "prankd":
+			return new PRankD(trainMatrix, testMatrix, fold);
 
 		default:
 			throw new Exception("No recommender is specified!");
@@ -309,10 +315,10 @@ private static void debugInfo() {
 		float ratio = (float) cf.getDouble("val.ratio");
 		int givenN = cf.getInt("num.given.n");
 		float givenRatio = cf.getFloat("val.given.ratio");
-		
+
 		String cvInfo = cf.isOn("is.cross.validation") ? cv : (ratio > 0 ? "ratio: " + ratio : "given: "
 				+ (givenN > 0 ? givenN : givenRatio));
-		
+
 		String testPath = cf.getPath("dataset.testing");
 		boolean isTestingFlie = !testPath.equals("-1");
 		String mode = isTestingFlie ? String.format("Testing:: %s.", Strings.last(testPath, 38)) : cvInfo;

diff --git a/librec/src/main/java/librec/ranking/RankSGD.java b/librec/src/main/java/librec/ranking/RankSGD.java
@@ -0,0 +1,131 @@
+// Copyright (C) 2014 Guibing Guo
+//
+// This file is part of LibRec.
+//
+// LibRec is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// LibRec is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with LibRec. If not, see <http://www.gnu.org/licenses/>.
+//
+
+package librec.ranking;
+
+import happy.coding.io.KeyValPair;
+import happy.coding.io.Strings;
+import happy.coding.math.Randoms;
+
+import java.util.List;
+
+import librec.data.SparseMatrix;
+import librec.data.SparseVector;
+import librec.data.VectorEntry;
+import librec.intf.IterativeRecommender;
+
+/**
+ * Jahrer and Toscher, Collaborative Filtering Ensemble for Ranking, JMLR, 2012
+ * (KDD Cup 2011 Track 2).
+ * 
+ * @author guoguibing
+ * 
+ */
+public class RankSGD extends IterativeRecommender {
+
+	// item sampling probabilities sorted ascendingly 
+	protected List<KeyValPair<Integer>> itemProbs;
+
+	public RankSGD(SparseMatrix trainMatrix, SparseMatrix testMatrix, int fold) {
+		super(trainMatrix, testMatrix, fold);
+
+		algoName = "RankSGD";
+		isRankingPred = true;
+	}
+
+	@Override
+	protected void initModel() {
+		super.initModel();
+
+		// pre-processing: binarize training data
+		// super.binary(trainMatrix);
+		// super.binary(testMatrix); 
+	}
+
+	@Override
+	protected void buildModel() {
+		for (int iter = 1; iter <= maxIters; iter++) {
+
+			errs = 0;
+			loss = 0;
+
+			// for each rated user-item (u,i) pair
+			for (int u : trainMatrix.rows()) {
+
+				SparseVector Ru = trainMatrix.row(u);
+				for (VectorEntry ve : Ru) {
+					// each rated item i
+					int i = ve.index();
+					double rui = ve.get();
+
+					int j = -1;
+					while (true) {
+						// draw an item j with probability proportional to popularity
+						double sum = 0, rand = Randoms.random();
+						for (KeyValPair<Integer> en : itemProbs) {
+							int k = en.getKey();
+							double prob = en.getValue();
+
+							sum += prob;
+							if (sum >= rand) {
+								j = k;
+								break;
+							}
+						}
+
+						// ensure that it is unrated by user u
+						if (!Ru.contains(j))
+							break;
+					}
+					double ruj = 0;
+
+					// compute predictions
+					double pui = predict(u, i), puj = predict(u, j);
+
+					double e = (pui - puj) - (rui - ruj);
+
+					errs += e * e;
+					loss += e * e;
+
+					// update vectors
+					double ye = lRate * e;
+					for (int f = 0; f < numFactors; f++) {
+						double puf = P.get(u, f);
+						double qif = Q.get(i, f);
+						double qjf = Q.get(j, f);
+
+						P.add(u, f, -ye * (qif - qjf));
+						Q.add(i, f, -ye * puf);
+						Q.add(j, f, ye * puf);
+					}
+				}
+			}
+
+			errs *= 0.5;
+			loss *= 0.5;
+
+			if (isConverged(iter))
+				break;
+		}
+	}
+
+	@Override
+	public String toString() {
+		return Strings.toString(new Object[] { binThold, (float) lRate, maxIters }, ",");
+	}
+}
diff --git a/librec/src/main/resources/librec.conf b/librec/src/main/resources/librec.conf
@@ -73,6 +73,7 @@ BPRMF.reg.j=0.00025
 Hybrid.lambda=0.5
 
 PD.sigma=2.5
+PRankD.alpha=20
 
 RankALS.is.sw=on
 RSTE.alpha=0.4