基于项亮编著的 推荐系统实践一书中的用户协同过滤算法
使用矩阵表示用户-商品关系,使用伪矩阵乘计算相关性
代码:
package gt.small; import java.util.BitSet; public class UserSimilarity { /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub UserCF ucf = new UserCF(4, 5);// example from <recommendation system in // action>by xiang liang page 47 User u = ucf.getUser(0); u.set(0); u.set(1); u.set(3); u = ucf.getUser(1); u.set(0); u.set(2); u = ucf.getUser(2); u.set(1); u.set(4); u = ucf.getUser(3); u.set(2); u.set(3); u.set(4); ucf.calculateSimilarity(); int item = ucf.recommend(0, 3); System.out.println(item); } } class UserCF { private DTMatrix similarity;// similarity matrix private User[] users;// all user private int itemCount; public UserCF(int userCount, int itemCount) { similarity = new DTMatrix(userCount); users = new User[userCount]; for (int i = 0; i < userCount; ++i) { users[i] = new User(); } this.itemCount = itemCount; } public User getUser(int index) { return users[index]; } public void calculateSimilarity() {// calculate similarity by bit operation // and similar to matrix multiple for (int i = 0; i < users.length; ++i) { User a = users[i]; int na = a.getN(); for (int j = i + 1; j < users.length; ++j) { User b = users[j]; int nb = b.getN(); int ni = a.intersect(b); similarity.set(i, j, ni / Math.sqrt(na * nb)); } } } private void insert(int user, int[] indexes, int v, int count) {// insert // sort boolean in = false; for (int i = count - 1; i >= 0; --i) { if (similarity.get(user, indexes[i]) < similarity.get(user, v)) {// users[v] // is // closer // to // user indexes[i + 1] = indexes[i]; } else { indexes[i + 1] = v; in = true; break; } } if (!in) { indexes[0] = v; } } private int[] topK(int user, int k) {// top @k closest users to @user int[] indexes = new int[k]; double min = similarity.get(user, 0); int count = 1; indexes[0] = 0; for (int i = 1; i < users.length; ++i) { if (similarity.get(user, i) > min) { if (count < k) {// not full add insert(user, indexes, i, count); ++count; } else {// replace the last one insert(user, indexes, i, k - 1); min = similarity.get(user, indexes[k - 1]); } } } return indexes; } public int recommend(int user, int k) {// recommend the best item using // usercf with para @k int[] topK = topK(user, k); // select top k indexes in similarity and put them in an array BitSet all = users[user].getCopy(); for (int i = 0; i < k; ++i) { all.or(users[topK[i]].getItems()); } int item = -1; double weight = 0; for (int i = 0; i < itemCount; ++i) { if (all.get(i)) { double w = 0; for (int j = 0; j < k; ++j) { if (users[topK[j]].get(i)) { w += similarity.get(user, topK[j]); } } if (w > weight) { item = i; weight = w; } } } return item; } } class User { private BitSet items = new BitSet();// stores the user-item relationship private int n = -1;// store to speed up multi-access bitset hasn't done // this public void set(int item) { items.set(item); n = -1; } public boolean get(int item) { return items.get(item); } public BitSet getItems() { return items; } public int getN() { n = -1 == n ? items.cardinality() : n; return n; } public int intersect(User u) { if (u == this) { return 0; } BitSet tmp = (BitSet) items.clone(); tmp.and(u.items); return tmp.cardinality(); } public BitSet getCopy() { return (BitSet) this.items.clone(); } }