基于社交网络的用户与基于物品的协同过滤推荐算法-java

完整工程+数据源:https://github.com/scnuxiaotao/recom_sys
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;

public class itemcf {
	/*
	 * 
	 *     主函数~
	 * 
	 */
	/*public static void main(String[] args) throws IOException { 
		
		_Run();
		
	}*/
	
	/*
	 * 
	 *     基于物品的实现~
	 * 
	 */
	
	
	
	
	
	
	static int usersum = 20836;     //用户数
	static int itemsum = 200;	//物品总数
	static int N = 3;           //推荐个数
	static int[][] train; //训练集合user item rate矩阵
	static int[][] test;//测试集合user item rate矩阵
	static double[][] trainuseritem; //训练集合user item 兴趣程度 矩阵
	static int[][] recommend;  //为每个用户推荐N个物品
	static simi [][]simiItem; //排序后的相似性矩阵
	static double [][]itemsim; //未排序的相似性矩阵
	static String road = "data/6 总评论情感分析结果/酒店-评论(已转化).txt";  //数据路径,格式为用户编号::物品编号::评分
	static String road2 = "data/10 推荐/(已转化)天河酒店ID.txt";
	static String road3 = "data/10 推荐/物品推荐.txt";
	public static String road4 = "data/10 推荐/(已转化)用户ID.txt";
	public static class simi
	{
		double value; //相似值
		int num;	 //相似物品号
	};
	
	public static void _Run() throws IOException {
		
		get_user_hotel_num();
		System.out.println("usersum: "+usersum);
		System.out.println("itemsum: "+itemsum);
		train = new int[itemsum][usersum]; train[0][0] = 0; //训练集合user item rate矩阵
		test = new int[itemsum][usersum]; test[0][0] = 0;  //测试集合user item rate矩阵
		trainuseritem = 
				new double[usersum][itemsum]; trainuseritem[0][0] = 0.0; //训练集合user item 兴趣程度 矩阵
		recommend = new int[usersum][N]; recommend[0][0] = 0;  //为每个用户推荐N个物品
		simiItem = new simi[itemsum][itemsum]; //排序后的相似性矩阵
		
		itemsim = new double[itemsum][itemsum]; //未排序的相似性矩阵

		int i,j,k = 8;        //去用户的k个最近邻居(相似度最高)来计算推荐物品
		
		for(i = 0 ;i < itemsum;++i)
			for(j = 0 ;j < itemsum;++j) simiItem[i][j] = new simi();
		
		System.out.println("1.训练集");
		SplitData(8,1); 
	    //输出初始化的矩阵
		/*for (i=0;i<10;i++)
	 	{
	 		System.out.println("Item"+i+":  ");
	 		for (j=0;j<5;j++)
	 		{
	 			System.out.print(train[i][j]+"  ");
	 		}
	 		System.out.println();
	 	}*/
		
		
		System.out.println("2.计算物品之间相似性,得到相似性矩阵");
		for (i=0;i0&&ItemB[i]>0)
			{
				comUser++;//查找ItemA与ItemB的都被用户评论的用户个数
			}
			if (ItemA[i]>0){
				countIa++;//评论ItemA的用户数量
			}
			if (ItemB[i]>0){
				countIb++;//评论ItemB的用户数量
			}
		}
		double tem = Math.sqrt(countIa*countIb);
		//double tem = 1;
		//System.out.println(tem);
		if(tem == 0)
		{
			return 0;
		}
		else
		{
	    	simility = comUser/tem;
		    return simility;
		}
		
	}


	/*物品相似性矩阵排序(根据相似性由高到低排序)*/
	public static void quickSort(int x, int start, int end) {   
	    if (start < end) {   
	    	double base = simiItem[x][start].value; // 选定的基准值(第一个数值作为基准值)   
	    	double temp; // 记录临时中间值   
	    	int i_tmp;
	        int i = start, j = end;   
	        do {   
	            while ((simiItem[x][i].value > base) && (i < end))   
	                i++;   
	            while ((simiItem[x][j].value < base) && (j > start))   
	                j--;   
	            if (i <= j) {    
	                temp = simiItem[x][i].value;   
	                simiItem[x][i].value = simiItem[x][j].value;   
	                simiItem[x][j].value = temp;  
	                i_tmp = simiItem[x][i].num;   
	                simiItem[x][i].num = simiItem[x][j].num;   
	                simiItem[x][j].num = i_tmp;   
	                i++;   
	                j--;   
	            }   
	        } while (i <= j);   
	        if (start < j)   
	            quickSort(x, start, j);   
	        if (end > i)   
	            quickSort(x, i, end);   
	    }   
	}  
	public static int sort()
	{
		for (int i=0;i0)//若这个用户同样对相似物品也有过行为
			{
				trainuseritem[i][j]+=simiItem[j][x].value;
			}
		}
		return trainuseritem[i][j];
	}
	
	/*通过物品兴趣程度,推荐前N个*/ 
	public static int getRecommend() //有bug,已修改
	{
		int maxnum;//当前最感兴趣物品号
		for(int i=0;i


package WjPack;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

public class new_ojld_dis {
	/*
	 * 
	 *     主函数~
	 * 
	 */	

    /*public static void main(String[] args) throws IOException {
    	
    	run();
        
    }*/
	
	
	/*
	 * 
	 *     基于用户的实现~
	 * 
	 */
	
	
	static String road_main = "data";
	static String road = road_main + "/6 总评论情感分析结果/酒店-评论(已转化).txt";//数据路径,格式为用户编号::物品编号::评分
	static String road2 = road_main + "/10 推荐/(已转化)用户-关注.txt";  //数据路径,格式为用户编号::关注编号
	static String road3 = road_main + "/10 推荐/(已转化)天河酒店ID.txt";
	static String road4 = road_main + "/10 推荐/用户推荐.txt";  
	static String road5 = road_main + "/10 推荐/(已转化)用户ID.txt";
	
	static int usersum = 20836;     //用户数
	static int itemsum = 200;	//物品总数
	
    static Map> score = new HashMap>();
    static Set userSet = new HashSet();
    static Set filmSet = new HashSet();
    
    static FileWriter txtw;
    
    static String tjhotel = "";
    
    static ArrayList arr;
    static {

    	arr = new ArrayList();
        try {
			score = get_score_from_road();
		} catch (IOException e) {
		}
    }
	
    public static void run() throws IOException {
    	txtw=new FileWriter(road4);
    	
    	get_hotelid(road3);
    	for(int m = 0;m < 20836;++m) {
    		tjhotel = "";
    		new_ojld_dis.outNearbyUserList(arr.get(m));
    		
    		if(tjhotel.length() > 1)
    			txtw.write("user"+arr.get(m)+tjhotel+"\r\n");

    	}
    	txtw.close();
    
    }
    public static void init() {
    	try {
			get_user_hotel_num();
		} catch (IOException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
		}
    	System.out.println(usersum);
    	System.out.println(itemsum);
    	
    }
    public static void get_user_hotel_num() throws IOException { 
    	FileReader data_about = new FileReader(road3);
 		BufferedReader read_data_about=new BufferedReader(data_about);
 		int num = 0;
		while(read_data_about.readLine() != null) num++;
 		itemsum = num;
		data_about.close();
		read_data_about.close();
		
    	data_about = new FileReader(road5);
 		read_data_about=new BufferedReader(data_about);
 		num = 0;
		while(read_data_about.readLine() != null) num++;
 		usersum = num;
		data_about.close();
		read_data_about.close();

    }
	
	public static Map> get_score_from_road() throws IOException {
		init();
		String []tmps = new String[5];
		FileReader data_about=new FileReader(road);
		BufferedReader read_data_about=new BufferedReader(data_about);
		String s2; //暂存文件一行记录
		String usertmp = null;
		Map> score = new HashMap>();
        HashMap tempScore = new HashMap();
		while((s2=read_data_about.readLine())!=null){

			//寻找数据集每条记录对应的用户号和物品号
			tmps = s2.split("::");
			String username = tmps[0];
			String filmname = tmps[1];
			Integer socrename = Integer.valueOf(tmps[2]);
			
			if(usertmp == null) {usertmp = username;arr.add(usertmp);}
			else if(!usertmp.equals(username)) {
				score.put(usertmp, tempScore);
				usertmp = username;
				arr.add(usertmp);
				tempScore = new HashMap();
			}
			
	        tempScore.put(filmname, socrename);	     
		}
		score.put(usertmp, tempScore);
		arr.add(usertmp);
		return score;
	}

		
			
    public static void outNearbyUserList(String user) throws IOException {
    	FileReader data_about=new FileReader(road2);
		BufferedReader read_data_about=new BufferedReader(data_about);
    	
        Map scores = new HashMap();
        
        String []tmps = new String[5];
        HashMap thing = new HashMap();
		
		String id; //暂存文件一行记录
		int num = 0;
		while((id=read_data_about.readLine())!=null){ 
			
			tmps = id.split("::");
			String username = tmps[0];
			String fansname = tmps[1];
				
			if(username.equals(user)) {
				thing.put(fansname, 1);
			}
				
		}
		
        for (int m = 0;m < arr.size()-1;++m) {
        	String tempUser = arr.get(m);
            if (tempUser.equals(user) || !thing.containsKey(tempUser)) {
                continue;
            }
            
            double score = getOSScore(user, tempUser);
            
            if(score >= 0)
            	scores.put(tempUser, score);
        }
        data_about.close();
		read_data_about.close();
        
    }
    
    private static Double getOSScore(String user1, String user2) throws NumberFormatException, IOException {
    	HashMap user1Score = (HashMap) score.get(user1);
    	HashMap user2Score = (HashMap) score.get(user2);
        double totalscore = 100;
        ArrayList hobby = new ArrayList();
        Iterator it = user1Score.keySet().iterator();
        while (it.hasNext()) {
            String film = (String) it.next();
            int a1 = (Integer) user1Score.get(film);
            //System.out.println(film);
            if(user2Score.get(film) == null) continue; 
            int b1 = (Integer) user2Score.get(film);
            int a = a1 * a1 - b1 * b1;
            //System.out.println(Math.abs(a));
            totalscore = Math.sqrt(Math.abs(a));
        }
        if(totalscore == 0) {
        	int ok = 0;
        	it = user2Score.keySet().iterator();
        	if(it != null) {
                while (it.hasNext()) {
                    String film = (String) it.next();
                    if(user1Score.get(film) == null) {
                    	if(ok == 0) {
                    		ok = 1; 
                    	}
                    	tjhotel+="::"+hotel[Integer.parseInt(film)];
                    	
                    }
                    
                }
        		
                
        	}
        	
        	
        }
        return totalscore;
    }

    static String []hotel = new String[201];
    public static void get_hotelid(String r1) throws IOException {
		FileReader data_about=new FileReader(r1);
		BufferedReader read_data_about=new BufferedReader(data_about);
		
		String id; //暂存文件一行记录
		String []tmps = new String[5];
		
		while((id=read_data_about.readLine())!=null){ 
			tmps = id.split("::");
			String hotelname = tmps[0];
			String number = tmps[1];
			hotel[Integer.parseInt(number)] = hotelname;
		}
		data_about.close();
		read_data_about.close();
	}

}

课程设计写的代码,可以用的,不过不写注释,但也不是很难看懂,先了解以下原理再看看代码就差不多了~


PS:因为抓到的用户ID和酒店都是数据都是类似434132这么长的编号,为了方便我用数组存,事先我全部转化为1开始的编号了。比如说有两个4654654,32131321,那我就转成1,2了,处理完推荐完再把1,2转成4654654,32131321

你可能感兴趣的:(算法与数据结构)