基于物品的协同过滤算法

@Data
public class ItemCF {
    private List<itemCFData> novelInfos;            // 小说数据
    private BiMap<Integer,String> allTags = new BiMap<>(new HashMap<>());      // 所有的小说标签
    private itemCFData testInfos;                   // 输入的小说数据
    private int[][] gongXian;
    private int[][] gongXianTmp;
    private int[] N;                                // 每个标签被喜欢的人数

    public ItemCF(itemCFData testInfos, List<itemCFData> novelInfos) {
        this.testInfos = testInfos;
        this.novelInfos = novelInfos;
        init(); // 计算共现矩阵
    }
    public void init() {
        HashSet<String> stringHashSet = new HashSet<>();
        for (itemCFData novelInfo : novelInfos) {
            String[] split = novelInfo.getTag().split("\\|");
            for (String s : split) {
                stringHashSet.add(s);
            }
        }
        int count = 0;
        for (String s : stringHashSet) {
            allTags.put(count, s);
            count++;
        }
        // 初始化共现矩阵
        gongXian = new int[allTags.size()][allTags.size()];
        gongXianTmp = new int[allTags.size()][allTags.size()];
        N = new int[allTags.size()];
    }

    // 返回相似度最高的标签
    public ArrayList<String> getMaxTags() {
        ArrayList<String> maxTags = new ArrayList<>();
        for (itemCFData novelInfo : novelInfos) {
            if (novelInfo.getOldBookid().equals(testInfos.getOldBookid())) {    // 如果是相同数据则跳过
                continue;
            }
            String[] tags = novelInfo.getTag().split("\\|");            // 当前小说的标签
            for (int i = 0; i < allTags.size(); i++) {
                for (int j = 0; j < allTags.size(); j++) {
                    gongXianTmp[i][j] = 0;
                }
            }   // 清空矩阵

            // 遍历当前小说已有的标签，组成共现矩阵的中间矩阵
            for (int i = 0; i < tags.length; i++) {
                int key1 = allTags.getKey(tags[i]);
                N[key1] = N[key1] + 1;
                for (int j = i + 1; j < tags.length; j++) {
                    int key2 = allTags.getKey(tags[j]);
                    // 实对称稀疏矩阵,两两加一
                    gongXianTmp[key1][key2] += 1;
                    gongXianTmp[key2][key1] += 1;
                }
            }

            // 对中间矩阵进行累加，得到共现矩阵
            for (int i = 0; i < allTags.size(); i++) {
                for (int j = 0; j < allTags.size(); j++) {
                    gongXian[i][j] += gongXianTmp[i][j];
                }
            }
        }

        // 测试小说的标签列表
        String[] tags = testInfos.getTag().split("\\|");
        BiMap<String,Double> ws;         // 小说标签的相似度
        for (String tag : tags) {
            int Nij = 0;        // 既喜欢i又喜欢j的人数
            double wij;         // 相似度
            ws = new BiMap<>(new HashMap<>());

            int i = allTags.getKey(tag);  // 获取当前小说标签的key值
            // 根据allTags遍历共现数组
            for (int j = 0; j < allTags.size(); j++) {
                if (i == j) {
                    continue;
                }
                Nij = gongXian[i][j];   // 同时喜欢ij的人数
                wij = (double) Nij / Math.sqrt(N[i] * N[j]);    // 根据公式计算相似度

                ws.put(allTags.get(j), wij);    // 存入相似度中
            }
            // 遍历相似度，查找相似度最高的
            double flag = 0;
            for (Map.Entry<String, Double> w : ws) {
                flag = Math.max(w.getValue(), flag);
            }
            String maxTag = ws.getKey(flag);
            maxTags.add(maxTag);
        }
        return maxTags;
    }

    // 根据最高相似度标签，返回标签中出现次数最多的小说id前五个
    public int[] run() {
        ArrayList<String> maxTags = this.getMaxTags();
        // 创建数组，计算标签次数最多的前五个小说
        int[][] count = new int[2][novelInfos.size()];
        for (int i = 0; i < novelInfos.size(); i++) {
            count[0][i] = novelInfos.get(i).getOldBookid();
            for (int j = 0; j < maxTags.size(); j++) {
                if (novelInfos.get(i).getTag().contains(maxTags.get(j))) {
                    count[1][i] += 1;
                }
            }
        }
        // 遍历，传入map
        Map<Integer,Integer> results = new HashMap<>();

        for (int i = 0; i < count[1].length; i++) {
            results.put(count[0][i],count[1][i]);
        }
        // 取出最高的五个来
        int[] maxBookids = new int[5];
        for (int i = 0; i < 5; i++) {
            int maxBookid = 0;
            int max = 0;
            for (Map.Entry<Integer, Integer> entry : results.entrySet()) {
                if (entry.getValue() > max) {
                    max = entry.getValue();
                    maxBookid = entry.getKey();
                }
            }
            results.remove(maxBookid);
            maxBookids[i] = maxBookid;
            System.out.println(results);
        }
        return maxBookids;
    }
}

调整改进版

public class ItemCF {
    private List<itemCFData> novelInfos;            // 小说数据
    private BiMap<Integer,String> allTags = new BiMap<>(new HashMap<>());      // 所有的小说标签
    private itemCFData testInfos;                   // 输入的小说数据
    private int[][] gongXian;
    private int[][] gongXianTmp;
    private int[] N;                                // 每个标签被喜欢的人数

    public ItemCF(itemCFData testInfos, List<itemCFData> novelInfos) {
        this.testInfos = testInfos;
        this.novelInfos = novelInfos;
        init(); // 计算共现矩阵
    }
    public void init() {
        HashSet<String> stringHashSet = new HashSet<>();
        for (itemCFData novelInfo : novelInfos) {
            try {
                String[] split = novelInfo.getTag().split("\\|");
                for (String s : split) {
                    stringHashSet.add(s);
                }
            } catch (Exception e) {
                System.out.println(e);
            }
        }
        int count = 0;
        for (String s : stringHashSet) {
            allTags.put(count, s);
            count++;
        }
        // 初始化共现矩阵
        gongXian = new int[allTags.size()][allTags.size()];
        gongXianTmp = new int[allTags.size()][allTags.size()];
        N = new int[allTags.size()];
    }

    // 返回相似度最高的标签
    public ArrayList<String> getMaxTags() {
        ArrayList<String> maxTags = new ArrayList<>();
        for (itemCFData novelInfo : novelInfos) {
            if (novelInfo.getOldBookid().equals(testInfos.getOldBookid())) {    // 如果是相同数据则跳过
                continue;
            }
            String[] tags = novelInfo.getTag().split("\\|");            // 当前小说的标签
            for (int i = 0; i < allTags.size(); i++) {
                for (int j = 0; j < allTags.size(); j++) {
                    gongXianTmp[i][j] = 0;
                }
            }   // 清空矩阵

            // 遍历当前小说已有的标签，组成共现矩阵的中间矩阵
            for (int i = 0; i < tags.length; i++) {
                int key1 = allTags.getKey(tags[i]);
                N[key1] = N[key1] + 1;
                for (int j = i + 1; j < tags.length; j++) {
                    int key2 = allTags.getKey(tags[j]);
                    // 实对称稀疏矩阵,两两加一
                    gongXianTmp[key1][key2] += 1;
                    gongXianTmp[key2][key1] += 1;
                }
            }

            // 对中间矩阵进行累加，得到共现矩阵
            for (int i = 0; i < allTags.size(); i++) {
                for (int j = 0; j < allTags.size(); j++) {
                    gongXian[i][j] += gongXianTmp[i][j];
                }
            }
        }

        // 测试小说的标签列表
        String[] tags = testInfos.getTag().split("\\|");
        BiMap<String,Double> ws;         // 小说标签的相似度
        for (String tag : tags) {
            int Nij = 0;        // 既喜欢i又喜欢j的人数
            double wij;         // 相似度
            ws = new BiMap<>(new HashMap<>());

            int i = allTags.getKey(tag);  // 获取当前小说标签的key值
            // 根据allTags遍历共现数组
            for (int j = 0; j < allTags.size(); j++) {
                if (i == j) {
                    continue;
                }
                Nij = gongXian[i][j];   // 同时喜欢ij的人数
                wij = (double) Nij / Math.sqrt(N[i] * N[j]);    // 根据公式计算相似度

                ws.put(allTags.get(j), wij);    // 存入相似度中
            }
            // 遍历相似度，查找相似度最高的
            double flag = 0;
            for (Map.Entry<String, Double> w : ws) {
                flag = Math.max(w.getValue(), flag);
            }
            String maxTag = ws.getKey(flag);
            maxTags.add(maxTag);
        }
        return maxTags;
    }

    // 根据最高相似度标签，返回标签中出现次数最多的小说id前五个
    public int[] run() {
        ArrayList<String> maxTags = this.getMaxTags();
        // 创建数组，计算标签次数最多的前五个小说
        int[][] count = new int[2][novelInfos.size()];
        for (int i = 0; i < novelInfos.size(); i++) {
            count[0][i] = novelInfos.get(i).getOldBookid();
            for (int j = 0; j < maxTags.size(); j++) {
                if (novelInfos.get(i).getTag().contains(maxTags.get(j))) {
                    count[1][i] += 1;
                }
            }
        }
        // 遍历，传入map
        Map<Integer,Integer> results = new HashMap<>();

        for (int i = 0; i < count[1].length; i++) {
            results.put(count[0][i],count[1][i]);
        }
        // 取出最高的六个来
        int[] maxBookids = new int[6];
        int maxBookid = 0;
        int max = 0;
        // 标签出现次数最高的novelid都放在这里，随机取6个
        List<Integer> maxnovelids = new ArrayList<>();
        for (Map.Entry<Integer, Integer> entry : results.entrySet()) {
            if (entry.getValue() >= max) {
                max = entry.getValue();
                maxBookid = entry.getKey();
                maxnovelids.add(maxBookid);
            }
        }
        Set<Integer> integers;
        try {
            integers = RandomUtil.randomEleSet(maxnovelids, 6);
        } catch (Exception e) {
            integers = new HashSet<>(maxnovelids);
        }
        System.out.println(integers);
        int maxBookids_flag = 0;
        for (Integer integer : integers) {
            if (maxBookids_flag == 6) {
                break;
            }
            maxBookids[maxBookids_flag] = integer;
            maxBookids_flag++;
        }
//        for (int i = 0; i < 6; i++) {
//
//            results.remove(maxBookid);
//            maxBookids[i] = maxBookid;
//            System.out.println(results);
//        }
        return maxBookids;
    }
}