基于物品的协同过滤算法:给用户推荐与他之前喜欢的物品相似的物品;
基于用户的协同过滤算法:给用户推荐与他兴趣相似的用户喜欢的物品;
实现协同过滤算法,大致几个关键步骤:
1:根据历史数据收集用户偏好
2:过滤噪音
3:计算相似度(余弦相似度计算、欧氏距离计算、Jaccard系数、皮尔森相关系数),根据特定的业务进行计算
4:找到与目标最相邻的K个
相似的用户(基于用户userCF)或物品(基于物品itemCF)
5:根据特定的业务进行过滤数据进行推荐
userCF和itemCF区别
userCF推荐是与用户兴趣相同的用户们喜欢的物品,哪怕用户根本不认识这些人
itemCF推荐是与用户之前感兴趣的物品相似的物品
userCF更横向更社会化
itemCF更纵向更个性化
userCF要维护一个用户相似度矩阵,适合用户较少的场合
itemCF要维护一个物品相似度矩阵,适合物品较少的场合
处理用户
case class Weight(hot:Double,score:Double) case class UserBean(userId:String,videoCode:String,weight:Weight) case class ItemBean(videoCode:String,weight:Weight) // 用户-物品倒排列表 val userMap = mutable.HashMap[String,mutable.HashMap[String,Weight]]() // 物品-用户倒排列表 val itemMap = mutable.HashMap[String,mutable.HashSet[String]]() def readData(userArray: Array[UserBean]): mutable.HashMap[String,mutable.HashMap[String,Weight]] ={ userArray.foreach(u=>{ val user = userMap.getOrElse(u.userId,mutable.HashMap[String,Weight]()) if(user.isEmpty) userMap += (u.userId -> user) user += (u.videoCode -> u.weight) val item = itemMap.getOrElse(u.videoCode,mutable.HashSet[String]()) if(item.isEmpty) itemMap += (u.videoCode -> item) item += u.userId }) userMap }
itemCF数据计算
def itemResult(): mutable.HashMap[String,mutable.HashMap[String,Weight]] ={ val bool = true val itemMatrix = mutable.HashMap[String,mutable.HashMap[String,Weight]]() val itemCount = mutable.HashMap[String,Double]() userMap.foreach(u=>{ u._2.foreach(i=>{ val iMap = itemMatrix.getOrElse(i._1,mutable.HashMap[String,Weight]()) if(iMap.isEmpty) itemMatrix += (i._1 -> iMap) itemCount += (i._1 -> (itemCount.getOrElse(i._1,0.0)+1)) u._2.foreach(j=>{ if(!i._1.equals(j._1)){ // 计算权重的方式有很多种 val w = iMap.getOrElse(j._1,Weight(0.0,0.0)) if(bool){ iMap += (j._1 -> Weight((1.0/math.log1p(u._2.size))+w.hot,0+w.score)) }else{ // 用户对物品带有评分、喜爱度等的,可以使用用户对对物品的权重进行计算距离 iMap += (j._1 -> Weight(math.min(i._2.hot,j._2.hot)/math.max(i._2.hot,j._2.hot)+w.hot, math.min(i._2.hot,j._2.score)/math.max(i._2.score,j._2.hot)+w.score)) // 也可以使用其他计算距离的公式计算权重,如: 余弦相似度、欧几里得距离、皮尔逊相关系数等 } } }) }) }) if(bool){ itemMatrix.foreach(i=>{ i._2.foreach(j=>{ val jMap = itemMatrix.getOrElse(i._1,mutable.HashMap[String,Weight]()) // 1/log1+|N(i)|/sprt(N(i)*N(j)) 惩罚了用户u和用户j共同兴趣列表中热门物品对他们相似度的影响 val weight = jMap.getOrElse(j._1,Weight(0,0)).hot/math.sqrt(itemCount.getOrElse(i._1,0.0)*itemCount.getOrElse(j._1,0.0)) jMap += (j._1 -> Weight(weight,0.0)) }) }) } itemMatrix }
itemCF对用户进行推荐
def getItemRecommend(itemMatrix:mutable.HashMap[String,mutable.HashMap[String,Weight]],userArray: Array[UserBean], k:Int): Unit ={ val set = mutable.HashSet[String]() set ++= userArray.map(_.videoCode) val recommends = mutable.HashMap[String,Weight]() userArray.foreach(u=>{ val videoList = itemMatrix.getOrElse(u.videoCode,mutable.HashMap[String,Weight]()) // 选择权重高的前k个相似的物品推荐 videoList.toList.sortBy(_._2.hot).reverse.take(k).map(v=>{ if(!set.contains(v._1)){ // 推荐给用户物品的权重 = 用户当前物品权重 * 当前物品的相关物品权重 // recommends += (v._1 -> Weight((recommends.getOrElse(v._1,Weight(0.0,0.0)).hot+v._2.hot)*u.weight.hot,(0.0+v._2.score)*v._2.score)) recommends += (v._1 -> Weight(recommends.getOrElse(v._1,Weight(0.0,0.0)).hot+v._2.hot,(0.0+v._2.score)*v._2.score)) } }) }) println(recommends.toList.sortBy(_._2.hot).reverse.take(10).mkString(";")) }
userCF数据计算
def userResult(): mutable.HashMap[String,mutable.HashMap[String,Double]] ={ val userCount = mutable.HashMap[String,Double]() val userMatrix = mutable.HashMap[String,mutable.HashMap[String,Double]]() val bool = true itemMap.foreach(us=>{ us._2.foreach(u=>{ val uMap = userMatrix.getOrElse(u,mutable.HashMap[String,Double]()) if(uMap.isEmpty) userMatrix += (u -> uMap) userCount += (u -> (userCount.getOrElse(u,0.0)+1)) us._2.foreach(v=>{ if(!u.equals(v)){ if(bool){ uMap += (v -> (1.0/math.log1p(us._2.size*1.0)+uMap.getOrElse(v,0.0))) } } }) }) }) if(bool){ userMatrix.foreach(u=>{ u._2.foreach(v=>{ val jMap = userMatrix.getOrElse(u._1,mutable.HashMap[String,Double]()) // 1/log1+|N(i)|/sprt(N(i)*N(j)) 惩罚了用户u和用户j共同兴趣列表中热门物品对他们相似度的影响 val weight = jMap.getOrElse(v._1,0.0)/math.sqrt(userCount.getOrElse(u._1,0.0)*userCount.getOrElse(v._1,0.0)) jMap += (v._1 -> weight) }) }) } userMatrix }
userCF对用户进行推荐
def getUserRecommend(userMatrix : mutable.HashMap[String,mutable.HashMap[String,Double]],userArray: Array[UserBean], k:Int): Unit ={ val set = mutable.HashSet[String]() set ++= userArray.map(_.videoCode) val userId = userArray(0).userId val recommends = mutable.HashMap[String,Double]() val users = userMatrix.getOrElse(userId,null) if(users == null) return // 选择用户权重高的前k个用户的物品做推荐 users.toList.sortBy(_._2).reverse.take(k).foreach(u=>{ val userVideo = userMap.getOrElse(u._1,null) if(userVideo != null){ userVideo.foreach(v=>{ if(!set.contains(v._1)){ // 用户对应的物品权重可以以多种形式计算 recommends += (v._1 -> (recommends.getOrElse(v._1,0.0)+u._2)) // recommends += (v._1 -> (recommends.getOrElse(v._1,0.0)+v._2.hot)) // recommends += (v._1 -> (u._2*v._2.hot + recommends.getOrElse(v._1,0.0))) } }) } }) println(userId +" : "+recommends.toList.sortBy(_._2).reverse.take(10).mkString(";")) }
数据测试
def main(args: Array[String]): Unit = { var userArray = Array[UserBean]() userArray = userArray :+ UserBean("1","A",Weight(2.0,0.0)) userArray = userArray :+ UserBean("2","B",Weight(3.0,0.0)) userArray = userArray :+ UserBean("1","B",Weight(4.0,0.0)) userArray = userArray :+ UserBean("1","C",Weight(1.0,0.0)) userArray = userArray :+ UserBean("3","C",Weight(1.0,0.0)) userArray = userArray :+ UserBean("4","H",Weight(1.0,0.0)) userArray = userArray :+ UserBean("4","A",Weight(1.0,0.0)) userArray = userArray :+ UserBean("4","B",Weight(1.0,0.0)) userArray = userArray :+ UserBean("5","E",Weight(1.0,0.0)) userArray = userArray :+ UserBean("5","A",Weight(1.0,0.0)) readData(userArray) println("=======itemCF") val b = itemResult() val u = userArray.groupBy(_.userId) getItemRecommend(b,u.getOrElse("1",null),20) getItemRecommend(b,u.getOrElse("2",null),20) getItemRecommend(b,u.getOrElse("3",null),20) getItemRecommend(b,u.getOrElse("4",null),20) println("=======userCF") val uMap = userResult() getUserRecommend(uMap,u.getOrElse("1",null),20) getUserRecommend(uMap,u.getOrElse("2",null),20) getUserRecommend(uMap,u.getOrElse("3",null),20) getUserRecommend(uMap,u.getOrElse("4",null),20) }
Be First to Comment