Press "Enter" to skip to content

协同过滤算法代码(itemCF\userCF)

基于物品的协同过滤算法:给用户推荐与他之前喜欢的物品相似的物品;

 

基于用户的协同过滤算法:给用户推荐与他兴趣相似的用户喜欢的物品;

 

实现协同过滤算法,大致几个关键步骤:

 

1:根据历史数据收集用户偏好

 

2:过滤噪音

 

3:计算相似度(​​余弦相似度计算​​、​​欧氏距离计算​​、Jaccard系数、皮尔森相关系数),根据特定的业务进行计算

 

4:找到与目标最相邻的K个
相似的用户(基于用户userCF)或物品(基于物品itemCF)

 

5:根据特定的业务进行过滤数据进行推荐

 

userCF和itemCF区别

 

userCF推荐是与用户兴趣相同的用户们喜欢的物品,哪怕用户根本不认识这些人

 

itemCF推荐是与用户之前感兴趣的物品相似的物品

 

userCF更横向更社会化

 

itemCF更纵向更个性化

 

userCF要维护一个用户相似度矩阵,适合用户较少的场合

 

itemCF要维护一个物品相似度矩阵,适合物品较少的场合

 

处理用户

 

case class Weight(hot:Double,score:Double)
  case class UserBean(userId:String,videoCode:String,weight:Weight)
  case class ItemBean(videoCode:String,weight:Weight)
  //    用户-物品倒排列表
  val userMap = mutable.HashMap[String,mutable.HashMap[String,Weight]]()
  //    物品-用户倒排列表
  val itemMap = mutable.HashMap[String,mutable.HashSet[String]]()
  def readData(userArray: Array[UserBean]): mutable.HashMap[String,mutable.HashMap[String,Weight]] ={
    userArray.foreach(u=>{
      val user = userMap.getOrElse(u.userId,mutable.HashMap[String,Weight]())
      if(user.isEmpty) userMap += (u.userId -> user)
      user += (u.videoCode -> u.weight)
      val item = itemMap.getOrElse(u.videoCode,mutable.HashSet[String]())
      if(item.isEmpty) itemMap += (u.videoCode -> item)
      item += u.userId
    })
    userMap
  }

 

itemCF数据计算

 

def itemResult(): mutable.HashMap[String,mutable.HashMap[String,Weight]] ={
    val bool = true
    val itemMatrix = mutable.HashMap[String,mutable.HashMap[String,Weight]]()
    val itemCount = mutable.HashMap[String,Double]()
    userMap.foreach(u=>{
      u._2.foreach(i=>{
        val iMap = itemMatrix.getOrElse(i._1,mutable.HashMap[String,Weight]())
        if(iMap.isEmpty) itemMatrix += (i._1 -> iMap)
        itemCount += (i._1 -> (itemCount.getOrElse(i._1,0.0)+1))
        u._2.foreach(j=>{
          if(!i._1.equals(j._1)){
//            计算权重的方式有很多种
            val w = iMap.getOrElse(j._1,Weight(0.0,0.0))
            if(bool){
              iMap += (j._1 -> Weight((1.0/math.log1p(u._2.size))+w.hot,0+w.score))
            }else{
//            用户对物品带有评分、喜爱度等的,可以使用用户对对物品的权重进行计算距离
              iMap += (j._1 -> Weight(math.min(i._2.hot,j._2.hot)/math.max(i._2.hot,j._2.hot)+w.hot,
                math.min(i._2.hot,j._2.score)/math.max(i._2.score,j._2.hot)+w.score))
//            也可以使用其他计算距离的公式计算权重,如: 余弦相似度、欧几里得距离、皮尔逊相关系数等
            }
          }
        })
      })
    })
    if(bool){
      itemMatrix.foreach(i=>{
        i._2.foreach(j=>{
          val jMap = itemMatrix.getOrElse(i._1,mutable.HashMap[String,Weight]())
//        1/log1+|N(i)|/sprt(N(i)*N(j)) 惩罚了用户u和用户j共同兴趣列表中热门物品对他们相似度的影响
          val weight = jMap.getOrElse(j._1,Weight(0,0)).hot/math.sqrt(itemCount.getOrElse(i._1,0.0)*itemCount.getOrElse(j._1,0.0))
          jMap += (j._1 -> Weight(weight,0.0))
        })
      })
    }
    itemMatrix
  }

 

itemCF对用户进行推荐

 

def getItemRecommend(itemMatrix:mutable.HashMap[String,mutable.HashMap[String,Weight]],userArray: Array[UserBean],
                       k:Int): Unit ={
    val set = mutable.HashSet[String]()
    set ++= userArray.map(_.videoCode)
    val recommends = mutable.HashMap[String,Weight]()
    userArray.foreach(u=>{
      val videoList = itemMatrix.getOrElse(u.videoCode,mutable.HashMap[String,Weight]())
//      选择权重高的前k个相似的物品推荐
      videoList.toList.sortBy(_._2.hot).reverse.take(k).map(v=>{
        if(!set.contains(v._1)){
//          推荐给用户物品的权重 =  用户当前物品权重 * 当前物品的相关物品权重
//          recommends += (v._1 -> Weight((recommends.getOrElse(v._1,Weight(0.0,0.0)).hot+v._2.hot)*u.weight.hot,(0.0+v._2.score)*v._2.score))
          recommends += (v._1 -> Weight(recommends.getOrElse(v._1,Weight(0.0,0.0)).hot+v._2.hot,(0.0+v._2.score)*v._2.score))
        }
      })
    })
    println(recommends.toList.sortBy(_._2.hot).reverse.take(10).mkString(";"))
  }

 

userCF数据计算

 

def userResult(): mutable.HashMap[String,mutable.HashMap[String,Double]] ={
    val userCount = mutable.HashMap[String,Double]()
    val userMatrix = mutable.HashMap[String,mutable.HashMap[String,Double]]()
    val bool = true
    itemMap.foreach(us=>{
      us._2.foreach(u=>{
        val uMap = userMatrix.getOrElse(u,mutable.HashMap[String,Double]())
        if(uMap.isEmpty) userMatrix += (u -> uMap)
        userCount += (u -> (userCount.getOrElse(u,0.0)+1))
        us._2.foreach(v=>{
          if(!u.equals(v)){
            if(bool){
              uMap += (v -> (1.0/math.log1p(us._2.size*1.0)+uMap.getOrElse(v,0.0)))
            }
          }
        })
      })
    })
    if(bool){
      userMatrix.foreach(u=>{
        u._2.foreach(v=>{
          val jMap = userMatrix.getOrElse(u._1,mutable.HashMap[String,Double]())
          //        1/log1+|N(i)|/sprt(N(i)*N(j)) 惩罚了用户u和用户j共同兴趣列表中热门物品对他们相似度的影响
          val weight = jMap.getOrElse(v._1,0.0)/math.sqrt(userCount.getOrElse(u._1,0.0)*userCount.getOrElse(v._1,0.0))
          jMap += (v._1 -> weight)
        })
      })
    }
    userMatrix
  }

 

userCF对用户进行推荐

 

def getUserRecommend(userMatrix : mutable.HashMap[String,mutable.HashMap[String,Double]],userArray: Array[UserBean],
                       k:Int): Unit ={
    val set = mutable.HashSet[String]()
    set ++= userArray.map(_.videoCode)
    val userId = userArray(0).userId
    val recommends = mutable.HashMap[String,Double]()
    val users = userMatrix.getOrElse(userId,null)
    if(users == null) return
//    选择用户权重高的前k个用户的物品做推荐
    users.toList.sortBy(_._2).reverse.take(k).foreach(u=>{
      val userVideo = userMap.getOrElse(u._1,null)
      if(userVideo != null){
        userVideo.foreach(v=>{
          if(!set.contains(v._1)){
//          用户对应的物品权重可以以多种形式计算
            recommends += (v._1 -> (recommends.getOrElse(v._1,0.0)+u._2))
//          recommends += (v._1 -> (recommends.getOrElse(v._1,0.0)+v._2.hot))
//          recommends += (v._1 -> (u._2*v._2.hot + recommends.getOrElse(v._1,0.0)))
          }
        })
      }
    })
    println(userId +" : "+recommends.toList.sortBy(_._2).reverse.take(10).mkString(";"))
  }

 

数据测试

 

def main(args: Array[String]): Unit = {
    var userArray = Array[UserBean]()
    userArray = userArray :+ UserBean("1","A",Weight(2.0,0.0))
    userArray = userArray :+ UserBean("2","B",Weight(3.0,0.0))
    userArray = userArray :+ UserBean("1","B",Weight(4.0,0.0))
    userArray = userArray :+ UserBean("1","C",Weight(1.0,0.0))
    userArray = userArray :+ UserBean("3","C",Weight(1.0,0.0))
    userArray = userArray :+ UserBean("4","H",Weight(1.0,0.0))
    userArray = userArray :+ UserBean("4","A",Weight(1.0,0.0))
    userArray = userArray :+ UserBean("4","B",Weight(1.0,0.0))
    userArray = userArray :+ UserBean("5","E",Weight(1.0,0.0))
    userArray = userArray :+ UserBean("5","A",Weight(1.0,0.0))
    readData(userArray)
    println("=======itemCF")
    val b = itemResult()
    val u = userArray.groupBy(_.userId)
    getItemRecommend(b,u.getOrElse("1",null),20)
    getItemRecommend(b,u.getOrElse("2",null),20)
    getItemRecommend(b,u.getOrElse("3",null),20)
    getItemRecommend(b,u.getOrElse("4",null),20)
    println("=======userCF")
    val uMap = userResult()
    getUserRecommend(uMap,u.getOrElse("1",null),20)
    getUserRecommend(uMap,u.getOrElse("2",null),20)
    getUserRecommend(uMap,u.getOrElse("3",null),20)
    getUserRecommend(uMap,u.getOrElse("4",null),20)
  }

Be First to Comment

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注