Collaborative Filtering协同过滤推荐算法

港控/mmm° 2022-09-21 05:43 310阅读 0赞
  1. from math import sqrt
  2. dataset={
  3. 'Lisa Rose': {'Lady in the Water': 2.5,
  4. 'Snakes on a Plane': 3.5,
  5. 'Just My Luck': 3.0,
  6. 'Superman Returns': 3.5,
  7. 'You, Me and Dupree': 2.5,
  8. 'The Night Listener': 3.0},
  9. 'Gene Seymour': {'Lady in the Water': 3.0,
  10. 'Snakes on a Plane': 3.5,
  11. 'Just My Luck': 1.5,
  12. 'Superman Returns': 5.0,
  13. 'The Night Listener': 3.0,
  14. 'You, Me and Dupree': 3.5},
  15. 'Michael Phillips': {'Lady in the Water': 2.5,
  16. 'Snakes on a Plane': 3.0,
  17. 'Superman Returns': 3.5,
  18. 'The Night Listener': 4.0},
  19. 'Claudia Puig': {'Snakes on a Plane': 3.5,
  20. 'Just My Luck': 3.0,
  21. 'The Night Listener': 4.5,
  22. 'Superman Returns': 4.0,
  23. 'You, Me and Dupree': 2.5},
  24. 'Mick LaSalle': {'Lady in the Water': 3.0,
  25. 'Snakes on a Plane': 4.0,
  26. 'Just My Luck': 2.0,
  27. 'Superman Returns': 3.0,
  28. 'The Night Listener': 3.0,
  29. 'You, Me and Dupree': 2.0},
  30. 'Jack Matthews': {'Lady in the Water': 3.0,
  31. 'Snakes on a Plane': 4.0,
  32. 'The Night Listener': 3.0,
  33. 'Superman Returns': 5.0,
  34. 'You, Me and Dupree': 3.5},
  35. 'Toby': {'Snakes on a Plane':4.5,
  36. 'You, Me and Dupree':1.0,
  37. 'Superman Returns':4.0}}
  38. def similarity_score(person1,person2):
  39. # Returns ratio Euclidean distance score of person1 and person2
  40. both_viewed = {} # To get both rated items by person1 and person2
  41. for item in dataset[person1]:
  42. if item in dataset[person2]:
  43. both_viewed[item] = 1
  44. # Conditions to check they both have an common rating items
  45. if len(both_viewed) == 0:
  46. return 0
  47. # Finding Euclidean distance
  48. sum_of_eclidean_distance = []
  49. for item in dataset[person1]:
  50. if item in dataset[person2]:
  51. sum_of_eclidean_distance.append(pow(dataset[person1][item] - dataset[person2][item],2))
  52. sum_of_eclidean_distance = sum(sum_of_eclidean_distance)
  53. return 1/(1+sqrt(sum_of_eclidean_distance))
  54. def pearson_correlation(person1,person2):
  55. # To get both rated items
  56. both_rated = {}
  57. for item in dataset[person1]:
  58. if item in dataset[person2]:
  59. both_rated[item] = 1
  60. number_of_ratings = len(both_rated)
  61. # Checking for number of ratings in common
  62. if number_of_ratings == 0:
  63. return 0
  64. # Add up all the preferences of each user
  65. person1_preferences_sum = sum([dataset[person1][item] for item in both_rated])
  66. person2_preferences_sum = sum([dataset[person2][item] for item in both_rated])
  67. # Sum up the squares of preferences of each user
  68. person1_square_preferences_sum = sum([pow(dataset[person1][item],2) for item in both_rated])
  69. person2_square_preferences_sum = sum([pow(dataset[person2][item],2) for item in both_rated])
  70. # Sum up the product value of both preferences for each item
  71. product_sum_of_both_users = sum([dataset[person1][item] * dataset[person2][item] for item in both_rated])
  72. # Calculate the pearson score
  73. numerator_value = product_sum_of_both_users - (person1_preferences_sum*person2_preferences_sum/number_of_ratings)
  74. denominator_value = sqrt((person1_square_preferences_sum - pow(person1_preferences_sum,2)/number_of_ratings) * (person2_square_preferences_sum -pow(person2_preferences_sum,2)/number_of_ratings))
  75. if denominator_value == 0:
  76. return 0
  77. else:
  78. r = numerator_value/denominator_value
  79. return r
  80. def most_similar_users(person,number_of_users):
  81. # returns the number_of_users (similar persons) for a given specific person.
  82. scores = [(pearson_correlation(person,other_person),other_person) for other_person in dataset if other_person != person ]
  83. # Sort the similar persons so that highest scores person will appear at the first
  84. scores.sort()
  85. scores.reverse()
  86. return scores[0:number_of_users]
  87. def user_reommendations(person):
  88. # Gets recommendations for a person by using a weighted average of every other user's rankings
  89. totals = {}
  90. simSums = {}
  91. rankings_list =[]
  92. for other in dataset:
  93. # don't compare me to myself
  94. if other == person:
  95. continue
  96. sim = pearson_correlation(person,other)
  97. #print ">>>>>>>",sim
  98. # ignore scores of zero or lower
  99. if sim <=0:
  100. continue
  101. for item in dataset[other]:
  102. # only score movies i haven't seen yet
  103. if item not in dataset[person] or dataset[person][item] == 0:
  104. # Similrity * score
  105. totals.setdefault(item,0)
  106. totals[item] += dataset[other][item]* sim
  107. # sum of similarities
  108. simSums.setdefault(item,0)
  109. simSums[item]+= sim
  110. # Create the normalized list
  111. rankings = [(total/simSums[item],item) for item,total in totals.items()]
  112. rankings.sort()
  113. rankings.reverse()
  114. # returns the recommended items
  115. recommendataions_list = [recommend_item for score,recommend_item in rankings]
  116. return recommendataions_list
  117. print user_reommendations('Toby')

探索推荐引擎内部的秘密,第 2 部分: 深入推荐引擎相关算法 - 协同过滤

探索推荐引擎内部的秘密,第 3 部分: 深入推荐引擎相关算法 - 聚类

memory-based 协同过滤(CF)方法

推荐方法总结

机器学习相关——协同过滤

Collaborative filtering 学习总结

如何利用用户标签数据

基于标签的推荐系统

基于用户的最近邻推荐

发表评论

表情:
评论列表 (有 0 条评论,310人围观)

还没有评论,来说两句吧...

相关阅读

    相关 协同过滤推荐算法

    1.思想简介: 协同过滤,从字面上理解,包括协同和过滤两个操作。所谓协同就是利用群体的行为来做决策(推荐)。对于推荐系统来说,通过用户的持续协同作用,最终给用户的推荐...

    相关 协同过滤推荐算法总结

    推荐算法具有非常多的应用场景和商业价值,因此对推荐算法值得好好研究。推荐算法种类很多,但是目前应用最广泛的应该是协同过滤类别的推荐算法,本文就对协同过滤类别的推荐算法做一个概括