Skip to content

Instantly share code, notes, and snippets.

@andrija-naglic
Last active August 14, 2016 20:03
Show Gist options
  • Save andrija-naglic/bb077daf31c698efba564523500d8b97 to your computer and use it in GitHub Desktop.
Save andrija-naglic/bb077daf31c698efba564523500d8b97 to your computer and use it in GitHub Desktop.
The Bayesian estimate formula useful for sorting the ratings properly
/*
The problem is described here: http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
And the formula is from IMDB: http://stackoverflow.com/a/1411268/5108832
It's a Bayesian estimate.
This is the formula that is used to sort the ratings:
( (avg_num_votes * avg_rating) + (this_num_votes * this_rating) ) / (avg_num_votes + this_num_votes)
In this example, the sample data are 9 posts (much like the WordPress posts) with a 5 star ratings.
See it in action: http://sqlfiddle.com/#!9/3cdfe/1/2
*/
DROP TABLE posts;
DROP TABLE postmeta;
CREATE TABLE posts(
ID int,
title varchar(30)
);
CREATE TABLE postmeta (
post_id int,
meta_key varchar(30),
meta_value real
);
INSERT INTO posts VALUES
(1, 'Title 1'), -- 123 2.4
(2, 'Title 2'), -- 1612 4.2
(3, 'Title 3'), -- 2131 3.5
(4, 'Title 4'), -- 7512 1.3
(5, 'Title 5'), -- 3131 1.7
(6, 'Title 6'), -- 9999 1.0
(7, 'Title 7'), -- 1 5.0
(8, 'Title 8'), -- 100 4.5
(9, 'Title 9'); -- 110 4.4
INSERT INTO postmeta VALUES
(1,'this_num_votes', 123),
(2,'this_num_votes', 1612),
(3,'this_num_votes', 2131),
(4,'this_num_votes', 7512),
(5,'this_num_votes', 3131),
(6,'this_num_votes', 9999),
(7,'this_num_votes', 1),
(8,'this_num_votes', 100),
(9,'this_num_votes', 110),
(1,'this_rating', 2.4),
(2,'this_rating', 4.2),
(3,'this_rating', 3.5),
(4,'this_rating', 1.3),
(5,'this_rating', 1.7),
(6,'this_rating', 1.0),
(7,'this_rating', 5.0),
(8,'this_rating', 4.5),
(9,'this_rating', 4.4);
SET @avg_total_votes := (SELECT AVG(meta_value) FROM postmeta WHERE meta_key ='this_num_votes');
SET @avg_total_rating := (SELECT AVG(meta_value) FROM postmeta WHERE meta_key ='this_rating');
SELECT posts.ID,
posts.title,
getmeta_votes.meta_value AS votes,
getmeta_rating.meta_value AS rating,
( ( (@avg_total_votes * @avg_total_rating) + (getmeta_votes.meta_value * getmeta_rating.meta_value) ) / ( @avg_total_votes + getmeta_votes.meta_value ) )
AS factor
FROM posts
LEFT JOIN postmeta AS getmeta_votes ON posts.ID = getmeta_votes.post_id AND getmeta_votes.meta_key = 'this_num_votes'
LEFT JOIN postmeta AS getmeta_rating ON posts.ID = getmeta_rating.post_id AND getmeta_rating.meta_key = 'this_rating'
WHERE NOT getmeta_votes.meta_value = 0 AND NOT getmeta_rating.meta_value = 0
ORDER BY factor DESC;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment