HyperLogLog UDF for MySQL
CREATE TABLE `uniqueUsersPerDay `(` day` DATE PRIMARY KEY , ` hll` BLOB);
SET @bits = 14 ; -- set storage size to 2^@bits bytes (standard error 0.81%)
SET @userId = 1 ;
INSERT INTO ` uniqueUsersPerDay` (` day` , ` hll` )
VALUES (CURDATE(), HLL_CREATE(@bits, @userId))
ON DUPLICATE KEY UPDATE ` hll` = HLL_ADD(` hll` , @userId)
;
SET @userId = 2 ;
INSERT INTO ` uniqueUsersPerDay` (` day` , ` hll` )
VALUES (CURDATE(), HLL_CREATE(@bits, @userId))
ON DUPLICATE KEY UPDATE ` hll` = HLL_ADD(` hll` , @userId)
;
SET @userId = 2 ;
INSERT INTO ` uniqueUsersPerDay` (` day` , ` hll` )
VALUES (CURDATE(), HLL_CREATE(@bits, @userId))
ON DUPLICATE KEY UPDATE ` hll` = HLL_ADD(` hll` , @userId)
;
SELECT HLL_COUNT(` hll` ) AS ` dau` FROM ` uniqueUsersPerDay` WHERE ` day` = CURDATE();
+------+
| dau |
+------+
| 2 |
+------+
COUNT DISTINCT
replacement
SELECT HLL_COUNT_DISTINCT(` key` ) AS ` uniq` FROM (
SELECT 1 AS ` key`
UNION SELECT 2
UNION SELECT 3
UNION SELECT 4
UNION SELECT 2
UNION SELECT 3
) AS t;
+------+
| uniq |
+------+
| 4 |
+------+
Installation (how to draw an owl)
Build and install plugin library
Install functions in MySQL
CREATE FUNCTION HLL_COUNT RETURNS INTEGER SONAME ' mysql-hll.so' ;
CREATE FUNCTION HLL_CREATE RETURNS STRING SONAME ' mysql-hll.so' ;
CREATE FUNCTION HLL_ADD RETURNS STRING SONAME ' mysql-hll.so' ;
CREATE FUNCTION HLL_MERGE RETURNS STRING SONAME ' mysql-hll.so' ;
CREATE AGGREGATE FUNCTION HLL_GROUP_COUNT RETURNS INTEGER SONAME ' mysql-hll.so' ;
CREATE AGGREGATE FUNCTION HLL_GROUP_MERGE RETURNS STRING SONAME ' mysql-hll.so' ;
CREATE AGGREGATE FUNCTION HLL_GROUP_CREATE RETURNS STRING SONAME ' mysql-hll.so' ;
CREATE AGGREGATE FUNCTION HLL_COUNT_DISTINCT RETURNS INTEGER SONAME ' mysql-hll.so' ;
blob HLL_CREATE(bits[, key1[, key2[, ...]]])
blob HLL_ADD(blob, key1[, key2[, ...]])
int HLL_COUNT(blob)
blob HLL_MERGE(blob1[, blob2[, ...]])
blob HLL_GROUP_CREATE(bits, *keys)
int HLL_GROUP_COUNT(*blobs)
blob HLL_GROUP_MERGE(*blobs)
int HLL_COUNT_DISTINCT(*keys)