Morris Counters Overview

Counting the number of elements in a stream by limited means in the artist's view
Counting the number of elements in a stream by limited means in the artist's view

, Qrator Labs @dimak24 . Core —  hr@qrator.net.





1

- : , ; : n- 2 ^ n - 1. , , . , .





, , — .  , .





, ( 2). ( 3) , 1978 , . — . : , , , , .





2 ,

— . :





\ begin {equation *} C_ {n + 1} = \ left \ {\ begin {array} {ll} C_n + 1 & \ textit {with probability $ p = const $} \\ C_n & \ textit {with probability $ 1 -p $.} \ end {array} \ right.  \ end {equation *}

 C_n  n- . ,  C_n n . , C_n  :





\begin{align*}             C_n \sim Bin(&n, p) \\     {\sf E} C_n    = &np        \\     {\sf D} C_n    = &np(1-p). \end{align*}

n \widehat{n}(C) := C/p. , : {\sf E}\widehat{n}(C_n) = ({\sf E}C_n)/p = np/p = n.





: -, 1/p = const , , . -, n. , n=1 100%. :





\begin{align*}     {\sf CV}\widehat{n}(C_n) = \frac{\sqrt{{\sf D}\widehat{n}(C_n)}}{{\sf E} \widehat{n}(C_n)} = \sqrt{\frac{1-p}{pn}}. \end{align*}

3

, : 1, 1/2, 1/4 ..:





\begin{equation*}     C_{n+1} = \left\{\begin{array}{ll}          C_n + 1 & \textit{  $2^{-C_n}$} \\          C_n     & \textit{  $1 - 2^{-C_n}$.}      \end{array}\right. \end{equation*}

, ? x x + 1 2^x, :





\widehat{n}(C) := \sum_{i=0}^{C-1} 2^{i} = 2^C - 1.





  • , n n,





\begin{align*}         {\sf E}\left(2^{C_n} | C_{n-1} = y\right) =& \underbrace{2^{y + 1} 2^{-y}}_{\textit{ }} + \underbrace{2^y (1 - 2^{-y})}_{\textit{  }} = 2^y + 1 \implies \\         {\sf E}\left(2^{C_n}|C_{n-1}\right) =& 2^{C_{n-1}} + 1 \implies {\sf E}2^{C_n} = {\sf E}2^{C_{n-1}} + 1 = {\sf E}2^{C_{n-2}} + 2 = \dots     \end{align*}

0: C_0 = 0, {\sf E}2^{C_0} = 2^{0} = 1, {\sf E} 2^{C_n} = \dots = n + 1. {\sf E}\widehat{n}(C_n) = {\sf E}2^{C_n} - 1 = n.





  • n.





\widehat{n}(C_n). , {\sf D}\widehat{n} = {\sf E}\widehat{n}^2 - ({\sf E}\widehat{n})^2. , {\sf E}\widehat{n}(C_n) = n. {\sf E}\widehat{n}(C_n)^2. ,





\begin{align*}         {\sf E}\left(2^{C_n} - 1\right)^2 = {\sf E}2^{2C_n} - 2(n+1) + 1 = {\sf E}2^{2C_n} - 2n - 1.     \end{align*}

, , {\sf E}2^{2C_n}.





\begin{align*}         {\sf E}\left(2^{2C_n} | C_{n-1} = y\right) &=         \underbrace{2^{2(y + 1)} 2^{-y}}_{\textit{ }} +         \underbrace{2^{2y}(1 - 2^{-y})}_{\textit{  }} = \\         3 \cdot 2^y +2^{2y} &\implies         {\sf E}\left(2^{2C_n} | C_{n-1}\right) = 3 \cdot 2^{C_{n-1}} + 2^{2C_{n-1}} \implies \\         {\sf E} 2^{2C_n} = 3n &+ {\sf E}2^{2C_{n-1}} = 3n + 3(n-1) + {\sf E}2^{2C_{n-2}} = \dots = \sum_{i=1}^n 3i     \end{align*}





\begin{equation*}         {\sf D}\widehat{n}(C_n) = \frac{3n(n+1)}{2} - 2n - 1 - n^2 = \frac{n^2 - n - 2}{2}     \end{equation*} \begin{equation*} {\sf CV}\widehat{n}(C_n) = \frac{\sqrt{{\sf D}\widehat{n}(C_n)}}{{\sf E}\widehat{n}(C_n)} = \sqrt{\frac{n^2-n-2}{2n^2}} \sim \frac{1}{\sqrt{2}}.     \end{equation*}

, , n.





:





\begin{align*}         {\sf P}(|\widehat{n}(C_n) - n| \ge \varepsilon n) \le \frac{{\sf D}\widehat{n}(C_n)}{\varepsilon^2n^2} \sim \frac{1}{2\varepsilon^2}.     \end{align*}
  • , {\bf X} 2^{2^{\bf X}} - 1. , n \mathcal{O}(\log \log n) !





. 1:             (   sample_size=100     ,    n          n )
. 1: ( sample_size=100 , n n )

4

: , . — < > — .





, , , —   :





\begin{align*}     {\sf P}(n_{fail} = k) = (1 - 2^{-C})^k 2^{-C} \implies n_{fail} \sim Geom(2^{-C}). \end{align*}

, , :





5

. , , , .    , . , , - . , , EMA , .





: 1 . , C





\sum_{i=0}^{C-1} 2^i = 2^C - 1 =: n_0.

, 1 2^{C-1} - 1 = \frac{1}{2}n_0 - \frac{1}{2}. , 1 \sim 2! 2, \frac{1}{2} 1:





6

:  2 ,  {\bf X}. : , ,  , (-   ).





. 2:      (M=5 )   (E=3 )
. 2: (M=5 ) (E=3 )

:





\begin{align*} {\rm e}(C)  &= C \gg {\bf M} \\     {\rm m}(C)  &= C \&  (2^{\bf M} - 1). \end{align*}

:





\begin{align*}     C_{n+1} &= \left\{\begin{array}{ll}          C_n + 1, & \textit{  $2^{-{\rm e}(C)}$}  \\          C_n,     & \textit{  $1 - 2^{-{\rm e}(C)}$}     \end{array}\right. \\     \widehat{n}(C) &= (2^{{\rm e}(C)} - 1)2^{\bf M} + 2^{{\rm e}(C)}{\rm m}(C). \end{align*}

, 2 C = 89, {\rm m} = 25 {\rm e} = 2. \widehat{n} = (2^2 - 1) \times 2^5 + 2^2 \times 25 = 196.





, ( , ). ; , .





:









\begin{align*}         {\bf N}_{max} = 2^{2^{\bf E} + {\bf M}} - \left(2^{2^{\bf E} - 1} + 2^{\bf M}\right);     \end{align*}

, \widehat{n} {\rm m} = 2^{\bf M} - 1 {\rm e} = 2^{\bf E} - 1:





\begin{align*} {\bf N}_{max} = \left(2^{2^{\bf E} - 1} - 1\right)2^{\bf M} + 2^{2^{\bf E} - 1}\left(2^{\bf M} - 1\right) =                              2^{2^{\bf E} + {\bf M}} - \left(2^{2^{\bf E} - 1} + 2^{\bf M}\right) \end{align*}
  • \widehat{n} : {\sf E}\widehat{n}(C_n) = n;





{\sf E}(C_n | C_{n - 1} = y). y \equiv -1 \pmod{2^{\bf M}}( ) y \not\equiv -1 \pmod{2^{\bf M}}.





\begin{align*}             y \not\equiv -1 \pmod{2^{\bf M}} &\implies             {\rm e}(y + 1) = {\rm e}(y), \quad {\rm m}(y + 1) = {\rm m}(y) + 1 \\             &\implies \widehat{n}(y + 1) = \widehat{n}(y) + 2^{{\rm e}(y)} \\             y \equiv -1 \pmod{2^{\bf M}} & \implies {\rm e}(y + 1) = {\rm e}(y) + 1,\\             &\phantom{\implies} \phantom{Z} {\rm m}(y) = 2^{\bf M} - 1, \quad {\rm m}(y + 1) = 0 \implies \\             \widehat{n}(y + 1) = \widehat{n}(y) &- 2^{{\rm e}(y)}\left(2^{\bf M} - 1\right) + 2^{{\rm e}(y) + {\bf M}} = \widehat{n}(y) + 2^{{\rm e}(y)}         \end{align*}





\begin{align*}             {\sf E}(2^{C_n} | C_{n-1} = y) &= \widehat{n}(y)\left(1 - 2^{-{\rm e}(y)}\right) + \left(\widehat{n}(y) + 2^{{\rm e}(y)}\right)2^{-{\rm e}(y)} = \widehat{n}(y) + 1.         \end{align*}
  • ( ) ! , ( ):





\begin{align*}         {\sf CV}\widehat{n}(C_n) \le 2^{-\frac{{\bf M} + 1}{2}}.     \end{align*}
. 3:
. 3:

.





, 4, , , . ,  2^{\bf M}. ( ),  2^{\bf M}.   .





.  . , , , . , :





\begin{align*}     n \sim \underbrace{         NegativeBinomial\left(2^{-{\rm e}(C)}, 2^{\bf M} - {\rm m}(C)\right)}_{\textit{}}         + \underbrace{\left(2^{\bf M} - {\rm m}(C)\right)}_{\textit{}}. \end{align*}

, , :





, 1 ( , 2^{\bf M} ).





\begin{align*}     \widehat{n}(C - 2^{\bf M}) = (2^{{\rm e}(C) - 1} - 1)2^{\bf M} + 2^{{\rm e}(C) - 1}{\rm m}(C) = 1/2\widehat{n}(C) - 2^{{\bf M} - 1}. \end{align*}

, {\bf M} > 0( ) 2^{{\bf M} - 1} \in \mathbb{N}. , 2^{{\bf M} - 1}:





7

— ! !





, ...





8

F : C \mapsto [0, 1]:





\begin{equation*}     C_{n+1} = \left\{\begin{array}{ll}          C_n + 1 & \textit{  $F(C_n)$} \\          C_n     & \textit{  $1 - F(C_n)$.}      \end{array}\right. \end{equation*}

F(C_n) = 2^{-C_n}. :





F(C_n) = 1 - \frac{C_n}{C_{max}},

C_{max}— , . , .





, .   n m x_1, \dots, x_m—  n  1:





\begin{align*}         \mathcal{H}_n(x_1, \dots x_m) =          \sum_{\begin{array}{cc}                 & 0 \le i_j \le n \\                 & i_1 + \dots + i_m = n             \end{array}} x_1^{i_1} \cdots x_m^{i_m}. \end{align*}

, , —  . — : \genfrac\{\}{0pt}{}{n}{m}n- m . :





\begin{align*}     \genfrac\{\}{0pt}{}{n+m}{m} = \mathcal{H}_m(1, 2, \dots, m). \end{align*}

\mathcal{H} , n m:





\begin{align*}     {\sf P}(C_n = m) = \left[\prod_{k=0}^{m-1} F(k) \right] \mathcal{H}_{n-m}(1-F(1), \dots 1-F(m)). \end{align*}

, : , , m, m : 0 1, 1 2 .., — \prod_{k=0}^{m-1} F(k), n - m, m: i_1 1 2, i_2 2 3 .., \mathcal{H}_{n-m}.





, F(C) = 1 - \frac{C}{C_{max}}





\begin{equation*}     {\sf P}(C_n = m) = \genfrac\{\}{0pt}{}{n}{m} \frac{C_{max}!}{(C_{max} - m)!C_{max}^n}. \end{equation*}

. , ( ). — .





 C_n( , ,  {\sf P} ):





\begin{align*}     {\sf E}C_n &= C_{max}\left(1 - \left(1 - \frac{1}{C_{max}}\right)^n\right) \\     {\sf D}C_n &= C_{max}^2\left(\frac{1}{C_{max}}\left(1 - \frac{1}{C_{max}}\right)^n + \left(1 - \frac{1}{C_{max}}\right)\left(1 - \frac{2}{C_{max}}\right)^n - \left(1 - \frac{1}{C_{max}}\right)^{2n}\right). \end{align*}

:





\begin{equation*}     \widehat{n}_{MM} := \frac{\ln \left(1 - \frac{C_n}{C_{max}}\right)}{\ln \left(1 - \frac{1}{C_{max}}\right)} \end{equation*}

, , 1 + 1/2 + \dots + 1/n \sim \ln n:





\begin{equation*}     \widehat{n} := C_{max}\ln \left(\frac{1}{1 - \frac{C_n}{C_{max}}}\right). \end{equation*}

[1] Morris, R.Counting large numbers of events in small registersCommunications of the ACM, 1978 21(10), 840-842.





[2] http://gregorygundersen.com/blog/2019/11/11/morris-algorithm/ − 





[3] https://habr.com/ru/company/qrator/blog/334354/ − EMA-





[4] https://habr.com/ru/post/208268/ −








All Articles