Skip to content

Commit c485250

Browse files
committed
pushing after approximate counting chapter
1 parent bfb72f0 commit c485250

File tree

66 files changed

+2370
-46
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+2370
-46
lines changed

contents/IFS/IFS.html

+14-1
Large diffs are not rendered by default.

contents/affine_transformations/affine_transformations.html

+14-1
Large diffs are not rendered by default.

contents/approximate_counting/approximate_counting.html

+1,405
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
using Test
2+
3+
# This function takes
4+
# - v: value in register
5+
# - a: a scaling value for the logarithm based on Morris's paper
6+
# It returns n(v,a), the approximate count
7+
function n(v, a)
8+
a*((1+1/a)^v-1)
9+
end
10+
11+
# This function takes
12+
# - v: value in register
13+
# - a: a scaling value for the logarithm based on Morris's paper
14+
# It returns a new value for v
15+
function increment(v, a)
16+
# delta is the probability of incrementing our counter
17+
delta = 1/(n(v+1, a)-n(v, a))
18+
19+
if rand() <= delta
20+
return v += 1
21+
else
22+
return v
23+
end
24+
end
25+
26+
# This simulates counting and takes
27+
# - n_items: number of items to count and loop over
28+
# - a: a scaling value for the logarithm based on Morris's paper
29+
# It returns n(v,a), the approximate count
30+
function approximate_count(n_items, a)
31+
v = 0
32+
for i = 1:n_items
33+
v = increment(v, a)
34+
end
35+
36+
return n(v, a)
37+
end
38+
39+
# This function takes
40+
# - n_trials: the number of counting trials
41+
# - n_items: the number of items to count to
42+
# - a: a scaling value for the logarithm based on Morris's paper
43+
# - threshold: the maximum percent error allowed
44+
# It returns a true / false test value
45+
function test_approximate_count(n_trials, n_items, a, threshold)
46+
samples = [approximate_count(n_items, a) for i = 1:n_trials]
47+
48+
avg = sum(samples)/n_trials
49+
50+
@test ((avg - n_items) / n_items < threshold)
51+
end
52+
53+
@testset "Counting Tests, 100 trials" begin
54+
println("testing 1,000, a = 30, 1% error")
55+
test_approximate_count(0.1, 100, 1000, 30)
56+
println("testing 12,345, a = 10, 1% error")
57+
test_approximate_count(0.1, 100, 12345, 10)
58+
# Note: with a lower a, we need more trials, so a higher % error here.
59+
println("testing 222,222, a = 0.5, 10% error")
60+
test_approximate_count(0.1, 100, 222222, 0.5)
61+
end
Loading
Loading
Loading
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
using DelimitedFiles
2+
3+
# n = number of events
4+
# prob = probability of incrementing counter
5+
function random_count(n, prob; a=30, prob_calc=false)
6+
v = 0
7+
base = 1+1/a
8+
output = zeros(n)
9+
10+
if prob_calc
11+
prob = 1
12+
v = 1
13+
end
14+
15+
if rand() < prob
16+
output[1] = 1/prob
17+
end
18+
19+
for i = 2:n
20+
if prob_calc
21+
prob = 1/((a*(base^(v+1)-1))-output[i-1])
22+
23+
if rand() <= prob
24+
v += 1
25+
end
26+
output[i] = a*(base^v-1)
27+
else
28+
if rand() <= prob
29+
output[i] = output[i-1]+1/prob
30+
else
31+
output[i] = output[i-1]
32+
end
33+
end
34+
end
35+
36+
return output
37+
end
38+
39+
# m = number of counting trials
40+
# l = number of saved trials
41+
function multi_count(n, m, l, prob; a=30, prob_calc=false, file_mod="",
42+
stops = [100000, 500000, 1000000])
43+
out = zeros(n, l)
44+
extremes = zeros(n, 2)
45+
46+
chosen_set = zeros(Int, l)
47+
48+
for i = 1:l
49+
chosen_number = rand(1:m)
50+
while chosen_number in chosen_set
51+
chosen_number = rand(1:m)
52+
end
53+
chosen_set[i] = chosen_number
54+
end
55+
56+
histograms = zeros(Float64, m, length(stops))
57+
58+
out_count = 1
59+
for i = 1:m
60+
current_dist = random_count(n, prob; a, prob_calc)
61+
if i == 1
62+
extremes[:,1] .= current_dist
63+
extremes[:,2] .= current_dist
64+
else
65+
for j = 1:n
66+
if current_dist[j] < extremes[j,1]
67+
extremes[j,1] = current_dist[j]
68+
end
69+
if current_dist[j] > extremes[j,2]
70+
extremes[j,2] = current_dist[j]
71+
end
72+
73+
end
74+
end
75+
76+
for j = 1:length(stops)
77+
histograms[i,j] = current_dist[stops[j]]
78+
end
79+
80+
if i in chosen_set
81+
out[:,out_count] = current_dist
82+
out_count += 1
83+
end
84+
end
85+
86+
output_file = open("out"*file_mod*".dat", "w")
87+
writedlm(output_file, out)
88+
close(output_file)
89+
90+
extreme_output_file = open("extremes"*file_mod*".dat", "w")
91+
writedlm(extreme_output_file, extremes)
92+
close(extreme_output_file)
93+
94+
formatted_histograms = [[],[],[]]
95+
# Going through the histogram data to put it into the right format
96+
for j = 1:length(stops)
97+
98+
max = floor(Int,maximum(histograms[:,j]))
99+
min = floor(Int,minimum(histograms[:,j]))
100+
101+
println(min, '\t', max, '\t', sum(histograms[:,j])/m)
102+
103+
temp_array = zeros(max - min+1)
104+
105+
for i = 1:m
106+
107+
temp_array[floor(Int,histograms[i,j])-min+1] += 1
108+
109+
end
110+
111+
formatted_histograms[j] = temp_array
112+
end
113+
114+
# output histograms into different files for each one
115+
116+
for i = 1:length(stops)
117+
histogram_file = open("histogram_" * string(i)*file_mod* ".dat", "w")
118+
writedlm(histogram_file, formatted_histograms[i])
119+
close(histogram_file)
120+
end
121+
return formatted_histograms
122+
end
123+
72.4 KB
Loading
77.8 KB
Loading
39.8 KB
Loading
46.9 KB
Loading
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
set terminal epslatex standalone color
2+
3+
#set output "histexp_1.tex"
4+
#set output "histexp_2.tex"
5+
set output "histexp_3.tex"
6+
7+
set size square
8+
9+
#set title "True count of 10,000"
10+
#set title "True count of 500,000"
11+
set title "True count of 1,000,000"
12+
13+
set xlabel ''
14+
#set xrange [-255:9745]
15+
#set xtics ("6000" -255, "10,000" 3745, "16,000" 9745)
16+
17+
#set xrange [352000:644000]
18+
#set xrange[-1458:538542]
19+
#set xtics ("320,000" -1458, "500,000" 178542, "860,000" 538542)
20+
21+
#set xrange [808000:1240000]
22+
set xrange[-19374:1100626]
23+
set xtics ("600,0000" -19374, "1,000,000" 380626, "1,720,000" 1100626)
24+
25+
#set ylabel 'Approximate count $\left( \times 10^{5} \right)$'
26+
#set ytics ("0" 0, "2" 200000, "4" 400000, "6" 600000, "8" 800000, "10" 1000000)
27+
28+
#plot "histogram_1exp.dat" w l lw 10 title ""
29+
#plot "histogram_2exp.dat" w l lw 10 title ""
30+
plot "histogram_3exp.dat" w l lw 10 title ""
31+
41.3 KB
Loading
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
set terminal epslatex standalone color
2+
3+
#set output "hist_1.tex"
4+
#set output "hist_2.tex"
5+
set output "hist_3.tex"
6+
7+
set size square
8+
9+
#set title "True count of 10,000"
10+
#set title "True count of 500,000"
11+
set title "True count of 1,000,000"
12+
13+
#set xlabel ''
14+
#set xrange [0:40000]
15+
#set xtics ("0" 0, "20,000" 20000, "40,000" 40000)
16+
17+
#set xrange [352000:644000]
18+
#set xrange[10000:310000]
19+
#set xtics ("350,000" 10000, "500,000" 160000, "650,000" 310000)
20+
21+
#set xrange [808000:1240000]
22+
set xrange[-10000:490000]
23+
set xtics ("750,0000" -10000, "1,000,000" 240000, "1,250,000" 490000)
24+
25+
#set ylabel 'Approximate count $\left( \times 10^{5} \right)$'
26+
#set ytics ("0" 0, "2" 200000, "4" 400000, "6" 600000, "8" 800000, "10" 1000000)
27+
28+
#plot "histogram_1.dat" w l lw 10 title ""
29+
#plot "histogram_2.dat" w l lw 10 title ""
30+
plot "histogram_3.dat" w l lw 10 title ""
31+
51.7 KB
Loading
51.6 KB
Loading
41.8 KB
Loading
94.3 KB
Loading
+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
set terminal epslatex standalone color
2+
#set terminal pdf enhanced
3+
#set style fill transparent solid 0.3
4+
5+
#set output "check.tex"
6+
set output "checkexp.tex"
7+
#set size square
8+
9+
set title "Approximate counting of 1,000,000 items"
10+
11+
set xlabel 'True number of items $\left( \times 10^{5} \right)$'
12+
set xtics ("0" 0, "2" 200000, "4" 400000, "6" 600000, "8" 800000, "10" 1000000)
13+
14+
set ylabel 'Approximate count $\left( \times 10^{5} \right)$'
15+
set ytics ("0" 0, "2" 200000, "4" 400000, "6" 600000, "8" 800000, "10" 1000000, "12" 1200000, "14" 1400000)
16+
17+
#plot "extremes.dat" u 0:1:2 w filledcu lc "gray" title "", for [i=1:10] "out.dat" u 0:i w l lw 3 title "", x w l lw 3 dt 3 lc "black" title ""
18+
plot "extremesexp.dat" u 0:1:2 w filledcu lc "gray" title "", for [i=1:10] "outexp.dat" u 0:i w l lw 3 title "", x w l lw 3 dt 3 lc "black" title ""
19+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
set terminal epslatex standalone color
2+
3+
set output "a_change.tex"
4+
#set size square
5+
6+
set title '$n_v$ vs $a$ for $v=255$'
7+
8+
set xlabel '$a$'
9+
set xtics ("25" 0, "27.5" 5, "30" 10, "32.5" 15, "35" 20)
10+
11+
set ylabel '$n_v (\times 10^5)$'
12+
set ytics ("0" 0, "1" 100000, "2" 200000, "3" 300000, "4" 400000, "5" 500000)
13+
14+
plot for [i=1:10] "a_change.dat" w l lw 3 title ""
15+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
set terminal epslatex standalone color
2+
3+
set output "deltas.tex"
4+
#set size square
5+
6+
set title '$\Delta$ vs $v$'
7+
8+
set xlabel '$v$'
9+
10+
set ylabel '$\Delta$ (logscale)'
11+
set logscale y
12+
13+
plot for [i=1:10] "deltas.dat" w l lw 3 title ""
14+

0 commit comments

Comments
 (0)