-
Notifications
You must be signed in to change notification settings - Fork 15
/
q1_database_statistics.py
53 lines (45 loc) · 2.68 KB
/
q1_database_statistics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
'''
dumps out some statistics for the fonts.zip database and
makes plots, saved in files, of characters for each fontVariant found in
the database
Created on Jul 25, 2016
@author: richard
'''
import ocr_utils
import numpy as np
df1 = ocr_utils.get_list(input_filters_dict = {'font':()})
unique_fonts=[]
unique_fontVariants=[]
unique_m_labels=[]
unique_strengths=[]
unique_italics=[]
unique_orientations=[]
#############################################################################
# read and show the character images for each font variant
# output only the character label and the image
for font in df1:
df2 = ocr_utils.get_list(input_filters_dict = {'font':font,'fontVariant':(), 'm_label':(),'strength':(),'italic':(),'orientation':()})
unique_fonts = np.unique( np.append(unique_fonts, df2['font']))
u1= np.unique(df2['fontVariant'])
unique_fontVariants = np.unique(np.append(unique_fontVariants, u1))
u2 = np.unique(df2['m_label'])
unique_m_labels = np.unique(np.append(unique_m_labels,u2))
u3 = np.unique(df2['strength'])
unique_strengths = np.unique(np.append(unique_strengths,u3))
u4 = np.unique(df2['italic'])
unique_italics = np.unique(np.append(unique_italics,u4))
u5 =np.unique( df2['orientation'])
unique_orientations = np.unique(np.append(unique_orientations,u5))
print ('\n{}, fontVariants={}, labels = {}, strengths = {}, italics = {}, orientations = {}\n'.format(font[0], len(u1),
len(u2), len(u3), len(u4), len(u5)))
for fontVariant in u1:
fd = {'font': font, 'fontVariant': fontVariant}
ds = ocr_utils.read_data(input_filters_dict=fd, output_feature_list=['m_label','image'] , dtype=np.int32)
y,X = ds.train.features
X2D = np.reshape(X, (X.shape[0], ds.train.num_rows, ds.train.num_columns ))
title = '{}-{}'.format(font[0],fontVariant)
ocr_utils.show_examples(X2D, y, title=title)
print ('unique fonts={}, fontVariants={}, labels = {}, strengths = {}, italics = {}, orientations = {}'.format(len(unique_fonts), len(unique_fontVariants),
len(unique_m_labels), len(unique_strengths),
len(unique_italics), len(unique_orientations)))
print ('\n########################### No Errors ####################################')