2
2
import pandas as pd
3
3
import pyerrors as pe
4
4
import pytest
5
+ import warnings
6
+
5
7
6
8
def test_df_export_import (tmp_path ):
7
9
my_dict = {"int" : 1 ,
8
- "float" : - 0.01 ,
9
- "Obs1" : pe .pseudo_Obs (87 , 21 , "test_ensemble" ),
10
- "Obs2" : pe .pseudo_Obs (- 87 , 21 , "test_ensemble2" )}
10
+ "float" : - 0.01 ,
11
+ "Obs1" : pe .pseudo_Obs (87 , 21 , "test_ensemble" ),
12
+ "Obs2" : pe .pseudo_Obs (- 87 , 21 , "test_ensemble2" )}
11
13
for gz in [True , False ]:
12
14
my_df = pd .DataFrame ([my_dict ] * 10 )
13
15
@@ -18,13 +20,166 @@ def test_df_export_import(tmp_path):
18
20
pe .input .pandas .load_df ((tmp_path / 'df_output.csv' ).as_posix (), gz = gz )
19
21
20
22
23
+ def test_null_first_line_df_export_import (tmp_path ):
24
+ my_dict = {"int" : 1 ,
25
+ "float" : - 0.01 ,
26
+ "Obs1" : pe .pseudo_Obs (87 , 21 , "test_ensemble" ),
27
+ "Obs2" : pe .pseudo_Obs (- 87 , 21 , "test_ensemble2" )}
28
+ my_df = pd .DataFrame ([my_dict ] * 4 )
29
+ my_df .loc [0 , "Obs1" ] = None
30
+ my_df .loc [2 , "Obs1" ] = None
31
+ for gz in [True , False ]:
32
+ pe .input .pandas .dump_df (my_df , (tmp_path / 'df_output' ).as_posix (), gz = gz )
33
+ reconstructed_df = pe .input .pandas .load_df ((tmp_path / 'df_output' ).as_posix (), auto_gamma = True , gz = gz )
34
+ assert reconstructed_df .loc [0 , "Obs1" ] is None
35
+ assert reconstructed_df .loc [2 , "Obs1" ] is None
36
+ assert np .all (reconstructed_df .loc [1 ] == my_df .loc [1 ])
37
+ assert np .all (reconstructed_df .loc [3 ] == my_df .loc [3 ])
38
+
39
+
40
+ def test_nan_df_export_import (tmp_path ):
41
+ my_dict = {"int" : 1 ,
42
+ "float" : - 0.01 ,
43
+ "Obs1" : pe .pseudo_Obs (87 , 21 , "test_ensemble" ),
44
+ "Obs2" : pe .pseudo_Obs (- 87 , 21 , "test_ensemble2" )}
45
+ my_df = pd .DataFrame ([my_dict ] * 4 )
46
+ my_df .loc [1 , "int" ] = np .nan
47
+
48
+ for gz in [True , False ]:
49
+ pe .input .pandas .dump_df (my_df , (tmp_path / 'df_output' ).as_posix (), gz = gz )
50
+ reconstructed_df = pe .input .pandas .load_df ((tmp_path / 'df_output' ).as_posix (), auto_gamma = True , gz = gz )
51
+ with pytest .warns (UserWarning , match = "nan value in column int will be replaced by None" ):
52
+ warnings .warn ("nan value in column int will be replaced by None" , UserWarning )
53
+ assert reconstructed_df .loc [1 , "int" ] is None
54
+ assert np .all (reconstructed_df .loc [:, "float" ] == my_df .loc [:, "float" ])
55
+ assert np .all (reconstructed_df .loc [:, "Obs1" ] == my_df .loc [:, "Obs1" ])
56
+ assert np .all (reconstructed_df .loc [:, "Obs2" ] == my_df .loc [:, "Obs2" ])
57
+
58
+
59
+ def test_null_second_line_df_export_import (tmp_path ):
60
+ my_dict = {"int" : 1 ,
61
+ "float" : - 0.01 ,
62
+ "Obs1" : pe .pseudo_Obs (87 , 21 , "test_ensemble" ),
63
+ "Obs2" : pe .pseudo_Obs (- 87 , 21 , "test_ensemble2" )}
64
+ my_df = pd .DataFrame ([my_dict ] * 4 )
65
+ my_df .loc [1 , "Obs1" ] = None
66
+ for gz in [True , False ]:
67
+ pe .input .pandas .dump_df (my_df , (tmp_path / 'df_output' ).as_posix (), gz = gz )
68
+ reconstructed_df = pe .input .pandas .load_df ((tmp_path / 'df_output' ).as_posix (), auto_gamma = True , gz = gz )
69
+ assert reconstructed_df .loc [1 , "Obs1" ] is None
70
+ assert np .all (reconstructed_df .loc [0 ] == my_df .loc [0 ])
71
+ assert np .all (reconstructed_df .loc [2 :] == my_df .loc [2 :])
72
+
73
+
74
+ def test_null_first_line_df_gzsql_export_import (tmp_path ):
75
+ my_dict = {"int" : 1 ,
76
+ "float" : - 0.01 ,
77
+ "Obs1" : pe .pseudo_Obs (87 , 21 , "test_ensemble" ),
78
+ "Obs2" : pe .pseudo_Obs (- 87 , 21 , "test_ensemble2" )}
79
+
80
+ my_df = pd .DataFrame ([my_dict ] * 4 )
81
+ my_df .loc [0 , "Obs1" ] = None
82
+ my_df .loc [2 , "Obs1" ] = None
83
+ gz = True
84
+ pe .input .pandas .to_sql (my_df , 'test' , (tmp_path / 'test.db' ).as_posix (), gz = gz )
85
+ reconstructed_df = pe .input .pandas .read_sql ('SELECT * FROM test' , (tmp_path / 'test.db' ).as_posix (), auto_gamma = True )
86
+ assert reconstructed_df .loc [0 , "Obs1" ] is None
87
+ assert reconstructed_df .loc [2 , "Obs1" ] is None
88
+ assert np .all (reconstructed_df .loc [1 ] == my_df .loc [1 ])
89
+ assert np .all (reconstructed_df .loc [3 ] == my_df .loc [3 ])
90
+
91
+
92
+ def test_null_second_line_df_gzsql_export_import (tmp_path ):
93
+ my_dict = {"int" : 1 ,
94
+ "float" : - 0.01 ,
95
+ "Obs1" : pe .pseudo_Obs (87 , 21 , "test_ensemble" ),
96
+ "Obs2" : pe .pseudo_Obs (- 87 , 21 , "test_ensemble2" )}
97
+
98
+ my_df = pd .DataFrame ([my_dict ] * 4 )
99
+ my_df .loc [1 , "Obs1" ] = None
100
+ gz = True
101
+ pe .input .pandas .to_sql (my_df , 'test' , (tmp_path / 'test.db' ).as_posix (), gz = gz )
102
+ reconstructed_df = pe .input .pandas .read_sql ('SELECT * FROM test' , (tmp_path / 'test.db' ).as_posix (), auto_gamma = True )
103
+ assert reconstructed_df .loc [1 , "Obs1" ] is None
104
+ assert np .all (reconstructed_df .loc [0 ] == my_df .loc [0 ])
105
+ assert np .all (reconstructed_df .loc [2 :] == my_df .loc [2 :])
106
+
107
+
108
+ def test_null_first_line_df_sql_export_import (tmp_path ):
109
+ my_dict = {"int" : 1 ,
110
+ "float" : - 0.01 ,
111
+ "Obs1" : pe .pseudo_Obs (87 , 21 , "test_ensemble" ),
112
+ "Obs2" : pe .pseudo_Obs (- 87 , 21 , "test_ensemble2" )}
113
+
114
+ my_df = pd .DataFrame ([my_dict ] * 4 )
115
+ my_df .loc [0 , "Obs1" ] = None
116
+ my_df .loc [2 , "Obs1" ] = None
117
+ gz = False
118
+ pe .input .pandas .to_sql (my_df , 'test' , (tmp_path / 'test.db' ).as_posix (), gz = gz )
119
+ reconstructed_df = pe .input .pandas .read_sql ('SELECT * FROM test' , (tmp_path / 'test.db' ).as_posix (), auto_gamma = True )
120
+ assert reconstructed_df .loc [0 , "Obs1" ] is None
121
+ assert reconstructed_df .loc [2 , "Obs1" ] is None
122
+ assert np .all (reconstructed_df .loc [1 ] == my_df .loc [1 ])
123
+ assert np .all (reconstructed_df .loc [3 ] == my_df .loc [3 ])
124
+
125
+
126
+ def test_nan_sql_export_import (tmp_path ):
127
+ my_dict = {"int" : 1 ,
128
+ "float" : - 0.01 ,
129
+ "Obs1" : pe .pseudo_Obs (87 , 21 , "test_ensemble" ),
130
+ "Obs2" : pe .pseudo_Obs (- 87 , 21 , "test_ensemble2" )}
131
+ my_df = pd .DataFrame ([my_dict ] * 4 )
132
+ my_df .loc [1 , "int" ] = np .nan
133
+ gz = False
134
+ pe .input .pandas .to_sql (my_df , 'test' , (tmp_path / 'test.db' ).as_posix (), gz = gz )
135
+ reconstructed_df = pe .input .pandas .read_sql ('SELECT * FROM test' , (tmp_path / 'test.db' ).as_posix (), auto_gamma = True )
136
+ with pytest .warns (UserWarning , match = "nan value in column int will be replaced by None" ):
137
+ warnings .warn ("nan value in column int will be replaced by None" , UserWarning )
138
+ assert np .isnan (reconstructed_df .loc [1 , "int" ])
139
+ assert np .all (reconstructed_df .loc [:, "float" ] == my_df .loc [:, "float" ])
140
+ assert np .all (reconstructed_df .loc [:, "Obs1" ] == my_df .loc [:, "Obs1" ])
141
+ assert np .all (reconstructed_df .loc [:, "Obs2" ] == my_df .loc [:, "Obs2" ])
142
+
143
+
144
+ def test_nan_gzsql_export_import (tmp_path ):
145
+ my_dict = {"int" : 1 ,
146
+ "float" : - 0.01 ,
147
+ "Obs1" : pe .pseudo_Obs (87 , 21 , "test_ensemble" ),
148
+ "Obs2" : pe .pseudo_Obs (- 87 , 21 , "test_ensemble2" )}
149
+ my_df = pd .DataFrame ([my_dict ] * 4 )
150
+ my_df .loc [1 , "int" ] = np .nan
151
+ gz = True
152
+ pe .input .pandas .to_sql (my_df , 'test' , (tmp_path / 'test.db' ).as_posix (), gz = gz )
153
+ reconstructed_df = pe .input .pandas .read_sql ('SELECT * FROM test' , (tmp_path / 'test.db' ).as_posix (), auto_gamma = True )
154
+ assert np .isnan (reconstructed_df .loc [1 , "int" ])
155
+ assert np .all (reconstructed_df .loc [:, "float" ] == my_df .loc [:, "float" ])
156
+ assert np .all (reconstructed_df .loc [:, "Obs1" ] == my_df .loc [:, "Obs1" ])
157
+ assert np .all (reconstructed_df .loc [:, "Obs2" ] == my_df .loc [:, "Obs2" ])
158
+
159
+
160
+ def test_null_second_line_df_sql_export_import (tmp_path ):
161
+ my_dict = {"int" : 1 ,
162
+ "float" : - 0.01 ,
163
+ "Obs1" : pe .pseudo_Obs (87 , 21 , "test_ensemble" ),
164
+ "Obs2" : pe .pseudo_Obs (- 87 , 21 , "test_ensemble2" )}
165
+
166
+ my_df = pd .DataFrame ([my_dict ] * 4 )
167
+ my_df .loc [1 , "Obs1" ] = None
168
+ gz = False
169
+ pe .input .pandas .to_sql (my_df , 'test' , (tmp_path / 'test.db' ).as_posix (), gz = gz )
170
+ reconstructed_df = pe .input .pandas .read_sql ('SELECT * FROM test' , (tmp_path / 'test.db' ).as_posix (), auto_gamma = True )
171
+ assert reconstructed_df .loc [1 , "Obs1" ] is None
172
+ assert np .all (reconstructed_df .loc [0 ] == my_df .loc [0 ])
173
+ assert np .all (reconstructed_df .loc [2 :] == my_df .loc [2 :])
174
+
175
+
21
176
def test_df_Corr (tmp_path ):
22
177
23
178
my_corr = pe .Corr ([pe .pseudo_Obs (- 0.48 , 0.04 , "test" ), pe .pseudo_Obs (- 0.154 , 0.03 , "test" )])
24
179
25
180
my_dict = {"int" : 1 ,
26
- "float" : - 0.01 ,
27
- "Corr" : my_corr }
181
+ "float" : - 0.01 ,
182
+ "Corr" : my_corr }
28
183
my_df = pd .DataFrame ([my_dict ] * 5 )
29
184
30
185
pe .input .pandas .dump_df (my_df , (tmp_path / 'df_output' ).as_posix ())
@@ -76,8 +231,8 @@ def test_sql_if_exists_fail(tmp_path):
76
231
77
232
def test_Obs_list_sql (tmp_path ):
78
233
my_dict = {"int" : 1 ,
79
- "Obs1" : pe .pseudo_Obs (17 , 11 , "test_sql_if_exists_failnsemble" ),
80
- "Obs_list" : [[pe .pseudo_Obs (0.0 , 0.1 , "test_ensemble2" ), pe .pseudo_Obs (3.2 , 1.1 , "test_ensemble2" )]]}
234
+ "Obs1" : pe .pseudo_Obs (17 , 11 , "test_sql_if_exists_failnsemble" ),
235
+ "Obs_list" : [[pe .pseudo_Obs (0.0 , 0.1 , "test_ensemble2" ), pe .pseudo_Obs (3.2 , 1.1 , "test_ensemble2" )]]}
81
236
pe_df = pd .DataFrame (my_dict )
82
237
my_db = (tmp_path / "test_db.sqlite" ).as_posix ()
83
238
pe .input .pandas .to_sql (pe_df , "My_table" , my_db )
0 commit comments