From bdb22fe113fc2afaf697452ad4f8f02c6d537e2a Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Thu, 9 Sep 2021 16:10:47 +0300 Subject: [PATCH 01/27] docstring for hr_to_mr function --- pydatarecognition/utils.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index 54cf8a4..05704ed 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -213,4 +213,25 @@ def get_formatted_crossref_reference(doi): return ref, ref_date + +def hr_to_mr(number_esd): + ''' + splits human readable numbers with estimated standard deviations (e.g. 343.44(45)) into machine readable numbers and + estimated standard deviations (e.g. 343.44 and 0.45). + + Parameters + ---------- + number_esd array_like + array-like object that contains numbers with their estimated standard deviations as strings + in the following format: ["343.44(45)", "324908.435(67)", "0.0783(1)"] + + Returns + ------- + number numpy array + array with the numbers as floats + + esd numpy array + array with estimated standard deviations as floats + + ''' # End of file. From 2cbf60c9324ee5dba5cb9ffaf898d9d2d03239f7 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Thu, 9 Sep 2021 16:39:19 +0300 Subject: [PATCH 02/27] test for hr_to_mr function --- tests/test_utils.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index caef472..5d19eb6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,7 +2,8 @@ import pytest from datetime import date from habanero import Crossref -from pydatarecognition.utils import data_sample, pearson_correlate, xy_resample, get_formatted_crossref_reference +from pydatarecognition.utils import data_sample, pearson_correlate, xy_resample, get_formatted_crossref_reference, \ + hr_to_mr def test_data_sample(): test_cif_data = [[10.0413, 10.0913, 10.1413, 10.1913], @@ -69,4 +70,12 @@ def mockreturn(*args, **kwargs): actual = get_formatted_crossref_reference("test") assert actual == expected + +def test_hr_to_mr(): + number_esd = ["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)"] + actual = hr_to_mr(number_esd) + expected = np.array([343.44, 324908.435, 0.0783, 11, 51]), np.array([0.45, 0.067, 0.0001, 1, 13]) + assert np.allclose(actual[0], expected[0]) + assert np.allclose(actual[1], expected[1]) + # End of file. From f5b5f87a66c2f2cb3669062fcb8d1d61500af750 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Thu, 9 Sep 2021 16:56:59 +0300 Subject: [PATCH 03/27] hr_to_mr function added to utils.py --- pydatarecognition/utils.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index 05704ed..8a2e8f4 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -234,4 +234,19 @@ def hr_to_mr(number_esd): array with estimated standard deviations as floats ''' + number_esd = np.array(number_esd, dtype='str') + number = np.char.split(number_esd, sep="(") + esd = np.array([e[1].split(")")[0] for e in number], dtype='float') + number = np.array([e[0] for e in number], dtype='str') + esd_oom = [] + for i in range(len(number)): + if len(number[i].split(".")) == 1: + esd_oom.append(1) + else: + esd_oom.append(10**-len(number[i].split(".")[1])) + esd_oom = np.array(esd_oom, dtype='float') + number, esd = np.array(number, dtype='float'), np.array(esd * esd_oom, dtype='float') + + return number, esd + # End of file. From 8ae23f6fefdaf13e4a90ca68eb7bf093bb5ebeb9 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Thu, 9 Sep 2021 17:11:56 +0300 Subject: [PATCH 04/27] docstring for mr_to_hr function --- pydatarecognition/utils.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index 8a2e8f4..fb0f427 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -249,4 +249,26 @@ def hr_to_mr(number_esd): return number, esd + +def mr_to_hr(number, esd): + ''' + merges machine readable numbers and estimated standard deviations (e.g. 343.44 and 0.45) into human readable + numbers with estimated standard deviations (e.g. 343.44(45)). + + Parameters + ---------- + number array_like + array-like object that contains numbers + + esd array_like + array-like object that contains estimated standard deviations + + Returns + ------- + number_esd numpy array + numpy array that contains numbers (e.g. 343.44) with estimated standard deviations (e.g. 0.45) as strings + in the following format: "343.44(45)" + + ''' + # End of file. From c574b3269d2b6af77315b15a58469c7f64582095 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Thu, 9 Sep 2021 17:19:44 +0300 Subject: [PATCH 05/27] test for mr_to_hr function --- tests/test_utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 5d19eb6..ea7ccf4 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,7 +3,7 @@ from datetime import date from habanero import Crossref from pydatarecognition.utils import data_sample, pearson_correlate, xy_resample, get_formatted_crossref_reference, \ - hr_to_mr + hr_to_mr, mr_to_hr def test_data_sample(): test_cif_data = [[10.0413, 10.0913, 10.1413, 10.1913], @@ -78,4 +78,11 @@ def test_hr_to_mr(): assert np.allclose(actual[0], expected[0]) assert np.allclose(actual[1], expected[1]) + +def test_mr_to_hr(): + number, esd = [343.44, 324908.435, 0.0783, 11, 51], [0.45, 0.067, 0.0001, 1, 13] + actual = mr_to_hr(number, esd) + expected = np.array(["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)"], dtype='str') + assert np.allclose(actual, expected) + # End of file. From c90c4f0fc23bee1b92ef8f237a15f5120f3edcee Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Thu, 9 Sep 2021 17:33:51 +0300 Subject: [PATCH 06/27] mr_to_hr function added to utils.py and testfor mr_to_hr edited, test passes --- pydatarecognition/utils.py | 13 +++++++++++++ tests/test_utils.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index fb0f427..13809be 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -270,5 +270,18 @@ def mr_to_hr(number, esd): in the following format: "343.44(45)" ''' + number, esd = np.array(number, dtype='float').astype('str'), np.array(esd, dtype='float').astype('str') + number_hr, esd_hr = [], [] + for i in range(len(number)): + if number[i].split(".")[1] == "0": + number_hr.append(number[i].split(".")[0]) + esd_hr.append(esd[i].split(".")[0]) + else: + number_hr.append(number[i]) + esd_hr.append(int(esd[i].split(".")[1])) + number, esd = np.array(number_hr, dtype='str'), np.array(esd_hr, dtype='str') + number_esd = np.array([f'{number[i]}({esd[i]})' for i in range(len(esd))]) + + return number_esd # End of file. diff --git a/tests/test_utils.py b/tests/test_utils.py index ea7ccf4..14512ce 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -83,6 +83,6 @@ def test_mr_to_hr(): number, esd = [343.44, 324908.435, 0.0783, 11, 51], [0.45, 0.067, 0.0001, 1, 13] actual = mr_to_hr(number, esd) expected = np.array(["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)"], dtype='str') - assert np.allclose(actual, expected) + assert np.array_equal(actual, expected) # End of file. From e2d9d8e2a1c831a1eeeba3106158f1f3cbd76fbf Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Mon, 13 Sep 2021 17:22:20 +0300 Subject: [PATCH 07/27] renamed functions and introduced numpy conventions to the docstrings --- pydatarecognition/utils.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index 13809be..ccae215 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -214,24 +214,24 @@ def get_formatted_crossref_reference(doi): return ref, ref_date -def hr_to_mr(number_esd): +def hr_to_mr_number_and_esd(number_esd): ''' splits human readable numbers with estimated standard deviations (e.g. 343.44(45)) into machine readable numbers and estimated standard deviations (e.g. 343.44 and 0.45). Parameters ---------- - number_esd array_like - array-like object that contains numbers with their estimated standard deviations as strings + number_esd : array_like + The array-like object that contains numbers with their estimated standard deviations as strings in the following format: ["343.44(45)", "324908.435(67)", "0.0783(1)"] Returns ------- - number numpy array - array with the numbers as floats + number : numpy array + The array with the numbers as floats - esd numpy array - array with estimated standard deviations as floats + esd : numpy array + The array with estimated standard deviations as floats ''' number_esd = np.array(number_esd, dtype='str') @@ -250,23 +250,23 @@ def hr_to_mr(number_esd): return number, esd -def mr_to_hr(number, esd): +def mr_to_hr_number_and_esd(number, esd): ''' merges machine readable numbers and estimated standard deviations (e.g. 343.44 and 0.45) into human readable numbers with estimated standard deviations (e.g. 343.44(45)). Parameters ---------- - number array_like - array-like object that contains numbers + number : array_like + The array-like object that contains numbers - esd array_like - array-like object that contains estimated standard deviations + esd : array_like + The array-like object that contains estimated standard deviations Returns ------- - number_esd numpy array - numpy array that contains numbers (e.g. 343.44) with estimated standard deviations (e.g. 0.45) as strings + number_esd : numpy array + The numpy array that contains numbers (e.g. 343.44) with estimated standard deviations (e.g. 0.45) as strings in the following format: "343.44(45)" ''' From f2600bd9734f97d3c0fae0ad6d35f78bae78a237 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Mon, 13 Sep 2021 18:02:55 +0300 Subject: [PATCH 08/27] edited the parameters and returns in docstrings of both functions --- pydatarecognition/utils.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index ccae215..d8ee548 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -221,9 +221,11 @@ def hr_to_mr_number_and_esd(number_esd): Parameters ---------- - number_esd : array_like + number_esd : array_like or string The array-like object that contains numbers with their estimated standard deviations as strings - in the following format: ["343.44(45)", "324908.435(67)", "0.0783(1)"] + in the following format: ["343.44(45)", "324908.435(67)", "0.0783(1)"] or + The string that contains numbers with their estimated standard deviations separated by new line characters + in the following format: "343.44(45)\n324908.435(67)\n0.0783(1)" Returns ------- @@ -257,17 +259,21 @@ def mr_to_hr_number_and_esd(number, esd): Parameters ---------- - number : array_like - The array-like object that contains numbers + number : array_like or string + The array-like object that contains numbers in the following format: [343.44, 324908.435, 0.0783] or + The string that contains numbers in the following format: "343.44\n324908.435\n0.0783" - esd : array_like - The array-like object that contains estimated standard deviations + esd : array_like or string + The array-like object that contains estimated standard deviations in the following format: + [0.45, 0.067, 0.0001] + The string that contains estimated standard deviations in the following format: + "0.45\n0.067\n0.0001" Returns ------- - number_esd : numpy array - The numpy array that contains numbers (e.g. 343.44) with estimated standard deviations (e.g. 0.45) as strings - in the following format: "343.44(45)" + number_esd : list + The list of strings that contains the rounded numbers with estimated standard deviations + in the following format: ["343.4(5)", "324908.44(7)", "0.0783(1)" ] ''' number, esd = np.array(number, dtype='float').astype('str'), np.array(esd, dtype='float').astype('str') From 72b2687d3edc6e3117f2a628f312de1106f62985 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Mon, 13 Sep 2021 18:44:27 +0300 Subject: [PATCH 09/27] altered hr_to_mr_number_and_esd function and test to accept string input as well, test passes --- pydatarecognition/utils.py | 13 +++++++++---- tests/test_utils.py | 16 +++++++++++----- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index d8ee548..de2a37b 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -236,10 +236,14 @@ def hr_to_mr_number_and_esd(number_esd): The array with estimated standard deviations as floats ''' - number_esd = np.array(number_esd, dtype='str') - number = np.char.split(number_esd, sep="(") - esd = np.array([e[1].split(")")[0] for e in number], dtype='float') - number = np.array([e[0] for e in number], dtype='str') + if isinstance(number_esd, str): + number_esd = number_esd.split("\n") + number = [e.split("(")[0] for e in number_esd] + # number_esd = np.array(number_esd, dtype='str') + # number = np.char.split(number_esd, sep="(") + esd = np.array([e.split("(")[1].split(")")[0] for e in number_esd], dtype="float") + # esd = np.array([e[1].split(")")[0] for e in number], dtype='float') + # number = np.array([e[0] for e in number], dtype='str') esd_oom = [] for i in range(len(number)): if len(number[i].split(".")) == 1: @@ -247,6 +251,7 @@ def hr_to_mr_number_and_esd(number_esd): else: esd_oom.append(10**-len(number[i].split(".")[1])) esd_oom = np.array(esd_oom, dtype='float') + print(esd, esd_oom) number, esd = np.array(number, dtype='float'), np.array(esd * esd_oom, dtype='float') return number, esd diff --git a/tests/test_utils.py b/tests/test_utils.py index 14512ce..c9b8ff7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,7 +3,8 @@ from datetime import date from habanero import Crossref from pydatarecognition.utils import data_sample, pearson_correlate, xy_resample, get_formatted_crossref_reference, \ - hr_to_mr, mr_to_hr + hr_to_mr_number_and_esd, mr_to_hr_number_and_esd + def test_data_sample(): test_cif_data = [[10.0413, 10.0913, 10.1413, 10.1913], @@ -71,17 +72,22 @@ def mockreturn(*args, **kwargs): assert actual == expected -def test_hr_to_mr(): +def test_hr_to_mr_number_and_esd(): number_esd = ["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)"] - actual = hr_to_mr(number_esd) + actual = hr_to_mr_number_and_esd(number_esd) expected = np.array([343.44, 324908.435, 0.0783, 11, 51]), np.array([0.45, 0.067, 0.0001, 1, 13]) assert np.allclose(actual[0], expected[0]) assert np.allclose(actual[1], expected[1]) + number_esd = "343.44(45)\n324908.435(67)\n0.0783(1)\n11(1)\n51(13)" + actual = hr_to_mr_number_and_esd(number_esd) + assert np.allclose(actual[0], expected[0]) + assert np.allclose(actual[1], expected[1]) + -def test_mr_to_hr(): +def test_mr_to_hr_number_and_esd(): number, esd = [343.44, 324908.435, 0.0783, 11, 51], [0.45, 0.067, 0.0001, 1, 13] - actual = mr_to_hr(number, esd) + actual = mr_to_hr_number_and_esd(number, esd) expected = np.array(["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)"], dtype='str') assert np.array_equal(actual, expected) From 4aa333e3c48cdbb0c742172087a936361b047e31 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Tue, 14 Sep 2021 15:49:05 +0300 Subject: [PATCH 10/27] edited hr_to_mr function and test --- pydatarecognition/utils.py | 17 ++++------------- tests/test_utils.py | 5 ----- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index de2a37b..bdbb0ac 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -221,29 +221,21 @@ def hr_to_mr_number_and_esd(number_esd): Parameters ---------- - number_esd : array_like or string + number_esd : array_like The array-like object that contains numbers with their estimated standard deviations as strings - in the following format: ["343.44(45)", "324908.435(67)", "0.0783(1)"] or - The string that contains numbers with their estimated standard deviations separated by new line characters - in the following format: "343.44(45)\n324908.435(67)\n0.0783(1)" + in the following format: ["343.44(45)", "324908.435(67)", "0.0783(1)"] Returns ------- - number : numpy array + numpy array The array with the numbers as floats - esd : numpy array + numpy array The array with estimated standard deviations as floats ''' - if isinstance(number_esd, str): - number_esd = number_esd.split("\n") number = [e.split("(")[0] for e in number_esd] - # number_esd = np.array(number_esd, dtype='str') - # number = np.char.split(number_esd, sep="(") esd = np.array([e.split("(")[1].split(")")[0] for e in number_esd], dtype="float") - # esd = np.array([e[1].split(")")[0] for e in number], dtype='float') - # number = np.array([e[0] for e in number], dtype='str') esd_oom = [] for i in range(len(number)): if len(number[i].split(".")) == 1: @@ -251,7 +243,6 @@ def hr_to_mr_number_and_esd(number_esd): else: esd_oom.append(10**-len(number[i].split(".")[1])) esd_oom = np.array(esd_oom, dtype='float') - print(esd, esd_oom) number, esd = np.array(number, dtype='float'), np.array(esd * esd_oom, dtype='float') return number, esd diff --git a/tests/test_utils.py b/tests/test_utils.py index c9b8ff7..22116b9 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -78,11 +78,6 @@ def test_hr_to_mr_number_and_esd(): expected = np.array([343.44, 324908.435, 0.0783, 11, 51]), np.array([0.45, 0.067, 0.0001, 1, 13]) assert np.allclose(actual[0], expected[0]) assert np.allclose(actual[1], expected[1]) - number_esd = "343.44(45)\n324908.435(67)\n0.0783(1)\n11(1)\n51(13)" - actual = hr_to_mr_number_and_esd(number_esd) - assert np.allclose(actual[0], expected[0]) - assert np.allclose(actual[1], expected[1]) - def test_mr_to_hr_number_and_esd(): From 11eb646000128f629fe5345ab18b9dc985e65faf Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Tue, 14 Sep 2021 16:44:44 +0300 Subject: [PATCH 11/27] edited mr_to_hr function and test --- pydatarecognition/utils.py | 21 +++++++++------------ tests/test_utils.py | 4 ++-- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index bdbb0ac..e543aad 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -255,34 +255,31 @@ def mr_to_hr_number_and_esd(number, esd): Parameters ---------- - number : array_like or string + number : array_like The array-like object that contains numbers in the following format: [343.44, 324908.435, 0.0783] or - The string that contains numbers in the following format: "343.44\n324908.435\n0.0783" - esd : array_like or string + esd : array_like The array-like object that contains estimated standard deviations in the following format: [0.45, 0.067, 0.0001] - The string that contains estimated standard deviations in the following format: - "0.45\n0.067\n0.0001" Returns ------- - number_esd : list + list The list of strings that contains the rounded numbers with estimated standard deviations in the following format: ["343.4(5)", "324908.44(7)", "0.0783(1)" ] ''' - number, esd = np.array(number, dtype='float').astype('str'), np.array(esd, dtype='float').astype('str') + #number, esd = np.array(number, dtype='float').astype('str'), np.array(esd, dtype='float').astype('str') + number, esd = [str(e) for e in number], [str(e) for e in esd] number_hr, esd_hr = [], [] for i in range(len(number)): - if number[i].split(".")[1] == "0": - number_hr.append(number[i].split(".")[0]) + if len(number[i].split(".")) == 1: + number_hr.append(number[i]) esd_hr.append(esd[i].split(".")[0]) else: number_hr.append(number[i]) - esd_hr.append(int(esd[i].split(".")[1])) - number, esd = np.array(number_hr, dtype='str'), np.array(esd_hr, dtype='str') - number_esd = np.array([f'{number[i]}({esd[i]})' for i in range(len(esd))]) + esd_hr.append(str(int(esd[i].split(".")[1]))) + number_esd = [f'{number_hr[i]}({esd_hr[i]})' for i in range(len(esd_hr))] return number_esd diff --git a/tests/test_utils.py b/tests/test_utils.py index 22116b9..2ec0953 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -83,7 +83,7 @@ def test_hr_to_mr_number_and_esd(): def test_mr_to_hr_number_and_esd(): number, esd = [343.44, 324908.435, 0.0783, 11, 51], [0.45, 0.067, 0.0001, 1, 13] actual = mr_to_hr_number_and_esd(number, esd) - expected = np.array(["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)"], dtype='str') - assert np.array_equal(actual, expected) + expected = ["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)"] + assert actual == expected # End of file. From 259bbf1d4a1f99395578246202e4ad7aae12e2b1 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Thu, 23 Sep 2021 16:58:18 +0300 Subject: [PATCH 12/27] docstring for the (new) function round_number_esd --- pydatarecognition/utils.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index e543aad..ac236e8 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -283,4 +283,28 @@ def mr_to_hr_number_and_esd(number, esd): return number_esd +def round_number_esd(number, esd): + ''' + Rounds each element in number and each element in esd (estimated standard deviation) arrays. + + Esd is rounded to one significant figure if esd > 1.44E**x.(NB: 1.45E**x will become 2E**x) + If esd <= 1.44E**x the esd is rounded to two significant figures. + Number is rounded to the order of the rounded esd. + + Parameters + ---------- + number : array-like + The array containing numbers to be rounded. + esd : array-like + The array containing esds to be rounded. + + Returns + ------- + list + The list containing rounded numbers as floats and/or integers. + list + The list containing rounded esds as floats and/or integers. + + ''' + # End of file. From 8b05b713995bb23560fc09de0567740467a798d8 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Thu, 23 Sep 2021 17:17:04 +0300 Subject: [PATCH 13/27] added test_round_number_esd function --- tests/test_utils.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 2ec0953..a03f651 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,7 +3,7 @@ from datetime import date from habanero import Crossref from pydatarecognition.utils import data_sample, pearson_correlate, xy_resample, get_formatted_crossref_reference, \ - hr_to_mr_number_and_esd, mr_to_hr_number_and_esd + hr_to_mr_number_and_esd, mr_to_hr_number_and_esd, round_number_esd def test_data_sample(): @@ -86,4 +86,19 @@ def test_mr_to_hr_number_and_esd(): expected = ["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)"] assert actual == expected + +def test_round_number_esd(): + number = [123.45, 123.3, 123.35, 123.31, 123.12, 132.124, 19, 123, 145, 1234, + 1.99, 1, 2.145, 2.146, 2.144, 10, 11, 10.6] + esd = [0.45, 0.46, 0.3, 0.213, 0.125, 0.145, 2, 21, 14.5, 145, + 0.99, 1, 0.145, 0.146, 0.144, 100, 111, 1.72] + number_exp = [123.5, 123.3, 123.4, 123.3, 123.12, 132.1, 19, 120, 150, 1200, + 2, 1, 2.1, 2.1, 2.14, 10, 10, 11] + esd_exp = [0.5, 0.5, 0.3, 0.2, 0.13, 0.2, 2, 20, 20, 200, + 1, 1, 0.2, 0.2, 0.14, 100, 110, 2] + actual = round_number_esd(number, esd) + expected = (number_exp, esd_exp) + assert actual == expected + + # End of file. From 67a0b4af39ed83fa712ed055f6c0f977a7943820 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Thu, 23 Sep 2021 17:19:18 +0300 Subject: [PATCH 14/27] added round_number_esd function to utils.py, test passes --- pydatarecognition/utils.py | 68 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index ac236e8..1f53ab4 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -306,5 +306,73 @@ def round_number_esd(number, esd): The list containing rounded esds as floats and/or integers. ''' + # Empty lists to append to. + number_rounded, esd_rounded = [], [] + + # Loop through all elements in number (and esd) arrays. + for i in range(len(number)): + + # Getting the value and value error to be rounded. + val, val_err = number[i], esd[i] + + # Turn val_err into scientific notation. + val_err_sci = f"{val_err:.5E}" + + # Inspect first significant figure of val_err_sci. + # If the first significant figure is 1, we need to inspect the next. + # Else, we can set the number of significant figures to 1. + if int(val_err_sci[0]) == 1: + + # Inspect the second significant figure. + # Take care of cases where we always want 2 significant figures: + # val_err < 1.4E**x + if int(val_err_sci[2]) < 4: + sig_figs = 2 + + # Inspect the edge case of the second significant figure. + # Make sure that we round up, if val_err >= 1.45E**x, + # and set the number of significant figures to 1. + # Else round down (that is, if val_err < 1.45E**x). + # and the set number of significant figures to 2. + elif int(val_err_sci[2]) == 4: + if int(val_err_sci[3]) >= 5: + val_err_sci= f"{val_err_sci[0:2]}5{val_err_sci[3::]}" + val_err = float(val_err_sci) + sig_figs = 1 + else: + sig_figs = 2 + else: + sig_figs = 1 + else: + sig_figs = 1 + + # Get the order of magnitude of the val_err. + n = int(np.log10(val_err)) + + # Take into account if we need to 'correct' the order of magnitude. + # Related to the 'scale' below. + if val_err >= 1: + n += 1 + + # Set the scale, considering number of significant figures, + # and the order of magnitude. + scale = 10 ** (sig_figs - n) + + # Use floor rounding. Add 0.5 to make sure that we round up for halfs. + # (However, remember that np.floor always rounds down...) + val = np.floor(val * scale + 0.5) / scale + val_err = np.floor(val_err * scale + 0.5) / scale + + # Take into account, if the val_err >= 1. + # Then, we get rid of any decimals. + if val_err >= 1: + val, val_err = int(val), int(val_err) + + # Append to rounded number and esd to lists. + number_rounded.append(val) + esd_rounded.append(val_err) + + return number_rounded, esd_rounded + # End of file. From e294ce36667ec8b7f6b48f9d9dc901c493c45556 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Thu, 23 Sep 2021 17:37:39 +0300 Subject: [PATCH 15/27] edited the mr_to_hr_number_and_esd docstring --- pydatarecognition/utils.py | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index 1f53ab4..f62de89 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -250,39 +250,24 @@ def hr_to_mr_number_and_esd(number_esd): def mr_to_hr_number_and_esd(number, esd): ''' - merges machine readable numbers and estimated standard deviations (e.g. 343.44 and 0.45) into human readable - numbers with estimated standard deviations (e.g. 343.44(45)). + merges machine readable (rounded) numbers and (rounded) estimated standard deviations (e.g. 343.4 and 0.5) + into human readable numbers with estimated standard deviations (e.g. 343.4(5)). Parameters ---------- - number : array_like - The array-like object that contains numbers in the following format: [343.44, 324908.435, 0.0783] or - - esd : array_like - The array-like object that contains estimated standard deviations in the following format: - [0.45, 0.067, 0.0001] + number : array-like + The array that contains (rounded) numbers. + esd : array-like + The array that contains (rounded) esds. Returns ------- list - The list of strings that contains the rounded numbers with estimated standard deviations - in the following format: ["343.4(5)", "324908.44(7)", "0.0783(1)" ] + The list of strings with human readable (rounded) numbers and esds. ''' - #number, esd = np.array(number, dtype='float').astype('str'), np.array(esd, dtype='float').astype('str') - number, esd = [str(e) for e in number], [str(e) for e in esd] - number_hr, esd_hr = [], [] - for i in range(len(number)): - if len(number[i].split(".")) == 1: - number_hr.append(number[i]) - esd_hr.append(esd[i].split(".")[0]) - else: - number_hr.append(number[i]) - esd_hr.append(str(int(esd[i].split(".")[1]))) - number_esd = [f'{number_hr[i]}({esd_hr[i]})' for i in range(len(esd_hr))] - - return number_esd + def round_number_esd(number, esd): ''' Rounds each element in number and each element in esd (estimated standard deviation) arrays. From 1411959381119a74279e8c12de08ccecef5d4d59 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Thu, 23 Sep 2021 17:43:10 +0300 Subject: [PATCH 16/27] edited test_mr_to_hr_number_and_esd --- tests/test_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index a03f651..06e121c 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -83,7 +83,10 @@ def test_hr_to_mr_number_and_esd(): def test_mr_to_hr_number_and_esd(): number, esd = [343.44, 324908.435, 0.0783, 11, 51], [0.45, 0.067, 0.0001, 1, 13] actual = mr_to_hr_number_and_esd(number, esd) - expected = ["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)"] + number = [123.5, 123.3, 123.4, 123.3, 123.12, 132.1, 19, 120, 150, 1200, 2, 1, 2.1, 2.1, 2.14, 10, 10, 11] + esd = [0.5, 0.5, 0.3, 0.2, 0.13, 0.2, 2, 20, 20, 200, 1, 1, 0.2, 0.2, 0.14, 100, 110, 2] + expected = ['123.5(5)', '123.3(5)', '123.4(3)', '123.3(2)', '123.12(13)', '132.1(2)', '19(2)', '120(20)', '150(20)', + '1200(200)', '2(1)', '1(1)', '2.1(2)', '2.1(2)', '2.14(14)', '10(100)', '10(110)', '11(2)'] assert actual == expected From fb1e2e5283cf6eb052a000a86e1e1f63c3767a11 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Thu, 23 Sep 2021 17:46:08 +0300 Subject: [PATCH 17/27] edited mr_to_hr_number_and_esd and the test, test passing --- pydatarecognition/utils.py | 11 ++++++++++- tests/test_utils.py | 3 +-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index f62de89..49444c9 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -266,8 +266,17 @@ def mr_to_hr_number_and_esd(number, esd): The list of strings with human readable (rounded) numbers and esds. ''' + esd_hr = [] + for e in esd: + if e < 1: + esd_hr.append(int(str(e).split(".")[1])) + else: + esd_hr.append(e) + number_esd = [f"{number[i]}({esd_hr[i]})" for i in range(len(number))] + + return number_esd + - def round_number_esd(number, esd): ''' Rounds each element in number and each element in esd (estimated standard deviation) arrays. diff --git a/tests/test_utils.py b/tests/test_utils.py index 06e121c..b481eb5 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -81,10 +81,9 @@ def test_hr_to_mr_number_and_esd(): def test_mr_to_hr_number_and_esd(): - number, esd = [343.44, 324908.435, 0.0783, 11, 51], [0.45, 0.067, 0.0001, 1, 13] - actual = mr_to_hr_number_and_esd(number, esd) number = [123.5, 123.3, 123.4, 123.3, 123.12, 132.1, 19, 120, 150, 1200, 2, 1, 2.1, 2.1, 2.14, 10, 10, 11] esd = [0.5, 0.5, 0.3, 0.2, 0.13, 0.2, 2, 20, 20, 200, 1, 1, 0.2, 0.2, 0.14, 100, 110, 2] + actual = mr_to_hr_number_and_esd(number, esd) expected = ['123.5(5)', '123.3(5)', '123.4(3)', '123.3(2)', '123.12(13)', '132.1(2)', '19(2)', '120(20)', '150(20)', '1200(200)', '2(1)', '1(1)', '2.1(2)', '2.1(2)', '2.14(14)', '10(100)', '10(110)', '11(2)'] assert actual == expected From f91eef3462d1e73bb8c05411d212aa883e88785f Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Fri, 24 Sep 2021 15:59:10 +0300 Subject: [PATCH 18/27] Edited round_number_esd function and the test, test passes --- pydatarecognition/utils.py | 7 ++++++- tests/test_utils.py | 8 ++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index 49444c9..9f906c7 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -311,11 +311,16 @@ def round_number_esd(number, esd): # Turn val_err into scientific notation. val_err_sci = f"{val_err:.5E}" + + # If val < val_err than set the value to 0 and the significant figures to 1. + if val < val_err: + val == 0 + sig_figs = 1 # Inspect first significant figure of val_err_sci. # If the first significant figure is 1, we need to inspect the next. # Else, we can set the number of significant figures to 1. - if int(val_err_sci[0]) == 1: + elif int(val_err_sci[0]) == 1: # Inspect the second significant figure. # Take care of cases where we always want 2 significant figures: diff --git a/tests/test_utils.py b/tests/test_utils.py index b481eb5..fb41a4c 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -81,10 +81,10 @@ def test_hr_to_mr_number_and_esd(): def test_mr_to_hr_number_and_esd(): - number = [123.5, 123.3, 123.4, 123.3, 123.12, 132.1, 19, 120, 150, 1200, 2, 1, 2.1, 2.1, 2.14, 10, 10, 11] + number = [123.5, 123.3, 123.4, 123.3, 123.12, 132.1, 19, 120, 154, 1200, 2, 1, 2.1, 2.1, 2.14, 10, 10, 11] esd = [0.5, 0.5, 0.3, 0.2, 0.13, 0.2, 2, 20, 20, 200, 1, 1, 0.2, 0.2, 0.14, 100, 110, 2] actual = mr_to_hr_number_and_esd(number, esd) - expected = ['123.5(5)', '123.3(5)', '123.4(3)', '123.3(2)', '123.12(13)', '132.1(2)', '19(2)', '120(20)', '150(20)', + expected = ['123.5(5)', '123.3(5)', '123.4(3)', '123.3(2)', '123.12(13)', '132.1(2)', '19(2)', '120(20)', '154(20)', '1200(200)', '2(1)', '1(1)', '2.1(2)', '2.1(2)', '2.14(14)', '10(100)', '10(110)', '11(2)'] assert actual == expected @@ -95,9 +95,9 @@ def test_round_number_esd(): esd = [0.45, 0.46, 0.3, 0.213, 0.125, 0.145, 2, 21, 14.5, 145, 0.99, 1, 0.145, 0.146, 0.144, 100, 111, 1.72] number_exp = [123.5, 123.3, 123.4, 123.3, 123.12, 132.1, 19, 120, 150, 1200, - 2, 1, 2.1, 2.1, 2.14, 10, 10, 11] + 2, 1, 2.1, 2.1, 2.14, 0, 0, 11] esd_exp = [0.5, 0.5, 0.3, 0.2, 0.13, 0.2, 2, 20, 20, 200, - 1, 1, 0.2, 0.2, 0.14, 100, 110, 2] + 1, 1, 0.2, 0.2, 0.14, 100, 100, 2] actual = round_number_esd(number, esd) expected = (number_exp, esd_exp) assert actual == expected From b5209ec39e252ceba1868c5272bfab9ccf846038 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Fri, 24 Sep 2021 16:04:15 +0300 Subject: [PATCH 19/27] Edited test_mr_to_hr_number_and_esd, test passes --- tests/test_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index fb41a4c..603cdf6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -81,11 +81,11 @@ def test_hr_to_mr_number_and_esd(): def test_mr_to_hr_number_and_esd(): - number = [123.5, 123.3, 123.4, 123.3, 123.12, 132.1, 19, 120, 154, 1200, 2, 1, 2.1, 2.1, 2.14, 10, 10, 11] - esd = [0.5, 0.5, 0.3, 0.2, 0.13, 0.2, 2, 20, 20, 200, 1, 1, 0.2, 0.2, 0.14, 100, 110, 2] + number = [123.5, 123.3, 123.4, 123.3, 123.12, 132.1, 19, 120, 154, 1200, 2, 1, 2.1, 2.1, 2.14, 0, 0, 11] + esd = [0.5, 0.5, 0.3, 0.2, 0.13, 0.2, 2, 20, 20, 200, 1, 1, 0.2, 0.2, 0.14, 100, 100, 2] actual = mr_to_hr_number_and_esd(number, esd) expected = ['123.5(5)', '123.3(5)', '123.4(3)', '123.3(2)', '123.12(13)', '132.1(2)', '19(2)', '120(20)', '154(20)', - '1200(200)', '2(1)', '1(1)', '2.1(2)', '2.1(2)', '2.14(14)', '10(100)', '10(110)', '11(2)'] + '1200(200)', '2(1)', '1(1)', '2.1(2)', '2.1(2)', '2.14(14)', '0(100)', '0(100)', '11(2)'] assert actual == expected From ccd1faf136a8412533dc9633f632e76c94520473 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Fri, 24 Sep 2021 16:40:16 +0300 Subject: [PATCH 20/27] adjusted the cases where value is smaller than value error in utils.py and test_utils.py, test passes --- pydatarecognition/utils.py | 4 +--- tests/test_utils.py | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index 9f906c7..b4d52ee 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -314,14 +314,12 @@ def round_number_esd(number, esd): # If val < val_err than set the value to 0 and the significant figures to 1. if val < val_err: - val == 0 - sig_figs = 1 + val, sig_figs = 0, 1 # Inspect first significant figure of val_err_sci. # If the first significant figure is 1, we need to inspect the next. # Else, we can set the number of significant figures to 1. elif int(val_err_sci[0]) == 1: - # Inspect the second significant figure. # Take care of cases where we always want 2 significant figures: # val_err < 1.4E**x diff --git a/tests/test_utils.py b/tests/test_utils.py index 603cdf6..804fb9b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -92,10 +92,10 @@ def test_mr_to_hr_number_and_esd(): def test_round_number_esd(): number = [123.45, 123.3, 123.35, 123.31, 123.12, 132.124, 19, 123, 145, 1234, 1.99, 1, 2.145, 2.146, 2.144, 10, 11, 10.6] - esd = [0.45, 0.46, 0.3, 0.213, 0.125, 0.145, 2, 21, 14.5, 145, - 0.99, 1, 0.145, 0.146, 0.144, 100, 111, 1.72] + esd = [0.4521, 0.4673, 0.309, 0.213, 0.125, 0.145, 2.4, 21.32, 14.5, 145, + 0.99, 1.11, 0.145, 0.146, 0.144, 100.99, 111, 1.72] number_exp = [123.5, 123.3, 123.4, 123.3, 123.12, 132.1, 19, 120, 150, 1200, - 2, 1, 2.1, 2.1, 2.14, 0, 0, 11] + 2, 0, 2.1, 2.1, 2.14, 0, 0, 11] esd_exp = [0.5, 0.5, 0.3, 0.2, 0.13, 0.2, 2, 20, 20, 200, 1, 1, 0.2, 0.2, 0.14, 100, 100, 2] actual = round_number_esd(number, esd) From fc9d2664edba25156b2f513c6e8a761645ad12f2 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Fri, 24 Sep 2021 17:32:10 +0300 Subject: [PATCH 21/27] edited hr_to_mr_number_and_esd function and test --- pydatarecognition/utils.py | 17 +++++++++-------- tests/test_utils.py | 5 ++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index b4d52ee..2559fa7 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -217,7 +217,7 @@ def get_formatted_crossref_reference(doi): def hr_to_mr_number_and_esd(number_esd): ''' splits human readable numbers with estimated standard deviations (e.g. 343.44(45)) into machine readable numbers and - estimated standard deviations (e.g. 343.44 and 0.45). + estimated standard deviations (e.g. 343.44 and 0.45) without any rounding. Parameters ---------- @@ -227,15 +227,15 @@ def hr_to_mr_number_and_esd(number_esd): Returns ------- - numpy array - The array with the numbers as floats + list + The list with the numbers as floats, e.g. [343.44, 324908.435, 0.0783] - numpy array - The array with estimated standard deviations as floats + list + The list with estimated standard deviations as floats, e.g. [0.45, 0.067, 0.0001] ''' number = [e.split("(")[0] for e in number_esd] - esd = np.array([e.split("(")[1].split(")")[0] for e in number_esd], dtype="float") + esd = [e.split("(")[1].split(")")[0] for e in number_esd] esd_oom = [] for i in range(len(number)): if len(number[i].split(".")) == 1: @@ -243,9 +243,10 @@ def hr_to_mr_number_and_esd(number_esd): else: esd_oom.append(10**-len(number[i].split(".")[1])) esd_oom = np.array(esd_oom, dtype='float') - number, esd = np.array(number, dtype='float'), np.array(esd * esd_oom, dtype='float') + esd = list(np.array(esd, dtype='float') * np.array(esd_oom, dtype='float')) + number_floats = [float(e) for e in number] - return number, esd + return number_floats, esd def mr_to_hr_number_and_esd(number, esd): diff --git a/tests/test_utils.py b/tests/test_utils.py index 804fb9b..61e85e6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -75,9 +75,8 @@ def mockreturn(*args, **kwargs): def test_hr_to_mr_number_and_esd(): number_esd = ["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)"] actual = hr_to_mr_number_and_esd(number_esd) - expected = np.array([343.44, 324908.435, 0.0783, 11, 51]), np.array([0.45, 0.067, 0.0001, 1, 13]) - assert np.allclose(actual[0], expected[0]) - assert np.allclose(actual[1], expected[1]) + expected = [343.44, 324908.435, 0.0783, 11, 51], [0.45, 0.067, 0.0001, 1, 13] + assert actual == expected def test_mr_to_hr_number_and_esd(): From fc72a66926505b07e53a3b0b33a5d90cbcfadf10 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Tue, 28 Sep 2021 16:24:36 +0300 Subject: [PATCH 22/27] moved round_number_esd upwards and used it in hr_to_mr_number_and_esd to round numbers --- pydatarecognition/utils.py | 132 ++++++++++++++++++------------------- tests/test_utils.py | 4 +- 2 files changed, 68 insertions(+), 68 deletions(-) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index 2559fa7..58dad02 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -214,70 +214,6 @@ def get_formatted_crossref_reference(doi): return ref, ref_date -def hr_to_mr_number_and_esd(number_esd): - ''' - splits human readable numbers with estimated standard deviations (e.g. 343.44(45)) into machine readable numbers and - estimated standard deviations (e.g. 343.44 and 0.45) without any rounding. - - Parameters - ---------- - number_esd : array_like - The array-like object that contains numbers with their estimated standard deviations as strings - in the following format: ["343.44(45)", "324908.435(67)", "0.0783(1)"] - - Returns - ------- - list - The list with the numbers as floats, e.g. [343.44, 324908.435, 0.0783] - - list - The list with estimated standard deviations as floats, e.g. [0.45, 0.067, 0.0001] - - ''' - number = [e.split("(")[0] for e in number_esd] - esd = [e.split("(")[1].split(")")[0] for e in number_esd] - esd_oom = [] - for i in range(len(number)): - if len(number[i].split(".")) == 1: - esd_oom.append(1) - else: - esd_oom.append(10**-len(number[i].split(".")[1])) - esd_oom = np.array(esd_oom, dtype='float') - esd = list(np.array(esd, dtype='float') * np.array(esd_oom, dtype='float')) - number_floats = [float(e) for e in number] - - return number_floats, esd - - -def mr_to_hr_number_and_esd(number, esd): - ''' - merges machine readable (rounded) numbers and (rounded) estimated standard deviations (e.g. 343.4 and 0.5) - into human readable numbers with estimated standard deviations (e.g. 343.4(5)). - - Parameters - ---------- - number : array-like - The array that contains (rounded) numbers. - esd : array-like - The array that contains (rounded) esds. - - Returns - ------- - list - The list of strings with human readable (rounded) numbers and esds. - - ''' - esd_hr = [] - for e in esd: - if e < 1: - esd_hr.append(int(str(e).split(".")[1])) - else: - esd_hr.append(e) - number_esd = [f"{number[i]}({esd_hr[i]})" for i in range(len(number))] - - return number_esd - - def round_number_esd(number, esd): ''' Rounds each element in number and each element in esd (estimated standard deviation) arrays. @@ -312,7 +248,7 @@ def round_number_esd(number, esd): # Turn val_err into scientific notation. val_err_sci = f"{val_err:.5E}" - + # If val < val_err than set the value to 0 and the significant figures to 1. if val < val_err: val, sig_figs = 0, 1 @@ -334,7 +270,7 @@ def round_number_esd(number, esd): # and the set number of significant figures to 2. elif int(val_err_sci[2]) == 4: if int(val_err_sci[3]) >= 5: - val_err_sci= f"{val_err_sci[0:2]}5{val_err_sci[3::]}" + val_err_sci = f"{val_err_sci[0:2]}5{val_err_sci[3::]}" val_err = float(val_err_sci) sig_figs = 1 else: @@ -373,4 +309,68 @@ def round_number_esd(number, esd): return number_rounded, esd_rounded +def hr_to_mr_number_and_esd(number_esd): + ''' + splits human readable numbers with estimated standard deviations (e.g. 343.44(45)) into machine readable numbers and + estimated standard deviations (e.g. 343.44 and 0.45) without any rounding. + + Parameters + ---------- + number_esd : array_like + The array-like object that contains numbers with their estimated standard deviations as strings + in the following format: ["343.44(45)", "324908.435(67)", "0.0783(1)"] + + Returns + ------- + list + The list with the numbers as floats, e.g. [343.44, 324908.435, 0.0783] + + list + The list with estimated standard deviations as floats, e.g. [0.45, 0.067, 0.0001] + + ''' + number = [e.split("(")[0] for e in number_esd] + esd = [e.split("(")[1].split(")")[0] for e in number_esd] + esd_oom = [] + for i in range(len(number)): + if len(number[i].split(".")) == 1: + esd_oom.append(1) + else: + esd_oom.append(10**-len(number[i].split(".")[1])) + esd_oom = np.array(esd_oom, dtype='float') + esd = list(np.array(esd, dtype='float') * np.array(esd_oom, dtype='float')) + number_floats = [float(e) for e in number] + number_rounded, esd_rounded = round_number_esd(number_floats, esd) + + return number_rounded, esd_rounded + + +def mr_to_hr_number_and_esd(number, esd): + ''' + merges machine readable (rounded) numbers and (rounded) estimated standard deviations (e.g. 343.4 and 0.5) + into human readable numbers with estimated standard deviations (e.g. 343.4(5)). + + Parameters + ---------- + number : array-like + The array that contains (rounded) numbers. + esd : array-like + The array that contains (rounded) esds. + + Returns + ------- + list + The list of strings with human readable (rounded) numbers and esds. + + ''' + esd_hr = [] + for e in esd: + if e < 1: + esd_hr.append(int(str(e).split(".")[1])) + else: + esd_hr.append(e) + number_esd = [f"{number[i]}({esd_hr[i]})" for i in range(len(number))] + + return number_esd + # End of file. diff --git a/tests/test_utils.py b/tests/test_utils.py index 61e85e6..3500f61 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -73,9 +73,9 @@ def mockreturn(*args, **kwargs): def test_hr_to_mr_number_and_esd(): - number_esd = ["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)"] + number_esd = ["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)", "243(6)", "3200(300)"] actual = hr_to_mr_number_and_esd(number_esd) - expected = [343.44, 324908.435, 0.0783, 11, 51], [0.45, 0.067, 0.0001, 1, 13] + expected = [343.4, 324908.44, 0.0783, 11, 51, 243, 3200], [0.5, 0.07, 0.0001, 1, 13, 6, 300] assert actual == expected From 70e6bd52288a638349b2cc620238404b961bc7ad Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Tue, 28 Sep 2021 16:29:46 +0300 Subject: [PATCH 23/27] edited docstring of hr_to_mr_number_and_esd --- pydatarecognition/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index 58dad02..35f1e60 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -311,8 +311,8 @@ def round_number_esd(number, esd): def hr_to_mr_number_and_esd(number_esd): ''' - splits human readable numbers with estimated standard deviations (e.g. 343.44(45)) into machine readable numbers and - estimated standard deviations (e.g. 343.44 and 0.45) without any rounding. + splits human readable numbers with estimated standard deviations (e.g. 343.44(45)) into rounded machine readable + numbers and estimated standard deviations (e.g. 343.4 and 0.5). Parameters ---------- @@ -323,10 +323,10 @@ def hr_to_mr_number_and_esd(number_esd): Returns ------- list - The list with the numbers as floats, e.g. [343.44, 324908.435, 0.0783] + The list with the rounded numbers as floats, e.g. [343.4, 324908.44, 0.0783] list - The list with estimated standard deviations as floats, e.g. [0.45, 0.067, 0.0001] + The list with rounded estimated standard deviations as floats, e.g. [0.5, 0.07, 0.0001] ''' number = [e.split("(")[0] for e in number_esd] From c3e47093f2fb689c5c1c8d752ff02843502f6958 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Tue, 28 Sep 2021 16:40:58 +0300 Subject: [PATCH 24/27] edited docstring of mr_to_hr_number_and_esd, included proper rounding to this function and altered test_mr_to_hr_number_and_esd, test passes --- pydatarecognition/utils.py | 13 +++++++------ tests/test_utils.py | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pydatarecognition/utils.py b/pydatarecognition/utils.py index 35f1e60..99d0fa2 100644 --- a/pydatarecognition/utils.py +++ b/pydatarecognition/utils.py @@ -347,29 +347,30 @@ def hr_to_mr_number_and_esd(number_esd): def mr_to_hr_number_and_esd(number, esd): ''' - merges machine readable (rounded) numbers and (rounded) estimated standard deviations (e.g. 343.4 and 0.5) + rounds and merges machine readable numbers and estimated standard deviations (e.g. 343.4 and 0.5) into human readable numbers with estimated standard deviations (e.g. 343.4(5)). Parameters ---------- number : array-like - The array that contains (rounded) numbers. + The array that contains numbers. esd : array-like - The array that contains (rounded) esds. + The array that contains esds. Returns ------- list - The list of strings with human readable (rounded) numbers and esds. + The list of strings with human readable rounded numbers and esds. ''' + number_rounded, esd_rounded = round_number_esd(number, esd) esd_hr = [] - for e in esd: + for e in esd_rounded: if e < 1: esd_hr.append(int(str(e).split(".")[1])) else: esd_hr.append(e) - number_esd = [f"{number[i]}({esd_hr[i]})" for i in range(len(number))] + number_esd = [f"{number_rounded[i]}({esd_hr[i]})" for i in range(len(number_rounded))] return number_esd diff --git a/tests/test_utils.py b/tests/test_utils.py index 3500f61..dd75c8d 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -83,7 +83,7 @@ def test_mr_to_hr_number_and_esd(): number = [123.5, 123.3, 123.4, 123.3, 123.12, 132.1, 19, 120, 154, 1200, 2, 1, 2.1, 2.1, 2.14, 0, 0, 11] esd = [0.5, 0.5, 0.3, 0.2, 0.13, 0.2, 2, 20, 20, 200, 1, 1, 0.2, 0.2, 0.14, 100, 100, 2] actual = mr_to_hr_number_and_esd(number, esd) - expected = ['123.5(5)', '123.3(5)', '123.4(3)', '123.3(2)', '123.12(13)', '132.1(2)', '19(2)', '120(20)', '154(20)', + expected = ['123.5(5)', '123.3(5)', '123.4(3)', '123.3(2)', '123.12(13)', '132.1(2)', '19(2)', '120(20)', '150(20)', '1200(200)', '2(1)', '1(1)', '2.1(2)', '2.1(2)', '2.14(14)', '0(100)', '0(100)', '11(2)'] assert actual == expected From 94a66a2878d05bb44e3f101109e59474ae16bc21 Mon Sep 17 00:00:00 2001 From: Simon Billinge Date: Thu, 30 Sep 2021 06:49:04 -0400 Subject: [PATCH 25/27] Update test_utils.py added some slightly more challenging tests --- tests/test_utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index dd75c8d..c6049da 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -73,18 +73,18 @@ def mockreturn(*args, **kwargs): def test_hr_to_mr_number_and_esd(): - number_esd = ["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)", "243(6)", "3200(300)"] + number_esd = ["343.44(45)", "324908.435(67)", "0.0783(1)", "11(1)", "51(13)", "243(6)", "3205(300)"] actual = hr_to_mr_number_and_esd(number_esd) expected = [343.4, 324908.44, 0.0783, 11, 51, 243, 3200], [0.5, 0.07, 0.0001, 1, 13, 6, 300] assert actual == expected def test_mr_to_hr_number_and_esd(): - number = [123.5, 123.3, 123.4, 123.3, 123.12, 132.1, 19, 120, 154, 1200, 2, 1, 2.1, 2.1, 2.14, 0, 0, 11] - esd = [0.5, 0.5, 0.3, 0.2, 0.13, 0.2, 2, 20, 20, 200, 1, 1, 0.2, 0.2, 0.14, 100, 100, 2] + number = [123.5, 123.3, 123.417, 123.367, 123.12, 132.1, 19, 125, 154, 1200, 2, 1, 2.1, 2.1, 2.14, 7.26, 50, 11] + esd = [0.5, 0.5, 0.326, 0.2, 0.13, 0.236, 2, 20, 20, 207, 1, 1.4, 0.2, 0.25, 0.14, 100, 100, 2] actual = mr_to_hr_number_and_esd(number, esd) - expected = ['123.5(5)', '123.3(5)', '123.4(3)', '123.3(2)', '123.12(13)', '132.1(2)', '19(2)', '120(20)', '150(20)', - '1200(200)', '2(1)', '1(1)', '2.1(2)', '2.1(2)', '2.14(14)', '0(100)', '0(100)', '11(2)'] + expected = ['123.5(5)', '123.3(5)', '123.4(3)', '123.4(2)', '123.12(13)', '132.1(2)', '19(2)', '120(20)', '150(20)', + '1200(200)', '2(1)', '1(1)', '2.1(2)', '2.1(3)', '2.14(14)', '0(100)', '0(100)', '11(2)'] assert actual == expected From e4d6f78be74341a44c6a05fd3973aafe93932d92 Mon Sep 17 00:00:00 2001 From: Simon Billinge Date: Thu, 30 Sep 2021 07:01:04 -0400 Subject: [PATCH 26/27] Update test_utils.py simon corrected his bad test --- tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index c6049da..3c6e948 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -80,7 +80,7 @@ def test_hr_to_mr_number_and_esd(): def test_mr_to_hr_number_and_esd(): - number = [123.5, 123.3, 123.417, 123.367, 123.12, 132.1, 19, 125, 154, 1200, 2, 1, 2.1, 2.1, 2.14, 7.26, 50, 11] + number = [123.5, 123.3, 123.417, 123.367, 123.12, 132.1, 19, 123, 154, 1200, 2, 1, 2.1, 2.1, 2.14, 7.26, 50, 11] esd = [0.5, 0.5, 0.326, 0.2, 0.13, 0.236, 2, 20, 20, 207, 1, 1.4, 0.2, 0.25, 0.14, 100, 100, 2] actual = mr_to_hr_number_and_esd(number, esd) expected = ['123.5(5)', '123.3(5)', '123.4(3)', '123.4(2)', '123.12(13)', '132.1(2)', '19(2)', '120(20)', '150(20)', From 52b2288add7ef3396e63f2a1246aa86683d410a6 Mon Sep 17 00:00:00 2001 From: Berrak Ozer Date: Tue, 5 Oct 2021 17:35:33 +0300 Subject: [PATCH 27/27] fixed test_mr_to_hr_number_and_esd, test passes --- tests/test_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 3c6e948..987b09a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -80,11 +80,11 @@ def test_hr_to_mr_number_and_esd(): def test_mr_to_hr_number_and_esd(): - number = [123.5, 123.3, 123.417, 123.367, 123.12, 132.1, 19, 123, 154, 1200, 2, 1, 2.1, 2.1, 2.14, 7.26, 50, 11] + number = [123.5, 123.3, 123.417, 123.367, 123.12, 132.1, 19, 125, 154, 1200, 2, 1, 2.1, 2.1, 2.14, 7.26, 50, 11] esd = [0.5, 0.5, 0.326, 0.2, 0.13, 0.236, 2, 20, 20, 207, 1, 1.4, 0.2, 0.25, 0.14, 100, 100, 2] actual = mr_to_hr_number_and_esd(number, esd) - expected = ['123.5(5)', '123.3(5)', '123.4(3)', '123.4(2)', '123.12(13)', '132.1(2)', '19(2)', '120(20)', '150(20)', - '1200(200)', '2(1)', '1(1)', '2.1(2)', '2.1(3)', '2.14(14)', '0(100)', '0(100)', '11(2)'] + expected = ['123.5(5)', '123.3(5)', '123.4(3)', '123.4(2)', '123.12(13)', '132.1(2)', '19(2)', '130(20)', '150(20)', + '1200(200)', '2(1)', '0(1)', '2.1(2)', '2.1(3)', '2.14(14)', '0(100)', '0(100)', '11(2)'] assert actual == expected