From e55b958d8c369b3d42c515c5db633462dec320d9 Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Mon, 28 Aug 2023 16:25:06 +0200
Subject: [PATCH 1/7] IO - Change origin attribute when not find on system
---
Orange/data/io.py | 5 +-
Orange/data/io_util.py | 82 +++++++++++++++++++-
Orange/data/tests/test_io_util.py | 124 +++++++++++++++++++++++++++++-
Orange/tests/test_io.py | 30 +++++++-
4 files changed, 234 insertions(+), 7 deletions(-)
diff --git a/Orange/data/io.py b/Orange/data/io.py
index 07592d72bc0..6f61266ec74 100644
--- a/Orange/data/io.py
+++ b/Orange/data/io.py
@@ -24,7 +24,7 @@
import xlsxwriter
import openpyxl
-from Orange.data import _io, Table, Domain, ContinuousVariable
+from Orange.data import _io, Table, Domain, ContinuousVariable, update_origin
from Orange.data import Compression, open_compressed, detect_encoding, \
isnastr, guess_data_type, sanitize_variable
from Orange.data.io_base import FileFormatBase, Flags, DataTableMixin, PICKLE_PROTOCOL
@@ -179,6 +179,7 @@ def read(self):
('-' + str(endpos)) if (endpos - pos) > 1 else '')
warnings.warn(warning)
self.set_table_metadata(self.filename, data)
+ update_origin(data, self.filename)
return data
except Exception as e:
error = e
@@ -215,6 +216,7 @@ def read(self):
if not isinstance(table, Table):
raise TypeError("file does not contain a data table")
else:
+ update_origin(table, self.filename)
return table
@classmethod
@@ -264,6 +266,7 @@ def read(self):
try:
cells = self.get_cells()
table = self.data_table(cells)
+ update_origin(table, self.filename)
table.name = path.splitext(path.split(self.filename)[-1])[0]
if self.sheet and len(self.sheets) > 1:
table.name = '-'.join((table.name, self.sheet))
diff --git a/Orange/data/io_util.py b/Orange/data/io_util.py
index 163c0354abe..c1c8de402c5 100644
--- a/Orange/data/io_util.py
+++ b/Orange/data/io_util.py
@@ -1,17 +1,27 @@
+import os.path
import subprocess
from collections import defaultdict
+from typing import Tuple, Optional
import numpy as np
+import pandas as pd
from chardet.universaldetector import UniversalDetector
from Orange.data import (
is_discrete_values, MISSING_VALUES, Variable,
- DiscreteVariable, StringVariable, ContinuousVariable, TimeVariable,
+ DiscreteVariable, StringVariable, ContinuousVariable, TimeVariable, Table,
)
from Orange.misc.collections import natural_sorted
-__all__ = ["Compression", "open_compressed", "detect_encoding", "isnastr",
- "guess_data_type", "sanitize_variable"]
+__all__ = [
+ "Compression",
+ "open_compressed",
+ "detect_encoding",
+ "isnastr",
+ "guess_data_type",
+ "sanitize_variable",
+ "update_origin",
+]
class Compression:
@@ -207,3 +217,69 @@ def mapvalues(arr):
values = [_var.parse(i) for i in orig_values]
return values, var
+
+
+def _extract_new_origin(attr: Variable, table: Table, lookup_dirs: Tuple[str]) -> Optional[str]:
+ # origin exists
+ if os.path.exists(attr.attributes["origin"]):
+ return attr.attributes["origin"]
+
+ # last dir of origin in lookup dirs
+ dir_ = os.path.basename(os.path.normpath(attr.attributes["origin"]))
+ for ld in lookup_dirs:
+ new_dir = os.path.join(ld, dir_)
+ if os.path.isdir(new_dir):
+ return new_dir
+
+ # all column paths in lookup dirs
+ for ld in lookup_dirs:
+ if all(
+ os.path.exists(os.path.join(ld, attr.str_val(v)))
+ for v in table.get_column(attr)
+ if v and not pd.isna(v)
+ ):
+ return ld
+
+ return None
+
+
+def update_origin(table: Table, file_path: str):
+ """
+ When a dataset with file paths in the column is moved to another computer,
+ the absolute path may not be correct. This function updates the path for all
+ columns with an "origin" attribute.
+
+ The process consists of two steps. First, we identify directories to search
+ for files, and in the second step, we check if paths exist.
+
+ Lookup directories:
+ 1. The directory where the file from file_path is placed
+ 2. The parent directory of 1. The situation when the user places dataset
+ file in the directory with files (for example, workflow in a directory
+ with images)
+
+ Possible situations for file search:
+ 1. The last directory of origin (basedir) is in one of the lookup directories
+ 2. Origin doesn't exist in any lookup directories, but paths in a column can
+ be found in one of the lookup directories. This is usually a situation
+ when paths in a column are complex (e.g. a/b/c/d/file.txt).
+
+ Note: This function updates the existing table
+
+ Parameters
+ ----------
+ table
+ Orange Table to be updated if origin exits in any column
+ file_path
+ Path of the loaded dataset for reference. Only paths inside datasets
+ directory or its parent directory will be considered for new origin.
+ """
+ file_dir = os.path.dirname(file_path)
+ parent_dir = os.path.dirname(file_dir)
+ # if file_dir already root file_dir == parent_dir
+ lookup_dirs = tuple({file_dir: 0, parent_dir: 0})
+ for attr in table.domain.metas:
+ if "origin" in attr.attributes and (attr.is_string or attr.is_discrete):
+ new_orig = _extract_new_origin(attr, table, lookup_dirs)
+ if new_orig:
+ attr.attributes["origin"] = new_orig
diff --git a/Orange/data/tests/test_io_util.py b/Orange/data/tests/test_io_util.py
index 683132da8c5..8d6ec273768 100644
--- a/Orange/data/tests/test_io_util.py
+++ b/Orange/data/tests/test_io_util.py
@@ -1,6 +1,18 @@
+import os.path
import unittest
+from tempfile import TemporaryDirectory
-from Orange.data import ContinuousVariable, guess_data_type
+import numpy as np
+
+from Orange.data import (
+ ContinuousVariable,
+ guess_data_type,
+ Table,
+ Domain,
+ StringVariable,
+ DiscreteVariable,
+)
+from Orange.data.io_util import update_origin
class TestIoUtil(unittest.TestCase):
@@ -10,5 +22,115 @@ def test_guess_continuous_w_nans(self):
ContinuousVariable)
+class TestUpdateOrigin(unittest.TestCase):
+ FILE_NAMES = ["file1.txt", "file2.txt", "file3.txt"]
+
+ def setUp(self) -> None:
+ self.alt_dir = TemporaryDirectory() # pylint: disable=consider-using-with
+
+ self.var_string = var = StringVariable("Files")
+ files = self.FILE_NAMES + [var.Unknown]
+ self.table_string = Table.from_list(
+ Domain([], metas=[var]), np.array(files).reshape((-1, 1))
+ )
+ self.var_discrete = var = DiscreteVariable("Files", values=self.FILE_NAMES)
+ files = self.FILE_NAMES + [var.Unknown]
+ self.table_discrete = Table.from_list(
+ Domain([], metas=[var]), np.array(files).reshape((-1, 1))
+ )
+
+ def tearDown(self) -> None:
+ self.alt_dir.cleanup()
+
+ def __create_files(self):
+ for f in self.FILE_NAMES:
+ f = os.path.join(self.alt_dir.name, f)
+ with open(f, "w", encoding="utf8"):
+ pass
+ self.assertTrue(os.path.exists(f))
+
+ def test_origin_not_changed(self):
+ """
+ Origin exist; keep it unchanged, even though dataset path also includes
+ files from column.
+ """
+ with TemporaryDirectory() as dir_name:
+ self.var_string.attributes["origin"] = dir_name
+ update_origin(self.table_string, self.alt_dir.name)
+ self.assertEqual(
+ self.table_string.domain[self.var_string].attributes["origin"], dir_name
+ )
+
+ def test_origin_subdir(self):
+ """
+ Origin is wrong but last dir in origin exit in the dataset file's path
+ """
+ images_dir = os.path.join(self.alt_dir.name, "subdir")
+ os.mkdir(images_dir)
+
+ self.var_string.attributes["origin"] = "/a/b/subdir"
+ update_origin(self.table_string, os.path.join(self.alt_dir.name, "data.csv"))
+ self.assertEqual(
+ self.table_string.domain[self.var_string].attributes["origin"], images_dir
+ )
+
+ def test_origin_parents_subdir(self):
+ """
+ Origin is wrong but last dir in origin exit in the dataset file
+ parent's directory
+ """
+ # make the dir where dataset is placed
+ images_dir = os.path.join(self.alt_dir.name, "subdir")
+ os.mkdir(images_dir)
+
+ self.var_string.attributes["origin"] = "/a/b/subdir"
+ update_origin(self.table_string, os.path.join(images_dir, "data.csv"))
+ self.assertEqual(
+ self.table_string.domain[self.var_string].attributes["origin"], images_dir
+ )
+
+ def test_column_paths_subdir(self):
+ """
+ Origin dir not exiting but paths from column exist in dataset's dir
+ """
+ self.__create_files()
+
+ self.var_string.attributes["origin"] = "/a/b/non-exiting-dir"
+ update_origin(self.table_string, os.path.join(self.alt_dir.name, "data.csv"))
+ self.assertEqual(
+ self.table_string.domain[self.var_string].attributes["origin"],
+ self.alt_dir.name,
+ )
+
+ self.var_discrete.attributes["origin"] = "/a/b/non-exiting-dir"
+ update_origin(self.table_discrete, os.path.join(self.alt_dir.name, "data.csv"))
+ self.assertEqual(
+ self.table_discrete.domain[self.var_discrete].attributes["origin"],
+ self.alt_dir.name,
+ )
+
+ def test_column_paths_parents_subdir(self):
+ """
+ Origin dir not exiting but paths from column exist in dataset parent's dir
+ """
+ # make the dir where dataset is placed
+ dataset_dir = os.path.join(self.alt_dir.name, "subdir")
+ self.__create_files()
+
+ self.var_string.attributes["origin"] = "/a/b/non-exiting-dir"
+ update_origin(self.table_string, os.path.join(dataset_dir, "data.csv"))
+ self.assertEqual(
+ self.table_string.domain[self.var_string].attributes["origin"],
+ self.alt_dir.name,
+ )
+
+ self.var_discrete.attributes["origin"] = "/a/b/non-exiting-dir"
+ update_origin(self.table_discrete, os.path.join(dataset_dir, "data.csv"))
+ self.assertEqual(
+ self.table_discrete.domain[self.var_discrete].attributes["origin"],
+ self.alt_dir.name,
+ )
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/Orange/tests/test_io.py b/Orange/tests/test_io.py
index 3bcb71d9155..579a9819555 100644
--- a/Orange/tests/test_io.py
+++ b/Orange/tests/test_io.py
@@ -12,10 +12,10 @@
from Orange import data
-from Orange.data.io import FileFormat, TabReader, CSVReader, PickleReader
+from Orange.data.io import FileFormat, TabReader, CSVReader, PickleReader, ExcelReader
from Orange.data.io_base import PICKLE_PROTOCOL
from Orange.data.table import get_sample_datasets_dir
-from Orange.data import Table
+from Orange.data import Table, StringVariable, Domain
from Orange.tests import test_dirname
from Orange.util import OrangeDeprecationWarning
@@ -206,6 +206,32 @@ def test_pickle_version(self):
# we should not use a version that is not supported
self.assertLessEqual(PICKLE_PROTOCOL, pickle.HIGHEST_PROTOCOL)
+ def test_update_origin(self):
+ """
+ Test if origin attributes is changed if path doesn't exist. For example
+ when file moved to another computer. It tested only one scenario
+ all other scenarios are tested as part of update_origin function tests.
+ """
+ with tempfile.TemporaryDirectory() as dir_name:
+ os.mkdir(os.path.join(dir_name, "subdir"))
+
+ var = StringVariable("Files")
+ var.attributes["origin"] = "/a/b/c/d/subdir"
+ table = Table.from_list(Domain([], metas=[var]), ["f1", "f2"])
+
+ for reader in (CSVReader, TabReader, PickleReader, ExcelReader):
+ dataset = os.path.join(dir_name, f"dataset{reader.EXTENSIONS[0]}")
+ if reader is PickleReader:
+ reader.write_file(dataset, table)
+ else:
+ reader.write_file(dataset, table, with_annotations=True)
+
+ table = Table.from_file(dataset)
+ self.assertEqual(
+ os.path.join(dir_name, "subdir"),
+ table.domain["Files"].attributes["origin"],
+ )
+
if __name__ == "__main__":
unittest.main()
From 2b77093cd855d48fe0aa427f9a5d49d4955c9fd8 Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Tue, 29 Aug 2023 12:30:31 +0200
Subject: [PATCH 2/7] test_variable - Replace StringIO with TemporaryFile
---
Orange/data/tests/test_variable.py | 49 ++++++++++++++++++------------
1 file changed, 29 insertions(+), 20 deletions(-)
diff --git a/Orange/data/tests/test_variable.py b/Orange/data/tests/test_variable.py
index c8d9b027cd9..4021acafcd4 100644
--- a/Orange/data/tests/test_variable.py
+++ b/Orange/data/tests/test_variable.py
@@ -1,6 +1,7 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring
# pylint: disable=protected-access
+import csv
import os
import sys
import math
@@ -10,7 +11,7 @@
import warnings
from datetime import datetime, timezone
-from io import StringIO
+from tempfile import NamedTemporaryFile, TemporaryDirectory
import numpy as np
import pandas as pd
@@ -714,27 +715,35 @@ def test_no_date_no_time(self):
self.assertEqual(TimeVariable('relative time').repr_val(1.6), '1.6')
def test_readwrite_timevariable(self):
- output_csv = StringIO()
- input_csv = StringIO("""\
-Date,Feature
-time,continuous
-,
-1920-12-12,1.0
-1920-12-13,3.0
-1920-12-14,5.5
-""")
- for stream in (output_csv, input_csv):
- stream.close = lambda: None # HACK: Prevent closing of streams
-
- table = CSVReader(input_csv).read()
- self.assertIsInstance(table.domain['Date'], TimeVariable)
- self.assertEqual(table[0, 'Date'], '1920-12-12')
+ content = [
+ ("Date", "Feature"),
+ ("time", "continuous"),
+ ("", ""),
+ ("1920-12-12", 1.0),
+ ("1920-12-13", 3.0),
+ ("1920-12-14", 5.5),
+ ]
+ with NamedTemporaryFile(
+ mode="w", delete=False, newline="", encoding="utf-8"
+ ) as input_csv:
+ csv.writer(input_csv, delimiter=",").writerows(content)
+
+ table = CSVReader(input_csv.name).read()
+ self.assertIsInstance(table.domain["Date"], TimeVariable)
+ self.assertEqual(table[0, "Date"], "1920-12-12")
# Dates before 1970 are negative
- self.assertTrue(all(inst['Date'] < 0 for inst in table))
+ self.assertTrue(all(inst["Date"] < 0 for inst in table))
- CSVReader.write_file(output_csv, table)
- self.assertEqual(input_csv.getvalue().splitlines(),
- output_csv.getvalue().splitlines())
+ with NamedTemporaryFile(mode="w", delete=False) as output_csv:
+ pass
+ CSVReader.write_file(output_csv.name, table)
+
+ with open(input_csv.name, encoding="utf-8") as in_f:
+ with open(output_csv.name, encoding="utf-8") as out_f:
+ self.assertEqual(in_f.read(), out_f.read())
+
+ os.unlink(input_csv.name)
+ os.unlink(output_csv.name)
def test_repr_value(self):
# https://github.com/biolab/orange3/pull/1760
From e3f49bc737d0972395410037fe2e9bedf2746c8d Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Tue, 29 Aug 2023 12:31:06 +0200
Subject: [PATCH 3/7] test_tab_reader - Replace StringIO with TemporaryFile
---
Orange/tests/test_tab_reader.py | 115 ++++++++++++++++++++------------
1 file changed, 73 insertions(+), 42 deletions(-)
diff --git a/Orange/tests/test_tab_reader.py b/Orange/tests/test_tab_reader.py
index 85c1a8aa846..f0e98333b5d 100644
--- a/Orange/tests/test_tab_reader.py
+++ b/Orange/tests/test_tab_reader.py
@@ -2,12 +2,13 @@
# pylint: disable=missing-docstring
import io
+import os
from os import path, remove
import unittest
-import tempfile
import shutil
import time
from collections import OrderedDict
+from tempfile import NamedTemporaryFile, mkdtemp
import numpy as np
@@ -34,8 +35,10 @@ def test_read_easy(self):
2.0 \tM \t4 \t
"""
- file = io.StringIO(simplefile)
- table = read_tab_file(file)
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ tmp.write(simplefile)
+ table = read_tab_file(tmp.name)
+ os.unlink(tmp.name)
f1, f2, c1, c2 = table.domain.variables
self.assertIsInstance(f1, DiscreteVariable)
@@ -60,15 +63,24 @@ def test_read_save_quoted(self):
"""c\td"""\tk
'''
expected = ['"a"', '"b"', '"c\td"']
- f = io.StringIO(quoted)
- table = read_tab_file(f)
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ tmp.write(quoted)
+ table = read_tab_file(tmp.name)
+ os.unlink(tmp.name)
self.assertSequenceEqual(table.metas[:, 0].tolist(), expected)
- f = io.StringIO()
- f.close = lambda: None
- TabReader.write_file(f, table)
- saved = f.getvalue()
- table1 = read_tab_file(io.StringIO(saved))
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ pass
+ TabReader.write_file(tmp.name, table)
+ with open(tmp.name, encoding="utf-8") as f:
+ saved = f.read()
+ os.unlink(tmp.name)
+
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ tmp.write(saved)
+ table1 = read_tab_file(tmp.name)
+ os.unlink(tmp.name)
+
self.assertSequenceEqual(table1.metas[:, 0].tolist(), expected)
def test_read_and_save_attributes(self):
@@ -78,8 +90,10 @@ def test_read_and_save_attributes(self):
\ta=1 b=2 \tclass x=a\\ longer\\ string \tclass
1.0 \tM \t5 \trich
"""
- file = io.StringIO(samplefile)
- table = read_tab_file(file)
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ tmp.write(samplefile)
+ table = read_tab_file(tmp.name)
+ os.unlink(tmp.name)
_, f2, c1, _ = table.domain.variables
self.assertIsInstance(f2, DiscreteVariable)
@@ -89,13 +103,18 @@ def test_read_and_save_attributes(self):
self.assertIsInstance(c1, DiscreteVariable)
self.assertEqual(c1.name, "Class 1")
self.assertEqual(c1.attributes, {'x': 'a longer string'})
- outf = io.StringIO()
- outf.close = lambda: None
- TabReader.write_file(outf, table)
- saved = outf.getvalue()
- file = io.StringIO(saved)
- table = read_tab_file(file)
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ pass
+ TabReader.write_file(tmp.name, table)
+ with open(tmp.name, encoding="utf-8") as f:
+ saved = f.read()
+ os.unlink(tmp.name)
+
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ tmp.write(saved)
+ table = read_tab_file(tmp.name)
+ os.unlink(tmp.name)
_, f2, c1, _ = table.domain.variables
self.assertIsInstance(f2, DiscreteVariable)
@@ -108,12 +127,11 @@ def test_read_and_save_attributes(self):
spath = "/path/to/somewhere"
c1.attributes["path"] = spath
- outf = io.StringIO()
- outf.close = lambda: None
- TabReader.write_file(outf, table)
- outf.seek(0)
-
- table = read_tab_file(outf)
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ pass
+ TabReader.write_file(tmp.name, table)
+ table = read_tab_file(tmp.name)
+ os.unlink(tmp.name)
_, _, c1, _ = table.domain.variables
self.assertEqual(c1.attributes["path"], spath)
@@ -123,8 +141,10 @@ def test_read_data_oneline_header(self):
0.1\t0.2\t0.3
1.1\t1.2\t1.5
"""
- file = io.StringIO(samplefile)
- table = read_tab_file(file)
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ tmp.write(samplefile)
+ table = read_tab_file(tmp.name)
+ os.unlink(tmp.name)
self.assertEqual(len(table), 2)
self.assertEqual(len(table.domain.variables), 3)
@@ -135,8 +155,10 @@ def test_read_data_no_header(self):
0.1\t0.2\t0.3
1.1\t1.2\t1.5
"""
- file = io.StringIO(samplefile)
- table = read_tab_file(file)
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ tmp.write(samplefile)
+ table = read_tab_file(tmp.name)
+ os.unlink(tmp.name)
self.assertEqual(len(table), 2)
self.assertEqual(len(table.domain.variables), 3)
@@ -148,10 +170,14 @@ def test_read_data_no_header_feature_reuse(self):
0.1\t0.2\t0.3
1.1\t1.2\t1.5
"""
- file = io.StringIO(samplefile)
- t1 = read_tab_file(file)
- file = io.StringIO(samplefile)
- t2 = read_tab_file(file)
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ tmp.write(samplefile)
+ t1 = read_tab_file(tmp.name)
+ os.unlink(tmp.name)
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ tmp.write(samplefile)
+ t2 = read_tab_file(tmp.name)
+ os.unlink(tmp.name)
self.assertEqual(t1.domain[0], t2.domain[0])
def test_renaming(self):
@@ -160,7 +186,7 @@ def test_renaming(self):
c\t c\t c\t c\t c\t c\t c\t c\t c
\t \t \t \t class\t class\t \t \t meta
0\t 0\t 0\t 0\t 0\t 0\t 0\t 0 """
- file = tempfile.NamedTemporaryFile("wt", delete=False, suffix=".tab")
+ file = NamedTemporaryFile("wt", delete=False, suffix=".tab")
filename = file.name
try:
file.write(simplefile)
@@ -198,7 +224,7 @@ def test_sheets(self):
self.assertEqual(reader.sheets, [])
def test_attributes_saving(self):
- tempdir = tempfile.mkdtemp()
+ tempdir = mkdtemp()
try:
self.assertEqual(self.data.attributes, {})
self.data.attributes[1] = "test"
@@ -209,7 +235,7 @@ def test_attributes_saving(self):
shutil.rmtree(tempdir)
def test_attributes_saving_as_txt(self):
- tempdir = tempfile.mkdtemp()
+ tempdir = mkdtemp()
try:
self.data.attributes = OrderedDict()
self.data.attributes["a"] = "aa"
@@ -229,7 +255,7 @@ def test_data_name(self):
self.assertEqual(table2.name, 'iris')
def test_metadata(self):
- tempdir = tempfile.mkdtemp()
+ tempdir = mkdtemp()
try:
self.data.attributes = OrderedDict()
self.data.attributes["a"] = "aa"
@@ -241,7 +267,7 @@ def test_metadata(self):
shutil.rmtree(tempdir)
def test_no_metadata(self):
- tempdir = tempfile.mkdtemp()
+ tempdir = mkdtemp()
try:
self.data.attributes = OrderedDict()
fname = path.join(tempdir, "out.tab")
@@ -251,7 +277,7 @@ def test_no_metadata(self):
shutil.rmtree(tempdir)
def test_had_metadata_now_there_is_none(self):
- tempdir = tempfile.mkdtemp()
+ tempdir = mkdtemp()
try:
self.data.attributes["a"] = "aa"
fname = path.join(tempdir, "out.tab")
@@ -275,11 +301,16 @@ def test_number_of_decimals(self):
@staticmethod
def test_many_discrete():
- b = io.StringIO()
- b.write("Poser\nd\n\n")
- b.writelines("K" + str(i) + "\n" for i in range(30000))
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ tmp.write("Poser\nd\n\n")
+ tmp.writelines("K" + str(i) + "\n" for i in range(30000))
start = time.time()
- _ = TabReader(b).read()
+ _ = TabReader(tmp.name).read()
elapsed = time.time() - start
+ os.unlink(tmp.name)
if elapsed > 2:
raise AssertionError()
+
+
+if __name__ == "__main__":
+ unittest.main()
From c62fc91959263265b5432248c3d640d51684576e Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Tue, 29 Aug 2023 12:31:36 +0200
Subject: [PATCH 4/7] test_txt_reader - Replace StringIO with TemporaryFile
---
Orange/tests/test_txt_reader.py | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/Orange/tests/test_txt_reader.py b/Orange/tests/test_txt_reader.py
index dcd0dfc451d..2111143ee77 100644
--- a/Orange/tests/test_txt_reader.py
+++ b/Orange/tests/test_txt_reader.py
@@ -4,7 +4,6 @@
import unittest
from tempfile import NamedTemporaryFile
import os
-import io
import warnings
from Orange.data import Table, ContinuousVariable, DiscreteVariable
@@ -80,8 +79,11 @@ def test_read_csv(self):
self.read_easy(csv_file_nh, "Feature ")
def test_read_csv_with_na(self):
- c = io.StringIO(csv_file_missing)
- table = CSVReader(c).read()
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ tmp.write(csv_file_missing)
+
+ table = CSVReader(tmp.name).read()
+ os.unlink(tmp.name)
f1, f2 = table.domain.variables
self.assertIsInstance(f1, ContinuousVariable)
self.assertIsInstance(f2, DiscreteVariable)
@@ -130,3 +132,7 @@ def test_csv_sniffer(self):
data = reader.read()
self.assertEqual(len(data), 8)
self.assertEqual(len(data.domain.variables) + len(data.domain.metas), 15)
+
+
+if __name__ == "__main__":
+ unittest.main()
From 622046758427e5b5619d7e4a3338a24291125275 Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Tue, 29 Aug 2023 12:31:54 +0200
Subject: [PATCH 5/7] test_owpredictions - Replace StringIO with TemporaryFile
---
.../widgets/evaluate/tests/test_owpredictions.py | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/Orange/widgets/evaluate/tests/test_owpredictions.py b/Orange/widgets/evaluate/tests/test_owpredictions.py
index f48a5eb884b..762f4d2068b 100644
--- a/Orange/widgets/evaluate/tests/test_owpredictions.py
+++ b/Orange/widgets/evaluate/tests/test_owpredictions.py
@@ -1,8 +1,9 @@
"""Tests for OWPredictions"""
# pylint: disable=protected-access
-import io
+import os
import unittest
from functools import partial
+from tempfile import NamedTemporaryFile
from typing import Optional
from unittest.mock import Mock, patch
@@ -206,8 +207,10 @@ def test_bad_data(self):
child\tmale\tyes
child\tfemale\tyes
"""
- file1 = io.StringIO(filestr1)
- table = TabReader(file1).read()
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ tmp.write(filestr1)
+ table = TabReader(tmp.name).read()
+ os.unlink(tmp.name)
learner = TreeLearner()
tree = learner(table)
@@ -220,9 +223,11 @@ def test_bad_data(self):
child\tmale\tyes
child\tfemale\tunknown
"""
- file2 = io.StringIO(filestr2)
- bad_table = TabReader(file2).read()
+ with NamedTemporaryFile(mode="w", delete=False) as tmp:
+ tmp.write(filestr2)
+ bad_table = TabReader(tmp.name).read()
+ os.unlink(tmp.name)
self.send_signal(self.widget.Inputs.predictors, tree, 1)
with excepthook_catch():
From 0365ed691584027b9cd1c178784ea41e54b2989d Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Tue, 29 Aug 2023 12:51:00 +0200
Subject: [PATCH 6/7] test_io - Replace StringIO with TemporaryFile
---
Orange/tests/test_io.py | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/Orange/tests/test_io.py b/Orange/tests/test_io.py
index 579a9819555..557f411d87c 100644
--- a/Orange/tests/test_io.py
+++ b/Orange/tests/test_io.py
@@ -1,7 +1,5 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring
-
-import io
import os
import pickle
import shutil
@@ -124,12 +122,13 @@ def test_empty_columns(self):
1, 0,
1, 2,
"""
- c = io.StringIO(samplefile)
+ with tempfile.NamedTemporaryFile(mode="w+", delete=False) as tmp:
+ tmp.write(samplefile)
with self.assertWarns(UserWarning) as cm:
- table = CSVReader(c).read()
+ table = CSVReader(tmp.name).read()
+ os.unlink(tmp.name)
self.assertEqual(len(table.domain.attributes), 2)
- self.assertEqual(cm.warning.args[0],
- "Columns with no headers were removed.")
+ self.assertEqual(cm.warning.args[0], "Columns with no headers were removed.")
def test_type_annotations(self):
class FooFormat(FileFormat):
From 72ccefe2193652654e56796df47fe1b067a28be4 Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Tue, 29 Aug 2023 15:41:23 +0200
Subject: [PATCH 7/7] CSVReader - Remove string type check for file name
---
Orange/data/io.py | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/Orange/data/io.py b/Orange/data/io.py
index 6f61266ec74..0959bb725c2 100644
--- a/Orange/data/io.py
+++ b/Orange/data/io.py
@@ -164,14 +164,7 @@ def read(self):
skipinitialspace=True,
)
data = self.data_table(reader)
-
- # TODO: Name can be set unconditionally when/if
- # self.filename will always be a string with the file name.
- # Currently, some tests pass StringIO instead of
- # the file name to a reader.
- if isinstance(self.filename, str):
- data.name = path.splitext(
- path.split(self.filename)[-1])[0]
+ data.name = path.splitext(path.split(self.filename)[-1])[0]
if error and isinstance(error, UnicodeDecodeError):
pos, endpos = error.args[2], error.args[3]
warning = ('Skipped invalid byte(s) in position '