summaryrefslogtreecommitdiff
path: root/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch
diff options
context:
space:
mode:
Diffstat (limited to 'sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch')
-rw-r--r--sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch39
1 files changed, 39 insertions, 0 deletions
diff --git a/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch b/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch
new file mode 100644
index 000000000000..01a872cb846a
--- /dev/null
+++ b/sci-libs/tokenizers/files/tokenizers-0.15.2-test.patch
@@ -0,0 +1,39 @@
+--- a/tests/bindings/test_trainers.py 2024-04-07 18:21:19.443506351 +0200
++++ b/tests/bindings/test_trainers.py 2024-04-07 18:21:54.893466083 +0200
+@@ -295,8 +295,8 @@
+ tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
+ [pre_tokenizers.Whitespace(), pre_tokenizers.Digits(individual_digits=True)]
+ )
+- tokenizer.train(files=["data/big.txt"], trainer=trainer)
++ tokenizer.train(files=["tests/data/big.txt"], trainer=trainer)
+
+- tokenizer.save("data/tokenizer.json")
++ tokenizer.save("tests/data/tokenizer.json")
+
+- tokenizer.from_file("data/tokenizer.json")
++ tokenizer.from_file("tests/data/tokenizer.json")
+--- a/tests/documentation/test_tutorial_train_from_iterators.py 2024-04-07 18:19:08.653593406 +0200
++++ b/tests/documentation/test_tutorial_train_from_iterators.py 2024-04-07 18:19:39.206906910 +0200
+@@ -40,7 +40,7 @@
+ def setup_gzip_files(self, train_files):
+ with open(train_files["small"], "rt") as small:
+ for n in range(3):
+- path = f"data/my-file.{n}.gz"
++ path = f"tests/data/my-file.{n}.gz"
+ with gzip.open(path, "wt") as f:
+ f.write(small.read())
+
+@@ -87,11 +87,11 @@
+ # START single_gzip
+ import gzip
+
+- with gzip.open("data/my-file.0.gz", "rt") as f:
++ with gzip.open("tests/data/my-file.0.gz", "rt") as f:
+ tokenizer.train_from_iterator(f, trainer=trainer)
+ # END single_gzip
+ # START multi_gzip
+- files = ["data/my-file.0.gz", "data/my-file.1.gz", "data/my-file.2.gz"]
++ files = ["tests/data/my-file.0.gz", "tests/data/my-file.1.gz", "tests/data/my-file.2.gz"]
+
+ def gzip_iterator():
+ for path in files: