Skip to content

Commit 6a016ed

Browse files
committed
Move buffering decisions to _open functions, to make xopen.xopen less complex
1 parent e6652dd commit 6a016ed

File tree

2 files changed

+24
-23
lines changed

2 files changed

+24
-23
lines changed

src/xopen/__init__.py

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,11 @@ def _open_bz2(
462462
except OSError:
463463
pass # We try without threads.
464464

465-
return bz2.open(filename, mode, compresslevel)
465+
bz2_file = bz2.open(filename, mode, compresslevel)
466+
if "r" in mode:
467+
return bz2_file
468+
# Buffer writes on bz2.open to mitigate overhead of small writes
469+
return io.BufferedWriter(bz2_file) # type: ignore
466470

467471

468472
def _open_xz(
@@ -488,11 +492,10 @@ def _open_xz(
488492
except OSError:
489493
pass # We try without threads.
490494

491-
return lzma.open(
492-
filename,
493-
mode,
494-
preset=compresslevel if "r" not in mode else None,
495-
)
495+
if "r" in mode:
496+
return lzma.open(filename, mode)
497+
# Buffer writes on lzma.open to mitigate overhead of small writes
498+
return io.BufferedWriter(lzma.open(filename, mode, preset=compresslevel)) # type: ignore
496499

497500

498501
def _open_zst(
@@ -628,6 +631,9 @@ def _open_reproducible_gzip(filename, mode: str, compresslevel: int):
628631
# is called. This forces it to be closed.
629632
if closefd:
630633
gzip_file.myfileobj = fileobj
634+
if sys.version_info.major == 3 and sys.version_info.minor < 12 and "r" not in mode:
635+
# From version 3.12 onwards, gzip is properly internally buffered for writing.
636+
return io.BufferedWriter(gzip_file) # type: ignore
631637
return gzip_file
632638

633639

@@ -741,7 +747,7 @@ def xopen(
741747
...
742748

743749

744-
def xopen( # noqa: C901 # The function is complex, but readable.
750+
def xopen(
745751
filename: FileOrPath,
746752
mode: Literal["r", "w", "a", "rt", "rb", "wt", "wb", "at", "ab"] = "r",
747753
compresslevel: Optional[int] = None,
@@ -824,18 +830,6 @@ def xopen( # noqa: C901 # The function is complex, but readable.
824830
else:
825831
opened_file, _ = _file_or_path_to_binary_stream(filename, binary_mode)
826832

827-
# The "write" method for GzipFile is very costly. Lots of python calls are
828-
# made. To a lesser extent this is true for LzmaFile and BZ2File. By
829-
# putting a buffer in between, the expensive write method is called much
830-
# less. The effect is very noticeable when writing small units such as
831-
# lines or FASTQ records.
832-
if (
833-
isinstance(opened_file, (gzip.GzipFile, bz2.BZ2File, lzma.LZMAFile)) # FIXME
834-
and "w" in mode
835-
):
836-
opened_file = io.BufferedWriter(
837-
opened_file, buffer_size=BUFFER_SIZE # type: ignore
838-
)
839833
if "t" in mode:
840834
return io.TextIOWrapper(opened_file, encoding, errors, newline)
841835
return opened_file

tests/test_xopen.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Tests for the xopen.xopen function
33
"""
44
import bz2
5+
import sys
56
from contextlib import contextmanager
67
import functools
78
import gzip
@@ -386,16 +387,22 @@ def test_write_no_threads(tmp_path, ext):
386387
return
387388
klass = klasses[ext]
388389
with xopen(tmp_path / f"out{ext}", "wb", threads=0) as f:
389-
assert isinstance(f, io.BufferedWriter)
390-
if ext:
391-
assert isinstance(f.raw, klass), f
390+
if isinstance(f, io.BufferedWriter):
391+
if ext:
392+
assert isinstance(f.raw, klass), f
393+
else:
394+
if ext:
395+
assert isinstance(f, klass)
392396

393397

394398
def test_write_gzip_no_threads_no_isal(tmp_path, xopen_without_igzip):
395399
import gzip
396400

397401
with xopen_without_igzip(tmp_path / "out.gz", "wb", threads=0) as f:
398-
assert isinstance(f.raw, gzip.GzipFile), f
402+
if sys.version_info.major == 3 and sys.version_info.minor >= 12:
403+
assert isinstance(f, gzip.GzipFile), f
404+
else:
405+
assert isinstance(f.raw, gzip.GzipFile)
399406

400407

401408
def test_write_stdout():

0 commit comments

Comments
 (0)