Skip to content

Commit 3306269

Browse files
authored
Merge pull request #158 from pycompression/fixbug
Fix stdin bug
2 parents 63a10ff + 77778a3 commit 3306269

File tree

3 files changed

+104
-4
lines changed

3 files changed

+104
-4
lines changed

README.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,13 @@ To ensure that you get the correct ``zstandard`` version, you can specify the ``
184184
Changelog
185185
---------
186186

187+
in-development
188+
~~~~~~~~~~~~~~~~~~~
189+
+ #158: Fixed a bug where reading from stdin and other pipes would discard the
190+
first bytes from the input.
191+
+ #156: Zstd files compressed with the ``--long=31`` files can now be opened
192+
without throwing errors.
193+
187194
v2.0.0 (2024-03-26)
188195
~~~~~~~~~~~~~~~~~~~
189196

src/xopen/__init__.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import dataclasses
1212
import gzip
13+
import stat
1314
import sys
1415
import io
1516
import os
@@ -701,8 +702,6 @@ def _file_or_path_to_binary_stream(
701702
file_or_path: FileOrPath, binary_mode: str
702703
) -> Tuple[BinaryIO, bool]:
703704
assert binary_mode in ("rb", "wb", "ab")
704-
if file_or_path == "-":
705-
return _open_stdin_or_out(binary_mode), False
706705
if isinstance(file_or_path, (str, bytes)) or hasattr(file_or_path, "__fspath__"):
707706
return open(os.fspath(file_or_path), binary_mode), True # type: ignore
708707
if isinstance(file_or_path, io.TextIOWrapper):
@@ -722,10 +721,23 @@ def _filepath_from_path_or_filelike(fileorpath: FileOrPath) -> str:
722721
except TypeError:
723722
pass
724723
if hasattr(fileorpath, "name"):
725-
return fileorpath.name
724+
name = fileorpath.name
725+
if isinstance(name, str):
726+
return name
727+
elif isinstance(name, bytes):
728+
return name.decode()
726729
return ""
727730

728731

732+
def _file_is_a_socket_or_pipe(filepath):
733+
try:
734+
mode = os.stat(filepath).st_mode
735+
# Treat anything that is not a regular file as special
736+
return not stat.S_ISREG(mode)
737+
except (OSError, TypeError): # Type error for unexpected types in stat.
738+
return False
739+
740+
729741
@overload
730742
def xopen(
731743
filename: FileOrPath,
@@ -756,7 +768,7 @@ def xopen(
756768
...
757769

758770

759-
def xopen(
771+
def xopen( # noqa: C901
760772
filename: FileOrPath,
761773
mode: Literal["r", "w", "a", "rt", "rb", "wt", "wb", "at", "ab"] = "r",
762774
compresslevel: Optional[int] = None,
@@ -819,6 +831,13 @@ def xopen(
819831
binary_mode = mode[0] + "b"
820832
filepath = _filepath_from_path_or_filelike(filename)
821833

834+
# Open non-regular files such as pipes and sockets here to force opening
835+
# them once.
836+
if filename == "-":
837+
filename = _open_stdin_or_out(binary_mode)
838+
elif _file_is_a_socket_or_pipe(filename):
839+
filename = open(filename, binary_mode) # type: ignore
840+
822841
if format not in (None, "gz", "xz", "bz2", "zst"):
823842
raise ValueError(
824843
f"Format not supported: {format}. "

tests/test_xopen.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
Tests for the xopen.xopen function
33
"""
44
import bz2
5+
import subprocess
56
import sys
7+
import tempfile
68
from contextlib import contextmanager
79
import functools
810
import gzip
@@ -634,3 +636,75 @@ def test_pass_bytesio_for_reading_and_writing(ext, threads):
634636
filelike.seek(0)
635637
with xopen(filelike, "rb", format=format, threads=threads) as fh:
636638
assert fh.readline() == first_line
639+
640+
641+
@pytest.mark.parametrize("threads", (0, 1))
642+
def test_xopen_stdin(monkeypatch, ext, threads):
643+
if ext == ".zst" and zstandard is None:
644+
return
645+
# Add encoding to suppress encoding warnings
646+
with open(TEST_DIR / f"file.txt{ext}", "rt", encoding="latin-1") as in_file:
647+
monkeypatch.setattr("sys.stdin", in_file)
648+
with xopen("-", "rt", threads=threads) as f:
649+
data = f.read()
650+
assert data == CONTENT
651+
652+
653+
def test_xopen_stdout(monkeypatch):
654+
# Add encoding to suppress encoding warnings
655+
with tempfile.TemporaryFile(mode="w+t", encoding="latin-1") as raw:
656+
monkeypatch.setattr("sys.stdout", raw)
657+
with xopen("-", "wt") as f:
658+
f.write("Hello world!")
659+
raw.seek(0)
660+
data = raw.read()
661+
assert data == "Hello world!"
662+
663+
664+
@pytest.mark.parametrize("threads", (0, 1))
665+
def test_xopen_read_from_pipe(ext, threads):
666+
if ext == ".zst" and zstandard is None:
667+
return
668+
in_file = TEST_DIR / f"file.txt{ext}"
669+
process = subprocess.Popen(("cat", str(in_file)), stdout=subprocess.PIPE)
670+
with xopen(process.stdout, "rt", threads=threads) as f:
671+
data = f.read()
672+
process.wait()
673+
process.stdout.close()
674+
assert data == CONTENT
675+
676+
677+
@pytest.mark.parametrize("threads", (0, 1))
678+
def test_xopen_write_to_pipe(threads, ext):
679+
if ext == ".zst" and zstandard is None:
680+
return
681+
format = ext.lstrip(".")
682+
if format == "":
683+
format = None
684+
process = subprocess.Popen(("cat",), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
685+
with xopen(process.stdin, "wt", threads=threads, format=format) as f:
686+
f.write(CONTENT)
687+
process.stdin.close()
688+
with xopen(process.stdout, "rt", threads=threads) as f:
689+
data = f.read()
690+
process.wait()
691+
process.stdout.close()
692+
assert data == CONTENT
693+
694+
695+
@pytest.mark.skipif(
696+
not os.path.exists("/dev/stdin"), reason="/dev/stdin does not exist"
697+
)
698+
@pytest.mark.parametrize("threads", (0, 1))
699+
def test_xopen_dev_stdin_read(threads, ext):
700+
if ext == ".zst" and zstandard is None:
701+
return
702+
file = str(Path(__file__).parent / f"file.txt{ext}")
703+
result = subprocess.run(
704+
f"cat {file} | python -c 'import xopen; "
705+
f'f=xopen.xopen("/dev/stdin", "rt", threads={threads});print(f.read())\'',
706+
shell=True,
707+
stdout=subprocess.PIPE,
708+
encoding="ascii",
709+
)
710+
assert result.stdout == CONTENT + "\n"

0 commit comments

Comments
 (0)