zipp.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. import io
  2. import posixpath
  3. import zipfile
  4. import itertools
  5. import contextlib
  6. import sys
  7. import pathlib
  8. if sys.version_info < (3, 7):
  9. from collections import OrderedDict
  10. else:
  11. OrderedDict = dict
  12. __all__ = ['Path']
  13. def _parents(path):
  14. """
  15. Given a path with elements separated by
  16. posixpath.sep, generate all parents of that path.
  17. >>> list(_parents('b/d'))
  18. ['b']
  19. >>> list(_parents('/b/d/'))
  20. ['/b']
  21. >>> list(_parents('b/d/f/'))
  22. ['b/d', 'b']
  23. >>> list(_parents('b'))
  24. []
  25. >>> list(_parents(''))
  26. []
  27. """
  28. return itertools.islice(_ancestry(path), 1, None)
  29. def _ancestry(path):
  30. """
  31. Given a path with elements separated by
  32. posixpath.sep, generate all elements of that path
  33. >>> list(_ancestry('b/d'))
  34. ['b/d', 'b']
  35. >>> list(_ancestry('/b/d/'))
  36. ['/b/d', '/b']
  37. >>> list(_ancestry('b/d/f/'))
  38. ['b/d/f', 'b/d', 'b']
  39. >>> list(_ancestry('b'))
  40. ['b']
  41. >>> list(_ancestry(''))
  42. []
  43. """
  44. path = path.rstrip(posixpath.sep)
  45. while path and path != posixpath.sep:
  46. yield path
  47. path, tail = posixpath.split(path)
  48. _dedupe = OrderedDict.fromkeys
  49. """Deduplicate an iterable in original order"""
  50. def _difference(minuend, subtrahend):
  51. """
  52. Return items in minuend not in subtrahend, retaining order
  53. with O(1) lookup.
  54. """
  55. return itertools.filterfalse(set(subtrahend).__contains__, minuend)
  56. class CompleteDirs(zipfile.ZipFile):
  57. """
  58. A ZipFile subclass that ensures that implied directories
  59. are always included in the namelist.
  60. """
  61. @staticmethod
  62. def _implied_dirs(names):
  63. parents = itertools.chain.from_iterable(map(_parents, names))
  64. as_dirs = (p + posixpath.sep for p in parents)
  65. return _dedupe(_difference(as_dirs, names))
  66. def namelist(self):
  67. names = super(CompleteDirs, self).namelist()
  68. return names + list(self._implied_dirs(names))
  69. def _name_set(self):
  70. return set(self.namelist())
  71. def resolve_dir(self, name):
  72. """
  73. If the name represents a directory, return that name
  74. as a directory (with the trailing slash).
  75. """
  76. names = self._name_set()
  77. dirname = name + '/'
  78. dir_match = name not in names and dirname in names
  79. return dirname if dir_match else name
  80. @classmethod
  81. def make(cls, source):
  82. """
  83. Given a source (filename or zipfile), return an
  84. appropriate CompleteDirs subclass.
  85. """
  86. if isinstance(source, CompleteDirs):
  87. return source
  88. if not isinstance(source, zipfile.ZipFile):
  89. return cls(_pathlib_compat(source))
  90. # Only allow for FastLookup when supplied zipfile is read-only
  91. if 'r' not in source.mode:
  92. cls = CompleteDirs
  93. source.__class__ = cls
  94. return source
  95. class FastLookup(CompleteDirs):
  96. """
  97. ZipFile subclass to ensure implicit
  98. dirs exist and are resolved rapidly.
  99. """
  100. def namelist(self):
  101. with contextlib.suppress(AttributeError):
  102. return self.__names
  103. self.__names = super(FastLookup, self).namelist()
  104. return self.__names
  105. def _name_set(self):
  106. with contextlib.suppress(AttributeError):
  107. return self.__lookup
  108. self.__lookup = super(FastLookup, self)._name_set()
  109. return self.__lookup
  110. def _pathlib_compat(path):
  111. """
  112. For path-like objects, convert to a filename for compatibility
  113. on Python 3.6.1 and earlier.
  114. """
  115. try:
  116. return path.__fspath__()
  117. except AttributeError:
  118. return str(path)
  119. class Path:
  120. """
  121. A pathlib-compatible interface for zip files.
  122. Consider a zip file with this structure::
  123. .
  124. ├── a.txt
  125. └── b
  126. ├── c.txt
  127. └── d
  128. └── e.txt
  129. >>> data = io.BytesIO()
  130. >>> zf = zipfile.ZipFile(data, 'w')
  131. >>> zf.writestr('a.txt', 'content of a')
  132. >>> zf.writestr('b/c.txt', 'content of c')
  133. >>> zf.writestr('b/d/e.txt', 'content of e')
  134. >>> zf.filename = 'mem/abcde.zip'
  135. Path accepts the zipfile object itself or a filename
  136. >>> root = Path(zf)
  137. From there, several path operations are available.
  138. Directory iteration (including the zip file itself):
  139. >>> a, b = root.iterdir()
  140. >>> a
  141. Path('mem/abcde.zip', 'a.txt')
  142. >>> b
  143. Path('mem/abcde.zip', 'b/')
  144. name property:
  145. >>> b.name
  146. 'b'
  147. join with divide operator:
  148. >>> c = b / 'c.txt'
  149. >>> c
  150. Path('mem/abcde.zip', 'b/c.txt')
  151. >>> c.name
  152. 'c.txt'
  153. Read text:
  154. >>> c.read_text()
  155. 'content of c'
  156. existence:
  157. >>> c.exists()
  158. True
  159. >>> (b / 'missing.txt').exists()
  160. False
  161. Coercion to string:
  162. >>> import os
  163. >>> str(c).replace(os.sep, posixpath.sep)
  164. 'mem/abcde.zip/b/c.txt'
  165. At the root, ``name``, ``filename``, and ``parent``
  166. resolve to the zipfile. Note these attributes are not
  167. valid and will raise a ``ValueError`` if the zipfile
  168. has no filename.
  169. >>> root.name
  170. 'abcde.zip'
  171. >>> str(root.filename).replace(os.sep, posixpath.sep)
  172. 'mem/abcde.zip'
  173. >>> str(root.parent)
  174. 'mem'
  175. """
  176. __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
  177. def __init__(self, root, at=""):
  178. """
  179. Construct a Path from a ZipFile or filename.
  180. Note: When the source is an existing ZipFile object,
  181. its type (__class__) will be mutated to a
  182. specialized type. If the caller wishes to retain the
  183. original type, the caller should either create a
  184. separate ZipFile object or pass a filename.
  185. """
  186. self.root = FastLookup.make(root)
  187. self.at = at
  188. def open(self, mode='r', *args, pwd=None, **kwargs):
  189. """
  190. Open this entry as text or binary following the semantics
  191. of ``pathlib.Path.open()`` by passing arguments through
  192. to io.TextIOWrapper().
  193. """
  194. if self.is_dir():
  195. raise IsADirectoryError(self)
  196. zip_mode = mode[0]
  197. if not self.exists() and zip_mode == 'r':
  198. raise FileNotFoundError(self)
  199. stream = self.root.open(self.at, zip_mode, pwd=pwd)
  200. if 'b' in mode:
  201. if args or kwargs:
  202. raise ValueError("encoding args invalid for binary operation")
  203. return stream
  204. return io.TextIOWrapper(stream, *args, **kwargs)
  205. @property
  206. def name(self):
  207. return pathlib.Path(self.at).name or self.filename.name
  208. @property
  209. def suffix(self):
  210. return pathlib.Path(self.at).suffix or self.filename.suffix
  211. @property
  212. def suffixes(self):
  213. return pathlib.Path(self.at).suffixes or self.filename.suffixes
  214. @property
  215. def stem(self):
  216. return pathlib.Path(self.at).stem or self.filename.stem
  217. @property
  218. def filename(self):
  219. return pathlib.Path(self.root.filename).joinpath(self.at)
  220. def read_text(self, *args, **kwargs):
  221. with self.open('r', *args, **kwargs) as strm:
  222. return strm.read()
  223. def read_bytes(self):
  224. with self.open('rb') as strm:
  225. return strm.read()
  226. def _is_child(self, path):
  227. return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
  228. def _next(self, at):
  229. return self.__class__(self.root, at)
  230. def is_dir(self):
  231. return not self.at or self.at.endswith("/")
  232. def is_file(self):
  233. return self.exists() and not self.is_dir()
  234. def exists(self):
  235. return self.at in self.root._name_set()
  236. def iterdir(self):
  237. if not self.is_dir():
  238. raise ValueError("Can't listdir a file")
  239. subs = map(self._next, self.root.namelist())
  240. return filter(self._is_child, subs)
  241. def __str__(self):
  242. return posixpath.join(self.root.filename, self.at)
  243. def __repr__(self):
  244. return self.__repr.format(self=self)
  245. def joinpath(self, *other):
  246. next = posixpath.join(self.at, *map(_pathlib_compat, other))
  247. return self._next(self.root.resolve_dir(next))
  248. __truediv__ = joinpath
  249. @property
  250. def parent(self):
  251. if not self.at:
  252. return self.filename.parent
  253. parent_at = posixpath.dirname(self.at.rstrip('/'))
  254. if parent_at:
  255. parent_at += '/'
  256. return self._next(parent_at)