urls.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import os
  2. import string
  3. import urllib.parse
  4. import urllib.request
  5. from typing import Optional
  6. from .compat import WINDOWS
  7. def get_url_scheme(url: str) -> Optional[str]:
  8. if ":" not in url:
  9. return None
  10. return url.split(":", 1)[0].lower()
  11. def path_to_url(path: str) -> str:
  12. """
  13. Convert a path to a file: URL. The path will be made absolute and have
  14. quoted path parts.
  15. """
  16. path = os.path.normpath(os.path.abspath(path))
  17. url = urllib.parse.urljoin("file:", urllib.request.pathname2url(path))
  18. return url
  19. def url_to_path(url: str) -> str:
  20. """
  21. Convert a file: URL to a path.
  22. """
  23. assert url.startswith(
  24. "file:"
  25. ), f"You can only turn file: urls into filenames (not {url!r})"
  26. _, netloc, path, _, _ = urllib.parse.urlsplit(url)
  27. if not netloc or netloc == "localhost":
  28. # According to RFC 8089, same as empty authority.
  29. netloc = ""
  30. elif WINDOWS:
  31. # If we have a UNC path, prepend UNC share notation.
  32. netloc = "\\\\" + netloc
  33. else:
  34. raise ValueError(
  35. f"non-local file URIs are not supported on this platform: {url!r}"
  36. )
  37. path = urllib.request.url2pathname(netloc + path)
  38. # On Windows, urlsplit parses the path as something like "/C:/Users/foo".
  39. # This creates issues for path-related functions like io.open(), so we try
  40. # to detect and strip the leading slash.
  41. if (
  42. WINDOWS
  43. and not netloc # Not UNC.
  44. and len(path) >= 3
  45. and path[0] == "/" # Leading slash to strip.
  46. and path[1] in string.ascii_letters # Drive letter.
  47. and path[2:4] in (":", ":/") # Colon + end of string, or colon + absolute path.
  48. ):
  49. path = path[1:]
  50. return path