123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- # Extracted from https://github.com/pfmoore/pkg_metadata
- from email.header import Header, decode_header, make_header
- from email.message import Message
- from typing import Any, Dict, List, Union
- METADATA_FIELDS = [
- # Name, Multiple-Use
- ("Metadata-Version", False),
- ("Name", False),
- ("Version", False),
- ("Dynamic", True),
- ("Platform", True),
- ("Supported-Platform", True),
- ("Summary", False),
- ("Description", False),
- ("Description-Content-Type", False),
- ("Keywords", False),
- ("Home-page", False),
- ("Download-URL", False),
- ("Author", False),
- ("Author-email", False),
- ("Maintainer", False),
- ("Maintainer-email", False),
- ("License", False),
- ("Classifier", True),
- ("Requires-Dist", True),
- ("Requires-Python", False),
- ("Requires-External", True),
- ("Project-URL", True),
- ("Provides-Extra", True),
- ("Provides-Dist", True),
- ("Obsoletes-Dist", True),
- ]
- def json_name(field: str) -> str:
- return field.lower().replace("-", "_")
- def msg_to_json(msg: Message) -> Dict[str, Any]:
- """Convert a Message object into a JSON-compatible dictionary."""
- def sanitise_header(h: Union[Header, str]) -> str:
- if isinstance(h, Header):
- chunks = []
- for bytes, encoding in decode_header(h):
- if encoding == "unknown-8bit":
- try:
- # See if UTF-8 works
- bytes.decode("utf-8")
- encoding = "utf-8"
- except UnicodeDecodeError:
- # If not, latin1 at least won't fail
- encoding = "latin1"
- chunks.append((bytes, encoding))
- return str(make_header(chunks))
- return str(h)
- result = {}
- for field, multi in METADATA_FIELDS:
- if field not in msg:
- continue
- key = json_name(field)
- if multi:
- value: Union[str, List[str]] = [
- sanitise_header(v) for v in msg.get_all(field)
- ]
- else:
- value = sanitise_header(msg.get(field))
- if key == "keywords":
- # Accept both comma-separated and space-separated
- # forms, for better compatibility with old data.
- if "," in value:
- value = [v.strip() for v in value.split(",")]
- else:
- value = value.split()
- result[key] = value
- payload = msg.get_payload()
- if payload:
- result["description"] = payload
- return result
|