results.py 25 KB


  1. # results.py
  2. from collections.abc import MutableMapping, Mapping, MutableSequence, Iterator
  3. import pprint
  4. from weakref import ref as wkref
  5. from typing import Tuple, Any
  6. str_type: Tuple[type, ...] = (str, bytes)
  7. _generator_type = type((_ for _ in ()))
  8. class _ParseResultsWithOffset:
  9. __slots__ = ["tup"]
  10. def __init__(self, p1, p2):
  11. self.tup = (p1, p2)
  12. def __getitem__(self, i):
  13. return self.tup[i]
  14. def __getstate__(self):
  15. return self.tup
  16. def __setstate__(self, *args):
  17. self.tup = args[0]
  18. class ParseResults:
  19. """Structured parse results, to provide multiple means of access to
  20. the parsed data:
  21. - as a list (``len(results)``)
  22. - by list index (``results[0], results[1]``, etc.)
  23. - by attribute (``results.<results_name>`` - see :class:`ParserElement.set_results_name`)
  24. Example::
  25. integer = Word(nums)
  26. date_str = (integer.set_results_name("year") + '/'
  27. + integer.set_results_name("month") + '/'
  28. + integer.set_results_name("day"))
  29. # equivalent form:
  30. # date_str = (integer("year") + '/'
  31. # + integer("month") + '/'
  32. # + integer("day"))
  33. # parse_string returns a ParseResults object
  34. result = date_str.parse_string("1999/12/31")
  35. def test(s, fn=repr):
  36. print("{} -> {}".format(s, fn(eval(s))))
  37. test("list(result)")
  38. test("result[0]")
  39. test("result['month']")
  40. test("result.day")
  41. test("'month' in result")
  42. test("'minutes' in result")
  43. test("result.dump()", str)
  44. prints::
  45. list(result) -> ['1999', '/', '12', '/', '31']
  46. result[0] -> '1999'
  47. result['month'] -> '12'
  48. result.day -> '31'
  49. 'month' in result -> True
  50. 'minutes' in result -> False
  51. result.dump() -> ['1999', '/', '12', '/', '31']
  52. - day: '31'
  53. - month: '12'
  54. - year: '1999'
  55. """
  56. _null_values: Tuple[Any, ...] = (None, [], "", ())
  57. __slots__ = [
  58. "_name",
  59. "_parent",
  60. "_all_names",
  61. "_modal",
  62. "_toklist",
  63. "_tokdict",
  64. "__weakref__",
  65. ]
  66. class List(list):
  67. """
  68. Simple wrapper class to distinguish parsed list results that should be preserved
  69. as actual Python lists, instead of being converted to :class:`ParseResults`:
  70. LBRACK, RBRACK = map(pp.Suppress, "[]")
  71. element = pp.Forward()
  72. item = ppc.integer
  73. element_list = LBRACK + pp.delimited_list(element) + RBRACK
  74. # add parse actions to convert from ParseResults to actual Python collection types
  75. def as_python_list(t):
  76. return pp.ParseResults.List(t.as_list())
  77. element_list.add_parse_action(as_python_list)
  78. element <<= item | element_list
  79. element.run_tests('''
  80. 100
  81. [2,3,4]
  82. [[2, 1],3,4]
  83. [(2, 1),3,4]
  84. (2,3,4)
  85. ''', post_parse=lambda s, r: (r[0], type(r[0])))
  86. prints:
  87. 100
  88. (100, <class 'int'>)
  89. [2,3,4]
  90. ([2, 3, 4], <class 'list'>)
  91. [[2, 1],3,4]
  92. ([[2, 1], 3, 4], <class 'list'>)
  93. (Used internally by :class:`Group` when `aslist=True`.)
  94. """
  95. def __new__(cls, contained=None):
  96. if contained is None:
  97. contained = []
  98. if not isinstance(contained, list):
  99. raise TypeError(
  100. "{} may only be constructed with a list,"
  101. " not {}".format(cls.__name__, type(contained).__name__)
  102. )
  103. return list.__new__(cls)
  104. def __new__(cls, toklist=None, name=None, **kwargs):
  105. if isinstance(toklist, ParseResults):
  106. return toklist
  107. self = object.__new__(cls)
  108. self._name = None
  109. self._parent = None
  110. self._all_names = set()
  111. if toklist is None:
  112. self._toklist = []
  113. elif isinstance(toklist, (list, _generator_type)):
  114. self._toklist = (
  115. [toklist[:]]
  116. if isinstance(toklist, ParseResults.List)
  117. else list(toklist)
  118. )
  119. else:
  120. self._toklist = [toklist]
  121. self._tokdict = dict()
  122. return self
  123. # Performance tuning: we construct a *lot* of these, so keep this
  124. # constructor as small and fast as possible
  125. def __init__(
  126. self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance
  127. ):
  128. self._modal = modal
  129. if name is not None and name != "":
  130. if isinstance(name, int):
  131. name = str(name)
  132. if not modal:
  133. self._all_names = {name}
  134. self._name = name
  135. if toklist not in self._null_values:
  136. if isinstance(toklist, (str_type, type)):
  137. toklist = [toklist]
  138. if asList:
  139. if isinstance(toklist, ParseResults):
  140. self[name] = _ParseResultsWithOffset(
  141. ParseResults(toklist._toklist), 0
  142. )
  143. else:
  144. self[name] = _ParseResultsWithOffset(
  145. ParseResults(toklist[0]), 0
  146. )
  147. self[name]._name = name
  148. else:
  149. try:
  150. self[name] = toklist[0]
  151. except (KeyError, TypeError, IndexError):
  152. if toklist is not self:
  153. self[name] = toklist
  154. else:
  155. self._name = name
  156. def __getitem__(self, i):
  157. if isinstance(i, (int, slice)):
  158. return self._toklist[i]
  159. else:
  160. if i not in self._all_names:
  161. return self._tokdict[i][-1][0]
  162. else:
  163. return ParseResults([v[0] for v in self._tokdict[i]])
  164. def __setitem__(self, k, v, isinstance=isinstance):
  165. if isinstance(v, _ParseResultsWithOffset):
  166. self._tokdict[k] = self._tokdict.get(k, list()) + [v]
  167. sub = v[0]
  168. elif isinstance(k, (int, slice)):
  169. self._toklist[k] = v
  170. sub = v
  171. else:
  172. self._tokdict[k] = self._tokdict.get(k, list()) + [
  173. _ParseResultsWithOffset(v, 0)
  174. ]
  175. sub = v
  176. if isinstance(sub, ParseResults):
  177. sub._parent = wkref(self)
  178. def __delitem__(self, i):
  179. if isinstance(i, (int, slice)):
  180. mylen = len(self._toklist)
  181. del self._toklist[i]
  182. # convert int to slice
  183. if isinstance(i, int):
  184. if i < 0:
  185. i += mylen
  186. i = slice(i, i + 1)
  187. # get removed indices
  188. removed = list(range(*i.indices(mylen)))
  189. removed.reverse()
  190. # fixup indices in token dictionary
  191. for name, occurrences in self._tokdict.items():
  192. for j in removed:
  193. for k, (value, position) in enumerate(occurrences):
  194. occurrences[k] = _ParseResultsWithOffset(
  195. value, position - (position > j)
  196. )
  197. else:
  198. del self._tokdict[i]
  199. def __contains__(self, k) -> bool:
  200. return k in self._tokdict
  201. def __len__(self) -> int:
  202. return len(self._toklist)
  203. def __bool__(self) -> bool:
  204. return not not (self._toklist or self._tokdict)
  205. def __iter__(self) -> Iterator:
  206. return iter(self._toklist)
  207. def __reversed__(self) -> Iterator:
  208. return iter(self._toklist[::-1])
  209. def keys(self):
  210. return iter(self._tokdict)
  211. def values(self):
  212. return (self[k] for k in self.keys())
  213. def items(self):
  214. return ((k, self[k]) for k in self.keys())
  215. def haskeys(self) -> bool:
  216. """
  217. Since ``keys()`` returns an iterator, this method is helpful in bypassing
  218. code that looks for the existence of any defined results names."""
  219. return bool(self._tokdict)
  220. def pop(self, *args, **kwargs):
  221. """
  222. Removes and returns item at specified index (default= ``last``).
  223. Supports both ``list`` and ``dict`` semantics for ``pop()``. If
  224. passed no argument or an integer argument, it will use ``list``
  225. semantics and pop tokens from the list of parsed tokens. If passed
  226. a non-integer argument (most likely a string), it will use ``dict``
  227. semantics and pop the corresponding value from any defined results
  228. names. A second default return value argument is supported, just as in
  229. ``dict.pop()``.
  230. Example::
  231. numlist = Word(nums)[...]
  232. print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
  233. def remove_first(tokens):
  234. tokens.pop(0)
  235. numlist.add_parse_action(remove_first)
  236. print(numlist.parse_string("0 123 321")) # -> ['123', '321']
  237. label = Word(alphas)
  238. patt = label("LABEL") + Word(nums)[1, ...]
  239. print(patt.parse_string("AAB 123 321").dump())
  240. # Use pop() in a parse action to remove named result (note that corresponding value is not
  241. # removed from list form of results)
  242. def remove_LABEL(tokens):
  243. tokens.pop("LABEL")
  244. return tokens
  245. patt.add_parse_action(remove_LABEL)
  246. print(patt.parse_string("AAB 123 321").dump())
  247. prints::
  248. ['AAB', '123', '321']
  249. - LABEL: 'AAB'
  250. ['AAB', '123', '321']
  251. """
  252. if not args:
  253. args = [-1]
  254. for k, v in kwargs.items():
  255. if k == "default":
  256. args = (args[0], v)
  257. else:
  258. raise TypeError(
  259. "pop() got an unexpected keyword argument {!r}".format(k)
  260. )
  261. if isinstance(args[0], int) or len(args) == 1 or args[0] in self:
  262. index = args[0]
  263. ret = self[index]
  264. del self[index]
  265. return ret
  266. else:
  267. defaultvalue = args[1]
  268. return defaultvalue
  269. def get(self, key, default_value=None):
  270. """
  271. Returns named result matching the given key, or if there is no
  272. such name, then returns the given ``default_value`` or ``None`` if no
  273. ``default_value`` is specified.
  274. Similar to ``dict.get()``.
  275. Example::
  276. integer = Word(nums)
  277. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  278. result = date_str.parse_string("1999/12/31")
  279. print(result.get("year")) # -> '1999'
  280. print(result.get("hour", "not specified")) # -> 'not specified'
  281. print(result.get("hour")) # -> None
  282. """
  283. if key in self:
  284. return self[key]
  285. else:
  286. return default_value
  287. def insert(self, index, ins_string):
  288. """
  289. Inserts new element at location index in the list of parsed tokens.
  290. Similar to ``list.insert()``.
  291. Example::
  292. numlist = Word(nums)[...]
  293. print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
  294. # use a parse action to insert the parse location in the front of the parsed results
  295. def insert_locn(locn, tokens):
  296. tokens.insert(0, locn)
  297. numlist.add_parse_action(insert_locn)
  298. print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321']
  299. """
  300. self._toklist.insert(index, ins_string)
  301. # fixup indices in token dictionary
  302. for name, occurrences in self._tokdict.items():
  303. for k, (value, position) in enumerate(occurrences):
  304. occurrences[k] = _ParseResultsWithOffset(
  305. value, position + (position > index)
  306. )
  307. def append(self, item):
  308. """
  309. Add single element to end of ``ParseResults`` list of elements.
  310. Example::
  311. numlist = Word(nums)[...]
  312. print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
  313. # use a parse action to compute the sum of the parsed integers, and add it to the end
  314. def append_sum(tokens):
  315. tokens.append(sum(map(int, tokens)))
  316. numlist.add_parse_action(append_sum)
  317. print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444]
  318. """
  319. self._toklist.append(item)
  320. def extend(self, itemseq):
  321. """
  322. Add sequence of elements to end of ``ParseResults`` list of elements.
  323. Example::
  324. patt = Word(alphas)[1, ...]
  325. # use a parse action to append the reverse of the matched strings, to make a palindrome
  326. def make_palindrome(tokens):
  327. tokens.extend(reversed([t[::-1] for t in tokens]))
  328. return ''.join(tokens)
  329. patt.add_parse_action(make_palindrome)
  330. print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
  331. """
  332. if isinstance(itemseq, ParseResults):
  333. self.__iadd__(itemseq)
  334. else:
  335. self._toklist.extend(itemseq)
  336. def clear(self):
  337. """
  338. Clear all elements and results names.
  339. """
  340. del self._toklist[:]
  341. self._tokdict.clear()
  342. def __getattr__(self, name):
  343. try:
  344. return self[name]
  345. except KeyError:
  346. if name.startswith("__"):
  347. raise AttributeError(name)
  348. return ""
  349. def __add__(self, other) -> "ParseResults":
  350. ret = self.copy()
  351. ret += other
  352. return ret
  353. def __iadd__(self, other) -> "ParseResults":
  354. if other._tokdict:
  355. offset = len(self._toklist)
  356. addoffset = lambda a: offset if a < 0 else a + offset
  357. otheritems = other._tokdict.items()
  358. otherdictitems = [
  359. (k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
  360. for k, vlist in otheritems
  361. for v in vlist
  362. ]
  363. for k, v in otherdictitems:
  364. self[k] = v
  365. if isinstance(v[0], ParseResults):
  366. v[0]._parent = wkref(self)
  367. self._toklist += other._toklist
  368. self._all_names |= other._all_names
  369. return self
  370. def __radd__(self, other) -> "ParseResults":
  371. if isinstance(other, int) and other == 0:
  372. # useful for merging many ParseResults using sum() builtin
  373. return self.copy()
  374. else:
  375. # this may raise a TypeError - so be it
  376. return other + self
  377. def __repr__(self) -> str:
  378. return "{}({!r}, {})".format(type(self).__name__, self._toklist, self.as_dict())
  379. def __str__(self) -> str:
  380. return (
  381. "["
  382. + ", ".join(
  383. [
  384. str(i) if isinstance(i, ParseResults) else repr(i)
  385. for i in self._toklist
  386. ]
  387. )
  388. + "]"
  389. )
  390. def _asStringList(self, sep=""):
  391. out = []
  392. for item in self._toklist:
  393. if out and sep:
  394. out.append(sep)
  395. if isinstance(item, ParseResults):
  396. out += item._asStringList()
  397. else:
  398. out.append(str(item))
  399. return out
  400. def as_list(self) -> list:
  401. """
  402. Returns the parse results as a nested list of matching tokens, all converted to strings.
  403. Example::
  404. patt = Word(alphas)[1, ...]
  405. result = patt.parse_string("sldkj lsdkj sldkj")
  406. # even though the result prints in string-like form, it is actually a pyparsing ParseResults
  407. print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
  408. # Use as_list() to create an actual list
  409. result_list = result.as_list()
  410. print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
  411. """
  412. return [
  413. res.as_list() if isinstance(res, ParseResults) else res
  414. for res in self._toklist
  415. ]
  416. def as_dict(self) -> dict:
  417. """
  418. Returns the named parse results as a nested dictionary.
  419. Example::
  420. integer = Word(nums)
  421. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  422. result = date_str.parse_string('12/31/1999')
  423. print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
  424. result_dict = result.as_dict()
  425. print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
  426. # even though a ParseResults supports dict-like access, sometime you just need to have a dict
  427. import json
  428. print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
  429. print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"}
  430. """
  431. def to_item(obj):
  432. if isinstance(obj, ParseResults):
  433. return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj]
  434. else:
  435. return obj
  436. return dict((k, to_item(v)) for k, v in self.items())
  437. def copy(self) -> "ParseResults":
  438. """
  439. Returns a new copy of a :class:`ParseResults` object.
  440. """
  441. ret = ParseResults(self._toklist)
  442. ret._tokdict = self._tokdict.copy()
  443. ret._parent = self._parent
  444. ret._all_names |= self._all_names
  445. ret._name = self._name
  446. return ret
  447. def get_name(self):
  448. r"""
  449. Returns the results name for this token expression. Useful when several
  450. different expressions might match at a particular location.
  451. Example::
  452. integer = Word(nums)
  453. ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
  454. house_number_expr = Suppress('#') + Word(nums, alphanums)
  455. user_data = (Group(house_number_expr)("house_number")
  456. | Group(ssn_expr)("ssn")
  457. | Group(integer)("age"))
  458. user_info = user_data[1, ...]
  459. result = user_info.parse_string("22 111-22-3333 #221B")
  460. for item in result:
  461. print(item.get_name(), ':', item[0])
  462. prints::
  463. age : 22
  464. ssn : 111-22-3333
  465. house_number : 221B
  466. """
  467. if self._name:
  468. return self._name
  469. elif self._parent:
  470. par = self._parent()
  471. def find_in_parent(sub):
  472. return next(
  473. (
  474. k
  475. for k, vlist in par._tokdict.items()
  476. for v, loc in vlist
  477. if sub is v
  478. ),
  479. None,
  480. )
  481. return find_in_parent(self) if par else None
  482. elif (
  483. len(self) == 1
  484. and len(self._tokdict) == 1
  485. and next(iter(self._tokdict.values()))[0][1] in (0, -1)
  486. ):
  487. return next(iter(self._tokdict.keys()))
  488. else:
  489. return None
  490. def dump(self, indent="", full=True, include_list=True, _depth=0) -> str:
  491. """
  492. Diagnostic method for listing out the contents of
  493. a :class:`ParseResults`. Accepts an optional ``indent`` argument so
  494. that this string can be embedded in a nested display of other data.
  495. Example::
  496. integer = Word(nums)
  497. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  498. result = date_str.parse_string('1999/12/31')
  499. print(result.dump())
  500. prints::
  501. ['1999', '/', '12', '/', '31']
  502. - day: '31'
  503. - month: '12'
  504. - year: '1999'
  505. """
  506. out = []
  507. NL = "\n"
  508. out.append(indent + str(self.as_list()) if include_list else "")
  509. if full:
  510. if self.haskeys():
  511. items = sorted((str(k), v) for k, v in self.items())
  512. for k, v in items:
  513. if out:
  514. out.append(NL)
  515. out.append("{}{}- {}: ".format(indent, (" " * _depth), k))
  516. if isinstance(v, ParseResults):
  517. if v:
  518. out.append(
  519. v.dump(
  520. indent=indent,
  521. full=full,
  522. include_list=include_list,
  523. _depth=_depth + 1,
  524. )
  525. )
  526. else:
  527. out.append(str(v))
  528. else:
  529. out.append(repr(v))
  530. if any(isinstance(vv, ParseResults) for vv in self):
  531. v = self
  532. for i, vv in enumerate(v):
  533. if isinstance(vv, ParseResults):
  534. out.append(
  535. "\n{}{}[{}]:\n{}{}{}".format(
  536. indent,
  537. (" " * (_depth)),
  538. i,
  539. indent,
  540. (" " * (_depth + 1)),
  541. vv.dump(
  542. indent=indent,
  543. full=full,
  544. include_list=include_list,
  545. _depth=_depth + 1,
  546. ),
  547. )
  548. )
  549. else:
  550. out.append(
  551. "\n%s%s[%d]:\n%s%s%s"
  552. % (
  553. indent,
  554. (" " * (_depth)),
  555. i,
  556. indent,
  557. (" " * (_depth + 1)),
  558. str(vv),
  559. )
  560. )
  561. return "".join(out)
  562. def pprint(self, *args, **kwargs):
  563. """
  564. Pretty-printer for parsed results as a list, using the
  565. `pprint <https://docs.python.org/3/library/pprint.html>`_ module.
  566. Accepts additional positional or keyword args as defined for
  567. `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
  568. Example::
  569. ident = Word(alphas, alphanums)
  570. num = Word(nums)
  571. func = Forward()
  572. term = ident | num | Group('(' + func + ')')
  573. func <<= ident + Group(Optional(delimited_list(term)))
  574. result = func.parse_string("fna a,b,(fnb c,d,200),100")
  575. result.pprint(width=40)
  576. prints::
  577. ['fna',
  578. ['a',
  579. 'b',
  580. ['(', 'fnb', ['c', 'd', '200'], ')'],
  581. '100']]
  582. """
  583. pprint.pprint(self.as_list(), *args, **kwargs)
  584. # add support for pickle protocol
  585. def __getstate__(self):
  586. return (
  587. self._toklist,
  588. (
  589. self._tokdict.copy(),
  590. self._parent is not None and self._parent() or None,
  591. self._all_names,
  592. self._name,
  593. ),
  594. )
  595. def __setstate__(self, state):
  596. self._toklist, (self._tokdict, par, inAccumNames, self._name) = state
  597. self._all_names = set(inAccumNames)
  598. if par is not None:
  599. self._parent = wkref(par)
  600. else:
  601. self._parent = None
  602. def __getnewargs__(self):
  603. return self._toklist, self._name
  604. def __dir__(self):
  605. return dir(type(self)) + list(self.keys())
  606. @classmethod
  607. def from_dict(cls, other, name=None) -> "ParseResults":
  608. """
  609. Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the
  610. name-value relations as results names. If an optional ``name`` argument is
  611. given, a nested ``ParseResults`` will be returned.
  612. """
  613. def is_iterable(obj):
  614. try:
  615. iter(obj)
  616. except Exception:
  617. return False
  618. else:
  619. return not isinstance(obj, str_type)
  620. ret = cls([])
  621. for k, v in other.items():
  622. if isinstance(v, Mapping):
  623. ret += cls.from_dict(v, name=k)
  624. else:
  625. ret += cls([v], name=k, asList=is_iterable(v))
  626. if name is not None:
  627. ret = cls([ret], name=name)
  628. return ret
  629. asList = as_list
  630. asDict = as_dict
  631. getName = get_name
  632. MutableMapping.register(ParseResults)
  633. MutableSequence.register(ParseResults)