inputs.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. # -*- coding: utf-8 -*-
  2. """
  3. This module provide some helpers for advanced types parsing.
  4. You can define you own parser using the same pattern:
  5. .. code-block:: python
  6. def my_type(value):
  7. if not condition:
  8. raise ValueError('This is not my type')
  9. return parse(value)
  10. # Swagger documentation
  11. my_type.__schema__ = {'type': 'string', 'format': 'my-custom-format'}
  12. The last line allows you to document properly the type in the Swagger documentation.
  13. """
  14. from __future__ import unicode_literals
  15. import re
  16. import socket
  17. from datetime import datetime, time, timedelta
  18. from email.utils import parsedate_tz, mktime_tz
  19. from six.moves.urllib.parse import urlparse
  20. import aniso8601
  21. import pytz
  22. # Constants for upgrading date-based intervals to full datetimes.
  23. START_OF_DAY = time(0, 0, 0, tzinfo=pytz.UTC)
  24. END_OF_DAY = time(23, 59, 59, 999999, tzinfo=pytz.UTC)
  25. netloc_regex = re.compile(
  26. r"(?:(?P<auth>[^:@]+?(?::[^:@]*?)?)@)?" # basic auth
  27. r"(?:"
  28. r"(?P<localhost>localhost)|" # localhost...
  29. r"(?P<ipv4>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})|" # ...or ipv4
  30. r"(?:\[?(?P<ipv6>[A-F0-9]*:[A-F0-9:]+)\]?)|" # ...or ipv6
  31. r"(?P<domain>(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?))" # domain...
  32. r")"
  33. r"(?::(?P<port>\d+))?" # optional port
  34. r"$",
  35. re.IGNORECASE,
  36. )
  37. email_regex = re.compile(
  38. r"^" "(?P<local>[^@]*[^@.])" r"@" r"(?P<server>[^@\.]+(?:\.[^@\.]+)*)" r"$",
  39. re.IGNORECASE,
  40. )
  41. time_regex = re.compile(r"\d{2}:\d{2}")
  42. def ipv4(value):
  43. """Validate an IPv4 address"""
  44. try:
  45. socket.inet_aton(value)
  46. if value.count(".") == 3:
  47. return value
  48. except socket.error:
  49. pass
  50. raise ValueError("{0} is not a valid ipv4 address".format(value))
  51. ipv4.__schema__ = {"type": "string", "format": "ipv4"}
  52. def ipv6(value):
  53. """Validate an IPv6 address"""
  54. try:
  55. socket.inet_pton(socket.AF_INET6, value)
  56. return value
  57. except socket.error:
  58. raise ValueError("{0} is not a valid ipv4 address".format(value))
  59. ipv6.__schema__ = {"type": "string", "format": "ipv6"}
  60. def ip(value):
  61. """Validate an IP address (both IPv4 and IPv6)"""
  62. try:
  63. return ipv4(value)
  64. except ValueError:
  65. pass
  66. try:
  67. return ipv6(value)
  68. except ValueError:
  69. raise ValueError("{0} is not a valid ip".format(value))
  70. ip.__schema__ = {"type": "string", "format": "ip"}
  71. class URL(object):
  72. """
  73. Validate an URL.
  74. Example::
  75. parser = reqparse.RequestParser()
  76. parser.add_argument('url', type=inputs.URL(schemes=['http', 'https']))
  77. Input to the ``URL`` argument will be rejected
  78. if it does not match an URL with specified constraints.
  79. If ``check`` is True it will also be rejected if the domain does not exists.
  80. :param bool check: Check the domain exists (perform a DNS resolution)
  81. :param bool ip: Allow IP (both ipv4/ipv6) as domain
  82. :param bool local: Allow localhost (both string or ip) as domain
  83. :param bool port: Allow a port to be present
  84. :param bool auth: Allow authentication to be present
  85. :param list|tuple schemes: Restrict valid schemes to this list
  86. :param list|tuple domains: Restrict valid domains to this list
  87. :param list|tuple exclude: Exclude some domains
  88. """
  89. def __init__(
  90. self,
  91. check=False,
  92. ip=False,
  93. local=False,
  94. port=False,
  95. auth=False,
  96. schemes=None,
  97. domains=None,
  98. exclude=None,
  99. ):
  100. self.check = check
  101. self.ip = ip
  102. self.local = local
  103. self.port = port
  104. self.auth = auth
  105. self.schemes = schemes
  106. self.domains = domains
  107. self.exclude = exclude
  108. def error(self, value, details=None):
  109. msg = "{0} is not a valid URL"
  110. if details:
  111. msg = ". ".join((msg, details))
  112. raise ValueError(msg.format(value))
  113. def __call__(self, value):
  114. parsed = urlparse(value)
  115. netloc_match = netloc_regex.match(parsed.netloc)
  116. if not all((parsed.scheme, parsed.netloc)):
  117. if netloc_regex.match(
  118. parsed.netloc or parsed.path.split("/", 1)[0].split("?", 1)[0]
  119. ):
  120. self.error(value, "Did you mean: http://{0}")
  121. self.error(value)
  122. if parsed.scheme and self.schemes and parsed.scheme not in self.schemes:
  123. self.error(value, "Protocol is not allowed")
  124. if not netloc_match:
  125. self.error(value)
  126. data = netloc_match.groupdict()
  127. if data["ipv4"] or data["ipv6"]:
  128. if not self.ip:
  129. self.error(value, "IP is not allowed")
  130. else:
  131. try:
  132. ip(data["ipv4"] or data["ipv6"])
  133. except ValueError as e:
  134. self.error(value, str(e))
  135. if not self.local:
  136. if data["ipv4"] and data["ipv4"].startswith("127."):
  137. self.error(value, "Localhost is not allowed")
  138. elif data["ipv6"] == "::1":
  139. self.error(value, "Localhost is not allowed")
  140. if self.check:
  141. pass
  142. if data["auth"] and not self.auth:
  143. self.error(value, "Authentication is not allowed")
  144. if data["localhost"] and not self.local:
  145. self.error(value, "Localhost is not allowed")
  146. if data["port"]:
  147. if not self.port:
  148. self.error(value, "Custom port is not allowed")
  149. else:
  150. port = int(data["port"])
  151. if not 0 < port < 65535:
  152. self.error(value, "Port is out of range")
  153. if data["domain"]:
  154. if self.domains and data["domain"] not in self.domains:
  155. self.error(value, "Domain is not allowed")
  156. elif self.exclude and data["domain"] in self.exclude:
  157. self.error(value, "Domain is not allowed")
  158. if self.check:
  159. try:
  160. socket.getaddrinfo(data["domain"], None)
  161. except socket.error:
  162. self.error(value, "Domain does not exists")
  163. return value
  164. @property
  165. def __schema__(self):
  166. return {
  167. "type": "string",
  168. "format": "url",
  169. }
  170. #: Validate an URL
  171. #:
  172. #: Legacy validator, allows, auth, port, ip and local
  173. #: Only allows schemes 'http', 'https', 'ftp' and 'ftps'
  174. url = URL(
  175. ip=True, auth=True, port=True, local=True, schemes=("http", "https", "ftp", "ftps")
  176. )
  177. class email(object):
  178. """
  179. Validate an email.
  180. Example::
  181. parser = reqparse.RequestParser()
  182. parser.add_argument('email', type=inputs.email(dns=True))
  183. Input to the ``email`` argument will be rejected if it does not match an email
  184. and if domain does not exists.
  185. :param bool check: Check the domain exists (perform a DNS resolution)
  186. :param bool ip: Allow IP (both ipv4/ipv6) as domain
  187. :param bool local: Allow localhost (both string or ip) as domain
  188. :param list|tuple domains: Restrict valid domains to this list
  189. :param list|tuple exclude: Exclude some domains
  190. """
  191. def __init__(self, check=False, ip=False, local=False, domains=None, exclude=None):
  192. self.check = check
  193. self.ip = ip
  194. self.local = local
  195. self.domains = domains
  196. self.exclude = exclude
  197. def error(self, value, msg=None):
  198. msg = msg or "{0} is not a valid email"
  199. raise ValueError(msg.format(value))
  200. def is_ip(self, value):
  201. try:
  202. ip(value)
  203. return True
  204. except ValueError:
  205. return False
  206. def __call__(self, value):
  207. match = email_regex.match(value)
  208. if not match or ".." in value:
  209. self.error(value)
  210. server = match.group("server")
  211. if self.check:
  212. try:
  213. socket.getaddrinfo(server, None)
  214. except socket.error:
  215. self.error(value)
  216. if self.domains and server not in self.domains:
  217. self.error(value, "{0} does not belong to the authorized domains")
  218. if self.exclude and server in self.exclude:
  219. self.error(value, "{0} belongs to a forbidden domain")
  220. if not self.local and (
  221. server in ("localhost", "::1") or server.startswith("127.")
  222. ):
  223. self.error(value)
  224. if self.is_ip(server) and not self.ip:
  225. self.error(value)
  226. return value
  227. @property
  228. def __schema__(self):
  229. return {
  230. "type": "string",
  231. "format": "email",
  232. }
  233. class regex(object):
  234. """
  235. Validate a string based on a regular expression.
  236. Example::
  237. parser = reqparse.RequestParser()
  238. parser.add_argument('example', type=inputs.regex('^[0-9]+$'))
  239. Input to the ``example`` argument will be rejected if it contains anything
  240. but numbers.
  241. :param str pattern: The regular expression the input must match
  242. """
  243. def __init__(self, pattern):
  244. self.pattern = pattern
  245. self.re = re.compile(pattern)
  246. def __call__(self, value):
  247. if not self.re.search(value):
  248. message = 'Value does not match pattern: "{0}"'.format(self.pattern)
  249. raise ValueError(message)
  250. return value
  251. def __deepcopy__(self, memo):
  252. return regex(self.pattern)
  253. @property
  254. def __schema__(self):
  255. return {
  256. "type": "string",
  257. "pattern": self.pattern,
  258. }
  259. def _normalize_interval(start, end, value):
  260. """
  261. Normalize datetime intervals.
  262. Given a pair of datetime.date or datetime.datetime objects,
  263. returns a 2-tuple of tz-aware UTC datetimes spanning the same interval.
  264. For datetime.date objects, the returned interval starts at 00:00:00.0
  265. on the first date and ends at 00:00:00.0 on the second.
  266. Naive datetimes are upgraded to UTC.
  267. Timezone-aware datetimes are normalized to the UTC tzdata.
  268. Params:
  269. - start: A date or datetime
  270. - end: A date or datetime
  271. """
  272. if not isinstance(start, datetime):
  273. start = datetime.combine(start, START_OF_DAY)
  274. end = datetime.combine(end, START_OF_DAY)
  275. if start.tzinfo is None:
  276. start = pytz.UTC.localize(start)
  277. end = pytz.UTC.localize(end)
  278. else:
  279. start = start.astimezone(pytz.UTC)
  280. end = end.astimezone(pytz.UTC)
  281. return start, end
  282. def _expand_datetime(start, value):
  283. if not isinstance(start, datetime):
  284. # Expand a single date object to be the interval spanning
  285. # that entire day.
  286. end = start + timedelta(days=1)
  287. else:
  288. # Expand a datetime based on the finest resolution provided
  289. # in the original input string.
  290. time = value.split("T")[1]
  291. time_without_offset = re.sub("[+-].+", "", time)
  292. num_separators = time_without_offset.count(":")
  293. if num_separators == 0:
  294. # Hour resolution
  295. end = start + timedelta(hours=1)
  296. elif num_separators == 1:
  297. # Minute resolution:
  298. end = start + timedelta(minutes=1)
  299. else:
  300. # Second resolution
  301. end = start + timedelta(seconds=1)
  302. return end
  303. def _parse_interval(value):
  304. """
  305. Do some nasty try/except voodoo to get some sort of datetime
  306. object(s) out of the string.
  307. """
  308. try:
  309. return sorted(aniso8601.parse_interval(value))
  310. except ValueError:
  311. try:
  312. return aniso8601.parse_datetime(value), None
  313. except ValueError:
  314. return aniso8601.parse_date(value), None
  315. def iso8601interval(value, argument="argument"):
  316. """
  317. Parses ISO 8601-formatted datetime intervals into tuples of datetimes.
  318. Accepts both a single date(time) or a full interval using either start/end
  319. or start/duration notation, with the following behavior:
  320. - Intervals are defined as inclusive start, exclusive end
  321. - Single datetimes are translated into the interval spanning the
  322. largest resolution not specified in the input value, up to the day.
  323. - The smallest accepted resolution is 1 second.
  324. - All timezones are accepted as values; returned datetimes are
  325. localized to UTC. Naive inputs and date inputs will are assumed UTC.
  326. Examples::
  327. "2013-01-01" -> datetime(2013, 1, 1), datetime(2013, 1, 2)
  328. "2013-01-01T12" -> datetime(2013, 1, 1, 12), datetime(2013, 1, 1, 13)
  329. "2013-01-01/2013-02-28" -> datetime(2013, 1, 1), datetime(2013, 2, 28)
  330. "2013-01-01/P3D" -> datetime(2013, 1, 1), datetime(2013, 1, 4)
  331. "2013-01-01T12:00/PT30M" -> datetime(2013, 1, 1, 12), datetime(2013, 1, 1, 12, 30)
  332. "2013-01-01T06:00/2013-01-01T12:00" -> datetime(2013, 1, 1, 6), datetime(2013, 1, 1, 12)
  333. :param str value: The ISO8601 date time as a string
  334. :return: Two UTC datetimes, the start and the end of the specified interval
  335. :rtype: A tuple (datetime, datetime)
  336. :raises ValueError: if the interval is invalid.
  337. """
  338. if not value:
  339. raise ValueError("Expected a valid ISO8601 date/time interval.")
  340. try:
  341. start, end = _parse_interval(value)
  342. if end is None:
  343. end = _expand_datetime(start, value)
  344. start, end = _normalize_interval(start, end, value)
  345. except ValueError:
  346. msg = (
  347. "Invalid {arg}: {value}. {arg} must be a valid ISO8601 date/time interval."
  348. )
  349. raise ValueError(msg.format(arg=argument, value=value))
  350. return start, end
  351. iso8601interval.__schema__ = {"type": "string", "format": "iso8601-interval"}
  352. def date(value):
  353. """Parse a valid looking date in the format YYYY-mm-dd"""
  354. date = datetime.strptime(value, "%Y-%m-%d")
  355. return date
  356. date.__schema__ = {"type": "string", "format": "date"}
  357. def _get_integer(value):
  358. try:
  359. return int(value)
  360. except (TypeError, ValueError):
  361. raise ValueError("{0} is not a valid integer".format(value))
  362. def natural(value, argument="argument"):
  363. """Restrict input type to the natural numbers (0, 1, 2, 3...)"""
  364. value = _get_integer(value)
  365. if value < 0:
  366. msg = "Invalid {arg}: {value}. {arg} must be a non-negative integer"
  367. raise ValueError(msg.format(arg=argument, value=value))
  368. return value
  369. natural.__schema__ = {"type": "integer", "minimum": 0}
  370. def positive(value, argument="argument"):
  371. """Restrict input type to the positive integers (1, 2, 3...)"""
  372. value = _get_integer(value)
  373. if value < 1:
  374. msg = "Invalid {arg}: {value}. {arg} must be a positive integer"
  375. raise ValueError(msg.format(arg=argument, value=value))
  376. return value
  377. positive.__schema__ = {"type": "integer", "minimum": 0, "exclusiveMinimum": True}
  378. class int_range(object):
  379. """Restrict input to an integer in a range (inclusive)"""
  380. def __init__(self, low, high, argument="argument"):
  381. self.low = low
  382. self.high = high
  383. self.argument = argument
  384. def __call__(self, value):
  385. value = _get_integer(value)
  386. if value < self.low or value > self.high:
  387. msg = "Invalid {arg}: {val}. {arg} must be within the range {lo} - {hi}"
  388. raise ValueError(
  389. msg.format(arg=self.argument, val=value, lo=self.low, hi=self.high)
  390. )
  391. return value
  392. @property
  393. def __schema__(self):
  394. return {
  395. "type": "integer",
  396. "minimum": self.low,
  397. "maximum": self.high,
  398. }
  399. def boolean(value):
  400. """
  401. Parse the string ``"true"`` or ``"false"`` as a boolean (case insensitive).
  402. Also accepts ``"1"`` and ``"0"`` as ``True``/``False`` (respectively).
  403. If the input is from the request JSON body, the type is already a native python boolean,
  404. and will be passed through without further parsing.
  405. :raises ValueError: if the boolean value is invalid
  406. """
  407. if isinstance(value, bool):
  408. return value
  409. if value is None:
  410. raise ValueError("boolean type must be non-null")
  411. elif not value:
  412. return False
  413. value = str(value).lower()
  414. if value in ("true", "1", "on",):
  415. return True
  416. if value in ("false", "0",):
  417. return False
  418. raise ValueError("Invalid literal for boolean(): {0}".format(value))
  419. boolean.__schema__ = {"type": "boolean"}
  420. def datetime_from_rfc822(value):
  421. """
  422. Turns an RFC822 formatted date into a datetime object.
  423. Example::
  424. inputs.datetime_from_rfc822('Wed, 02 Oct 2002 08:00:00 EST')
  425. :param str value: The RFC822-complying string to transform
  426. :return: The parsed datetime
  427. :rtype: datetime
  428. :raises ValueError: if value is an invalid date literal
  429. """
  430. raw = value
  431. if not time_regex.search(value):
  432. value = " ".join((value, "00:00:00"))
  433. try:
  434. timetuple = parsedate_tz(value)
  435. timestamp = mktime_tz(timetuple)
  436. if timetuple[-1] is None:
  437. return datetime.fromtimestamp(timestamp).replace(tzinfo=pytz.utc)
  438. else:
  439. return datetime.fromtimestamp(timestamp, pytz.utc)
  440. except Exception:
  441. raise ValueError('Invalid date literal "{0}"'.format(raw))
  442. def datetime_from_iso8601(value):
  443. """
  444. Turns an ISO8601 formatted date into a datetime object.
  445. Example::
  446. inputs.datetime_from_iso8601("2012-01-01T23:30:00+02:00")
  447. :param str value: The ISO8601-complying string to transform
  448. :return: A datetime
  449. :rtype: datetime
  450. :raises ValueError: if value is an invalid date literal
  451. """
  452. try:
  453. try:
  454. return aniso8601.parse_datetime(value)
  455. except ValueError:
  456. date = aniso8601.parse_date(value)
  457. return datetime(date.year, date.month, date.day)
  458. except Exception:
  459. raise ValueError('Invalid date literal "{0}"'.format(value))
  460. datetime_from_iso8601.__schema__ = {"type": "string", "format": "date-time"}
  461. def date_from_iso8601(value):
  462. """
  463. Turns an ISO8601 formatted date into a date object.
  464. Example::
  465. inputs.date_from_iso8601("2012-01-01")
  466. :param str value: The ISO8601-complying string to transform
  467. :return: A date
  468. :rtype: date
  469. :raises ValueError: if value is an invalid date literal
  470. """
  471. return datetime_from_iso8601(value).date()
  472. date_from_iso8601.__schema__ = {"type": "string", "format": "date"}