You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1058 lines
38KB

  1. import datetime
  2. import io
  3. from os import linesep
  4. import re
  5. import sys
  6. from toml.tz import TomlTz
  7. if sys.version_info < (3,):
  8. _range = xrange # noqa: F821
  9. else:
  10. unicode = str
  11. _range = range
  12. basestring = str
  13. unichr = chr
  14. def _detect_pathlib_path(p):
  15. if (3, 4) <= sys.version_info:
  16. import pathlib
  17. if isinstance(p, pathlib.PurePath):
  18. return True
  19. return False
  20. def _ispath(p):
  21. if isinstance(p, (bytes, basestring)):
  22. return True
  23. return _detect_pathlib_path(p)
  24. def _getpath(p):
  25. if (3, 6) <= sys.version_info:
  26. import os
  27. return os.fspath(p)
  28. if _detect_pathlib_path(p):
  29. return str(p)
  30. return p
  31. try:
  32. FNFError = FileNotFoundError
  33. except NameError:
  34. FNFError = IOError
  35. TIME_RE = re.compile(r"([0-9]{2}):([0-9]{2}):([0-9]{2})(\.([0-9]{3,6}))?")
  36. class TomlDecodeError(ValueError):
  37. """Base toml Exception / Error."""
  38. def __init__(self, msg, doc, pos):
  39. lineno = doc.count('\n', 0, pos) + 1
  40. colno = pos - doc.rfind('\n', 0, pos)
  41. emsg = '{} (line {} column {} char {})'.format(msg, lineno, colno, pos)
  42. ValueError.__init__(self, emsg)
  43. self.msg = msg
  44. self.doc = doc
  45. self.pos = pos
  46. self.lineno = lineno
  47. self.colno = colno
  48. # Matches a TOML number, which allows underscores for readability
  49. _number_with_underscores = re.compile('([0-9])(_([0-9]))*')
  50. class CommentValue(object):
  51. def __init__(self, val, comment, beginline, _dict):
  52. self.val = val
  53. separator = "\n" if beginline else " "
  54. self.comment = separator + comment
  55. self._dict = _dict
  56. def __getitem__(self, key):
  57. return self.val[key]
  58. def __setitem__(self, key, value):
  59. self.val[key] = value
  60. def dump(self, dump_value_func):
  61. retstr = dump_value_func(self.val)
  62. if isinstance(self.val, self._dict):
  63. return self.comment + "\n" + unicode(retstr)
  64. else:
  65. return unicode(retstr) + self.comment
  66. def _strictly_valid_num(n):
  67. n = n.strip()
  68. if not n:
  69. return False
  70. if n[0] == '_':
  71. return False
  72. if n[-1] == '_':
  73. return False
  74. if "_." in n or "._" in n:
  75. return False
  76. if len(n) == 1:
  77. return True
  78. if n[0] == '0' and n[1] not in ['.', 'o', 'b', 'x']:
  79. return False
  80. if n[0] == '+' or n[0] == '-':
  81. n = n[1:]
  82. if len(n) > 1 and n[0] == '0' and n[1] != '.':
  83. return False
  84. if '__' in n:
  85. return False
  86. return True
  87. def load(f, _dict=dict, decoder=None):
  88. """Parses named file or files as toml and returns a dictionary
  89. Args:
  90. f: Path to the file to open, array of files to read into single dict
  91. or a file descriptor
  92. _dict: (optional) Specifies the class of the returned toml dictionary
  93. decoder: The decoder to use
  94. Returns:
  95. Parsed toml file represented as a dictionary
  96. Raises:
  97. TypeError -- When f is invalid type
  98. TomlDecodeError: Error while decoding toml
  99. IOError / FileNotFoundError -- When an array with no valid (existing)
  100. (Python 2 / Python 3) file paths is passed
  101. """
  102. if _ispath(f):
  103. with io.open(_getpath(f), encoding='utf-8') as ffile:
  104. return loads(ffile.read(), _dict, decoder)
  105. elif isinstance(f, list):
  106. from os import path as op
  107. from warnings import warn
  108. if not [path for path in f if op.exists(path)]:
  109. error_msg = "Load expects a list to contain filenames only."
  110. error_msg += linesep
  111. error_msg += ("The list needs to contain the path of at least one "
  112. "existing file.")
  113. raise FNFError(error_msg)
  114. if decoder is None:
  115. decoder = TomlDecoder(_dict)
  116. d = decoder.get_empty_table()
  117. for l in f: # noqa: E741
  118. if op.exists(l):
  119. d.update(load(l, _dict, decoder))
  120. else:
  121. warn("Non-existent filename in list with at least one valid "
  122. "filename")
  123. return d
  124. else:
  125. try:
  126. return loads(f.read(), _dict, decoder)
  127. except AttributeError:
  128. raise TypeError("You can only load a file descriptor, filename or "
  129. "list")
  130. _groupname_re = re.compile(r'^[A-Za-z0-9_-]+$')
  131. def loads(s, _dict=dict, decoder=None):
  132. """Parses string as toml
  133. Args:
  134. s: String to be parsed
  135. _dict: (optional) Specifies the class of the returned toml dictionary
  136. Returns:
  137. Parsed toml file represented as a dictionary
  138. Raises:
  139. TypeError: When a non-string is passed
  140. TomlDecodeError: Error while decoding toml
  141. """
  142. implicitgroups = []
  143. if decoder is None:
  144. decoder = TomlDecoder(_dict)
  145. retval = decoder.get_empty_table()
  146. currentlevel = retval
  147. if not isinstance(s, basestring):
  148. raise TypeError("Expecting something like a string")
  149. if not isinstance(s, unicode):
  150. s = s.decode('utf8')
  151. original = s
  152. sl = list(s)
  153. openarr = 0
  154. openstring = False
  155. openstrchar = ""
  156. multilinestr = False
  157. arrayoftables = False
  158. beginline = True
  159. keygroup = False
  160. dottedkey = False
  161. keyname = 0
  162. key = ''
  163. prev_key = ''
  164. line_no = 1
  165. for i, item in enumerate(sl):
  166. if item == '\r' and sl[i + 1] == '\n':
  167. sl[i] = ' '
  168. continue
  169. if keyname:
  170. key += item
  171. if item == '\n':
  172. raise TomlDecodeError("Key name found without value."
  173. " Reached end of line.", original, i)
  174. if openstring:
  175. if item == openstrchar:
  176. oddbackslash = False
  177. k = 1
  178. while i >= k and sl[i - k] == '\\':
  179. oddbackslash = not oddbackslash
  180. k += 1
  181. if not oddbackslash:
  182. keyname = 2
  183. openstring = False
  184. openstrchar = ""
  185. continue
  186. elif keyname == 1:
  187. if item.isspace():
  188. keyname = 2
  189. continue
  190. elif item == '.':
  191. dottedkey = True
  192. continue
  193. elif item.isalnum() or item == '_' or item == '-':
  194. continue
  195. elif (dottedkey and sl[i - 1] == '.' and
  196. (item == '"' or item == "'")):
  197. openstring = True
  198. openstrchar = item
  199. continue
  200. elif keyname == 2:
  201. if item.isspace():
  202. if dottedkey:
  203. nextitem = sl[i + 1]
  204. if not nextitem.isspace() and nextitem != '.':
  205. keyname = 1
  206. continue
  207. if item == '.':
  208. dottedkey = True
  209. nextitem = sl[i + 1]
  210. if not nextitem.isspace() and nextitem != '.':
  211. keyname = 1
  212. continue
  213. if item == '=':
  214. keyname = 0
  215. prev_key = key[:-1].rstrip()
  216. key = ''
  217. dottedkey = False
  218. else:
  219. raise TomlDecodeError("Found invalid character in key name: '" +
  220. item + "'. Try quoting the key name.",
  221. original, i)
  222. if item == "'" and openstrchar != '"':
  223. k = 1
  224. try:
  225. while sl[i - k] == "'":
  226. k += 1
  227. if k == 3:
  228. break
  229. except IndexError:
  230. pass
  231. if k == 3:
  232. multilinestr = not multilinestr
  233. openstring = multilinestr
  234. else:
  235. openstring = not openstring
  236. if openstring:
  237. openstrchar = "'"
  238. else:
  239. openstrchar = ""
  240. if item == '"' and openstrchar != "'":
  241. oddbackslash = False
  242. k = 1
  243. tripquote = False
  244. try:
  245. while sl[i - k] == '"':
  246. k += 1
  247. if k == 3:
  248. tripquote = True
  249. break
  250. if k == 1 or (k == 3 and tripquote):
  251. while sl[i - k] == '\\':
  252. oddbackslash = not oddbackslash
  253. k += 1
  254. except IndexError:
  255. pass
  256. if not oddbackslash:
  257. if tripquote:
  258. multilinestr = not multilinestr
  259. openstring = multilinestr
  260. else:
  261. openstring = not openstring
  262. if openstring:
  263. openstrchar = '"'
  264. else:
  265. openstrchar = ""
  266. if item == '#' and (not openstring and not keygroup and
  267. not arrayoftables):
  268. j = i
  269. comment = ""
  270. try:
  271. while sl[j] != '\n':
  272. comment += s[j]
  273. sl[j] = ' '
  274. j += 1
  275. except IndexError:
  276. break
  277. if not openarr:
  278. decoder.preserve_comment(line_no, prev_key, comment, beginline)
  279. if item == '[' and (not openstring and not keygroup and
  280. not arrayoftables):
  281. if beginline:
  282. if len(sl) > i + 1 and sl[i + 1] == '[':
  283. arrayoftables = True
  284. else:
  285. keygroup = True
  286. else:
  287. openarr += 1
  288. if item == ']' and not openstring:
  289. if keygroup:
  290. keygroup = False
  291. elif arrayoftables:
  292. if sl[i - 1] == ']':
  293. arrayoftables = False
  294. else:
  295. openarr -= 1
  296. if item == '\n':
  297. if openstring or multilinestr:
  298. if not multilinestr:
  299. raise TomlDecodeError("Unbalanced quotes", original, i)
  300. if ((sl[i - 1] == "'" or sl[i - 1] == '"') and (
  301. sl[i - 2] == sl[i - 1])):
  302. sl[i] = sl[i - 1]
  303. if sl[i - 3] == sl[i - 1]:
  304. sl[i - 3] = ' '
  305. elif openarr:
  306. sl[i] = ' '
  307. else:
  308. beginline = True
  309. line_no += 1
  310. elif beginline and sl[i] != ' ' and sl[i] != '\t':
  311. beginline = False
  312. if not keygroup and not arrayoftables:
  313. if sl[i] == '=':
  314. raise TomlDecodeError("Found empty keyname. ", original, i)
  315. keyname = 1
  316. key += item
  317. if keyname:
  318. raise TomlDecodeError("Key name found without value."
  319. " Reached end of file.", original, len(s))
  320. if openstring: # reached EOF and have an unterminated string
  321. raise TomlDecodeError("Unterminated string found."
  322. " Reached end of file.", original, len(s))
  323. s = ''.join(sl)
  324. s = s.split('\n')
  325. multikey = None
  326. multilinestr = ""
  327. multibackslash = False
  328. pos = 0
  329. for idx, line in enumerate(s):
  330. if idx > 0:
  331. pos += len(s[idx - 1]) + 1
  332. decoder.embed_comments(idx, currentlevel)
  333. if not multilinestr or multibackslash or '\n' not in multilinestr:
  334. line = line.strip()
  335. if line == "" and (not multikey or multibackslash):
  336. continue
  337. if multikey:
  338. if multibackslash:
  339. multilinestr += line
  340. else:
  341. multilinestr += line
  342. multibackslash = False
  343. closed = False
  344. if multilinestr[0] == '[':
  345. closed = line[-1] == ']'
  346. elif len(line) > 2:
  347. closed = (line[-1] == multilinestr[0] and
  348. line[-2] == multilinestr[0] and
  349. line[-3] == multilinestr[0])
  350. if closed:
  351. try:
  352. value, vtype = decoder.load_value(multilinestr)
  353. except ValueError as err:
  354. raise TomlDecodeError(str(err), original, pos)
  355. currentlevel[multikey] = value
  356. multikey = None
  357. multilinestr = ""
  358. else:
  359. k = len(multilinestr) - 1
  360. while k > -1 and multilinestr[k] == '\\':
  361. multibackslash = not multibackslash
  362. k -= 1
  363. if multibackslash:
  364. multilinestr = multilinestr[:-1]
  365. else:
  366. multilinestr += "\n"
  367. continue
  368. if line[0] == '[':
  369. arrayoftables = False
  370. if len(line) == 1:
  371. raise TomlDecodeError("Opening key group bracket on line by "
  372. "itself.", original, pos)
  373. if line[1] == '[':
  374. arrayoftables = True
  375. line = line[2:]
  376. splitstr = ']]'
  377. else:
  378. line = line[1:]
  379. splitstr = ']'
  380. i = 1
  381. quotesplits = decoder._get_split_on_quotes(line)
  382. quoted = False
  383. for quotesplit in quotesplits:
  384. if not quoted and splitstr in quotesplit:
  385. break
  386. i += quotesplit.count(splitstr)
  387. quoted = not quoted
  388. line = line.split(splitstr, i)
  389. if len(line) < i + 1 or line[-1].strip() != "":
  390. raise TomlDecodeError("Key group not on a line by itself.",
  391. original, pos)
  392. groups = splitstr.join(line[:-1]).split('.')
  393. i = 0
  394. while i < len(groups):
  395. groups[i] = groups[i].strip()
  396. if len(groups[i]) > 0 and (groups[i][0] == '"' or
  397. groups[i][0] == "'"):
  398. groupstr = groups[i]
  399. j = i + 1
  400. while ((not groupstr[0] == groupstr[-1]) or
  401. len(groupstr) == 1):
  402. j += 1
  403. if j > len(groups) + 2:
  404. raise TomlDecodeError("Invalid group name '" +
  405. groupstr + "' Something " +
  406. "went wrong.", original, pos)
  407. groupstr = '.'.join(groups[i:j]).strip()
  408. groups[i] = groupstr[1:-1]
  409. groups[i + 1:j] = []
  410. else:
  411. if not _groupname_re.match(groups[i]):
  412. raise TomlDecodeError("Invalid group name '" +
  413. groups[i] + "'. Try quoting it.",
  414. original, pos)
  415. i += 1
  416. currentlevel = retval
  417. for i in _range(len(groups)):
  418. group = groups[i]
  419. if group == "":
  420. raise TomlDecodeError("Can't have a keygroup with an empty "
  421. "name", original, pos)
  422. try:
  423. currentlevel[group]
  424. if i == len(groups) - 1:
  425. if group in implicitgroups:
  426. implicitgroups.remove(group)
  427. if arrayoftables:
  428. raise TomlDecodeError("An implicitly defined "
  429. "table can't be an array",
  430. original, pos)
  431. elif arrayoftables:
  432. currentlevel[group].append(decoder.get_empty_table()
  433. )
  434. else:
  435. raise TomlDecodeError("What? " + group +
  436. " already exists?" +
  437. str(currentlevel),
  438. original, pos)
  439. except TypeError:
  440. currentlevel = currentlevel[-1]
  441. if group not in currentlevel:
  442. currentlevel[group] = decoder.get_empty_table()
  443. if i == len(groups) - 1 and arrayoftables:
  444. currentlevel[group] = [decoder.get_empty_table()]
  445. except KeyError:
  446. if i != len(groups) - 1:
  447. implicitgroups.append(group)
  448. currentlevel[group] = decoder.get_empty_table()
  449. if i == len(groups) - 1 and arrayoftables:
  450. currentlevel[group] = [decoder.get_empty_table()]
  451. currentlevel = currentlevel[group]
  452. if arrayoftables:
  453. try:
  454. currentlevel = currentlevel[-1]
  455. except KeyError:
  456. pass
  457. elif line[0] == "{":
  458. if line[-1] != "}":
  459. raise TomlDecodeError("Line breaks are not allowed in inline"
  460. "objects", original, pos)
  461. try:
  462. decoder.load_inline_object(line, currentlevel, multikey,
  463. multibackslash)
  464. except ValueError as err:
  465. raise TomlDecodeError(str(err), original, pos)
  466. elif "=" in line:
  467. try:
  468. ret = decoder.load_line(line, currentlevel, multikey,
  469. multibackslash)
  470. except ValueError as err:
  471. raise TomlDecodeError(str(err), original, pos)
  472. if ret is not None:
  473. multikey, multilinestr, multibackslash = ret
  474. return retval
  475. def _load_date(val):
  476. microsecond = 0
  477. tz = None
  478. try:
  479. if len(val) > 19:
  480. if val[19] == '.':
  481. if val[-1].upper() == 'Z':
  482. subsecondval = val[20:-1]
  483. tzval = "Z"
  484. else:
  485. subsecondvalandtz = val[20:]
  486. if '+' in subsecondvalandtz:
  487. splitpoint = subsecondvalandtz.index('+')
  488. subsecondval = subsecondvalandtz[:splitpoint]
  489. tzval = subsecondvalandtz[splitpoint:]
  490. elif '-' in subsecondvalandtz:
  491. splitpoint = subsecondvalandtz.index('-')
  492. subsecondval = subsecondvalandtz[:splitpoint]
  493. tzval = subsecondvalandtz[splitpoint:]
  494. else:
  495. tzval = None
  496. subsecondval = subsecondvalandtz
  497. if tzval is not None:
  498. tz = TomlTz(tzval)
  499. microsecond = int(int(subsecondval) *
  500. (10 ** (6 - len(subsecondval))))
  501. else:
  502. tz = TomlTz(val[19:])
  503. except ValueError:
  504. tz = None
  505. if "-" not in val[1:]:
  506. return None
  507. try:
  508. if len(val) == 10:
  509. d = datetime.date(
  510. int(val[:4]), int(val[5:7]),
  511. int(val[8:10]))
  512. else:
  513. d = datetime.datetime(
  514. int(val[:4]), int(val[5:7]),
  515. int(val[8:10]), int(val[11:13]),
  516. int(val[14:16]), int(val[17:19]), microsecond, tz)
  517. except ValueError:
  518. return None
  519. return d
  520. def _load_unicode_escapes(v, hexbytes, prefix):
  521. skip = False
  522. i = len(v) - 1
  523. while i > -1 and v[i] == '\\':
  524. skip = not skip
  525. i -= 1
  526. for hx in hexbytes:
  527. if skip:
  528. skip = False
  529. i = len(hx) - 1
  530. while i > -1 and hx[i] == '\\':
  531. skip = not skip
  532. i -= 1
  533. v += prefix
  534. v += hx
  535. continue
  536. hxb = ""
  537. i = 0
  538. hxblen = 4
  539. if prefix == "\\U":
  540. hxblen = 8
  541. hxb = ''.join(hx[i:i + hxblen]).lower()
  542. if hxb.strip('0123456789abcdef'):
  543. raise ValueError("Invalid escape sequence: " + hxb)
  544. if hxb[0] == "d" and hxb[1].strip('01234567'):
  545. raise ValueError("Invalid escape sequence: " + hxb +
  546. ". Only scalar unicode points are allowed.")
  547. v += unichr(int(hxb, 16))
  548. v += unicode(hx[len(hxb):])
  549. return v
  550. # Unescape TOML string values.
  551. # content after the \
  552. _escapes = ['0', 'b', 'f', 'n', 'r', 't', '"']
  553. # What it should be replaced by
  554. _escapedchars = ['\0', '\b', '\f', '\n', '\r', '\t', '\"']
  555. # Used for substitution
  556. _escape_to_escapedchars = dict(zip(_escapes, _escapedchars))
  557. def _unescape(v):
  558. """Unescape characters in a TOML string."""
  559. i = 0
  560. backslash = False
  561. while i < len(v):
  562. if backslash:
  563. backslash = False
  564. if v[i] in _escapes:
  565. v = v[:i - 1] + _escape_to_escapedchars[v[i]] + v[i + 1:]
  566. elif v[i] == '\\':
  567. v = v[:i - 1] + v[i:]
  568. elif v[i] == 'u' or v[i] == 'U':
  569. i += 1
  570. else:
  571. raise ValueError("Reserved escape sequence used")
  572. continue
  573. elif v[i] == '\\':
  574. backslash = True
  575. i += 1
  576. return v
  577. class InlineTableDict(object):
  578. """Sentinel subclass of dict for inline tables."""
  579. class TomlDecoder(object):
  580. def __init__(self, _dict=dict):
  581. self._dict = _dict
  582. def get_empty_table(self):
  583. return self._dict()
  584. def get_empty_inline_table(self):
  585. class DynamicInlineTableDict(self._dict, InlineTableDict):
  586. """Concrete sentinel subclass for inline tables.
  587. It is a subclass of _dict which is passed in dynamically at load
  588. time
  589. It is also a subclass of InlineTableDict
  590. """
  591. return DynamicInlineTableDict()
  592. def load_inline_object(self, line, currentlevel, multikey=False,
  593. multibackslash=False):
  594. candidate_groups = line[1:-1].split(",")
  595. groups = []
  596. if len(candidate_groups) == 1 and not candidate_groups[0].strip():
  597. candidate_groups.pop()
  598. while len(candidate_groups) > 0:
  599. candidate_group = candidate_groups.pop(0)
  600. try:
  601. _, value = candidate_group.split('=', 1)
  602. except ValueError:
  603. raise ValueError("Invalid inline table encountered")
  604. value = value.strip()
  605. if ((value[0] == value[-1] and value[0] in ('"', "'")) or (
  606. value[0] in '-0123456789' or
  607. value in ('true', 'false') or
  608. (value[0] == "[" and value[-1] == "]") or
  609. (value[0] == '{' and value[-1] == '}'))):
  610. groups.append(candidate_group)
  611. elif len(candidate_groups) > 0:
  612. candidate_groups[0] = (candidate_group + "," +
  613. candidate_groups[0])
  614. else:
  615. raise ValueError("Invalid inline table value encountered")
  616. for group in groups:
  617. status = self.load_line(group, currentlevel, multikey,
  618. multibackslash)
  619. if status is not None:
  620. break
  621. def _get_split_on_quotes(self, line):
  622. doublequotesplits = line.split('"')
  623. quoted = False
  624. quotesplits = []
  625. if len(doublequotesplits) > 1 and "'" in doublequotesplits[0]:
  626. singlequotesplits = doublequotesplits[0].split("'")
  627. doublequotesplits = doublequotesplits[1:]
  628. while len(singlequotesplits) % 2 == 0 and len(doublequotesplits):
  629. singlequotesplits[-1] += '"' + doublequotesplits[0]
  630. doublequotesplits = doublequotesplits[1:]
  631. if "'" in singlequotesplits[-1]:
  632. singlequotesplits = (singlequotesplits[:-1] +
  633. singlequotesplits[-1].split("'"))
  634. quotesplits += singlequotesplits
  635. for doublequotesplit in doublequotesplits:
  636. if quoted:
  637. quotesplits.append(doublequotesplit)
  638. else:
  639. quotesplits += doublequotesplit.split("'")
  640. quoted = not quoted
  641. return quotesplits
  642. def load_line(self, line, currentlevel, multikey, multibackslash):
  643. i = 1
  644. quotesplits = self._get_split_on_quotes(line)
  645. quoted = False
  646. for quotesplit in quotesplits:
  647. if not quoted and '=' in quotesplit:
  648. break
  649. i += quotesplit.count('=')
  650. quoted = not quoted
  651. pair = line.split('=', i)
  652. strictly_valid = _strictly_valid_num(pair[-1])
  653. if _number_with_underscores.match(pair[-1]):
  654. pair[-1] = pair[-1].replace('_', '')
  655. while len(pair[-1]) and (pair[-1][0] != ' ' and pair[-1][0] != '\t' and
  656. pair[-1][0] != "'" and pair[-1][0] != '"' and
  657. pair[-1][0] != '[' and pair[-1][0] != '{' and
  658. pair[-1].strip() != 'true' and
  659. pair[-1].strip() != 'false'):
  660. try:
  661. float(pair[-1])
  662. break
  663. except ValueError:
  664. pass
  665. if _load_date(pair[-1]) is not None:
  666. break
  667. if TIME_RE.match(pair[-1]):
  668. break
  669. i += 1
  670. prev_val = pair[-1]
  671. pair = line.split('=', i)
  672. if prev_val == pair[-1]:
  673. raise ValueError("Invalid date or number")
  674. if strictly_valid:
  675. strictly_valid = _strictly_valid_num(pair[-1])
  676. pair = ['='.join(pair[:-1]).strip(), pair[-1].strip()]
  677. if '.' in pair[0]:
  678. if '"' in pair[0] or "'" in pair[0]:
  679. quotesplits = self._get_split_on_quotes(pair[0])
  680. quoted = False
  681. levels = []
  682. for quotesplit in quotesplits:
  683. if quoted:
  684. levels.append(quotesplit)
  685. else:
  686. levels += [level.strip() for level in
  687. quotesplit.split('.')]
  688. quoted = not quoted
  689. else:
  690. levels = pair[0].split('.')
  691. while levels[-1] == "":
  692. levels = levels[:-1]
  693. for level in levels[:-1]:
  694. if level == "":
  695. continue
  696. if level not in currentlevel:
  697. currentlevel[level] = self.get_empty_table()
  698. currentlevel = currentlevel[level]
  699. pair[0] = levels[-1].strip()
  700. elif (pair[0][0] == '"' or pair[0][0] == "'") and \
  701. (pair[0][-1] == pair[0][0]):
  702. pair[0] = _unescape(pair[0][1:-1])
  703. k, koffset = self._load_line_multiline_str(pair[1])
  704. if k > -1:
  705. while k > -1 and pair[1][k + koffset] == '\\':
  706. multibackslash = not multibackslash
  707. k -= 1
  708. if multibackslash:
  709. multilinestr = pair[1][:-1]
  710. else:
  711. multilinestr = pair[1] + "\n"
  712. multikey = pair[0]
  713. else:
  714. value, vtype = self.load_value(pair[1], strictly_valid)
  715. try:
  716. currentlevel[pair[0]]
  717. raise ValueError("Duplicate keys!")
  718. except TypeError:
  719. raise ValueError("Duplicate keys!")
  720. except KeyError:
  721. if multikey:
  722. return multikey, multilinestr, multibackslash
  723. else:
  724. currentlevel[pair[0]] = value
  725. def _load_line_multiline_str(self, p):
  726. poffset = 0
  727. if len(p) < 3:
  728. return -1, poffset
  729. if p[0] == '[' and (p.strip()[-1] != ']' and
  730. self._load_array_isstrarray(p)):
  731. newp = p[1:].strip().split(',')
  732. while len(newp) > 1 and newp[-1][0] != '"' and newp[-1][0] != "'":
  733. newp = newp[:-2] + [newp[-2] + ',' + newp[-1]]
  734. newp = newp[-1]
  735. poffset = len(p) - len(newp)
  736. p = newp
  737. if p[0] != '"' and p[0] != "'":
  738. return -1, poffset
  739. if p[1] != p[0] or p[2] != p[0]:
  740. return -1, poffset
  741. if len(p) > 5 and p[-1] == p[0] and p[-2] == p[0] and p[-3] == p[0]:
  742. return -1, poffset
  743. return len(p) - 1, poffset
  744. def load_value(self, v, strictly_valid=True):
  745. if not v:
  746. raise ValueError("Empty value is invalid")
  747. if v == 'true':
  748. return (True, "bool")
  749. elif v.lower() == 'true':
  750. raise ValueError("Only all lowercase booleans allowed")
  751. elif v == 'false':
  752. return (False, "bool")
  753. elif v.lower() == 'false':
  754. raise ValueError("Only all lowercase booleans allowed")
  755. elif v[0] == '"' or v[0] == "'":
  756. quotechar = v[0]
  757. testv = v[1:].split(quotechar)
  758. triplequote = False
  759. triplequotecount = 0
  760. if len(testv) > 1 and testv[0] == '' and testv[1] == '':
  761. testv = testv[2:]
  762. triplequote = True
  763. closed = False
  764. for tv in testv:
  765. if tv == '':
  766. if triplequote:
  767. triplequotecount += 1
  768. else:
  769. closed = True
  770. else:
  771. oddbackslash = False
  772. try:
  773. i = -1
  774. j = tv[i]
  775. while j == '\\':
  776. oddbackslash = not oddbackslash
  777. i -= 1
  778. j = tv[i]
  779. except IndexError:
  780. pass
  781. if not oddbackslash:
  782. if closed:
  783. raise ValueError("Found tokens after a closed " +
  784. "string. Invalid TOML.")
  785. else:
  786. if not triplequote or triplequotecount > 1:
  787. closed = True
  788. else:
  789. triplequotecount = 0
  790. if quotechar == '"':
  791. escapeseqs = v.split('\\')[1:]
  792. backslash = False
  793. for i in escapeseqs:
  794. if i == '':
  795. backslash = not backslash
  796. else:
  797. if i[0] not in _escapes and (i[0] != 'u' and
  798. i[0] != 'U' and
  799. not backslash):
  800. raise ValueError("Reserved escape sequence used")
  801. if backslash:
  802. backslash = False
  803. for prefix in ["\\u", "\\U"]:
  804. if prefix in v:
  805. hexbytes = v.split(prefix)
  806. v = _load_unicode_escapes(hexbytes[0], hexbytes[1:],
  807. prefix)
  808. v = _unescape(v)
  809. if len(v) > 1 and v[1] == quotechar and (len(v) < 3 or
  810. v[1] == v[2]):
  811. v = v[2:-2]
  812. return (v[1:-1], "str")
  813. elif v[0] == '[':
  814. return (self.load_array(v), "array")
  815. elif v[0] == '{':
  816. inline_object = self.get_empty_inline_table()
  817. self.load_inline_object(v, inline_object)
  818. return (inline_object, "inline_object")
  819. elif TIME_RE.match(v):
  820. h, m, s, _, ms = TIME_RE.match(v).groups()
  821. time = datetime.time(int(h), int(m), int(s), int(ms) if ms else 0)
  822. return (time, "time")
  823. else:
  824. parsed_date = _load_date(v)
  825. if parsed_date is not None:
  826. return (parsed_date, "date")
  827. if not strictly_valid:
  828. raise ValueError("Weirdness with leading zeroes or "
  829. "underscores in your number.")
  830. itype = "int"
  831. neg = False
  832. if v[0] == '-':
  833. neg = True
  834. v = v[1:]
  835. elif v[0] == '+':
  836. v = v[1:]
  837. v = v.replace('_', '')
  838. lowerv = v.lower()
  839. if '.' in v or ('x' not in v and ('e' in v or 'E' in v)):
  840. if '.' in v and v.split('.', 1)[1] == '':
  841. raise ValueError("This float is missing digits after "
  842. "the point")
  843. if v[0] not in '0123456789':
  844. raise ValueError("This float doesn't have a leading "
  845. "digit")
  846. v = float(v)
  847. itype = "float"
  848. elif len(lowerv) == 3 and (lowerv == 'inf' or lowerv == 'nan'):
  849. v = float(v)
  850. itype = "float"
  851. if itype == "int":
  852. v = int(v, 0)
  853. if neg:
  854. return (0 - v, itype)
  855. return (v, itype)
  856. def bounded_string(self, s):
  857. if len(s) == 0:
  858. return True
  859. if s[-1] != s[0]:
  860. return False
  861. i = -2
  862. backslash = False
  863. while len(s) + i > 0:
  864. if s[i] == "\\":
  865. backslash = not backslash
  866. i -= 1
  867. else:
  868. break
  869. return not backslash
  870. def _load_array_isstrarray(self, a):
  871. a = a[1:-1].strip()
  872. if a != '' and (a[0] == '"' or a[0] == "'"):
  873. return True
  874. return False
  875. def load_array(self, a):
  876. atype = None
  877. retval = []
  878. a = a.strip()
  879. if '[' not in a[1:-1] or "" != a[1:-1].split('[')[0].strip():
  880. strarray = self._load_array_isstrarray(a)
  881. if not a[1:-1].strip().startswith('{'):
  882. a = a[1:-1].split(',')
  883. else:
  884. # a is an inline object, we must find the matching parenthesis
  885. # to define groups
  886. new_a = []
  887. start_group_index = 1
  888. end_group_index = 2
  889. open_bracket_count = 1 if a[start_group_index] == '{' else 0
  890. in_str = False
  891. while end_group_index < len(a[1:]):
  892. if a[end_group_index] == '"' or a[end_group_index] == "'":
  893. if in_str:
  894. backslash_index = end_group_index - 1
  895. while (backslash_index > -1 and
  896. a[backslash_index] == '\\'):
  897. in_str = not in_str
  898. backslash_index -= 1
  899. in_str = not in_str
  900. if not in_str and a[end_group_index] == '{':
  901. open_bracket_count += 1
  902. if in_str or a[end_group_index] != '}':
  903. end_group_index += 1
  904. continue
  905. elif a[end_group_index] == '}' and open_bracket_count > 1:
  906. open_bracket_count -= 1
  907. end_group_index += 1
  908. continue
  909. # Increase end_group_index by 1 to get the closing bracket
  910. end_group_index += 1
  911. new_a.append(a[start_group_index:end_group_index])
  912. # The next start index is at least after the closing
  913. # bracket, a closing bracket can be followed by a comma
  914. # since we are in an array.
  915. start_group_index = end_group_index + 1
  916. while (start_group_index < len(a[1:]) and
  917. a[start_group_index] != '{'):
  918. start_group_index += 1
  919. end_group_index = start_group_index + 1
  920. a = new_a
  921. b = 0
  922. if strarray:
  923. while b < len(a) - 1:
  924. ab = a[b].strip()
  925. while (not self.bounded_string(ab) or
  926. (len(ab) > 2 and
  927. ab[0] == ab[1] == ab[2] and
  928. ab[-2] != ab[0] and
  929. ab[-3] != ab[0])):
  930. a[b] = a[b] + ',' + a[b + 1]
  931. ab = a[b].strip()
  932. if b < len(a) - 2:
  933. a = a[:b + 1] + a[b + 2:]
  934. else:
  935. a = a[:b + 1]
  936. b += 1
  937. else:
  938. al = list(a[1:-1])
  939. a = []
  940. openarr = 0
  941. j = 0
  942. for i in _range(len(al)):
  943. if al[i] == '[':
  944. openarr += 1
  945. elif al[i] == ']':
  946. openarr -= 1
  947. elif al[i] == ',' and not openarr:
  948. a.append(''.join(al[j:i]))
  949. j = i + 1
  950. a.append(''.join(al[j:]))
  951. for i in _range(len(a)):
  952. a[i] = a[i].strip()
  953. if a[i] != '':
  954. nval, ntype = self.load_value(a[i])
  955. if atype:
  956. if ntype != atype:
  957. raise ValueError("Not a homogeneous array")
  958. else:
  959. atype = ntype
  960. retval.append(nval)
  961. return retval
  962. def preserve_comment(self, line_no, key, comment, beginline):
  963. pass
  964. def embed_comments(self, idx, currentlevel):
  965. pass
  966. class TomlPreserveCommentDecoder(TomlDecoder):
  967. def __init__(self, _dict=dict):
  968. self.saved_comments = {}
  969. super(TomlPreserveCommentDecoder, self).__init__(_dict)
  970. def preserve_comment(self, line_no, key, comment, beginline):
  971. self.saved_comments[line_no] = (key, comment, beginline)
  972. def embed_comments(self, idx, currentlevel):
  973. if idx not in self.saved_comments:
  974. return
  975. key, comment, beginline = self.saved_comments[idx]
  976. currentlevel[key] = CommentValue(currentlevel[key], comment, beginline,
  977. self._dict)