reflection.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727
  1. # dialects/mysql/reflection.py
  2. # Copyright (C) 2005-2025 the SQLAlchemy authors and contributors
  3. # <see AUTHORS file>
  4. #
  5. # This module is part of SQLAlchemy and is released under
  6. # the MIT License: https://www.opensource.org/licenses/mit-license.php
  7. from __future__ import annotations
  8. import re
  9. from typing import Any
  10. from typing import Callable
  11. from typing import Dict
  12. from typing import List
  13. from typing import Optional
  14. from typing import overload
  15. from typing import Sequence
  16. from typing import Tuple
  17. from typing import TYPE_CHECKING
  18. from typing import Union
  19. from .enumerated import ENUM
  20. from .enumerated import SET
  21. from .types import DATETIME
  22. from .types import TIME
  23. from .types import TIMESTAMP
  24. from ... import types as sqltypes
  25. from ... import util
  26. from ...util.typing import Literal
  27. if TYPE_CHECKING:
  28. from .base import MySQLDialect
  29. from .base import MySQLIdentifierPreparer
  30. from ...engine.interfaces import ReflectedColumn
  31. class ReflectedState:
  32. """Stores raw information about a SHOW CREATE TABLE statement."""
  33. charset: Optional[str]
  34. def __init__(self) -> None:
  35. self.columns: List[ReflectedColumn] = []
  36. self.table_options: Dict[str, str] = {}
  37. self.table_name: Optional[str] = None
  38. self.keys: List[Dict[str, Any]] = []
  39. self.fk_constraints: List[Dict[str, Any]] = []
  40. self.ck_constraints: List[Dict[str, Any]] = []
  41. class MySQLTableDefinitionParser:
  42. """Parses the results of a SHOW CREATE TABLE statement."""
  43. def __init__(
  44. self, dialect: MySQLDialect, preparer: MySQLIdentifierPreparer
  45. ):
  46. self.dialect = dialect
  47. self.preparer = preparer
  48. self._prep_regexes()
  49. def parse(
  50. self, show_create: str, charset: Optional[str]
  51. ) -> ReflectedState:
  52. state = ReflectedState()
  53. state.charset = charset
  54. for line in re.split(r"\r?\n", show_create):
  55. if line.startswith(" " + self.preparer.initial_quote):
  56. self._parse_column(line, state)
  57. # a regular table options line
  58. elif line.startswith(") "):
  59. self._parse_table_options(line, state)
  60. # an ANSI-mode table options line
  61. elif line == ")":
  62. pass
  63. elif line.startswith("CREATE "):
  64. self._parse_table_name(line, state)
  65. elif "PARTITION" in line:
  66. self._parse_partition_options(line, state)
  67. # Not present in real reflection, but may be if
  68. # loading from a file.
  69. elif not line:
  70. pass
  71. else:
  72. type_, spec = self._parse_constraints(line)
  73. if type_ is None:
  74. util.warn("Unknown schema content: %r" % line)
  75. elif type_ == "key":
  76. state.keys.append(spec) # type: ignore[arg-type]
  77. elif type_ == "fk_constraint":
  78. state.fk_constraints.append(spec) # type: ignore[arg-type]
  79. elif type_ == "ck_constraint":
  80. state.ck_constraints.append(spec) # type: ignore[arg-type]
  81. else:
  82. pass
  83. return state
  84. def _check_view(self, sql: str) -> bool:
  85. return bool(self._re_is_view.match(sql))
  86. def _parse_constraints(self, line: str) -> Union[
  87. Tuple[None, str],
  88. Tuple[Literal["partition"], str],
  89. Tuple[
  90. Literal["ck_constraint", "fk_constraint", "key"], Dict[str, str]
  91. ],
  92. ]:
  93. """Parse a KEY or CONSTRAINT line.
  94. :param line: A line of SHOW CREATE TABLE output
  95. """
  96. # KEY
  97. m = self._re_key.match(line)
  98. if m:
  99. spec = m.groupdict()
  100. # convert columns into name, length pairs
  101. # NOTE: we may want to consider SHOW INDEX as the
  102. # format of indexes in MySQL becomes more complex
  103. spec["columns"] = self._parse_keyexprs(spec["columns"])
  104. if spec["version_sql"]:
  105. m2 = self._re_key_version_sql.match(spec["version_sql"])
  106. if m2 and m2.groupdict()["parser"]:
  107. spec["parser"] = m2.groupdict()["parser"]
  108. if spec["parser"]:
  109. spec["parser"] = self.preparer.unformat_identifiers(
  110. spec["parser"]
  111. )[0]
  112. return "key", spec
  113. # FOREIGN KEY CONSTRAINT
  114. m = self._re_fk_constraint.match(line)
  115. if m:
  116. spec = m.groupdict()
  117. spec["table"] = self.preparer.unformat_identifiers(spec["table"])
  118. spec["local"] = [c[0] for c in self._parse_keyexprs(spec["local"])]
  119. spec["foreign"] = [
  120. c[0] for c in self._parse_keyexprs(spec["foreign"])
  121. ]
  122. return "fk_constraint", spec
  123. # CHECK constraint
  124. m = self._re_ck_constraint.match(line)
  125. if m:
  126. spec = m.groupdict()
  127. return "ck_constraint", spec
  128. # PARTITION and SUBPARTITION
  129. m = self._re_partition.match(line)
  130. if m:
  131. # Punt!
  132. return "partition", line
  133. # No match.
  134. return (None, line)
  135. def _parse_table_name(self, line: str, state: ReflectedState) -> None:
  136. """Extract the table name.
  137. :param line: The first line of SHOW CREATE TABLE
  138. """
  139. regex, cleanup = self._pr_name
  140. m = regex.match(line)
  141. if m:
  142. state.table_name = cleanup(m.group("name"))
  143. def _parse_table_options(self, line: str, state: ReflectedState) -> None:
  144. """Build a dictionary of all reflected table-level options.
  145. :param line: The final line of SHOW CREATE TABLE output.
  146. """
  147. options = {}
  148. if line and line != ")":
  149. rest_of_line = line
  150. for regex, cleanup in self._pr_options:
  151. m = regex.search(rest_of_line)
  152. if not m:
  153. continue
  154. directive, value = m.group("directive"), m.group("val")
  155. if cleanup:
  156. value = cleanup(value)
  157. options[directive.lower()] = value
  158. rest_of_line = regex.sub("", rest_of_line)
  159. for nope in ("auto_increment", "data directory", "index directory"):
  160. options.pop(nope, None)
  161. for opt, val in options.items():
  162. state.table_options["%s_%s" % (self.dialect.name, opt)] = val
  163. def _parse_partition_options(
  164. self, line: str, state: ReflectedState
  165. ) -> None:
  166. options = {}
  167. new_line = line[:]
  168. while new_line.startswith("(") or new_line.startswith(" "):
  169. new_line = new_line[1:]
  170. for regex, cleanup in self._pr_options:
  171. m = regex.search(new_line)
  172. if not m or "PARTITION" not in regex.pattern:
  173. continue
  174. directive = m.group("directive")
  175. directive = directive.lower()
  176. is_subpartition = directive == "subpartition"
  177. if directive == "partition" or is_subpartition:
  178. new_line = new_line.replace(") */", "")
  179. new_line = new_line.replace(",", "")
  180. if is_subpartition and new_line.endswith(")"):
  181. new_line = new_line[:-1]
  182. if self.dialect.name == "mariadb" and new_line.endswith(")"):
  183. if (
  184. "MAXVALUE" in new_line
  185. or "MINVALUE" in new_line
  186. or "ENGINE" in new_line
  187. ):
  188. # final line of MariaDB partition endswith ")"
  189. new_line = new_line[:-1]
  190. defs = "%s_%s_definitions" % (self.dialect.name, directive)
  191. options[defs] = new_line
  192. else:
  193. directive = directive.replace(" ", "_")
  194. value = m.group("val")
  195. if cleanup:
  196. value = cleanup(value)
  197. options[directive] = value
  198. break
  199. for opt, val in options.items():
  200. part_def = "%s_partition_definitions" % (self.dialect.name)
  201. subpart_def = "%s_subpartition_definitions" % (self.dialect.name)
  202. if opt == part_def or opt == subpart_def:
  203. # builds a string of definitions
  204. if opt not in state.table_options:
  205. state.table_options[opt] = val
  206. else:
  207. state.table_options[opt] = "%s, %s" % (
  208. state.table_options[opt],
  209. val,
  210. )
  211. else:
  212. state.table_options["%s_%s" % (self.dialect.name, opt)] = val
  213. def _parse_column(self, line: str, state: ReflectedState) -> None:
  214. """Extract column details.
  215. Falls back to a 'minimal support' variant if full parse fails.
  216. :param line: Any column-bearing line from SHOW CREATE TABLE
  217. """
  218. spec = None
  219. m = self._re_column.match(line)
  220. if m:
  221. spec = m.groupdict()
  222. spec["full"] = True
  223. else:
  224. m = self._re_column_loose.match(line)
  225. if m:
  226. spec = m.groupdict()
  227. spec["full"] = False
  228. if not spec:
  229. util.warn("Unknown column definition %r" % line)
  230. return
  231. if not spec["full"]:
  232. util.warn("Incomplete reflection of column definition %r" % line)
  233. name, type_, args = spec["name"], spec["coltype"], spec["arg"]
  234. try:
  235. col_type = self.dialect.ischema_names[type_]
  236. except KeyError:
  237. util.warn(
  238. "Did not recognize type '%s' of column '%s'" % (type_, name)
  239. )
  240. col_type = sqltypes.NullType
  241. # Column type positional arguments eg. varchar(32)
  242. if args is None or args == "":
  243. type_args = []
  244. elif args[0] == "'" and args[-1] == "'":
  245. type_args = self._re_csv_str.findall(args)
  246. else:
  247. type_args = [int(v) for v in self._re_csv_int.findall(args)]
  248. # Column type keyword options
  249. type_kw = {}
  250. if issubclass(col_type, (DATETIME, TIME, TIMESTAMP)):
  251. if type_args:
  252. type_kw["fsp"] = type_args.pop(0)
  253. for kw in ("unsigned", "zerofill"):
  254. if spec.get(kw, False):
  255. type_kw[kw] = True
  256. for kw in ("charset", "collate"):
  257. if spec.get(kw, False):
  258. type_kw[kw] = spec[kw]
  259. if issubclass(col_type, (ENUM, SET)):
  260. type_args = _strip_values(type_args)
  261. if issubclass(col_type, SET) and "" in type_args:
  262. type_kw["retrieve_as_bitwise"] = True
  263. type_instance = col_type(*type_args, **type_kw)
  264. col_kw: Dict[str, Any] = {}
  265. # NOT NULL
  266. col_kw["nullable"] = True
  267. # this can be "NULL" in the case of TIMESTAMP
  268. if spec.get("notnull", False) == "NOT NULL":
  269. col_kw["nullable"] = False
  270. # For generated columns, the nullability is marked in a different place
  271. if spec.get("notnull_generated", False) == "NOT NULL":
  272. col_kw["nullable"] = False
  273. # AUTO_INCREMENT
  274. if spec.get("autoincr", False):
  275. col_kw["autoincrement"] = True
  276. elif issubclass(col_type, sqltypes.Integer):
  277. col_kw["autoincrement"] = False
  278. # DEFAULT
  279. default = spec.get("default", None)
  280. if default == "NULL":
  281. # eliminates the need to deal with this later.
  282. default = None
  283. comment = spec.get("comment", None)
  284. if comment is not None:
  285. comment = cleanup_text(comment)
  286. sqltext = spec.get("generated")
  287. if sqltext is not None:
  288. computed = dict(sqltext=sqltext)
  289. persisted = spec.get("persistence")
  290. if persisted is not None:
  291. computed["persisted"] = persisted == "STORED"
  292. col_kw["computed"] = computed
  293. col_d = dict(
  294. name=name, type=type_instance, default=default, comment=comment
  295. )
  296. col_d.update(col_kw)
  297. state.columns.append(col_d) # type: ignore[arg-type]
  298. def _describe_to_create(
  299. self,
  300. table_name: str,
  301. columns: Sequence[Tuple[str, str, str, str, str, str]],
  302. ) -> str:
  303. """Re-format DESCRIBE output as a SHOW CREATE TABLE string.
  304. DESCRIBE is a much simpler reflection and is sufficient for
  305. reflecting views for runtime use. This method formats DDL
  306. for columns only- keys are omitted.
  307. :param columns: A sequence of DESCRIBE or SHOW COLUMNS 6-tuples.
  308. SHOW FULL COLUMNS FROM rows must be rearranged for use with
  309. this function.
  310. """
  311. buffer = []
  312. for row in columns:
  313. (name, col_type, nullable, default, extra) = (
  314. row[i] for i in (0, 1, 2, 4, 5)
  315. )
  316. line = [" "]
  317. line.append(self.preparer.quote_identifier(name))
  318. line.append(col_type)
  319. if not nullable:
  320. line.append("NOT NULL")
  321. if default:
  322. if "auto_increment" in default:
  323. pass
  324. elif col_type.startswith("timestamp") and default.startswith(
  325. "C"
  326. ):
  327. line.append("DEFAULT")
  328. line.append(default)
  329. elif default == "NULL":
  330. line.append("DEFAULT")
  331. line.append(default)
  332. else:
  333. line.append("DEFAULT")
  334. line.append("'%s'" % default.replace("'", "''"))
  335. if extra:
  336. line.append(extra)
  337. buffer.append(" ".join(line))
  338. return "".join(
  339. [
  340. (
  341. "CREATE TABLE %s (\n"
  342. % self.preparer.quote_identifier(table_name)
  343. ),
  344. ",\n".join(buffer),
  345. "\n) ",
  346. ]
  347. )
  348. def _parse_keyexprs(
  349. self, identifiers: str
  350. ) -> List[Tuple[str, Optional[int], str]]:
  351. """Unpack '"col"(2),"col" ASC'-ish strings into components."""
  352. return [
  353. (colname, int(length) if length else None, modifiers)
  354. for colname, length, modifiers in self._re_keyexprs.findall(
  355. identifiers
  356. )
  357. ]
  358. def _prep_regexes(self) -> None:
  359. """Pre-compile regular expressions."""
  360. self._pr_options: List[
  361. Tuple[re.Pattern[Any], Optional[Callable[[str], str]]]
  362. ] = []
  363. _final = self.preparer.final_quote
  364. quotes = dict(
  365. zip(
  366. ("iq", "fq", "esc_fq"),
  367. [
  368. re.escape(s)
  369. for s in (
  370. self.preparer.initial_quote,
  371. _final,
  372. self.preparer._escape_identifier(_final),
  373. )
  374. ],
  375. )
  376. )
  377. self._pr_name = _pr_compile(
  378. r"^CREATE (?:\w+ +)?TABLE +"
  379. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +\($" % quotes,
  380. self.preparer._unescape_identifier,
  381. )
  382. self._re_is_view = _re_compile(r"^CREATE(?! TABLE)(\s.*)?\sVIEW")
  383. # `col`,`col2`(32),`col3`(15) DESC
  384. #
  385. self._re_keyexprs = _re_compile(
  386. r"(?:"
  387. r"(?:%(iq)s((?:%(esc_fq)s|[^%(fq)s])+)%(fq)s)"
  388. r"(?:\((\d+)\))?(?: +(ASC|DESC))?(?=\,|$))+" % quotes
  389. )
  390. # 'foo' or 'foo','bar' or 'fo,o','ba''a''r'
  391. self._re_csv_str = _re_compile(r"\x27(?:\x27\x27|[^\x27])*\x27")
  392. # 123 or 123,456
  393. self._re_csv_int = _re_compile(r"\d+")
  394. # `colname` <type> [type opts]
  395. # (NOT NULL | NULL)
  396. # DEFAULT ('value' | CURRENT_TIMESTAMP...)
  397. # COMMENT 'comment'
  398. # COLUMN_FORMAT (FIXED|DYNAMIC|DEFAULT)
  399. # STORAGE (DISK|MEMORY)
  400. self._re_column = _re_compile(
  401. r" "
  402. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
  403. r"(?P<coltype>\w+)"
  404. r"(?:\((?P<arg>(?:\d+|\d+,\d+|"
  405. r"(?:'(?:''|[^'])*',?)+))\))?"
  406. r"(?: +(?P<unsigned>UNSIGNED))?"
  407. r"(?: +(?P<zerofill>ZEROFILL))?"
  408. r"(?: +CHARACTER SET +(?P<charset>[\w_]+))?"
  409. r"(?: +COLLATE +(?P<collate>[\w_]+))?"
  410. r"(?: +(?P<notnull>(?:NOT )?NULL))?"
  411. r"(?: +DEFAULT +(?P<default>"
  412. r"(?:NULL|'(?:''|[^'])*'|\(.+?\)|[\-\w\.\(\)]+"
  413. r"(?: +ON UPDATE [\-\w\.\(\)]+)?)"
  414. r"))?"
  415. r"(?: +(?:GENERATED ALWAYS)? ?AS +(?P<generated>\("
  416. r".*\))? ?(?P<persistence>VIRTUAL|STORED)?"
  417. r"(?: +(?P<notnull_generated>(?:NOT )?NULL))?"
  418. r")?"
  419. r"(?: +(?P<autoincr>AUTO_INCREMENT))?"
  420. r"(?: +COMMENT +'(?P<comment>(?:''|[^'])*)')?"
  421. r"(?: +COLUMN_FORMAT +(?P<colfmt>\w+))?"
  422. r"(?: +STORAGE +(?P<storage>\w+))?"
  423. r"(?: +(?P<extra>.*))?"
  424. r",?$" % quotes
  425. )
  426. # Fallback, try to parse as little as possible
  427. self._re_column_loose = _re_compile(
  428. r" "
  429. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
  430. r"(?P<coltype>\w+)"
  431. r"(?:\((?P<arg>(?:\d+|\d+,\d+|\x27(?:\x27\x27|[^\x27])+\x27))\))?"
  432. r".*?(?P<notnull>(?:NOT )NULL)?" % quotes
  433. )
  434. # (PRIMARY|UNIQUE|FULLTEXT|SPATIAL) INDEX `name` (USING (BTREE|HASH))?
  435. # (`col` (ASC|DESC)?, `col` (ASC|DESC)?)
  436. # KEY_BLOCK_SIZE size | WITH PARSER name /*!50100 WITH PARSER name */
  437. self._re_key = _re_compile(
  438. r" "
  439. r"(?:(?P<type>\S+) )?KEY"
  440. r"(?: +%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s)?"
  441. r"(?: +USING +(?P<using_pre>\S+))?"
  442. r" +\((?P<columns>.+?)\)"
  443. r"(?: +USING +(?P<using_post>\S+))?"
  444. r"(?: +KEY_BLOCK_SIZE *[ =]? *(?P<keyblock>\S+))?"
  445. r"(?: +WITH PARSER +(?P<parser>\S+))?"
  446. r"(?: +COMMENT +(?P<comment>(\x27\x27|\x27([^\x27])*?\x27)+))?"
  447. r"(?: +/\*(?P<version_sql>.+)\*/ *)?"
  448. r",?$" % quotes
  449. )
  450. # https://forums.mysql.com/read.php?20,567102,567111#msg-567111
  451. # It means if the MySQL version >= \d+, execute what's in the comment
  452. self._re_key_version_sql = _re_compile(
  453. r"\!\d+ " r"(?: *WITH PARSER +(?P<parser>\S+) *)?"
  454. )
  455. # CONSTRAINT `name` FOREIGN KEY (`local_col`)
  456. # REFERENCES `remote` (`remote_col`)
  457. # MATCH FULL | MATCH PARTIAL | MATCH SIMPLE
  458. # ON DELETE CASCADE ON UPDATE RESTRICT
  459. #
  460. # unique constraints come back as KEYs
  461. kw = quotes.copy()
  462. kw["on"] = "RESTRICT|CASCADE|SET NULL|NO ACTION|SET DEFAULT"
  463. self._re_fk_constraint = _re_compile(
  464. r" "
  465. r"CONSTRAINT +"
  466. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
  467. r"FOREIGN KEY +"
  468. r"\((?P<local>[^\)]+?)\) REFERENCES +"
  469. r"(?P<table>%(iq)s[^%(fq)s]+%(fq)s"
  470. r"(?:\.%(iq)s[^%(fq)s]+%(fq)s)?) +"
  471. r"\((?P<foreign>(?:%(iq)s[^%(fq)s]+%(fq)s(?: *, *)?)+)\)"
  472. r"(?: +(?P<match>MATCH \w+))?"
  473. r"(?: +ON DELETE (?P<ondelete>%(on)s))?"
  474. r"(?: +ON UPDATE (?P<onupdate>%(on)s))?" % kw
  475. )
  476. # CONSTRAINT `CONSTRAINT_1` CHECK (`x` > 5)'
  477. # testing on MariaDB 10.2 shows that the CHECK constraint
  478. # is returned on a line by itself, so to match without worrying
  479. # about parenthesis in the expression we go to the end of the line
  480. self._re_ck_constraint = _re_compile(
  481. r" "
  482. r"CONSTRAINT +"
  483. r"%(iq)s(?P<name>(?:%(esc_fq)s|[^%(fq)s])+)%(fq)s +"
  484. r"CHECK +"
  485. r"\((?P<sqltext>.+)\),?" % kw
  486. )
  487. # PARTITION
  488. #
  489. # punt!
  490. self._re_partition = _re_compile(r"(?:.*)(?:SUB)?PARTITION(?:.*)")
  491. # Table-level options (COLLATE, ENGINE, etc.)
  492. # Do the string options first, since they have quoted
  493. # strings we need to get rid of.
  494. for option in _options_of_type_string:
  495. self._add_option_string(option)
  496. for option in (
  497. "ENGINE",
  498. "TYPE",
  499. "AUTO_INCREMENT",
  500. "AVG_ROW_LENGTH",
  501. "CHARACTER SET",
  502. "DEFAULT CHARSET",
  503. "CHECKSUM",
  504. "COLLATE",
  505. "DELAY_KEY_WRITE",
  506. "INSERT_METHOD",
  507. "MAX_ROWS",
  508. "MIN_ROWS",
  509. "PACK_KEYS",
  510. "ROW_FORMAT",
  511. "KEY_BLOCK_SIZE",
  512. "STATS_SAMPLE_PAGES",
  513. ):
  514. self._add_option_word(option)
  515. for option in (
  516. "PARTITION BY",
  517. "SUBPARTITION BY",
  518. "PARTITIONS",
  519. "SUBPARTITIONS",
  520. "PARTITION",
  521. "SUBPARTITION",
  522. ):
  523. self._add_partition_option_word(option)
  524. self._add_option_regex("UNION", r"\([^\)]+\)")
  525. self._add_option_regex("TABLESPACE", r".*? STORAGE DISK")
  526. self._add_option_regex(
  527. "RAID_TYPE",
  528. r"\w+\s+RAID_CHUNKS\s*\=\s*\w+RAID_CHUNKSIZE\s*=\s*\w+",
  529. )
  530. _optional_equals = r"(?:\s*(?:=\s*)|\s+)"
  531. def _add_option_string(self, directive: str) -> None:
  532. regex = r"(?P<directive>%s)%s" r"'(?P<val>(?:[^']|'')*?)'(?!')" % (
  533. re.escape(directive),
  534. self._optional_equals,
  535. )
  536. self._pr_options.append(_pr_compile(regex, cleanup_text))
  537. def _add_option_word(self, directive: str) -> None:
  538. regex = r"(?P<directive>%s)%s" r"(?P<val>\w+)" % (
  539. re.escape(directive),
  540. self._optional_equals,
  541. )
  542. self._pr_options.append(_pr_compile(regex))
  543. def _add_partition_option_word(self, directive: str) -> None:
  544. if directive == "PARTITION BY" or directive == "SUBPARTITION BY":
  545. regex = r"(?<!\S)(?P<directive>%s)%s" r"(?P<val>\w+.*)" % (
  546. re.escape(directive),
  547. self._optional_equals,
  548. )
  549. elif directive == "SUBPARTITIONS" or directive == "PARTITIONS":
  550. regex = r"(?<!\S)(?P<directive>%s)%s" r"(?P<val>\d+)" % (
  551. re.escape(directive),
  552. self._optional_equals,
  553. )
  554. else:
  555. regex = r"(?<!\S)(?P<directive>%s)(?!\S)" % (re.escape(directive),)
  556. self._pr_options.append(_pr_compile(regex))
  557. def _add_option_regex(self, directive: str, regex: str) -> None:
  558. regex = r"(?P<directive>%s)%s" r"(?P<val>%s)" % (
  559. re.escape(directive),
  560. self._optional_equals,
  561. regex,
  562. )
  563. self._pr_options.append(_pr_compile(regex))
  564. _options_of_type_string = (
  565. "COMMENT",
  566. "DATA DIRECTORY",
  567. "INDEX DIRECTORY",
  568. "PASSWORD",
  569. "CONNECTION",
  570. )
  571. @overload
  572. def _pr_compile(
  573. regex: str, cleanup: Callable[[str], str]
  574. ) -> Tuple[re.Pattern[Any], Callable[[str], str]]: ...
  575. @overload
  576. def _pr_compile(
  577. regex: str, cleanup: None = None
  578. ) -> Tuple[re.Pattern[Any], None]: ...
  579. def _pr_compile(
  580. regex: str, cleanup: Optional[Callable[[str], str]] = None
  581. ) -> Tuple[re.Pattern[Any], Optional[Callable[[str], str]]]:
  582. """Prepare a 2-tuple of compiled regex and callable."""
  583. return (_re_compile(regex), cleanup)
  584. def _re_compile(regex: str) -> re.Pattern[Any]:
  585. """Compile a string to regex, I and UNICODE."""
  586. return re.compile(regex, re.I | re.UNICODE)
  587. def _strip_values(values: Sequence[str]) -> List[str]:
  588. "Strip reflected values quotes"
  589. strip_values: List[str] = []
  590. for a in values:
  591. if a[0:1] == '"' or a[0:1] == "'":
  592. # strip enclosing quotes and unquote interior
  593. a = a[1:-1].replace(a[0] * 2, a[0])
  594. strip_values.append(a)
  595. return strip_values
  596. def cleanup_text(raw_text: str) -> str:
  597. if "\\" in raw_text:
  598. raw_text = re.sub(
  599. _control_char_regexp,
  600. lambda s: _control_char_map[s[0]], # type: ignore[index]
  601. raw_text,
  602. )
  603. return raw_text.replace("''", "'")
  604. _control_char_map = {
  605. "\\\\": "\\",
  606. "\\0": "\0",
  607. "\\a": "\a",
  608. "\\b": "\b",
  609. "\\t": "\t",
  610. "\\n": "\n",
  611. "\\v": "\v",
  612. "\\f": "\f",
  613. "\\r": "\r",
  614. # '\\e':'\e',
  615. }
  616. _control_char_regexp = re.compile(
  617. "|".join(re.escape(k) for k in _control_char_map)
  618. )