array.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. # dialects/postgresql/array.py
  2. # Copyright (C) 2005-2025 the SQLAlchemy authors and contributors
  3. # <see AUTHORS file>
  4. #
  5. # This module is part of SQLAlchemy and is released under
  6. # the MIT License: https://www.opensource.org/licenses/mit-license.php
  7. from __future__ import annotations
  8. import re
  9. from typing import Any as typing_Any
  10. from typing import Iterable
  11. from typing import Optional
  12. from typing import Sequence
  13. from typing import TYPE_CHECKING
  14. from typing import TypeVar
  15. from typing import Union
  16. from .operators import CONTAINED_BY
  17. from .operators import CONTAINS
  18. from .operators import OVERLAP
  19. from ... import types as sqltypes
  20. from ... import util
  21. from ...sql import expression
  22. from ...sql import operators
  23. from ...sql.visitors import InternalTraversal
  24. if TYPE_CHECKING:
  25. from ...engine.interfaces import Dialect
  26. from ...sql._typing import _ColumnExpressionArgument
  27. from ...sql._typing import _TypeEngineArgument
  28. from ...sql.elements import ColumnElement
  29. from ...sql.elements import Grouping
  30. from ...sql.expression import BindParameter
  31. from ...sql.operators import OperatorType
  32. from ...sql.selectable import _SelectIterable
  33. from ...sql.type_api import _BindProcessorType
  34. from ...sql.type_api import _LiteralProcessorType
  35. from ...sql.type_api import _ResultProcessorType
  36. from ...sql.type_api import TypeEngine
  37. from ...sql.visitors import _TraverseInternalsType
  38. from ...util.typing import Self
  39. _T = TypeVar("_T", bound=typing_Any)
  40. def Any(
  41. other: typing_Any,
  42. arrexpr: _ColumnExpressionArgument[_T],
  43. operator: OperatorType = operators.eq,
  44. ) -> ColumnElement[bool]:
  45. """A synonym for the ARRAY-level :meth:`.ARRAY.Comparator.any` method.
  46. See that method for details.
  47. """
  48. return arrexpr.any(other, operator) # type: ignore[no-any-return, union-attr] # noqa: E501
  49. def All(
  50. other: typing_Any,
  51. arrexpr: _ColumnExpressionArgument[_T],
  52. operator: OperatorType = operators.eq,
  53. ) -> ColumnElement[bool]:
  54. """A synonym for the ARRAY-level :meth:`.ARRAY.Comparator.all` method.
  55. See that method for details.
  56. """
  57. return arrexpr.all(other, operator) # type: ignore[no-any-return, union-attr] # noqa: E501
  58. class array(expression.ExpressionClauseList[_T]):
  59. """A PostgreSQL ARRAY literal.
  60. This is used to produce ARRAY literals in SQL expressions, e.g.::
  61. from sqlalchemy.dialects.postgresql import array
  62. from sqlalchemy.dialects import postgresql
  63. from sqlalchemy import select, func
  64. stmt = select(array([1, 2]) + array([3, 4, 5]))
  65. print(stmt.compile(dialect=postgresql.dialect()))
  66. Produces the SQL:
  67. .. sourcecode:: sql
  68. SELECT ARRAY[%(param_1)s, %(param_2)s] ||
  69. ARRAY[%(param_3)s, %(param_4)s, %(param_5)s]) AS anon_1
  70. An instance of :class:`.array` will always have the datatype
  71. :class:`_types.ARRAY`. The "inner" type of the array is inferred from the
  72. values present, unless the :paramref:`_postgresql.array.type_` keyword
  73. argument is passed::
  74. array(["foo", "bar"], type_=CHAR)
  75. When constructing an empty array, the :paramref:`_postgresql.array.type_`
  76. argument is particularly important as PostgreSQL server typically requires
  77. a cast to be rendered for the inner type in order to render an empty array.
  78. SQLAlchemy's compilation for the empty array will produce this cast so
  79. that::
  80. stmt = array([], type_=Integer)
  81. print(stmt.compile(dialect=postgresql.dialect()))
  82. Produces:
  83. .. sourcecode:: sql
  84. ARRAY[]::INTEGER[]
  85. As required by PostgreSQL for empty arrays.
  86. .. versionadded:: 2.0.40 added support to render empty PostgreSQL array
  87. literals with a required cast.
  88. Multidimensional arrays are produced by nesting :class:`.array` constructs.
  89. The dimensionality of the final :class:`_types.ARRAY`
  90. type is calculated by
  91. recursively adding the dimensions of the inner :class:`_types.ARRAY`
  92. type::
  93. stmt = select(
  94. array(
  95. [array([1, 2]), array([3, 4]), array([column("q"), column("x")])]
  96. )
  97. )
  98. print(stmt.compile(dialect=postgresql.dialect()))
  99. Produces:
  100. .. sourcecode:: sql
  101. SELECT ARRAY[
  102. ARRAY[%(param_1)s, %(param_2)s],
  103. ARRAY[%(param_3)s, %(param_4)s],
  104. ARRAY[q, x]
  105. ] AS anon_1
  106. .. versionadded:: 1.3.6 added support for multidimensional array literals
  107. .. seealso::
  108. :class:`_postgresql.ARRAY`
  109. """ # noqa: E501
  110. __visit_name__ = "array"
  111. stringify_dialect = "postgresql"
  112. _traverse_internals: _TraverseInternalsType = [
  113. ("clauses", InternalTraversal.dp_clauseelement_tuple),
  114. ("type", InternalTraversal.dp_type),
  115. ]
  116. def __init__(
  117. self,
  118. clauses: Iterable[_T],
  119. *,
  120. type_: Optional[_TypeEngineArgument[_T]] = None,
  121. **kw: typing_Any,
  122. ):
  123. r"""Construct an ARRAY literal.
  124. :param clauses: iterable, such as a list, containing elements to be
  125. rendered in the array
  126. :param type\_: optional type. If omitted, the type is inferred
  127. from the contents of the array.
  128. """
  129. super().__init__(operators.comma_op, *clauses, **kw)
  130. main_type = (
  131. type_
  132. if type_ is not None
  133. else self.clauses[0].type if self.clauses else sqltypes.NULLTYPE
  134. )
  135. if isinstance(main_type, ARRAY):
  136. self.type = ARRAY(
  137. main_type.item_type,
  138. dimensions=(
  139. main_type.dimensions + 1
  140. if main_type.dimensions is not None
  141. else 2
  142. ),
  143. ) # type: ignore[assignment]
  144. else:
  145. self.type = ARRAY(main_type) # type: ignore[assignment]
  146. @property
  147. def _select_iterable(self) -> _SelectIterable:
  148. return (self,)
  149. def _bind_param(
  150. self,
  151. operator: OperatorType,
  152. obj: typing_Any,
  153. type_: Optional[TypeEngine[_T]] = None,
  154. _assume_scalar: bool = False,
  155. ) -> BindParameter[_T]:
  156. if _assume_scalar or operator is operators.getitem:
  157. return expression.BindParameter(
  158. None,
  159. obj,
  160. _compared_to_operator=operator,
  161. type_=type_,
  162. _compared_to_type=self.type,
  163. unique=True,
  164. )
  165. else:
  166. return array(
  167. [
  168. self._bind_param(
  169. operator, o, _assume_scalar=True, type_=type_
  170. )
  171. for o in obj
  172. ]
  173. ) # type: ignore[return-value]
  174. def self_group(
  175. self, against: Optional[OperatorType] = None
  176. ) -> Union[Self, Grouping[_T]]:
  177. if against in (operators.any_op, operators.all_op, operators.getitem):
  178. return expression.Grouping(self)
  179. else:
  180. return self
  181. class ARRAY(sqltypes.ARRAY[_T]):
  182. """PostgreSQL ARRAY type.
  183. The :class:`_postgresql.ARRAY` type is constructed in the same way
  184. as the core :class:`_types.ARRAY` type; a member type is required, and a
  185. number of dimensions is recommended if the type is to be used for more
  186. than one dimension::
  187. from sqlalchemy.dialects import postgresql
  188. mytable = Table(
  189. "mytable",
  190. metadata,
  191. Column("data", postgresql.ARRAY(Integer, dimensions=2)),
  192. )
  193. The :class:`_postgresql.ARRAY` type provides all operations defined on the
  194. core :class:`_types.ARRAY` type, including support for "dimensions",
  195. indexed access, and simple matching such as
  196. :meth:`.types.ARRAY.Comparator.any` and
  197. :meth:`.types.ARRAY.Comparator.all`. :class:`_postgresql.ARRAY`
  198. class also
  199. provides PostgreSQL-specific methods for containment operations, including
  200. :meth:`.postgresql.ARRAY.Comparator.contains`
  201. :meth:`.postgresql.ARRAY.Comparator.contained_by`, and
  202. :meth:`.postgresql.ARRAY.Comparator.overlap`, e.g.::
  203. mytable.c.data.contains([1, 2])
  204. Indexed access is one-based by default, to match that of PostgreSQL;
  205. for zero-based indexed access, set
  206. :paramref:`_postgresql.ARRAY.zero_indexes`.
  207. Additionally, the :class:`_postgresql.ARRAY`
  208. type does not work directly in
  209. conjunction with the :class:`.ENUM` type. For a workaround, see the
  210. special type at :ref:`postgresql_array_of_enum`.
  211. .. container:: topic
  212. **Detecting Changes in ARRAY columns when using the ORM**
  213. The :class:`_postgresql.ARRAY` type, when used with the SQLAlchemy ORM,
  214. does not detect in-place mutations to the array. In order to detect
  215. these, the :mod:`sqlalchemy.ext.mutable` extension must be used, using
  216. the :class:`.MutableList` class::
  217. from sqlalchemy.dialects.postgresql import ARRAY
  218. from sqlalchemy.ext.mutable import MutableList
  219. class SomeOrmClass(Base):
  220. # ...
  221. data = Column(MutableList.as_mutable(ARRAY(Integer)))
  222. This extension will allow "in-place" changes such to the array
  223. such as ``.append()`` to produce events which will be detected by the
  224. unit of work. Note that changes to elements **inside** the array,
  225. including subarrays that are mutated in place, are **not** detected.
  226. Alternatively, assigning a new array value to an ORM element that
  227. replaces the old one will always trigger a change event.
  228. .. seealso::
  229. :class:`_types.ARRAY` - base array type
  230. :class:`_postgresql.array` - produces a literal array value.
  231. """
  232. def __init__(
  233. self,
  234. item_type: _TypeEngineArgument[_T],
  235. as_tuple: bool = False,
  236. dimensions: Optional[int] = None,
  237. zero_indexes: bool = False,
  238. ):
  239. """Construct an ARRAY.
  240. E.g.::
  241. Column("myarray", ARRAY(Integer))
  242. Arguments are:
  243. :param item_type: The data type of items of this array. Note that
  244. dimensionality is irrelevant here, so multi-dimensional arrays like
  245. ``INTEGER[][]``, are constructed as ``ARRAY(Integer)``, not as
  246. ``ARRAY(ARRAY(Integer))`` or such.
  247. :param as_tuple=False: Specify whether return results
  248. should be converted to tuples from lists. DBAPIs such
  249. as psycopg2 return lists by default. When tuples are
  250. returned, the results are hashable.
  251. :param dimensions: if non-None, the ARRAY will assume a fixed
  252. number of dimensions. This will cause the DDL emitted for this
  253. ARRAY to include the exact number of bracket clauses ``[]``,
  254. and will also optimize the performance of the type overall.
  255. Note that PG arrays are always implicitly "non-dimensioned",
  256. meaning they can store any number of dimensions no matter how
  257. they were declared.
  258. :param zero_indexes=False: when True, index values will be converted
  259. between Python zero-based and PostgreSQL one-based indexes, e.g.
  260. a value of one will be added to all index values before passing
  261. to the database.
  262. """
  263. if isinstance(item_type, ARRAY):
  264. raise ValueError(
  265. "Do not nest ARRAY types; ARRAY(basetype) "
  266. "handles multi-dimensional arrays of basetype"
  267. )
  268. if isinstance(item_type, type):
  269. item_type = item_type()
  270. self.item_type = item_type
  271. self.as_tuple = as_tuple
  272. self.dimensions = dimensions
  273. self.zero_indexes = zero_indexes
  274. class Comparator(sqltypes.ARRAY.Comparator[_T]):
  275. """Define comparison operations for :class:`_types.ARRAY`.
  276. Note that these operations are in addition to those provided
  277. by the base :class:`.types.ARRAY.Comparator` class, including
  278. :meth:`.types.ARRAY.Comparator.any` and
  279. :meth:`.types.ARRAY.Comparator.all`.
  280. """
  281. def contains(
  282. self, other: typing_Any, **kwargs: typing_Any
  283. ) -> ColumnElement[bool]:
  284. """Boolean expression. Test if elements are a superset of the
  285. elements of the argument array expression.
  286. kwargs may be ignored by this operator but are required for API
  287. conformance.
  288. """
  289. return self.operate(CONTAINS, other, result_type=sqltypes.Boolean)
  290. def contained_by(self, other: typing_Any) -> ColumnElement[bool]:
  291. """Boolean expression. Test if elements are a proper subset of the
  292. elements of the argument array expression.
  293. """
  294. return self.operate(
  295. CONTAINED_BY, other, result_type=sqltypes.Boolean
  296. )
  297. def overlap(self, other: typing_Any) -> ColumnElement[bool]:
  298. """Boolean expression. Test if array has elements in common with
  299. an argument array expression.
  300. """
  301. return self.operate(OVERLAP, other, result_type=sqltypes.Boolean)
  302. comparator_factory = Comparator
  303. @util.memoized_property
  304. def _against_native_enum(self) -> bool:
  305. return (
  306. isinstance(self.item_type, sqltypes.Enum)
  307. and self.item_type.native_enum
  308. )
  309. def literal_processor(
  310. self, dialect: Dialect
  311. ) -> Optional[_LiteralProcessorType[_T]]:
  312. item_proc = self.item_type.dialect_impl(dialect).literal_processor(
  313. dialect
  314. )
  315. if item_proc is None:
  316. return None
  317. def to_str(elements: Iterable[typing_Any]) -> str:
  318. return f"ARRAY[{', '.join(elements)}]"
  319. def process(value: Sequence[typing_Any]) -> str:
  320. inner = self._apply_item_processor(
  321. value, item_proc, self.dimensions, to_str
  322. )
  323. return inner
  324. return process
  325. def bind_processor(
  326. self, dialect: Dialect
  327. ) -> Optional[_BindProcessorType[Sequence[typing_Any]]]:
  328. item_proc = self.item_type.dialect_impl(dialect).bind_processor(
  329. dialect
  330. )
  331. def process(
  332. value: Optional[Sequence[typing_Any]],
  333. ) -> Optional[list[typing_Any]]:
  334. if value is None:
  335. return value
  336. else:
  337. return self._apply_item_processor(
  338. value, item_proc, self.dimensions, list
  339. )
  340. return process
  341. def result_processor(
  342. self, dialect: Dialect, coltype: object
  343. ) -> _ResultProcessorType[Sequence[typing_Any]]:
  344. item_proc = self.item_type.dialect_impl(dialect).result_processor(
  345. dialect, coltype
  346. )
  347. def process(
  348. value: Sequence[typing_Any],
  349. ) -> Optional[Sequence[typing_Any]]:
  350. if value is None:
  351. return value
  352. else:
  353. return self._apply_item_processor(
  354. value,
  355. item_proc,
  356. self.dimensions,
  357. tuple if self.as_tuple else list,
  358. )
  359. if self._against_native_enum:
  360. super_rp = process
  361. pattern = re.compile(r"^{(.*)}$")
  362. def handle_raw_string(value: str) -> list[str]:
  363. inner = pattern.match(value).group(1) # type: ignore[union-attr] # noqa: E501
  364. return _split_enum_values(inner)
  365. def process(
  366. value: Sequence[typing_Any],
  367. ) -> Optional[Sequence[typing_Any]]:
  368. if value is None:
  369. return value
  370. # isinstance(value, str) is required to handle
  371. # the case where a TypeDecorator for and Array of Enum is
  372. # used like was required in sa < 1.3.17
  373. return super_rp(
  374. handle_raw_string(value)
  375. if isinstance(value, str)
  376. else value
  377. )
  378. return process
  379. def _split_enum_values(array_string: str) -> list[str]:
  380. if '"' not in array_string:
  381. # no escape char is present so it can just split on the comma
  382. return array_string.split(",") if array_string else []
  383. # handles quoted strings from:
  384. # r'abc,"quoted","also\\\\quoted", "quoted, comma", "esc \" quot", qpr'
  385. # returns
  386. # ['abc', 'quoted', 'also\\quoted', 'quoted, comma', 'esc " quot', 'qpr']
  387. text = array_string.replace(r"\"", "_$ESC_QUOTE$_")
  388. text = text.replace(r"\\", "\\")
  389. result = []
  390. on_quotes = re.split(r'(")', text)
  391. in_quotes = False
  392. for tok in on_quotes:
  393. if tok == '"':
  394. in_quotes = not in_quotes
  395. elif in_quotes:
  396. result.append(tok.replace("_$ESC_QUOTE$_", '"'))
  397. else:
  398. result.extend(re.findall(r"([^\s,]+),?", tok))
  399. return result