# dialects/oracle/vector.py
# Copyright (C) 2005-2025 the SQLAlchemy authors and contributors
# <see AUTHORS file>
#
# This module is part of SQLAlchemy and is released under
# the MIT License: https://www.opensource.org/licenses/mit-license.php
# mypy: ignore-errors


from __future__ import annotations

import array
from dataclasses import dataclass
from enum import Enum
from typing import Optional
from typing import Union

import sqlalchemy.types as types
from sqlalchemy.types import Float


class VectorIndexType(Enum):
    """Enum representing different types of VECTOR index structures.

    See :ref:`oracle_vector_datatype` for background.

    .. versionadded:: 2.0.41

    """

    HNSW = "HNSW"
    """
    The HNSW (Hierarchical Navigable Small World) index type.
    """
    IVF = "IVF"
    """
    The IVF (Inverted File Index) index type
    """


class VectorDistanceType(Enum):
    """Enum representing different types of vector distance metrics.

    See :ref:`oracle_vector_datatype` for background.

    .. versionadded:: 2.0.41

    """

    EUCLIDEAN = "EUCLIDEAN"
    """Euclidean distance (L2 norm).

    Measures the straight-line distance between two vectors in space.
    """
    DOT = "DOT"
    """Dot product similarity.

    Measures the algebraic similarity between two vectors.
    """
    COSINE = "COSINE"
    """Cosine similarity.

    Measures the cosine of the angle between two vectors.
    """
    MANHATTAN = "MANHATTAN"
    """Manhattan distance (L1 norm).

    Calculates the sum of absolute differences across dimensions.
    """


class VectorStorageFormat(Enum):
    """Enum representing the data format used to store vector components.

    See :ref:`oracle_vector_datatype` for background.

    .. versionadded:: 2.0.41

    """

    INT8 = "INT8"
    """
    8-bit integer format.
    """
    BINARY = "BINARY"
    """
    Binary format.
    """
    FLOAT32 = "FLOAT32"
    """
    32-bit floating-point format.
    """
    FLOAT64 = "FLOAT64"
    """
    64-bit floating-point format.
    """


class VectorStorageType(Enum):
    """Enum representing the vector type,

    See :ref:`oracle_vector_datatype` for background.

    .. versionadded:: 2.0.43

    """

    SPARSE = "SPARSE"
    """
    A Sparse vector is a vector which has zero value for
    most of its dimensions.
    """
    DENSE = "DENSE"
    """
    A Dense vector is a vector where most, if not all, elements
    hold meaningful values.
    """


@dataclass
class VectorIndexConfig:
    """Define the configuration for Oracle VECTOR Index.

    See :ref:`oracle_vector_datatype` for background.

    .. versionadded:: 2.0.41

    :param index_type: Enum value from :class:`.VectorIndexType`
     Specifies the indexing method. For HNSW, this must be
     :attr:`.VectorIndexType.HNSW`.

    :param distance: Enum value from :class:`.VectorDistanceType`
     specifies the metric for calculating distance between VECTORS.

    :param accuracy: interger. Should be in the range 0 to 100
     Specifies the accuracy of the nearest neighbor search during
     query execution.

    :param parallel: integer. Specifies degree of parallelism.

    :param hnsw_neighbors: interger. Should be in the range 0 to
     2048. Specifies the number of nearest neighbors considered
     during the search. The attribute :attr:`.VectorIndexConfig.hnsw_neighbors`
     is HNSW index specific.

    :param hnsw_efconstruction: integer. Should be in the range 0
     to 65535. Controls the trade-off between indexing speed and
     recall quality during index construction. The attribute
     :attr:`.VectorIndexConfig.hnsw_efconstruction` is HNSW index
     specific.

    :param ivf_neighbor_partitions: integer. Should be in the range
     0 to 10,000,000. Specifies the number of partitions used to
     divide the dataset. The attribute
     :attr:`.VectorIndexConfig.ivf_neighbor_partitions` is IVF index
     specific.

    :param ivf_sample_per_partition: integer. Should be between 1
     and ``num_vectors / neighbor partitions``. Specifies the
     number of samples used per partition. The attribute
     :attr:`.VectorIndexConfig.ivf_sample_per_partition` is IVF index
     specific.

    :param ivf_min_vectors_per_partition: integer. From 0 (no trimming)
     to the total number of vectors (results in 1 partition). Specifies
     the minimum number of vectors per partition. The attribute
     :attr:`.VectorIndexConfig.ivf_min_vectors_per_partition`
     is IVF index specific.

    """

    index_type: VectorIndexType = VectorIndexType.HNSW
    distance: Optional[VectorDistanceType] = None
    accuracy: Optional[int] = None
    hnsw_neighbors: Optional[int] = None
    hnsw_efconstruction: Optional[int] = None
    ivf_neighbor_partitions: Optional[int] = None
    ivf_sample_per_partition: Optional[int] = None
    ivf_min_vectors_per_partition: Optional[int] = None
    parallel: Optional[int] = None

    def __post_init__(self):
        self.index_type = VectorIndexType(self.index_type)
        for field in [
            "hnsw_neighbors",
            "hnsw_efconstruction",
            "ivf_neighbor_partitions",
            "ivf_sample_per_partition",
            "ivf_min_vectors_per_partition",
            "parallel",
            "accuracy",
        ]:
            value = getattr(self, field)
            if value is not None and not isinstance(value, int):
                raise TypeError(
                    f"{field} must be an integer if"
                    f"provided, got {type(value).__name__}"
                )


class SparseVector:
    """
    Lightweight SQLAlchemy-side version of SparseVector.
    This mimics oracledb.SparseVector.

    .. versionadded:: 2.0.43

    """

    def __init__(
        self,
        num_dimensions: int,
        indices: Union[list, array.array],
        values: Union[list, array.array],
    ):
        if not isinstance(indices, array.array) or indices.typecode != "I":
            indices = array.array("I", indices)
        if not isinstance(values, array.array):
            values = array.array("d", values)
        if len(indices) != len(values):
            raise TypeError("indices and values must be of the same length!")

        self.num_dimensions = num_dimensions
        self.indices = indices
        self.values = values

    def __str__(self):
        return (
            f"SparseVector(num_dimensions={self.num_dimensions}, "
            f"size={len(self.indices)}, typecode={self.values.typecode})"
        )


class VECTOR(types.TypeEngine):
    """Oracle VECTOR datatype.

    For complete background on using this type, see
    :ref:`oracle_vector_datatype`.

    .. versionadded:: 2.0.41

    """

    cache_ok = True
    __visit_name__ = "VECTOR"

    _typecode_map = {
        VectorStorageFormat.INT8: "b",  # Signed int
        VectorStorageFormat.BINARY: "B",  # Unsigned int
        VectorStorageFormat.FLOAT32: "f",  # Float
        VectorStorageFormat.FLOAT64: "d",  # Double
    }

    def __init__(self, dim=None, storage_format=None, storage_type=None):
        """Construct a VECTOR.

        :param dim: integer. The dimension of the VECTOR datatype. This
         should be an integer value.

        :param storage_format: VectorStorageFormat. The VECTOR storage
         type format. This should be Enum values form
         :class:`.VectorStorageFormat` INT8, BINARY, FLOAT32, or FLOAT64.

        :param storage_type: VectorStorageType. The Vector storage type. This
         should be Enum values from :class:`.VectorStorageType` SPARSE or
         DENSE.

        """

        if dim is not None and not isinstance(dim, int):
            raise TypeError("dim must be an interger")
        if storage_format is not None and not isinstance(
            storage_format, VectorStorageFormat
        ):
            raise TypeError(
                "storage_format must be an enum of type VectorStorageFormat"
            )
        if storage_type is not None and not isinstance(
            storage_type, VectorStorageType
        ):
            raise TypeError(
                "storage_type must be an enum of type VectorStorageType"
            )

        self.dim = dim
        self.storage_format = storage_format
        self.storage_type = storage_type

    def _cached_bind_processor(self, dialect):
        """
        Converts a Python-side SparseVector instance into an
        oracledb.SparseVectormor a compatible array format before
        binding it to the database.
        """

        def process(value):
            if value is None or isinstance(value, array.array):
                return value

            # Convert list to a array.array
            elif isinstance(value, list):
                typecode = self._array_typecode(self.storage_format)
                value = array.array(typecode, value)
                return value

            # Convert SqlAlchemy SparseVector to oracledb SparseVector object
            elif isinstance(value, SparseVector):
                return dialect.dbapi.SparseVector(
                    value.num_dimensions,
                    value.indices,
                    value.values,
                )

            else:
                raise TypeError(
                    """
                    Invalid input for VECTOR: expected a list, an array.array,
                    or a SparseVector object.
                    """
                )

        return process

    def _cached_result_processor(self, dialect, coltype):
        """
        Converts database-returned values into Python-native representations.
        If the value is an oracledb.SparseVector, it is converted into the
        SQLAlchemy-side SparseVector class.
        If the value is a array.array, it is converted to a plain Python list.

        """

        def process(value):
            if value is None:
                return None

            elif isinstance(value, array.array):
                return list(value)

            # Convert Oracledb SparseVector to SqlAlchemy SparseVector object
            elif isinstance(value, dialect.dbapi.SparseVector):
                return SparseVector(
                    num_dimensions=value.num_dimensions,
                    indices=value.indices,
                    values=value.values,
                )

        return process

    def _array_typecode(self, typecode):
        """
        Map storage format to array typecode.
        """
        return self._typecode_map.get(typecode, "d")

    class comparator_factory(types.TypeEngine.Comparator):
        def l2_distance(self, other):
            return self.op("<->", return_type=Float)(other)

        def inner_product(self, other):
            return self.op("<#>", return_type=Float)(other)

        def cosine_distance(self, other):
            return self.op("<=>", return_type=Float)(other)