Complete API documentation auto-generated from source code docstrings.

VSA Factory

The main entry point for creating VSA models.

holovec.VSA

High-level factory interface for creating VSA models.

This class provides a simple, unified API for creating and using different VSA models. It's the recommended entry point for most users.

Examples: >>> # Create a MAP model with default settings >>> model = VSA.create('MAP') >>> >>> # Create FHRR with specific dimension and backend >>> model = VSA.create('FHRR', dim=512, backend='torch', device='cuda') >>> >>> # Use the model >>> a, b = model.random(), model.random() >>> c = model.bind(a, b) >>> similarity = model.similarity(a, model.unbind(c, b))

Source code in holovec/__init__.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
class VSA:
    """High-level factory interface for creating VSA models.

    This class provides a simple, unified API for creating and using
    different VSA models. It's the recommended entry point for most users.

    Examples:
        >>> # Create a MAP model with default settings
        >>> model = VSA.create('MAP')
        >>>
        >>> # Create FHRR with specific dimension and backend
        >>> model = VSA.create('FHRR', dim=512, backend='torch', device='cuda')
        >>>
        >>> # Use the model
        >>> a, b = model.random(), model.random()
        >>> c = model.bind(a, b)
        >>> similarity = model.similarity(a, model.unbind(c, b))
    """

    # Model registry
    _MODELS = {
        "map": MAPModel,
        "fhrr": FHRRModel,
        "hrr": HRRModel,
        "bsc": BSCModel,
        "bsdc": BSDCModel,
        "bsdc_seg": BSDCSEGModel,
        "bsdc-seg": BSDCSEGModel,  # alias with hyphen to match model_name
        "ghrr": GHRRModel,
        "vtb": VTBModel,
    }

    # Default vector spaces for each model
    _DEFAULT_SPACES = {
        "map": "bipolar",
        "fhrr": "complex",
        "hrr": "real",
        "bsc": "binary",
        "bsdc": "sparse",
        "bsdc_seg": "sparse_segment",
        "bsdc-seg": "sparse_segment",  # alias with hyphen
        "ghrr": "matrix",
        "vtb": "real",
    }

    @classmethod
    def create(
        cls,
        model_type: str,
        dim: int = 10000,
        backend: str | Backend | None = None,
        space: str | None = None,
        seed: int | None = None,
        **kwargs,
    ) -> VSAModel:
        """Create a VSA model with the specified configuration.

        Args:
            model_type: Model name ('MAP', 'FHRR', 'HRR', 'BSC', etc.)
            dim: Dimensionality of hypervectors
            backend: Backend name ('numpy', 'torch', 'jax'), a :class:`Backend`
                instance, or None for the default backend
            space: Vector space name or None for model's default
            seed: Random seed for reproducibility
            **kwargs: Additional arguments passed to backend (e.g., device='cuda')

        Returns:
            VSA model instance

        Raises:
            ValueError: If model_type is not recognized

        Examples:
            >>> model = VSA.create('MAP', dim=10000)
            >>> model = VSA.create('FHRR', dim=512, backend='torch', device='cuda')
            >>> model = VSA.create('MAP', space='real')  # Use real-valued MAP
        """
        # Normalize model type
        model_type_lower = model_type.lower()

        if model_type_lower not in cls._MODELS:
            available = list(cls._MODELS.keys())
            raise ValueError(f"Unknown model type '{model_type}'. Available models: {available}")

        # Get model class
        model_class = cls._MODELS[model_type_lower]

        # Create backend
        backend_kwargs = {k: v for k, v in kwargs.items() if k in ["device"]}
        if isinstance(backend, Backend):
            backend_instance = backend
        elif isinstance(backend, str):
            backend_instance = get_backend(backend, **backend_kwargs)
        elif backend is None:
            backend_instance = None
        else:
            raise TypeError("backend must be a backend name (str) or a Backend instance")

        # Determine space type
        if space is None:
            space = cls._DEFAULT_SPACES.get(model_type_lower)

        # Collect space-specific kwargs
        space_kwargs = {}
        if space == "sparse_segment":
            # For BSDC-SEG: default to dim/10 segments (segment_length=10)
            space_kwargs["segments"] = kwargs.get("segments", max(1, dim // 10))

        # Create space if string provided
        if isinstance(space, str):
            space_instance = create_space(
                space, dimension=dim, backend=backend_instance, seed=seed, **space_kwargs
            )
        else:
            space_instance = space

        # Collect model-specific kwargs
        model_kwargs = {}
        if model_type_lower == 'bsdc':
            # BSDC supports binding_mode parameter
            if 'binding_mode' in kwargs:
                model_kwargs['binding_mode'] = kwargs['binding_mode']

        # Create model
        model = model_class(
            dimension=dim, space=space_instance, backend=backend_instance, seed=seed,
            **model_kwargs
        )

        return model

    @classmethod
    def available_models(cls) -> list[str]:
        """Return list of available model names.

        Returns:
            List of model names that can be used with create()
        """
        return list(cls._MODELS.keys())

    @classmethod
    def model_info(cls, model_type: str) -> dict:
        """Get information about a specific model.

        Args:
            model_type: Model name

        Returns:
            Dictionary with model properties

        Example:
            >>> info = VSA.model_info('FHRR')
            >>> print(info['is_exact_inverse'])  # True
        """
        model_type_lower = model_type.lower()
        if model_type_lower not in cls._MODELS:
            raise ValueError(f"Unknown model type '{model_type}'")

        # Create a temporary instance to query properties
        model = cls.create(model_type, dim=100)

        return {
            "name": model.model_name,
            "is_self_inverse": model.is_self_inverse,
            "is_commutative": model.is_commutative,
            "is_exact_inverse": model.is_exact_inverse,
            "default_space": cls._DEFAULT_SPACES.get(model_type_lower),
            "class": model.__class__.__name__,
        }

create(model_type, dim=10000, backend=None, space=None, seed=None, **kwargs) classmethod

Create a VSA model with the specified configuration.

Args: model_type: Model name ('MAP', 'FHRR', 'HRR', 'BSC', etc.) dim: Dimensionality of hypervectors backend: Backend name ('numpy', 'torch', 'jax'), a :class:Backend instance, or None for the default backend space: Vector space name or None for model's default seed: Random seed for reproducibility **kwargs: Additional arguments passed to backend (e.g., device='cuda')

Returns: VSA model instance

Raises: ValueError: If model_type is not recognized

Examples: >>> model = VSA.create('MAP', dim=10000) >>> model = VSA.create('FHRR', dim=512, backend='torch', device='cuda') >>> model = VSA.create('MAP', space='real') # Use real-valued MAP

Source code in holovec/__init__.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
@classmethod
def create(
    cls,
    model_type: str,
    dim: int = 10000,
    backend: str | Backend | None = None,
    space: str | None = None,
    seed: int | None = None,
    **kwargs,
) -> VSAModel:
    """Create a VSA model with the specified configuration.

    Args:
        model_type: Model name ('MAP', 'FHRR', 'HRR', 'BSC', etc.)
        dim: Dimensionality of hypervectors
        backend: Backend name ('numpy', 'torch', 'jax'), a :class:`Backend`
            instance, or None for the default backend
        space: Vector space name or None for model's default
        seed: Random seed for reproducibility
        **kwargs: Additional arguments passed to backend (e.g., device='cuda')

    Returns:
        VSA model instance

    Raises:
        ValueError: If model_type is not recognized

    Examples:
        >>> model = VSA.create('MAP', dim=10000)
        >>> model = VSA.create('FHRR', dim=512, backend='torch', device='cuda')
        >>> model = VSA.create('MAP', space='real')  # Use real-valued MAP
    """
    # Normalize model type
    model_type_lower = model_type.lower()

    if model_type_lower not in cls._MODELS:
        available = list(cls._MODELS.keys())
        raise ValueError(f"Unknown model type '{model_type}'. Available models: {available}")

    # Get model class
    model_class = cls._MODELS[model_type_lower]

    # Create backend
    backend_kwargs = {k: v for k, v in kwargs.items() if k in ["device"]}
    if isinstance(backend, Backend):
        backend_instance = backend
    elif isinstance(backend, str):
        backend_instance = get_backend(backend, **backend_kwargs)
    elif backend is None:
        backend_instance = None
    else:
        raise TypeError("backend must be a backend name (str) or a Backend instance")

    # Determine space type
    if space is None:
        space = cls._DEFAULT_SPACES.get(model_type_lower)

    # Collect space-specific kwargs
    space_kwargs = {}
    if space == "sparse_segment":
        # For BSDC-SEG: default to dim/10 segments (segment_length=10)
        space_kwargs["segments"] = kwargs.get("segments", max(1, dim // 10))

    # Create space if string provided
    if isinstance(space, str):
        space_instance = create_space(
            space, dimension=dim, backend=backend_instance, seed=seed, **space_kwargs
        )
    else:
        space_instance = space

    # Collect model-specific kwargs
    model_kwargs = {}
    if model_type_lower == 'bsdc':
        # BSDC supports binding_mode parameter
        if 'binding_mode' in kwargs:
            model_kwargs['binding_mode'] = kwargs['binding_mode']

    # Create model
    model = model_class(
        dimension=dim, space=space_instance, backend=backend_instance, seed=seed,
        **model_kwargs
    )

    return model

Models

All VSA model implementations.

Base Model

holovec.models.base.VSAModel

Bases: ABC

Abstract base class for VSA models.

A VSA model defines the core operations: - bind: Associate two vectors (creates dissimilar result) - unbind: Recover one vector given the other and their binding - bundle: Combine multiple vectors (preserves similarity) - permute: Reorder vector to represent position/sequence

Different models have different algebraic properties: - Self-inverse binding: bind(a, b) = unbind(a, b) - Exact vs approximate inverse - Commutativity of binding

Source code in holovec/models/base.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
class VSAModel(ABC):
    """Abstract base class for VSA models.

    A VSA model defines the core operations:
    - bind: Associate two vectors (creates dissimilar result)
    - unbind: Recover one vector given the other and their binding
    - bundle: Combine multiple vectors (preserves similarity)
    - permute: Reorder vector to represent position/sequence

    Different models have different algebraic properties:
    - Self-inverse binding: bind(a, b) = unbind(a, b)
    - Exact vs approximate inverse
    - Commutativity of binding
    """

    def __init__(
        self,
        space: VectorSpace,
        backend: Backend | None = None
    ):
        """Initialize VSA model.

        Args:
            space: Vector space defining the representation
            backend: Computational backend (defaults to space's backend)
        """
        self.space = space
        self.backend = backend if backend is not None else space.backend
        self.dimension = space.dimension

    # ===== Core VSA Operations =====

    @abstractmethod
    def bind(self, a: Array, b: Array) -> Array:
        """Bind two vectors to create an association.

        Binding creates a new vector that is dissimilar to both inputs
        but preserves structured similarity (similar inputs → similar bindings).

        Args:
            a: First vector
            b: Second vector

        Returns:
            Bound vector representing the association of a and b
        """
        pass

    @abstractmethod
    def unbind(self, a: Array, b: Array) -> Array:
        """Unbind to recover one vector given the other.

        For self-inverse models: unbind(a, b) = bind(a, b)
        For others: approximately recovers a from bind(a, b) and b

        Args:
            a: Bound vector or first operand
            b: Second operand

        Returns:
            Recovered vector (exact or approximate depending on model)
        """
        pass

    @abstractmethod
    def bundle(self, vectors: Sequence[Array]) -> Array:
        """Bundle (superpose) multiple vectors.

        Bundling combines vectors while preserving similarity to all inputs.
        The result is similar to each input vector.

        Args:
            vectors: Sequence of vectors to bundle

        Returns:
            Bundled vector representing the superposition

        Raises:
            ValueError: If vectors is empty
        """
        pass

    @abstractmethod
    def permute(self, vec: Array, k: int = 1) -> Array:
        """Permute vector to represent position or sequence.

        Permutation reorders coordinates and is used to encode position
        or create sequences. It's invertible and preserves similarity.

        Args:
            vec: Vector to permute
            k: Number of positions to shift (default: 1)

        Returns:
            Permuted vector
        """
        pass

    def unpermute(self, vec: Array, k: int = 1) -> Array:
        """Inverse permutation.

        Args:
            vec: Vector to unpermute
            k: Number of positions to shift back (default: 1)

        Returns:
            Unpermuted vector
        """
        return self.permute(vec, -k)

    # ===== Similarity and Cleanup =====

    def similarity(self, a: Array, b: Array) -> float:
        """Compute similarity between two vectors.

        Delegates to the vector space's similarity metric.

        Args:
            a: First vector
            b: Second vector

        Returns:
            Similarity score (space-dependent metric)
        """
        return self.space.similarity(a, b)

    def normalize(self, vec: Array) -> Array:
        """Normalize vector according to space conventions.

        Args:
            vec: Vector to normalize

        Returns:
            Normalized vector
        """
        return self.space.normalize(vec)

    # ===== Vector Generation =====

    def random(self, seed: int | None = None) -> Array:
        """Generate a random vector from the space.

        Args:
            seed: Optional random seed

        Returns:
            Random vector
        """
        return self.space.random(seed=seed)

    def random_sequence(self, n: int, seed: int | None = None) -> list[Array]:
        """Generate n random vectors.

        Args:
            n: Number of vectors to generate
            seed: Optional base seed (each vector gets seed + i)

        Returns:
            List of random vectors
        """
        if seed is not None:
            return [self.random(seed=seed + i) for i in range(n)]
        return [self.random() for _ in range(n)]

    # ===== Compositional Operations =====

    def bind_multiple(self, vectors: Sequence[Array]) -> Array:
        """Bind multiple vectors sequentially.

        For n vectors: bind(bind(bind(v1, v2), v3), ...)

        Args:
            vectors: Sequence of vectors to bind

        Returns:
            Result of sequential binding

        Raises:
            ValueError: If fewer than 2 vectors provided
        """
        if len(vectors) < 2:
            raise ValueError("Need at least 2 vectors to bind")

        result = vectors[0]
        for vec in vectors[1:]:
            result = self.bind(result, vec)
        return result

    # ===== Model Properties =====

    @property
    @abstractmethod
    def is_self_inverse(self) -> bool:
        """Whether binding is self-inverse (bind = unbind)."""
        pass

    @property
    @abstractmethod
    def is_commutative(self) -> bool:
        """Whether binding is commutative (bind(a, b) = bind(b, a))."""
        pass

    @property
    @abstractmethod
    def is_exact_inverse(self) -> bool:
        """Whether unbinding gives exact recovery (no approximation error)."""
        pass

    @property
    @abstractmethod
    def model_name(self) -> str:
        """Return the model name (e.g., 'MAP', 'FHRR', 'HRR')."""
        pass

    def __repr__(self) -> str:
        return (f"{self.__class__.__name__}(dimension={self.dimension}, "
                f"space={self.space.space_name}, backend={self.backend.name})")

bind(a, b) abstractmethod

Bind two vectors to create an association.

Binding creates a new vector that is dissimilar to both inputs but preserves structured similarity (similar inputs → similar bindings).

Args: a: First vector b: Second vector

Returns: Bound vector representing the association of a and b

Source code in holovec/models/base.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@abstractmethod
def bind(self, a: Array, b: Array) -> Array:
    """Bind two vectors to create an association.

    Binding creates a new vector that is dissimilar to both inputs
    but preserves structured similarity (similar inputs → similar bindings).

    Args:
        a: First vector
        b: Second vector

    Returns:
        Bound vector representing the association of a and b
    """
    pass

unbind(a, b) abstractmethod

Unbind to recover one vector given the other.

For self-inverse models: unbind(a, b) = bind(a, b) For others: approximately recovers a from bind(a, b) and b

Args: a: Bound vector or first operand b: Second operand

Returns: Recovered vector (exact or approximate depending on model)

Source code in holovec/models/base.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
@abstractmethod
def unbind(self, a: Array, b: Array) -> Array:
    """Unbind to recover one vector given the other.

    For self-inverse models: unbind(a, b) = bind(a, b)
    For others: approximately recovers a from bind(a, b) and b

    Args:
        a: Bound vector or first operand
        b: Second operand

    Returns:
        Recovered vector (exact or approximate depending on model)
    """
    pass

bundle(vectors) abstractmethod

Bundle (superpose) multiple vectors.

Bundling combines vectors while preserving similarity to all inputs. The result is similar to each input vector.

Args: vectors: Sequence of vectors to bundle

Returns: Bundled vector representing the superposition

Raises: ValueError: If vectors is empty

Source code in holovec/models/base.py
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
@abstractmethod
def bundle(self, vectors: Sequence[Array]) -> Array:
    """Bundle (superpose) multiple vectors.

    Bundling combines vectors while preserving similarity to all inputs.
    The result is similar to each input vector.

    Args:
        vectors: Sequence of vectors to bundle

    Returns:
        Bundled vector representing the superposition

    Raises:
        ValueError: If vectors is empty
    """
    pass

permute(vec, k=1) abstractmethod

Permute vector to represent position or sequence.

Permutation reorders coordinates and is used to encode position or create sequences. It's invertible and preserves similarity.

Args: vec: Vector to permute k: Number of positions to shift (default: 1)

Returns: Permuted vector

Source code in holovec/models/base.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
@abstractmethod
def permute(self, vec: Array, k: int = 1) -> Array:
    """Permute vector to represent position or sequence.

    Permutation reorders coordinates and is used to encode position
    or create sequences. It's invertible and preserves similarity.

    Args:
        vec: Vector to permute
        k: Number of positions to shift (default: 1)

    Returns:
        Permuted vector
    """
    pass

unpermute(vec, k=1)

Inverse permutation.

Args: vec: Vector to unpermute k: Number of positions to shift back (default: 1)

Returns: Unpermuted vector

Source code in holovec/models/base.py
114
115
116
117
118
119
120
121
122
123
124
def unpermute(self, vec: Array, k: int = 1) -> Array:
    """Inverse permutation.

    Args:
        vec: Vector to unpermute
        k: Number of positions to shift back (default: 1)

    Returns:
        Unpermuted vector
    """
    return self.permute(vec, -k)

similarity(a, b)

Compute similarity between two vectors.

Delegates to the vector space's similarity metric.

Args: a: First vector b: Second vector

Returns: Similarity score (space-dependent metric)

Source code in holovec/models/base.py
128
129
130
131
132
133
134
135
136
137
138
139
140
def similarity(self, a: Array, b: Array) -> float:
    """Compute similarity between two vectors.

    Delegates to the vector space's similarity metric.

    Args:
        a: First vector
        b: Second vector

    Returns:
        Similarity score (space-dependent metric)
    """
    return self.space.similarity(a, b)

normalize(vec)

Normalize vector according to space conventions.

Args: vec: Vector to normalize

Returns: Normalized vector

Source code in holovec/models/base.py
142
143
144
145
146
147
148
149
150
151
def normalize(self, vec: Array) -> Array:
    """Normalize vector according to space conventions.

    Args:
        vec: Vector to normalize

    Returns:
        Normalized vector
    """
    return self.space.normalize(vec)

random(seed=None)

Generate a random vector from the space.

Args: seed: Optional random seed

Returns: Random vector

Source code in holovec/models/base.py
155
156
157
158
159
160
161
162
163
164
def random(self, seed: int | None = None) -> Array:
    """Generate a random vector from the space.

    Args:
        seed: Optional random seed

    Returns:
        Random vector
    """
    return self.space.random(seed=seed)

FHRR

holovec.models.fhrr.FHRRModel

Bases: VSAModel

FHRR (Fourier HRR) model using complex phasors.

Binding: element-wise complex multiplication (phase addition) Unbinding: element-wise multiplication with conjugate (phase subtraction) Bundling: element-wise addition + normalization to unit magnitude Permutation: circular shift (can also use phase rotation)

Uses ComplexSpace with unit-magnitude phasors.

Source code in holovec/models/fhrr.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
class FHRRModel(VSAModel):
    """FHRR (Fourier HRR) model using complex phasors.

    Binding: element-wise complex multiplication (phase addition)
    Unbinding: element-wise multiplication with conjugate (phase subtraction)
    Bundling: element-wise addition + normalization to unit magnitude
    Permutation: circular shift (can also use phase rotation)

    Uses ComplexSpace with unit-magnitude phasors.
    """

    def __init__(
        self,
        dimension: int = 512,
        space: VectorSpace | None = None,
        backend: Backend | None = None,
        seed: int | None = None
    ):
        """Initialize FHRR model.

        Args:
            dimension: Dimensionality of hypervectors
                      (can be smaller than MAP due to better capacity)
            space: Vector space (defaults to ComplexSpace)
            backend: Computational backend
            seed: Random seed for space
        """
        if space is None:
            from ..backends import get_backend
            backend = backend if backend is not None else get_backend()
            space = ComplexSpace(dimension, backend=backend, seed=seed)

        super().__init__(space, backend)

    @property
    def model_name(self) -> str:
        return "FHRR"

    @property
    def is_self_inverse(self) -> bool:
        return False  # Requires conjugate, not same operation

    @property
    def is_commutative(self) -> bool:
        return True  # Complex multiplication is commutative

    @property
    def is_exact_inverse(self) -> bool:
        return True  # Conjugate provides exact inverse

    def bind(self, a: Array, b: Array) -> Array:
        """Bind using element-wise complex multiplication.

        For unit phasors: (a * b)[i] = a[i] * b[i]
        This adds phase angles: ∠(a*b) = ∠a + ∠b

        Args:
            a: First vector (unit phasors)
            b: Second vector (unit phasors)

        Returns:
            Bound vector c = a ⊙ b (element-wise product)
        """
        result = self.backend.multiply(a, b)
        # Normalize to unit magnitude
        return self.normalize(result)

    def unbind(self, a: Array, b: Array) -> Array:
        """Unbind using element-wise multiplication with conjugate.

        To recover original from c = a ⊙ b:
        unbind(c, b) = c ⊙ b* = (a ⊙ b) ⊙ b* = a ⊙ (b ⊙ b*) = a ⊙ 1 = a

        Args:
            a: Bound vector (or first operand)
            b: Second operand

        Returns:
            Unbound vector (exact recovery)
        """
        b_conj = self.backend.conjugate(b)
        result = self.backend.multiply(a, b_conj)
        return self.normalize(result)

    def bundle(self, vectors: Sequence[Array]) -> Array:
        """Bundle using element-wise addition.

        Sum phasors and normalize back to unit magnitude.
        The result points in the "average" direction of inputs.

        Args:
            vectors: Sequence of vectors to bundle

        Returns:
            Bundled vector (normalized to unit magnitude)

        Raises:
            ValueError: If vectors is empty
        """
        if not vectors:
            raise ValueError("Cannot bundle empty sequence")

        vectors = list(vectors)

        # Sum all vectors (phasors add vectorially)
        result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)

        # Normalize to unit magnitude
        return self.normalize(result)

    def permute(self, vec: Array, k: int = 1) -> Array:
        """Permute using circular shift.

        For FHRR, permutation can be done as:
        1. Circular shift (coordinate permutation)
        2. Phase rotation (multiply by exp(i*2πk/D))

        We use circular shift for consistency with other models.

        Args:
            vec: Vector to permute
            k: Number of positions to shift

        Returns:
            Permuted vector
        """
        return self.backend.roll(vec, shift=k)

    def fractional_power(self, vec: Array, exponent: float) -> Array:
        """Raise phasor to a fractional power.

        For unit phasor z = exp(iθ): z^α = exp(iαθ)
        This is useful for encoding continuous values.

        Args:
            vec: Vector of unit phasors
            exponent: Power to raise to

        Returns:
            Vector with phases scaled by exponent

        Example:
            >>> base = model.random()
            >>> # Encode value 2.5 using fractional power
            >>> encoded = model.fractional_power(base, 2.5)
        """
        # For unit phasors z = exp(iθ), we want: z^α = exp(iαθ)
        # This is exact and avoids branch cuts from complex logarithms.
        #
        # Implementation:
        # 1. Extract phase θ = arg(z)
        # 2. Scale by exponent: αθ
        # 3. Create new phasor: exp(iαθ)

        # Get phase angles using backend operation
        angles = self.backend.angle(vec)

        # Scale angles by exponent
        scaled_angles = self.backend.multiply_scalar(angles, exponent)

        # Create new phasors: exp(i * scaled_angles)
        # exp(iθ) = cos(θ) + i*sin(θ)
        result = self.backend.exp(1j * scaled_angles)

        # Renormalize to unit magnitude (handle numerical errors)
        return self.normalize(result)

    def __repr__(self) -> str:
        return (f"FHRRModel(dimension={self.dimension}, "
                f"space={self.space.space_name}, "
                f"backend={self.backend.name})")

__init__(dimension=512, space=None, backend=None, seed=None)

Initialize FHRR model.

Args: dimension: Dimensionality of hypervectors (can be smaller than MAP due to better capacity) space: Vector space (defaults to ComplexSpace) backend: Computational backend seed: Random seed for space

Source code in holovec/models/fhrr.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def __init__(
    self,
    dimension: int = 512,
    space: VectorSpace | None = None,
    backend: Backend | None = None,
    seed: int | None = None
):
    """Initialize FHRR model.

    Args:
        dimension: Dimensionality of hypervectors
                  (can be smaller than MAP due to better capacity)
        space: Vector space (defaults to ComplexSpace)
        backend: Computational backend
        seed: Random seed for space
    """
    if space is None:
        from ..backends import get_backend
        backend = backend if backend is not None else get_backend()
        space = ComplexSpace(dimension, backend=backend, seed=seed)

    super().__init__(space, backend)

bind(a, b)

Bind using element-wise complex multiplication.

For unit phasors: (a * b)[i] = a[i] * b[i] This adds phase angles: ∠(a*b) = ∠a + ∠b

Args: a: First vector (unit phasors) b: Second vector (unit phasors)

Returns: Bound vector c = a ⊙ b (element-wise product)

Source code in holovec/models/fhrr.py
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def bind(self, a: Array, b: Array) -> Array:
    """Bind using element-wise complex multiplication.

    For unit phasors: (a * b)[i] = a[i] * b[i]
    This adds phase angles: ∠(a*b) = ∠a + ∠b

    Args:
        a: First vector (unit phasors)
        b: Second vector (unit phasors)

    Returns:
        Bound vector c = a ⊙ b (element-wise product)
    """
    result = self.backend.multiply(a, b)
    # Normalize to unit magnitude
    return self.normalize(result)

bundle(vectors)

Bundle using element-wise addition.

Sum phasors and normalize back to unit magnitude. The result points in the "average" direction of inputs.

Args: vectors: Sequence of vectors to bundle

Returns: Bundled vector (normalized to unit magnitude)

Raises: ValueError: If vectors is empty

Source code in holovec/models/fhrr.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def bundle(self, vectors: Sequence[Array]) -> Array:
    """Bundle using element-wise addition.

    Sum phasors and normalize back to unit magnitude.
    The result points in the "average" direction of inputs.

    Args:
        vectors: Sequence of vectors to bundle

    Returns:
        Bundled vector (normalized to unit magnitude)

    Raises:
        ValueError: If vectors is empty
    """
    if not vectors:
        raise ValueError("Cannot bundle empty sequence")

    vectors = list(vectors)

    # Sum all vectors (phasors add vectorially)
    result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)

    # Normalize to unit magnitude
    return self.normalize(result)

fractional_power(vec, exponent)

Raise phasor to a fractional power.

For unit phasor z = exp(iθ): z^α = exp(iαθ) This is useful for encoding continuous values.

Args: vec: Vector of unit phasors exponent: Power to raise to

Returns: Vector with phases scaled by exponent

Example: >>> base = model.random() >>> # Encode value 2.5 using fractional power >>> encoded = model.fractional_power(base, 2.5)

Source code in holovec/models/fhrr.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
def fractional_power(self, vec: Array, exponent: float) -> Array:
    """Raise phasor to a fractional power.

    For unit phasor z = exp(iθ): z^α = exp(iαθ)
    This is useful for encoding continuous values.

    Args:
        vec: Vector of unit phasors
        exponent: Power to raise to

    Returns:
        Vector with phases scaled by exponent

    Example:
        >>> base = model.random()
        >>> # Encode value 2.5 using fractional power
        >>> encoded = model.fractional_power(base, 2.5)
    """
    # For unit phasors z = exp(iθ), we want: z^α = exp(iαθ)
    # This is exact and avoids branch cuts from complex logarithms.
    #
    # Implementation:
    # 1. Extract phase θ = arg(z)
    # 2. Scale by exponent: αθ
    # 3. Create new phasor: exp(iαθ)

    # Get phase angles using backend operation
    angles = self.backend.angle(vec)

    # Scale angles by exponent
    scaled_angles = self.backend.multiply_scalar(angles, exponent)

    # Create new phasors: exp(i * scaled_angles)
    # exp(iθ) = cos(θ) + i*sin(θ)
    result = self.backend.exp(1j * scaled_angles)

    # Renormalize to unit magnitude (handle numerical errors)
    return self.normalize(result)

permute(vec, k=1)

Permute using circular shift.

For FHRR, permutation can be done as: 1. Circular shift (coordinate permutation) 2. Phase rotation (multiply by exp(i*2πk/D))

We use circular shift for consistency with other models.

Args: vec: Vector to permute k: Number of positions to shift

Returns: Permuted vector

Source code in holovec/models/fhrr.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def permute(self, vec: Array, k: int = 1) -> Array:
    """Permute using circular shift.

    For FHRR, permutation can be done as:
    1. Circular shift (coordinate permutation)
    2. Phase rotation (multiply by exp(i*2πk/D))

    We use circular shift for consistency with other models.

    Args:
        vec: Vector to permute
        k: Number of positions to shift

    Returns:
        Permuted vector
    """
    return self.backend.roll(vec, shift=k)

unbind(a, b)

Unbind using element-wise multiplication with conjugate.

To recover original from c = a ⊙ b: unbind(c, b) = c ⊙ b = (a ⊙ b) ⊙ b = a ⊙ (b ⊙ b*) = a ⊙ 1 = a

Args: a: Bound vector (or first operand) b: Second operand

Returns: Unbound vector (exact recovery)

Source code in holovec/models/fhrr.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def unbind(self, a: Array, b: Array) -> Array:
    """Unbind using element-wise multiplication with conjugate.

    To recover original from c = a ⊙ b:
    unbind(c, b) = c ⊙ b* = (a ⊙ b) ⊙ b* = a ⊙ (b ⊙ b*) = a ⊙ 1 = a

    Args:
        a: Bound vector (or first operand)
        b: Second operand

    Returns:
        Unbound vector (exact recovery)
    """
    b_conj = self.backend.conjugate(b)
    result = self.backend.multiply(a, b_conj)
    return self.normalize(result)

GHRR

holovec.models.ghrr.GHRRModel

Bases: VSAModel

GHRR (Generalized Holographic Reduced Representations) model.

Binding: element-wise matrix multiplication (phase addition per matrix) Unbinding: element-wise multiplication with conjugate transpose Bundling: element-wise addition + normalization Permutation: circular shift (or use non-commutativity instead)

Uses MatrixSpace with m×m unitary matrices.

Source code in holovec/models/ghrr.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
class GHRRModel(VSAModel):
    """GHRR (Generalized Holographic Reduced Representations) model.

    Binding: element-wise matrix multiplication (phase addition per matrix)
    Unbinding: element-wise multiplication with conjugate transpose
    Bundling: element-wise addition + normalization
    Permutation: circular shift (or use non-commutativity instead)

    Uses MatrixSpace with m×m unitary matrices.
    """

    def __init__(
        self,
        dimension: int = 100,
        matrix_size: int = 3,
        space: VectorSpace | None = None,
        backend: Backend | None = None,
        seed: int | None = None,
        diagonality: float | None = None
    ):
        """Initialize GHRR model.

        Args:
            dimension: Number of matrices in hypervector (can be smaller than
                      scalar models due to better capacity)
            matrix_size: Size m of each m×m matrix (default: 3)
                        Larger m → more non-commutative, better for complex structures
                        m=1 recovers FHRR
            space: Vector space (defaults to MatrixSpace)
            backend: Computational backend
            seed: Random seed for space
            diagonality: Control commutativity in [0, 1]
                        None: Random (default)
                        0.0: Maximally non-commutative
                        1.0: Fully commutative (FHRR-like)
        """
        if space is None:
            from ..backends import get_backend
            backend = backend if backend is not None else get_backend()
            space = MatrixSpace(
                dimension,
                matrix_size=matrix_size,
                backend=backend,
                seed=seed,
                diagonality=diagonality,
            )

        super().__init__(space, backend)

        # Store matrix size for easy access
        self.matrix_size = matrix_size if isinstance(space, MatrixSpace) else 1
        self._diagonality = diagonality

    @property
    def model_name(self) -> str:
        return f"GHRR_m{self.matrix_size}"

    @property
    def is_self_inverse(self) -> bool:
        return False  # Requires conjugate transpose

    @property
    def is_commutative(self) -> bool:
        return False  # Matrix multiplication is non-commutative

    @property
    def is_exact_inverse(self) -> bool:
        return True  # Conjugate transpose provides exact inverse

    @property
    def commutativity_degree(self) -> float:
        """Degree of commutativity in [0, 1].

        For GHRR, this depends on the diagonality of Q matrices.
        More diagonal → more commutative.

        Returns:
            0.0 if maximally non-commutative, 1.0 if fully commutative
        """
        if self._diagonality is not None:
            return self._diagonality

        # For random GHRR, larger m tends toward lower diagonality
        # This is approximate based on Yeung et al. Figure 6
        if self.matrix_size == 1:
            return 1.0  # FHRR is commutative
        elif self.matrix_size == 2:
            return 0.7  # Mostly commutative
        elif self.matrix_size == 3:
            return 0.5  # Balanced
        else:
            return 0.3  # Mostly non-commutative

    def bind(self, a: Array, b: Array) -> Array:
        """Bind using element-wise matrix multiplication.

        For matrices at position j: (a ⊗ b)_j = a_j @ b_j

        This is non-commutative: a ⊗ b ≠ b ⊗ a in general.

        Args:
            a: First hypervector (D, m, m)
            b: Second hypervector (D, m, m)

        Returns:
            Bound hypervector c where c_j = a_j @ b_j for all j
        """
        # Element-wise matrix multiplication using matmul broadcast
        # For (D, m, m) @ (D, m, m), this does D separate m×m multiplications
        result = self.backend.matmul(a, b)

        # Normalization not strictly needed for unitary matrices
        # but helps with numerical stability
        return result

    def unbind(self, a: Array, b: Array) -> Array:
        """Unbind using element-wise multiplication with conjugate transpose.

        To recover original from c = a ⊗ b:
        unbind(c, b) = c_j @ b_j† for all j

        This provides exact recovery: unbind(bind(a, b), b) = a

        Args:
            a: Bound hypervector (or first operand)
            b: Second operand

        Returns:
            Unbound hypervector (exact recovery)
        """
        # Compute b^† (conjugate transpose of each matrix)
        b_conj_t = self.backend.conjugate(self.backend.matrix_transpose(b))

        # Element-wise matrix multiply
        result = self.backend.matmul(a, b_conj_t)

        return result

    def bundle(self, vectors: Sequence[Array]) -> Array:
        """Bundle using element-wise addition.

        Sum all hypervectors element-wise. Each element is an m×m matrix.

        For GHRR: (a + b)_j = a_j + b_j (matrix addition)

        Args:
            vectors: Sequence of hypervectors to bundle

        Returns:
            Bundled hypervector

        Raises:
            ValueError: If vectors is empty
        """
        if not vectors:
            raise ValueError("Cannot bundle empty sequence")

        vectors = list(vectors)

        # Sum all vectors (element-wise matrix addition)
        result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)

        # Normalize to project back to unitary matrices
        # This is critical for maintaining quasi-orthogonality (Yeung et al. 2024)
        # Uses polar decomposition via SVD
        result = self.space.normalize(result)

        return result

    def permute(self, vec: Array, k: int = 1) -> Array:
        """Permute using circular shift.

        For GHRR, permutation is less critical since non-commutativity
        can encode order. But still useful for some applications.

        Args:
            vec: Hypervector to permute (D, m, m)
            k: Number of positions to shift

        Returns:
            Permuted hypervector
        """
        # Roll along first dimension (shift which matrix is at which position)
        return self.backend.roll(vec, shift=k, axis=0)

    def test_non_commutativity(self, a: Array, b: Array) -> float:
        """Test degree of non-commutativity for two hypervectors.

        Computes: δ(a ⊗ b, b ⊗ a)

        A similarity of 1.0 means commutative, close to 0 means non-commutative.

        Args:
            a: First hypervector
            b: Second hypervector

        Returns:
            Similarity between a⊗b and b⊗a
        """
        ab = self.bind(a, b)
        ba = self.bind(b, a)
        return self.similarity(ab, ba)

    def compute_diagonality(self, vec: Array) -> float:
        """Compute average diagonality of matrices in hypervector.

        Diagonality metric: Σ|Q_jj| / ΣΣ|Q_jk|

        Args:
            vec: Hypervector (D, m, m)

        Returns:
            Diagonality in [0, 1]
        """
        vec_np = self.backend.to_numpy(vec)

        D = vec_np.shape[0]
        m = vec_np.shape[1]

        total_diag = 0.0
        total_all = 0.0

        for i in range(D):
            matrix = vec_np[i]
            # Diagonal sum
            diag_sum = np.sum(np.abs(np.diag(matrix)))
            # Total sum
            all_sum = np.sum(np.abs(matrix))

            total_diag += diag_sum
            total_all += all_sum

        return total_diag / total_all if total_all > 0 else 0.0

    def __repr__(self) -> str:
        return (f"GHRRModel(dimension={self.dimension}, "
                f"matrix_size={self.matrix_size}, "
                f"space={self.space.space_name}, "
                f"backend={self.backend.name})")

commutativity_degree property

Degree of commutativity in [0, 1].

For GHRR, this depends on the diagonality of Q matrices. More diagonal → more commutative.

Returns: 0.0 if maximally non-commutative, 1.0 if fully commutative

__init__(dimension=100, matrix_size=3, space=None, backend=None, seed=None, diagonality=None)

Initialize GHRR model.

Args: dimension: Number of matrices in hypervector (can be smaller than scalar models due to better capacity) matrix_size: Size m of each m×m matrix (default: 3) Larger m → more non-commutative, better for complex structures m=1 recovers FHRR space: Vector space (defaults to MatrixSpace) backend: Computational backend seed: Random seed for space diagonality: Control commutativity in [0, 1] None: Random (default) 0.0: Maximally non-commutative 1.0: Fully commutative (FHRR-like)

Source code in holovec/models/ghrr.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def __init__(
    self,
    dimension: int = 100,
    matrix_size: int = 3,
    space: VectorSpace | None = None,
    backend: Backend | None = None,
    seed: int | None = None,
    diagonality: float | None = None
):
    """Initialize GHRR model.

    Args:
        dimension: Number of matrices in hypervector (can be smaller than
                  scalar models due to better capacity)
        matrix_size: Size m of each m×m matrix (default: 3)
                    Larger m → more non-commutative, better for complex structures
                    m=1 recovers FHRR
        space: Vector space (defaults to MatrixSpace)
        backend: Computational backend
        seed: Random seed for space
        diagonality: Control commutativity in [0, 1]
                    None: Random (default)
                    0.0: Maximally non-commutative
                    1.0: Fully commutative (FHRR-like)
    """
    if space is None:
        from ..backends import get_backend
        backend = backend if backend is not None else get_backend()
        space = MatrixSpace(
            dimension,
            matrix_size=matrix_size,
            backend=backend,
            seed=seed,
            diagonality=diagonality,
        )

    super().__init__(space, backend)

    # Store matrix size for easy access
    self.matrix_size = matrix_size if isinstance(space, MatrixSpace) else 1
    self._diagonality = diagonality

bind(a, b)

Bind using element-wise matrix multiplication.

For matrices at position j: (a ⊗ b)_j = a_j @ b_j

This is non-commutative: a ⊗ b ≠ b ⊗ a in general.

Args: a: First hypervector (D, m, m) b: Second hypervector (D, m, m)

Returns: Bound hypervector c where c_j = a_j @ b_j for all j

Source code in holovec/models/ghrr.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def bind(self, a: Array, b: Array) -> Array:
    """Bind using element-wise matrix multiplication.

    For matrices at position j: (a ⊗ b)_j = a_j @ b_j

    This is non-commutative: a ⊗ b ≠ b ⊗ a in general.

    Args:
        a: First hypervector (D, m, m)
        b: Second hypervector (D, m, m)

    Returns:
        Bound hypervector c where c_j = a_j @ b_j for all j
    """
    # Element-wise matrix multiplication using matmul broadcast
    # For (D, m, m) @ (D, m, m), this does D separate m×m multiplications
    result = self.backend.matmul(a, b)

    # Normalization not strictly needed for unitary matrices
    # but helps with numerical stability
    return result

bundle(vectors)

Bundle using element-wise addition.

Sum all hypervectors element-wise. Each element is an m×m matrix.

For GHRR: (a + b)_j = a_j + b_j (matrix addition)

Args: vectors: Sequence of hypervectors to bundle

Returns: Bundled hypervector

Raises: ValueError: If vectors is empty

Source code in holovec/models/ghrr.py
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
def bundle(self, vectors: Sequence[Array]) -> Array:
    """Bundle using element-wise addition.

    Sum all hypervectors element-wise. Each element is an m×m matrix.

    For GHRR: (a + b)_j = a_j + b_j (matrix addition)

    Args:
        vectors: Sequence of hypervectors to bundle

    Returns:
        Bundled hypervector

    Raises:
        ValueError: If vectors is empty
    """
    if not vectors:
        raise ValueError("Cannot bundle empty sequence")

    vectors = list(vectors)

    # Sum all vectors (element-wise matrix addition)
    result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)

    # Normalize to project back to unitary matrices
    # This is critical for maintaining quasi-orthogonality (Yeung et al. 2024)
    # Uses polar decomposition via SVD
    result = self.space.normalize(result)

    return result

compute_diagonality(vec)

Compute average diagonality of matrices in hypervector.

Diagonality metric: Σ|Q_jj| / ΣΣ|Q_jk|

Args: vec: Hypervector (D, m, m)

Returns: Diagonality in [0, 1]

Source code in holovec/models/ghrr.py
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
def compute_diagonality(self, vec: Array) -> float:
    """Compute average diagonality of matrices in hypervector.

    Diagonality metric: Σ|Q_jj| / ΣΣ|Q_jk|

    Args:
        vec: Hypervector (D, m, m)

    Returns:
        Diagonality in [0, 1]
    """
    vec_np = self.backend.to_numpy(vec)

    D = vec_np.shape[0]
    m = vec_np.shape[1]

    total_diag = 0.0
    total_all = 0.0

    for i in range(D):
        matrix = vec_np[i]
        # Diagonal sum
        diag_sum = np.sum(np.abs(np.diag(matrix)))
        # Total sum
        all_sum = np.sum(np.abs(matrix))

        total_diag += diag_sum
        total_all += all_sum

    return total_diag / total_all if total_all > 0 else 0.0

permute(vec, k=1)

Permute using circular shift.

For GHRR, permutation is less critical since non-commutativity can encode order. But still useful for some applications.

Args: vec: Hypervector to permute (D, m, m) k: Number of positions to shift

Returns: Permuted hypervector

Source code in holovec/models/ghrr.py
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
def permute(self, vec: Array, k: int = 1) -> Array:
    """Permute using circular shift.

    For GHRR, permutation is less critical since non-commutativity
    can encode order. But still useful for some applications.

    Args:
        vec: Hypervector to permute (D, m, m)
        k: Number of positions to shift

    Returns:
        Permuted hypervector
    """
    # Roll along first dimension (shift which matrix is at which position)
    return self.backend.roll(vec, shift=k, axis=0)

test_non_commutativity(a, b)

Test degree of non-commutativity for two hypervectors.

Computes: δ(a ⊗ b, b ⊗ a)

A similarity of 1.0 means commutative, close to 0 means non-commutative.

Args: a: First hypervector b: Second hypervector

Returns: Similarity between a⊗b and b⊗a

Source code in holovec/models/ghrr.py
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
def test_non_commutativity(self, a: Array, b: Array) -> float:
    """Test degree of non-commutativity for two hypervectors.

    Computes: δ(a ⊗ b, b ⊗ a)

    A similarity of 1.0 means commutative, close to 0 means non-commutative.

    Args:
        a: First hypervector
        b: Second hypervector

    Returns:
        Similarity between a⊗b and b⊗a
    """
    ab = self.bind(a, b)
    ba = self.bind(b, a)
    return self.similarity(ab, ba)

unbind(a, b)

Unbind using element-wise multiplication with conjugate transpose.

To recover original from c = a ⊗ b: unbind(c, b) = c_j @ b_j† for all j

This provides exact recovery: unbind(bind(a, b), b) = a

Args: a: Bound hypervector (or first operand) b: Second operand

Returns: Unbound hypervector (exact recovery)

Source code in holovec/models/ghrr.py
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def unbind(self, a: Array, b: Array) -> Array:
    """Unbind using element-wise multiplication with conjugate transpose.

    To recover original from c = a ⊗ b:
    unbind(c, b) = c_j @ b_j† for all j

    This provides exact recovery: unbind(bind(a, b), b) = a

    Args:
        a: Bound hypervector (or first operand)
        b: Second operand

    Returns:
        Unbound hypervector (exact recovery)
    """
    # Compute b^† (conjugate transpose of each matrix)
    b_conj_t = self.backend.conjugate(self.backend.matrix_transpose(b))

    # Element-wise matrix multiply
    result = self.backend.matmul(a, b_conj_t)

    return result

MAP

holovec.models.map.MAPModel

Bases: VSAModel

MAP (Multiply-Add-Permute) model.

Binding: element-wise multiplication Unbinding: element-wise multiplication (self-inverse) Bundling: element-wise addition + normalization Permutation: circular shift

Best used with BipolarSpace or RealSpace.

Source code in holovec/models/map.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
class MAPModel(VSAModel):
    """MAP (Multiply-Add-Permute) model.

    Binding: element-wise multiplication
    Unbinding: element-wise multiplication (self-inverse)
    Bundling: element-wise addition + normalization
    Permutation: circular shift

    Best used with BipolarSpace or RealSpace.
    """

    def __init__(
        self,
        dimension: int = 10000,
        space: VectorSpace | None = None,
        backend: Backend | None = None,
        seed: int | None = None,
    ):
        """Initialize MAP model.

        Args:
            dimension: Dimensionality of hypervectors
            space: Vector space (defaults to BipolarSpace)
            backend: Computational backend
            seed: Random seed for space
        """
        if space is None:
            from ..backends import get_backend

            backend = backend if backend is not None else get_backend()
            space = BipolarSpace(dimension, backend=backend, seed=seed)

        super().__init__(space, backend)

        # Pre-compute permutation indices for efficiency
        self._permutation_indices = list(range(self.dimension))

    @property
    def model_name(self) -> str:
        return "MAP"

    @property
    def is_self_inverse(self) -> bool:
        return True

    @property
    def is_commutative(self) -> bool:
        return True

    @property
    def is_exact_inverse(self) -> bool:
        # Exact for bipolar, approximate for continuous
        return self.space.space_name == "bipolar"

    def bind(self, a: Array, b: Array) -> Array:
        """Bind using element-wise multiplication.

        For bipolar: XOR when represented as {0,1}
        For real: Hadamard product

        Args:
            a: First vector
            b: Second vector

        Returns:
            Bound vector c = a ⊙ b
        """
        result = self.backend.multiply(a, b)
        # Normalize to maintain unit norm for continuous spaces
        if self.space.space_name != "bipolar":
            result = self.normalize(result)
        return result

    def unbind(self, a: Array, b: Array) -> Array:
        """Unbind using element-wise multiplication (self-inverse).

        Since binding is self-inverse: unbind(c, b) = c ⊙ b

        Args:
            a: Bound vector (or first operand)
            b: Second operand

        Returns:
            Unbound vector (exact for bipolar, approximate for continuous)
        """
        # For MAP, binding = unbinding
        return self.bind(a, b)

    def bundle(self, vectors: Sequence[Array]) -> Array:
        """Bundle using element-wise addition.

        For bipolar: majority vote after summing
        For real: sum and normalize

        Args:
            vectors: Sequence of vectors to bundle

        Returns:
            Bundled vector

        Raises:
            ValueError: If vectors is empty
        """
        if not vectors:
            raise ValueError("Cannot bundle empty sequence")

        vectors = list(vectors)

        # Sum all vectors
        result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)

        # Normalize according to space
        if self.space.space_name == "bipolar":
            # Majority vote: sign of sum
            result = self.backend.sign(result)
            # Handle zeros (shouldn't happen in practice, but be safe)
            # If sum is 0, randomly choose ±1
            zeros_mask = result == 0
            if self.backend.to_numpy(zeros_mask).any():
                # For any zeros, use the first vector's value
                first_vec = vectors[0]
                result = self.backend.where(zeros_mask, first_vec, result)
        else:
            # For continuous spaces, L2 normalize
            result = self.normalize(result)

        return result

    def permute(self, vec: Array, k: int = 1) -> Array:
        """Permute using circular shift.

        Shifts vector elements by k positions to the right.
        Negative k shifts left.

        Args:
            vec: Vector to permute
            k: Number of positions to shift

        Returns:
            Permuted vector
        """
        return self.backend.roll(vec, shift=k)

    def __repr__(self) -> str:
        return (
            f"MAPModel(dimension={self.dimension}, "
            f"space={self.space.space_name}, "
            f"backend={self.backend.name})"
        )

__init__(dimension=10000, space=None, backend=None, seed=None)

Initialize MAP model.

Args: dimension: Dimensionality of hypervectors space: Vector space (defaults to BipolarSpace) backend: Computational backend seed: Random seed for space

Source code in holovec/models/map.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def __init__(
    self,
    dimension: int = 10000,
    space: VectorSpace | None = None,
    backend: Backend | None = None,
    seed: int | None = None,
):
    """Initialize MAP model.

    Args:
        dimension: Dimensionality of hypervectors
        space: Vector space (defaults to BipolarSpace)
        backend: Computational backend
        seed: Random seed for space
    """
    if space is None:
        from ..backends import get_backend

        backend = backend if backend is not None else get_backend()
        space = BipolarSpace(dimension, backend=backend, seed=seed)

    super().__init__(space, backend)

    # Pre-compute permutation indices for efficiency
    self._permutation_indices = list(range(self.dimension))

bind(a, b)

Bind using element-wise multiplication.

For bipolar: XOR when represented as {0,1} For real: Hadamard product

Args: a: First vector b: Second vector

Returns: Bound vector c = a ⊙ b

Source code in holovec/models/map.py
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def bind(self, a: Array, b: Array) -> Array:
    """Bind using element-wise multiplication.

    For bipolar: XOR when represented as {0,1}
    For real: Hadamard product

    Args:
        a: First vector
        b: Second vector

    Returns:
        Bound vector c = a ⊙ b
    """
    result = self.backend.multiply(a, b)
    # Normalize to maintain unit norm for continuous spaces
    if self.space.space_name != "bipolar":
        result = self.normalize(result)
    return result

bundle(vectors)

Bundle using element-wise addition.

For bipolar: majority vote after summing For real: sum and normalize

Args: vectors: Sequence of vectors to bundle

Returns: Bundled vector

Raises: ValueError: If vectors is empty

Source code in holovec/models/map.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
def bundle(self, vectors: Sequence[Array]) -> Array:
    """Bundle using element-wise addition.

    For bipolar: majority vote after summing
    For real: sum and normalize

    Args:
        vectors: Sequence of vectors to bundle

    Returns:
        Bundled vector

    Raises:
        ValueError: If vectors is empty
    """
    if not vectors:
        raise ValueError("Cannot bundle empty sequence")

    vectors = list(vectors)

    # Sum all vectors
    result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)

    # Normalize according to space
    if self.space.space_name == "bipolar":
        # Majority vote: sign of sum
        result = self.backend.sign(result)
        # Handle zeros (shouldn't happen in practice, but be safe)
        # If sum is 0, randomly choose ±1
        zeros_mask = result == 0
        if self.backend.to_numpy(zeros_mask).any():
            # For any zeros, use the first vector's value
            first_vec = vectors[0]
            result = self.backend.where(zeros_mask, first_vec, result)
    else:
        # For continuous spaces, L2 normalize
        result = self.normalize(result)

    return result

permute(vec, k=1)

Permute using circular shift.

Shifts vector elements by k positions to the right. Negative k shifts left.

Args: vec: Vector to permute k: Number of positions to shift

Returns: Permuted vector

Source code in holovec/models/map.py
154
155
156
157
158
159
160
161
162
163
164
165
166
167
def permute(self, vec: Array, k: int = 1) -> Array:
    """Permute using circular shift.

    Shifts vector elements by k positions to the right.
    Negative k shifts left.

    Args:
        vec: Vector to permute
        k: Number of positions to shift

    Returns:
        Permuted vector
    """
    return self.backend.roll(vec, shift=k)

unbind(a, b)

Unbind using element-wise multiplication (self-inverse).

Since binding is self-inverse: unbind(c, b) = c ⊙ b

Args: a: Bound vector (or first operand) b: Second operand

Returns: Unbound vector (exact for bipolar, approximate for continuous)

Source code in holovec/models/map.py
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def unbind(self, a: Array, b: Array) -> Array:
    """Unbind using element-wise multiplication (self-inverse).

    Since binding is self-inverse: unbind(c, b) = c ⊙ b

    Args:
        a: Bound vector (or first operand)
        b: Second operand

    Returns:
        Unbound vector (exact for bipolar, approximate for continuous)
    """
    # For MAP, binding = unbinding
    return self.bind(a, b)

HRR

holovec.models.hrr.HRRModel

Bases: VSAModel

HRR (Holographic Reduced Representations) model.

Binding: circular convolution (via FFT) Unbinding: circular correlation (via FFT) Bundling: element-wise addition + normalization Permutation: circular shift

Uses RealSpace with Gaussian distribution N(0, 1/D).

Source code in holovec/models/hrr.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
class HRRModel(VSAModel):
    """HRR (Holographic Reduced Representations) model.

    Binding: circular convolution (via FFT)
    Unbinding: circular correlation (via FFT)
    Bundling: element-wise addition + normalization
    Permutation: circular shift

    Uses RealSpace with Gaussian distribution N(0, 1/D).
    """

    def __init__(
        self,
        dimension: int = 10000,
        space: VectorSpace | None = None,
        backend: Backend | None = None,
        seed: int | None = None,
    ):
        """Initialize HRR model.

        Args:
            dimension: Dimensionality of hypervectors (recommend 1000-10000)
            space: Vector space (defaults to RealSpace)
            backend: Computational backend
            seed: Random seed for space
        """
        if space is None:
            from ..backends import get_backend

            backend = backend if backend is not None else get_backend()
            space = RealSpace(dimension, backend=backend, seed=seed)

        super().__init__(space, backend)

    @property
    def model_name(self) -> str:
        return "HRR"

    @property
    def is_self_inverse(self) -> bool:
        return False  # Requires correlation, not same operation

    @property
    def is_commutative(self) -> bool:
        return True  # Convolution is commutative

    @property
    def is_exact_inverse(self) -> bool:
        return False  # Correlation gives approximate inverse

    def bind(self, a: Array, b: Array) -> Array:
        """Bind using circular convolution.

        Implemented via FFT: conv(a, b) = IFFT(FFT(a) * FFT(b))

        Args:
            a: First vector
            b: Second vector

        Returns:
            Bound vector c = a ⊛ b (circular convolution)
        """
        # Circular convolution in frequency domain
        result = self.backend.circular_convolve(a, b)

        # Do NOT normalize - preserves magnitude for proper unbinding via
        # circular correlation. Normalization would interfere with the
        # mathematical relationship required for unbind recovery.
        return result

    def unbind(self, a: Array, b: Array) -> Array:
        """Unbind using circular correlation (approximate inverse of convolution).

        This is the classic HRR unbinding operation that uses circular correlation
        to approximately recover the original vector from a bound pair.

        Args:
            a: Bound vector c = x ⊛ b (result of circular convolution)
            b: Key vector (second operand in binding)

        Returns:
            Approximate recovery of x (original vector), normalized to unit length

        Notes
        -----
        **Mathematical Foundation:**

        HRR binding via circular convolution:
            c = x ⊛ b

        In frequency domain (Fourier):
            C(ω) = X(ω) · B(ω)

        Unbinding via circular correlation:
            x̂ = c ⋆ b = IFFT(C(ω) · B*(ω))

        Where B*(ω) is the complex conjugate of B(ω).

        Substituting C(ω) = X(ω) · B(ω):
            x̂ = IFFT(X(ω) · B(ω) · B*(ω))
              = IFFT(X(ω) · |B(ω)|²)

        For random vectors with approximately uniform power spectrum (|B(ω)|² ≈ 1),
        this gives x̂ ≈ x.

        **Approximation Quality:**

        Recovery similarity depends on:
        - Dimension D: Higher D → better recovery
        - Noise level: Clean binding → better unbind
        - Bundle size: More items → more interference

        Empirical performance (D=10000):
        - Clean unbind: similarity ≈ 0.70-0.72 (approximate inverse)
        - After bundling 2 items: similarity ≈ 0.57
        - After bundling 10 items: similarity ≈ 0.30
        - After bundling 100 items: similarity decreases further

        Note: Unlike FHRR which achieves exact (1.0) recovery, HRR's circular
        correlation provides only approximate recovery. The ~0.71 similarity
        is sufficient for retrieval tasks but requires cleanup/thresholding.

        References
        ----------
        - Plate (1995): "Holographic Reduced Representations"
        - Plate (2003): "Holographic Reduced Representations" (full book)

        Examples
        --------
        >>> model = VSA.create('HRR', dim=10000)
        >>> x = model.random(seed=1)
        >>> b = model.random(seed=2)
        >>> c = model.bind(x, b)
        >>> x_recovered = model.unbind(c, b)
        >>> similarity = model.similarity(x, x_recovered)
        >>> print(f"Recovery similarity: {similarity:.3f}")  # ~0.71
        """
        # Transform to frequency domain
        fa = self.backend.fft(a)
        fb = self.backend.fft(b)

        # Circular correlation: C(ω) * conj(B(ω))
        # This is the classic HRR unbinding operation (Plate, 1995)
        fr = self.backend.multiply(fa, self.backend.conjugate(fb))

        # Transform back to time domain
        time = self.backend.ifft(fr)

        # Take real part (imaginary part should be near zero due to real inputs)
        result = self.backend.real(time)

        # Normalize to unit length for consistent comparison with other vectors
        return self.normalize(result)

    def bundle(self, vectors: Sequence[Array]) -> Array:
        """Bundle using element-wise addition (superposition).

        For HRR, bundling is simple vector addition without normalization.
        This preserves the magnitude relationships needed for proper unbinding.

        Args:
            vectors: Sequence of vectors to bundle

        Returns:
            Bundled vector (unnormalized sum)

        Raises:
            ValueError: If vectors is empty

        Notes:
            Unlike some VSA models, HRR does NOT normalize after bundling.
            Normalization would interfere with the circular correlation unbinding
            operation. The unbind() method handles normalization of its output.
        """
        if not vectors:
            raise ValueError("Cannot bundle empty sequence")

        vectors = list(vectors)

        # Sum all vectors (simple superposition, no normalization)
        result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)

        return result

    def permute(self, vec: Array, k: int = 1) -> Array:
        """Permute using circular shift.

        Shifts vector elements by k positions to the right.
        Negative k shifts left.

        Args:
            vec: Vector to permute
            k: Number of positions to shift

        Returns:
            Permuted vector
        """
        return self.backend.roll(vec, shift=k)

    def __repr__(self) -> str:
        return (
            f"HRRModel(dimension={self.dimension}, "
            f"space={self.space.space_name}, "
            f"backend={self.backend.name})"
        )

__init__(dimension=10000, space=None, backend=None, seed=None)

Initialize HRR model.

Args: dimension: Dimensionality of hypervectors (recommend 1000-10000) space: Vector space (defaults to RealSpace) backend: Computational backend seed: Random seed for space

Source code in holovec/models/hrr.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def __init__(
    self,
    dimension: int = 10000,
    space: VectorSpace | None = None,
    backend: Backend | None = None,
    seed: int | None = None,
):
    """Initialize HRR model.

    Args:
        dimension: Dimensionality of hypervectors (recommend 1000-10000)
        space: Vector space (defaults to RealSpace)
        backend: Computational backend
        seed: Random seed for space
    """
    if space is None:
        from ..backends import get_backend

        backend = backend if backend is not None else get_backend()
        space = RealSpace(dimension, backend=backend, seed=seed)

    super().__init__(space, backend)

bind(a, b)

Bind using circular convolution.

Implemented via FFT: conv(a, b) = IFFT(FFT(a) * FFT(b))

Args: a: First vector b: Second vector

Returns: Bound vector c = a ⊛ b (circular convolution)

Source code in holovec/models/hrr.py
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def bind(self, a: Array, b: Array) -> Array:
    """Bind using circular convolution.

    Implemented via FFT: conv(a, b) = IFFT(FFT(a) * FFT(b))

    Args:
        a: First vector
        b: Second vector

    Returns:
        Bound vector c = a ⊛ b (circular convolution)
    """
    # Circular convolution in frequency domain
    result = self.backend.circular_convolve(a, b)

    # Do NOT normalize - preserves magnitude for proper unbinding via
    # circular correlation. Normalization would interfere with the
    # mathematical relationship required for unbind recovery.
    return result

bundle(vectors)

Bundle using element-wise addition (superposition).

For HRR, bundling is simple vector addition without normalization. This preserves the magnitude relationships needed for proper unbinding.

Args: vectors: Sequence of vectors to bundle

Returns: Bundled vector (unnormalized sum)

Raises: ValueError: If vectors is empty

Notes: Unlike some VSA models, HRR does NOT normalize after bundling. Normalization would interfere with the circular correlation unbinding operation. The unbind() method handles normalization of its output.

Source code in holovec/models/hrr.py
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
def bundle(self, vectors: Sequence[Array]) -> Array:
    """Bundle using element-wise addition (superposition).

    For HRR, bundling is simple vector addition without normalization.
    This preserves the magnitude relationships needed for proper unbinding.

    Args:
        vectors: Sequence of vectors to bundle

    Returns:
        Bundled vector (unnormalized sum)

    Raises:
        ValueError: If vectors is empty

    Notes:
        Unlike some VSA models, HRR does NOT normalize after bundling.
        Normalization would interfere with the circular correlation unbinding
        operation. The unbind() method handles normalization of its output.
    """
    if not vectors:
        raise ValueError("Cannot bundle empty sequence")

    vectors = list(vectors)

    # Sum all vectors (simple superposition, no normalization)
    result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)

    return result

permute(vec, k=1)

Permute using circular shift.

Shifts vector elements by k positions to the right. Negative k shifts left.

Args: vec: Vector to permute k: Number of positions to shift

Returns: Permuted vector

Source code in holovec/models/hrr.py
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def permute(self, vec: Array, k: int = 1) -> Array:
    """Permute using circular shift.

    Shifts vector elements by k positions to the right.
    Negative k shifts left.

    Args:
        vec: Vector to permute
        k: Number of positions to shift

    Returns:
        Permuted vector
    """
    return self.backend.roll(vec, shift=k)

unbind(a, b)

Unbind using circular correlation (approximate inverse of convolution).

This is the classic HRR unbinding operation that uses circular correlation to approximately recover the original vector from a bound pair.

Args: a: Bound vector c = x ⊛ b (result of circular convolution) b: Key vector (second operand in binding)

Returns: Approximate recovery of x (original vector), normalized to unit length

Notes

Mathematical Foundation:

HRR binding via circular convolution: c = x ⊛ b

In frequency domain (Fourier): C(ω) = X(ω) · B(ω)

Unbinding via circular correlation: x̂ = c ⋆ b = IFFT(C(ω) · B*(ω))

Where B*(ω) is the complex conjugate of B(ω).

Substituting C(ω) = X(ω) · B(ω): x̂ = IFFT(X(ω) · B(ω) · B*(ω)) = IFFT(X(ω) · |B(ω)|²)

For random vectors with approximately uniform power spectrum (|B(ω)|² ≈ 1), this gives x̂ ≈ x.

Approximation Quality:

Recovery similarity depends on: - Dimension D: Higher D → better recovery - Noise level: Clean binding → better unbind - Bundle size: More items → more interference

Empirical performance (D=10000): - Clean unbind: similarity ≈ 0.70-0.72 (approximate inverse) - After bundling 2 items: similarity ≈ 0.57 - After bundling 10 items: similarity ≈ 0.30 - After bundling 100 items: similarity decreases further

Note: Unlike FHRR which achieves exact (1.0) recovery, HRR's circular correlation provides only approximate recovery. The ~0.71 similarity is sufficient for retrieval tasks but requires cleanup/thresholding.

References
  • Plate (1995): "Holographic Reduced Representations"
  • Plate (2003): "Holographic Reduced Representations" (full book)

Examples:

>>> model = VSA.create('HRR', dim=10000)
>>> x = model.random(seed=1)
>>> b = model.random(seed=2)
>>> c = model.bind(x, b)
>>> x_recovered = model.unbind(c, b)
>>> similarity = model.similarity(x, x_recovered)
>>> print(f"Recovery similarity: {similarity:.3f}")  # ~0.71
Source code in holovec/models/hrr.py
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
def unbind(self, a: Array, b: Array) -> Array:
    """Unbind using circular correlation (approximate inverse of convolution).

    This is the classic HRR unbinding operation that uses circular correlation
    to approximately recover the original vector from a bound pair.

    Args:
        a: Bound vector c = x ⊛ b (result of circular convolution)
        b: Key vector (second operand in binding)

    Returns:
        Approximate recovery of x (original vector), normalized to unit length

    Notes
    -----
    **Mathematical Foundation:**

    HRR binding via circular convolution:
        c = x ⊛ b

    In frequency domain (Fourier):
        C(ω) = X(ω) · B(ω)

    Unbinding via circular correlation:
        x̂ = c ⋆ b = IFFT(C(ω) · B*(ω))

    Where B*(ω) is the complex conjugate of B(ω).

    Substituting C(ω) = X(ω) · B(ω):
        x̂ = IFFT(X(ω) · B(ω) · B*(ω))
          = IFFT(X(ω) · |B(ω)|²)

    For random vectors with approximately uniform power spectrum (|B(ω)|² ≈ 1),
    this gives x̂ ≈ x.

    **Approximation Quality:**

    Recovery similarity depends on:
    - Dimension D: Higher D → better recovery
    - Noise level: Clean binding → better unbind
    - Bundle size: More items → more interference

    Empirical performance (D=10000):
    - Clean unbind: similarity ≈ 0.70-0.72 (approximate inverse)
    - After bundling 2 items: similarity ≈ 0.57
    - After bundling 10 items: similarity ≈ 0.30
    - After bundling 100 items: similarity decreases further

    Note: Unlike FHRR which achieves exact (1.0) recovery, HRR's circular
    correlation provides only approximate recovery. The ~0.71 similarity
    is sufficient for retrieval tasks but requires cleanup/thresholding.

    References
    ----------
    - Plate (1995): "Holographic Reduced Representations"
    - Plate (2003): "Holographic Reduced Representations" (full book)

    Examples
    --------
    >>> model = VSA.create('HRR', dim=10000)
    >>> x = model.random(seed=1)
    >>> b = model.random(seed=2)
    >>> c = model.bind(x, b)
    >>> x_recovered = model.unbind(c, b)
    >>> similarity = model.similarity(x, x_recovered)
    >>> print(f"Recovery similarity: {similarity:.3f}")  # ~0.71
    """
    # Transform to frequency domain
    fa = self.backend.fft(a)
    fb = self.backend.fft(b)

    # Circular correlation: C(ω) * conj(B(ω))
    # This is the classic HRR unbinding operation (Plate, 1995)
    fr = self.backend.multiply(fa, self.backend.conjugate(fb))

    # Transform back to time domain
    time = self.backend.ifft(fr)

    # Take real part (imaginary part should be near zero due to real inputs)
    result = self.backend.real(time)

    # Normalize to unit length for consistent comparison with other vectors
    return self.normalize(result)

VTB

holovec.models.vtb.VTBModel

Bases: VSAModel

VTB (Vector-derived Transformation Binding) model.

Binding (MBAT-style): c = Σ_k w_k(a) · roll(b, s_k) Unbinding (approximate): b̂ = Σ_k w_k(a) · roll(c, -s_k) Bundling: element-wise addition + normalization Permutation: circular shift

Uses RealSpace with L2-normalized real-valued vectors.

Source code in holovec/models/vtb.py
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
class VTBModel(VSAModel):
    """VTB (Vector-derived Transformation Binding) model.

    Binding (MBAT-style): c = Σ_k w_k(a) · roll(b, s_k)
    Unbinding (approximate): b̂ = Σ_k w_k(a) · roll(c, -s_k)
    Bundling: element-wise addition + normalization
    Permutation: circular shift

    Uses RealSpace with L2-normalized real-valued vectors.
    """

    def __init__(
        self,
        dimension: int = 10000,
        space: VectorSpace | None = None,
        backend: Backend | None = None,
        seed: int | None = None,
        n_bases: int = 4,
        shifts: list[int] | None = None,
        temperature: float = 100.0,
    ):
        """Initialize VTB model.

        Args:
            dimension: Dimensionality of hypervectors
            space: Vector space (defaults to RealSpace)
            backend: Computational backend
            seed: Random seed for space
        """
        if space is None:
            from ..backends import get_backend
            backend = backend if backend is not None else get_backend()
            space = RealSpace(dimension, backend=backend, seed=seed)

        super().__init__(space, backend)

        # MBAT parameters
        self.n_bases = int(n_bases)
        if self.n_bases < 2:
            raise ValueError("n_bases must be >= 2")
        self.temperature = float(temperature)
        if self.temperature <= 0:
            self.temperature = 1.0

        # Basis transformations: use integer circular shifts as R_k
        if shifts is None:
            # choose distinct small shifts spread across dimension
            step = max(1, self.dimension // (self.n_bases + 1))
            self.shifts = [((i + 1) * step) % self.dimension for i in range(self.n_bases)]
            # ensure non-zero and unique
            self.shifts = [s if s != 0 else 1 for s in self.shifts]
            self.shifts = list(dict.fromkeys(self.shifts))
            while len(self.shifts) < self.n_bases:
                # fill with incremental shifts
                self.shifts.append((self.shifts[-1] + 1) % self.dimension)
        else:
            if len(shifts) != self.n_bases:
                raise ValueError("len(shifts) must equal n_bases")
            self.shifts = [int(s) % self.dimension for s in shifts]

        # Code vectors U_k to produce weights w_k(a) = softmax(τ · <a, U_k>)
        # Stack shape (K, D)
        self._U = self.backend.stack([
            self.backend.normalize(self.backend.random_normal(self.dimension, seed=(seed or 0) + k))
            for k in range(self.n_bases)
        ], axis=0)

    @property
    def model_name(self) -> str:
        return "VTB"

    @property
    def is_self_inverse(self) -> bool:
        return False

    @property
    def is_commutative(self) -> bool:
        return False

    @property
    def is_exact_inverse(self) -> bool:
        return False

    def _weights(self, a: Array) -> Array:
        """Compute softmax weights over bases from vector a.

        w_k(a) = softmax(τ · <a, U_k>)
        Returns shape (K,)
        """
        # scores: (K,)
        scores = []
        for k in range(self.n_bases):
            uk = self._U[k]
            scores.append(self.backend.dot(a, uk))
        scores = self.backend.stack(scores, axis=0)
        # scale by temperature then softmax
        scaled = self.backend.multiply_scalar(scores, self.temperature)
        return self.backend.softmax(scaled, axis=0)

    def _vector_to_circulant(self, vec: Array) -> Array:
        """Convert vector to circulant matrix.

        A circulant matrix is a special matrix where each row is a circular
        shift of the previous row. For vector [a, b, c]:
            [[a, b, c],
             [c, a, b],
             [b, c, a]]

        This construction enables efficient matrix-vector multiplication
        via circular convolution in the frequency domain.

        Args:
            vec: Vector of shape (D,)

        Returns:
            Circulant matrix of shape (D, D)
        """
        vec_np = self.backend.to_numpy(vec)
        D = len(vec_np)

        # Build circulant matrix: each row is a circular shift
        matrix = np.zeros((D, D), dtype=vec_np.dtype)
        for i in range(D):
            matrix[i] = np.roll(vec_np, i)

        return self.backend.from_numpy(matrix)

    def bind(self, a: Array, b: Array) -> Array:
        """Bind using MBAT-style weighted basis transforms.

        c = Σ_k w_k(a) · roll(b, s_k)
        """
        # Derive transform from a to act on b
        w = self._weights(a)  # (K,)
        # accumulate weighted shifts
        parts = []
        for k, shift in enumerate(self.shifts):
            wk = w[k]
            rb = self.backend.roll(b, shift=shift)
            parts.append(self.backend.multiply_scalar(rb, float(self.backend.to_numpy(wk))))
        result = self.backend.sum(self.backend.stack(parts, axis=0), axis=0)
        return self.normalize(result)

    def unbind(self, c: Array, b: Array) -> Array:
        """Approximate unbinding using weighted inverse transforms.

        IMPORTANT: Due to non-commutativity, this recovers b from c = bind(a, b).
        You must pass the FIRST argument of bind (a) as the second argument here.

        For c = bind(a, b):
          - unbind(c, a) → recovers b (correct usage)
          - unbind(c, b) → does NOT recover a

        b̂ = Σ_k w_k(b) · roll(c, -s_k)
        """
        # Use same transform derived from b
        w = self._weights(b)
        parts = []
        for k, shift in enumerate(self.shifts):
            wk = w[k]
            rc = self.backend.roll(c, shift=-shift)
            parts.append(self.backend.multiply_scalar(rc, float(self.backend.to_numpy(wk))))
        num = self.backend.sum(self.backend.stack(parts, axis=0), axis=0)
        # Denominator as sum of squared weights (scalar)
        w_np = self.backend.to_numpy(w)
        denom = float((w_np ** 2).sum()) + 1e-8
        result = self.backend.multiply_scalar(num, 1.0 / denom)
        return self.normalize(result)

    def bundle(self, vectors: Sequence[Array]) -> Array:
        """Bundle using element-wise addition.

        Sum all hypervectors element-wise and normalize.

        Args:
            vectors: Sequence of hypervectors to bundle

        Returns:
            Bundled hypervector

        Raises:
            ValueError: If vectors is empty
        """
        if not vectors:
            raise ValueError("Cannot bundle empty sequence")

        vectors = list(vectors)

        # Sum all vectors
        result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)

        # Normalize to unit length
        return self.normalize(result)

    def permute(self, vec: Array, k: int = 1) -> Array:
        """Permute using circular shift.

        Shifts vector elements by k positions. Combined with binding,
        this can encode position in sequences.

        Args:
            vec: Hypervector to permute
            k: Number of positions to shift (default: 1)

        Returns:
            Permuted hypervector
        """
        return self.backend.roll(vec, shift=k)

    def test_non_commutativity(self, a: Array, b: Array) -> float:
        """Test degree of non-commutativity for two hypervectors.

        Computes: similarity(a ⊗ b, b ⊗ a)

        A similarity of 1.0 means commutative, close to 0 means non-commutative.

        Args:
            a: First hypervector
            b: Second hypervector

        Returns:
            Similarity between a⊗b and b⊗a (should be low for VTB)
        """
        ab = self.bind(a, b)
        ba = self.bind(b, a)
        return self.similarity(ab, ba)

    def bind_sequence(self, items: Sequence[Array], use_permute: bool = True) -> Array:
        """Bind a sequence of items with positional encoding.

        Two strategies:
        1. With permutation: c = a₁ ⊗ ρ⁰(pos) + a₂ ⊗ ρ¹(pos) + ...
        2. Without permutation: c = (...((a₁ ⊗ a₂) ⊗ a₃)...) (nested binding)

        Args:
            items: Sequence of hypervectors to bind
            use_permute: If True, use permutation strategy; else nested binding

        Returns:
            Sequence hypervector

        Raises:
            ValueError: If items is empty
        """
        if not items:
            raise ValueError("Cannot bind empty sequence")

        items = list(items)

        if use_permute:
            # Strategy 1: Bind each item with permuted position vector
            pos = self.random(seed=42)  # Fixed position vector
            bound_items = []

            for i, item in enumerate(items):
                permuted_pos = self.permute(pos, k=i)
                bound_items.append(self.bind(item, permuted_pos))

            return self.bundle(bound_items)
        else:
            # Strategy 2: Nested binding (naturally non-commutative)
            result = items[0]
            for item in items[1:]:
                result = self.bind(result, item)
            return result

    def __repr__(self) -> str:
        return (f"VTBModel(dimension={self.dimension}, "
                f"space={self.space.space_name}, "
                f"backend={self.backend.name})")

__init__(dimension=10000, space=None, backend=None, seed=None, n_bases=4, shifts=None, temperature=100.0)

Initialize VTB model.

Args: dimension: Dimensionality of hypervectors space: Vector space (defaults to RealSpace) backend: Computational backend seed: Random seed for space

Source code in holovec/models/vtb.py
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def __init__(
    self,
    dimension: int = 10000,
    space: VectorSpace | None = None,
    backend: Backend | None = None,
    seed: int | None = None,
    n_bases: int = 4,
    shifts: list[int] | None = None,
    temperature: float = 100.0,
):
    """Initialize VTB model.

    Args:
        dimension: Dimensionality of hypervectors
        space: Vector space (defaults to RealSpace)
        backend: Computational backend
        seed: Random seed for space
    """
    if space is None:
        from ..backends import get_backend
        backend = backend if backend is not None else get_backend()
        space = RealSpace(dimension, backend=backend, seed=seed)

    super().__init__(space, backend)

    # MBAT parameters
    self.n_bases = int(n_bases)
    if self.n_bases < 2:
        raise ValueError("n_bases must be >= 2")
    self.temperature = float(temperature)
    if self.temperature <= 0:
        self.temperature = 1.0

    # Basis transformations: use integer circular shifts as R_k
    if shifts is None:
        # choose distinct small shifts spread across dimension
        step = max(1, self.dimension // (self.n_bases + 1))
        self.shifts = [((i + 1) * step) % self.dimension for i in range(self.n_bases)]
        # ensure non-zero and unique
        self.shifts = [s if s != 0 else 1 for s in self.shifts]
        self.shifts = list(dict.fromkeys(self.shifts))
        while len(self.shifts) < self.n_bases:
            # fill with incremental shifts
            self.shifts.append((self.shifts[-1] + 1) % self.dimension)
    else:
        if len(shifts) != self.n_bases:
            raise ValueError("len(shifts) must equal n_bases")
        self.shifts = [int(s) % self.dimension for s in shifts]

    # Code vectors U_k to produce weights w_k(a) = softmax(τ · <a, U_k>)
    # Stack shape (K, D)
    self._U = self.backend.stack([
        self.backend.normalize(self.backend.random_normal(self.dimension, seed=(seed or 0) + k))
        for k in range(self.n_bases)
    ], axis=0)

bind(a, b)

Bind using MBAT-style weighted basis transforms.

c = Σ_k w_k(a) · roll(b, s_k)

Source code in holovec/models/vtb.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
def bind(self, a: Array, b: Array) -> Array:
    """Bind using MBAT-style weighted basis transforms.

    c = Σ_k w_k(a) · roll(b, s_k)
    """
    # Derive transform from a to act on b
    w = self._weights(a)  # (K,)
    # accumulate weighted shifts
    parts = []
    for k, shift in enumerate(self.shifts):
        wk = w[k]
        rb = self.backend.roll(b, shift=shift)
        parts.append(self.backend.multiply_scalar(rb, float(self.backend.to_numpy(wk))))
    result = self.backend.sum(self.backend.stack(parts, axis=0), axis=0)
    return self.normalize(result)

bind_sequence(items, use_permute=True)

Bind a sequence of items with positional encoding.

Two strategies: 1. With permutation: c = a₁ ⊗ ρ⁰(pos) + a₂ ⊗ ρ¹(pos) + ... 2. Without permutation: c = (...((a₁ ⊗ a₂) ⊗ a₃)...) (nested binding)

Args: items: Sequence of hypervectors to bind use_permute: If True, use permutation strategy; else nested binding

Returns: Sequence hypervector

Raises: ValueError: If items is empty

Source code in holovec/models/vtb.py
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
def bind_sequence(self, items: Sequence[Array], use_permute: bool = True) -> Array:
    """Bind a sequence of items with positional encoding.

    Two strategies:
    1. With permutation: c = a₁ ⊗ ρ⁰(pos) + a₂ ⊗ ρ¹(pos) + ...
    2. Without permutation: c = (...((a₁ ⊗ a₂) ⊗ a₃)...) (nested binding)

    Args:
        items: Sequence of hypervectors to bind
        use_permute: If True, use permutation strategy; else nested binding

    Returns:
        Sequence hypervector

    Raises:
        ValueError: If items is empty
    """
    if not items:
        raise ValueError("Cannot bind empty sequence")

    items = list(items)

    if use_permute:
        # Strategy 1: Bind each item with permuted position vector
        pos = self.random(seed=42)  # Fixed position vector
        bound_items = []

        for i, item in enumerate(items):
            permuted_pos = self.permute(pos, k=i)
            bound_items.append(self.bind(item, permuted_pos))

        return self.bundle(bound_items)
    else:
        # Strategy 2: Nested binding (naturally non-commutative)
        result = items[0]
        for item in items[1:]:
            result = self.bind(result, item)
        return result

bundle(vectors)

Bundle using element-wise addition.

Sum all hypervectors element-wise and normalize.

Args: vectors: Sequence of hypervectors to bundle

Returns: Bundled hypervector

Raises: ValueError: If vectors is empty

Source code in holovec/models/vtb.py
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
def bundle(self, vectors: Sequence[Array]) -> Array:
    """Bundle using element-wise addition.

    Sum all hypervectors element-wise and normalize.

    Args:
        vectors: Sequence of hypervectors to bundle

    Returns:
        Bundled hypervector

    Raises:
        ValueError: If vectors is empty
    """
    if not vectors:
        raise ValueError("Cannot bundle empty sequence")

    vectors = list(vectors)

    # Sum all vectors
    result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)

    # Normalize to unit length
    return self.normalize(result)

permute(vec, k=1)

Permute using circular shift.

Shifts vector elements by k positions. Combined with binding, this can encode position in sequences.

Args: vec: Hypervector to permute k: Number of positions to shift (default: 1)

Returns: Permuted hypervector

Source code in holovec/models/vtb.py
232
233
234
235
236
237
238
239
240
241
242
243
244
245
def permute(self, vec: Array, k: int = 1) -> Array:
    """Permute using circular shift.

    Shifts vector elements by k positions. Combined with binding,
    this can encode position in sequences.

    Args:
        vec: Hypervector to permute
        k: Number of positions to shift (default: 1)

    Returns:
        Permuted hypervector
    """
    return self.backend.roll(vec, shift=k)

test_non_commutativity(a, b)

Test degree of non-commutativity for two hypervectors.

Computes: similarity(a ⊗ b, b ⊗ a)

A similarity of 1.0 means commutative, close to 0 means non-commutative.

Args: a: First hypervector b: Second hypervector

Returns: Similarity between a⊗b and b⊗a (should be low for VTB)

Source code in holovec/models/vtb.py
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
def test_non_commutativity(self, a: Array, b: Array) -> float:
    """Test degree of non-commutativity for two hypervectors.

    Computes: similarity(a ⊗ b, b ⊗ a)

    A similarity of 1.0 means commutative, close to 0 means non-commutative.

    Args:
        a: First hypervector
        b: Second hypervector

    Returns:
        Similarity between a⊗b and b⊗a (should be low for VTB)
    """
    ab = self.bind(a, b)
    ba = self.bind(b, a)
    return self.similarity(ab, ba)

unbind(c, b)

Approximate unbinding using weighted inverse transforms.

IMPORTANT: Due to non-commutativity, this recovers b from c = bind(a, b). You must pass the FIRST argument of bind (a) as the second argument here.

For c = bind(a, b): - unbind(c, a) → recovers b (correct usage) - unbind(c, b) → does NOT recover a

b̂ = Σ_k w_k(b) · roll(c, -s_k)

Source code in holovec/models/vtb.py
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
def unbind(self, c: Array, b: Array) -> Array:
    """Approximate unbinding using weighted inverse transforms.

    IMPORTANT: Due to non-commutativity, this recovers b from c = bind(a, b).
    You must pass the FIRST argument of bind (a) as the second argument here.

    For c = bind(a, b):
      - unbind(c, a) → recovers b (correct usage)
      - unbind(c, b) → does NOT recover a

    b̂ = Σ_k w_k(b) · roll(c, -s_k)
    """
    # Use same transform derived from b
    w = self._weights(b)
    parts = []
    for k, shift in enumerate(self.shifts):
        wk = w[k]
        rc = self.backend.roll(c, shift=-shift)
        parts.append(self.backend.multiply_scalar(rc, float(self.backend.to_numpy(wk))))
    num = self.backend.sum(self.backend.stack(parts, axis=0), axis=0)
    # Denominator as sum of squared weights (scalar)
    w_np = self.backend.to_numpy(w)
    denom = float((w_np ** 2).sum()) + 1e-8
    result = self.backend.multiply_scalar(num, 1.0 / denom)
    return self.normalize(result)

BSC

holovec.models.bsc.BSCModel

Bases: VSAModel

BSC (Binary Spatter Codes) model.

Binding: XOR Unbinding: XOR (self-inverse) Bundling: element-wise addition + majority vote Permutation: circular shift

Uses BinarySpace with values in {0, 1}.

Source code in holovec/models/bsc.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
class BSCModel(VSAModel):
    """BSC (Binary Spatter Codes) model.

    Binding: XOR
    Unbinding: XOR (self-inverse)
    Bundling: element-wise addition + majority vote
    Permutation: circular shift

    Uses BinarySpace with values in {0, 1}.
    """

    def __init__(
        self,
        dimension: int = 10000,
        space: VectorSpace | None = None,
        backend: Backend | None = None,
        seed: int | None = None
    ):
        """Initialize BSC model.

        Args:
            dimension: Dimensionality of hypervectors
            space: Vector space (defaults to BinarySpace)
            backend: Computational backend
            seed: Random seed for space
        """
        if space is None:
            from ..backends import get_backend
            backend = backend if backend is not None else get_backend()
            space = BinarySpace(dimension, backend=backend, seed=seed)

        super().__init__(space, backend)

    @property
    def model_name(self) -> str:
        return "BSC"

    @property
    def is_self_inverse(self) -> bool:
        return True  # XOR is self-inverse

    @property
    def is_commutative(self) -> bool:
        return True  # XOR is commutative

    @property
    def is_exact_inverse(self) -> bool:
        return True  # XOR provides exact inverse

    def bind(self, a: Array, b: Array) -> Array:
        """Bind using XOR.

        For binary vectors: a XOR b
        Property: a XOR b XOR b = a (self-inverse)

        Args:
            a: First vector (binary {0, 1})
            b: Second vector (binary {0, 1})

        Returns:
            Bound vector c = a XOR b
        """
        return self.backend.xor(a, b)

    def unbind(self, a: Array, b: Array) -> Array:
        """Unbind using XOR (self-inverse).

        Since XOR is self-inverse: unbind(c, b) = c XOR b

        Args:
            a: Bound vector (or first operand)
            b: Second operand

        Returns:
            Unbound vector (exact recovery)
        """
        # For BSC, binding = unbinding (self-inverse)
        return self.bind(a, b)

    def bundle(self, vectors: Sequence[Array]) -> Array:
        """Bundle using element-wise addition + majority vote.

        Sum all binary vectors element-wise, then threshold at n/2
        where n is the number of vectors.

        Args:
            vectors: Sequence of vectors to bundle

        Returns:
            Bundled vector (binary {0, 1})

        Raises:
            ValueError: If vectors is empty
        """
        if not vectors:
            raise ValueError("Cannot bundle empty sequence")

        vectors = list(vectors)
        n = len(vectors)

        # Sum all vectors (each element is 0 or 1)
        summed = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)

        # Majority vote: threshold at n/2
        threshold = n / 2.0
        result = self.backend.threshold(summed, threshold=threshold, above=1.0, below=0.0)

        # Ensure binary dtype
        return result.astype(self.space.dtype) if hasattr(result, 'astype') else result

    def permute(self, vec: Array, k: int = 1) -> Array:
        """Permute using circular shift.

        Shifts vector elements by k positions to the right.
        Negative k shifts left.

        Args:
            vec: Vector to permute
            k: Number of positions to shift

        Returns:
            Permuted vector
        """
        return self.backend.roll(vec, shift=k)

    def to_bipolar(self, vec: Array) -> Array:
        """Convert binary {0, 1} to bipolar {-1, +1}.

        Transformation: x → 2x - 1

        Args:
            vec: Binary vector

        Returns:
            Bipolar vector
        """
        return 2 * vec - 1

    def from_bipolar(self, vec: Array) -> Array:
        """Convert bipolar {-1, +1} to binary {0, 1}.

        Transformation: x → (x + 1) / 2

        Args:
            vec: Bipolar vector

        Returns:
            Binary vector
        """
        return (vec + 1) / 2

    def __repr__(self) -> str:
        return (f"BSCModel(dimension={self.dimension}, "
                f"space={self.space.space_name}, "
                f"backend={self.backend.name})")

__init__(dimension=10000, space=None, backend=None, seed=None)

Initialize BSC model.

Args: dimension: Dimensionality of hypervectors space: Vector space (defaults to BinarySpace) backend: Computational backend seed: Random seed for space

Source code in holovec/models/bsc.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def __init__(
    self,
    dimension: int = 10000,
    space: VectorSpace | None = None,
    backend: Backend | None = None,
    seed: int | None = None
):
    """Initialize BSC model.

    Args:
        dimension: Dimensionality of hypervectors
        space: Vector space (defaults to BinarySpace)
        backend: Computational backend
        seed: Random seed for space
    """
    if space is None:
        from ..backends import get_backend
        backend = backend if backend is not None else get_backend()
        space = BinarySpace(dimension, backend=backend, seed=seed)

    super().__init__(space, backend)

bind(a, b)

Bind using XOR.

For binary vectors: a XOR b Property: a XOR b XOR b = a (self-inverse)

Args: a: First vector (binary {0, 1}) b: Second vector (binary {0, 1})

Returns: Bound vector c = a XOR b

Source code in holovec/models/bsc.py
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def bind(self, a: Array, b: Array) -> Array:
    """Bind using XOR.

    For binary vectors: a XOR b
    Property: a XOR b XOR b = a (self-inverse)

    Args:
        a: First vector (binary {0, 1})
        b: Second vector (binary {0, 1})

    Returns:
        Bound vector c = a XOR b
    """
    return self.backend.xor(a, b)

bundle(vectors)

Bundle using element-wise addition + majority vote.

Sum all binary vectors element-wise, then threshold at n/2 where n is the number of vectors.

Args: vectors: Sequence of vectors to bundle

Returns: Bundled vector (binary {0, 1})

Raises: ValueError: If vectors is empty

Source code in holovec/models/bsc.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def bundle(self, vectors: Sequence[Array]) -> Array:
    """Bundle using element-wise addition + majority vote.

    Sum all binary vectors element-wise, then threshold at n/2
    where n is the number of vectors.

    Args:
        vectors: Sequence of vectors to bundle

    Returns:
        Bundled vector (binary {0, 1})

    Raises:
        ValueError: If vectors is empty
    """
    if not vectors:
        raise ValueError("Cannot bundle empty sequence")

    vectors = list(vectors)
    n = len(vectors)

    # Sum all vectors (each element is 0 or 1)
    summed = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)

    # Majority vote: threshold at n/2
    threshold = n / 2.0
    result = self.backend.threshold(summed, threshold=threshold, above=1.0, below=0.0)

    # Ensure binary dtype
    return result.astype(self.space.dtype) if hasattr(result, 'astype') else result

from_bipolar(vec)

Convert bipolar {-1, +1} to binary {0, 1}.

Transformation: x → (x + 1) / 2

Args: vec: Bipolar vector

Returns: Binary vector

Source code in holovec/models/bsc.py
166
167
168
169
170
171
172
173
174
175
176
177
def from_bipolar(self, vec: Array) -> Array:
    """Convert bipolar {-1, +1} to binary {0, 1}.

    Transformation: x → (x + 1) / 2

    Args:
        vec: Bipolar vector

    Returns:
        Binary vector
    """
    return (vec + 1) / 2

permute(vec, k=1)

Permute using circular shift.

Shifts vector elements by k positions to the right. Negative k shifts left.

Args: vec: Vector to permute k: Number of positions to shift

Returns: Permuted vector

Source code in holovec/models/bsc.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def permute(self, vec: Array, k: int = 1) -> Array:
    """Permute using circular shift.

    Shifts vector elements by k positions to the right.
    Negative k shifts left.

    Args:
        vec: Vector to permute
        k: Number of positions to shift

    Returns:
        Permuted vector
    """
    return self.backend.roll(vec, shift=k)

to_bipolar(vec)

Convert binary {0, 1} to bipolar {-1, +1}.

Transformation: x → 2x - 1

Args: vec: Binary vector

Returns: Bipolar vector

Source code in holovec/models/bsc.py
153
154
155
156
157
158
159
160
161
162
163
164
def to_bipolar(self, vec: Array) -> Array:
    """Convert binary {0, 1} to bipolar {-1, +1}.

    Transformation: x → 2x - 1

    Args:
        vec: Binary vector

    Returns:
        Bipolar vector
    """
    return 2 * vec - 1

unbind(a, b)

Unbind using XOR (self-inverse).

Since XOR is self-inverse: unbind(c, b) = c XOR b

Args: a: Bound vector (or first operand) b: Second operand

Returns: Unbound vector (exact recovery)

Source code in holovec/models/bsc.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def unbind(self, a: Array, b: Array) -> Array:
    """Unbind using XOR (self-inverse).

    Since XOR is self-inverse: unbind(c, b) = c XOR b

    Args:
        a: Bound vector (or first operand)
        b: Second operand

    Returns:
        Unbound vector (exact recovery)
    """
    # For BSC, binding = unbinding (self-inverse)
    return self.bind(a, b)

BSDC

holovec.models.bsdc.BSDCModel

Bases: VSAModel

BSDC (Binary Sparse Distributed Codes) model.

Binding: XOR (element-wise, self-inverse) or CDT (context-dependent thinning) Unbinding: XOR (same as binding) or similarity-based (CDT) Bundling: Majority voting with sparsity preservation Permutation: circular shift

Uses SparseSpace with optimal sparsity p = 1/√D.

Binding Modes: - 'xor': Traditional XOR binding. Self-inverse, result dissimilar to inputs. - 'cdt': Context-Dependent Thinning (Rachkovskij 2001). Preserves both structured similarity (similar inputs → similar outputs) and unstructured similarity (result similar to its components).

Example: >>> # Default XOR mode >>> model = BSDCModel(dimension=10000) >>> >>> # CDT mode for analogical reasoning >>> model = BSDCModel(dimension=10000, binding_mode='cdt')

Source code in holovec/models/bsdc.py
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
class BSDCModel(VSAModel):
    """BSDC (Binary Sparse Distributed Codes) model.

    Binding: XOR (element-wise, self-inverse) or CDT (context-dependent thinning)
    Unbinding: XOR (same as binding) or similarity-based (CDT)
    Bundling: Majority voting with sparsity preservation
    Permutation: circular shift

    Uses SparseSpace with optimal sparsity p = 1/√D.

    Binding Modes:
        - 'xor': Traditional XOR binding. Self-inverse, result dissimilar to inputs.
        - 'cdt': Context-Dependent Thinning (Rachkovskij 2001). Preserves both
          structured similarity (similar inputs → similar outputs) and unstructured
          similarity (result similar to its components).

    Example:
        >>> # Default XOR mode
        >>> model = BSDCModel(dimension=10000)
        >>>
        >>> # CDT mode for analogical reasoning
        >>> model = BSDCModel(dimension=10000, binding_mode='cdt')
    """

    def __init__(
        self,
        dimension: int = 10000,
        sparsity: float | None = None,
        space: VectorSpace | None = None,
        backend: Backend | None = None,
        seed: int | None = None,
        binding_mode: str = 'xor',
    ):
        """Initialize BSDC model.

        Args:
            dimension: Dimensionality of hypervectors (typically > 1000)
            sparsity: Fraction of 1s (default: 1/√D which is optimal)
            space: Vector space (defaults to SparseSpace with optimal sparsity)
            backend: Computational backend
            seed: Random seed for space
            binding_mode: 'xor' (default) or 'cdt' for context-dependent thinning
        """
        if binding_mode not in ('xor', 'cdt'):
            raise ValueError(f"binding_mode must be 'xor' or 'cdt', got '{binding_mode}'")

        if space is None:
            from ..backends import get_backend
            backend = backend if backend is not None else get_backend()
            space = SparseSpace(dimension, sparsity=sparsity, backend=backend, seed=seed)

        super().__init__(space, backend)

        self.binding_mode = binding_mode
        self._seed = seed

        # Store sparsity for easy access
        if isinstance(space, SparseSpace):
            self.sparsity = space.sparsity
        else:
            # Fallback if using non-sparse space
            import math
            self.sparsity = sparsity if sparsity is not None else 1.0 / math.sqrt(dimension)

        # Pre-generate permutation patterns for CDT
        if binding_mode == 'cdt':
            self._cdt_permutations = self._generate_cdt_permutations()

    @property
    def model_name(self) -> str:
        return "BSDC"

    @property
    def is_self_inverse(self) -> bool:
        return self.binding_mode == 'xor'  # Only XOR is self-inverse

    @property
    def is_commutative(self) -> bool:
        return True  # Both XOR and CDT are commutative

    @property
    def is_exact_inverse(self) -> bool:
        return self.binding_mode == 'xor'  # Only XOR has exact inverse

    def _generate_cdt_permutations(self, n_permutations: int = 20) -> list:
        """Generate fixed permutation patterns for CDT thinning.

        Args:
            n_permutations: Number of permutation patterns to generate

        Returns:
            List of permutation index arrays
        """
        rng = np.random.default_rng(self._seed if self._seed is not None else 42)
        return [rng.permutation(self.dimension) for _ in range(n_permutations)]

    def _compute_thinning_iterations(
        self,
        n_components: int,
        current_density: float,
    ) -> int:
        """Compute K iterations needed to reach target sparsity.

        The CDT algorithm thins a superposition by applying permuted self-conjunction.
        After OR of S components: p(Z) ≈ 1 - (1-p)^S ≈ p*S (for small p)
        We need K iterations to reduce back to target sparsity.

        Args:
            n_components: Number of components in superposition
            current_density: Current density after OR superposition

        Returns:
            Number of thinning iterations K
        """
        import math

        if current_density <= self.sparsity:
            return 0

        # From Rachkovskij 2001:
        # p(Z ∧ Z^~) ≈ p(Z)^2 for random permutations
        # After K iterations with OR of permutations:
        # Expected density ≈ current_density * (density of OR of K permuted copies)
        # We want: current_density * OR_density ≈ target_sparsity

        # Simplified: K ≈ target_sparsity / current_density^2
        K = max(1, int(math.ceil(self.sparsity / (current_density ** 2))))
        return min(K, len(self._cdt_permutations))

    def context_dependent_thinning(
        self,
        components: Sequence[Array],
    ) -> Array:
        """Bind components using context-dependent thinning (CDT).

        Algorithm (Rachkovskij 2001):
            1. Superpose components via OR: Z = X₁ ∨ X₂ ∨ ... ∨ Xₛ
            2. Thin via permuted self-conjunction:
               ⟨Z⟩ = Z ∧ (Z^~(1) ∨ Z^~(2) ∨ ... ∨ Z^~(K))

        Properties:
            - Preserves unstructured similarity: result is similar to each component
            - Preserves structured similarity: similar inputs → similar outputs
            - Maintains target sparsity automatically

        Args:
            components: Sequence of hypervectors to bind together

        Returns:
            Bound hypervector with preserved similarity to components

        Example:
            >>> model = BSDCModel(dimension=10000, binding_mode='cdt')
            >>> a, b, c = model.random(), model.random(), model.random()
            >>> bound = model.context_dependent_thinning([a, b, c])
            >>> # bound is similar to a, b, and c (unstructured similarity)
        """
        if not components:
            raise ValueError("Cannot bind empty sequence")

        components = list(components)

        if len(components) == 1:
            return components[0].copy() if hasattr(components[0], 'copy') else components[0]

        # Convert to numpy for efficient logical operations
        components_np = [self.backend.to_numpy(c) for c in components]

        # Step 1: Superpose via OR
        z = components_np[0].astype(bool)
        for c in components_np[1:]:
            z = np.logical_or(z, c.astype(bool))

        # Step 2: Compute required thinning iterations
        current_density = float(np.sum(z)) / self.dimension
        K = self._compute_thinning_iterations(len(components), current_density)

        if K == 0:
            # Already at or below target sparsity
            result = z.astype(np.int32)
            return self.backend.from_numpy(result)

        # Step 3: Thin via permuted self-conjunction
        # ⟨Z⟩ = Z ∧ (Z^~(1) ∨ Z^~(2) ∨ ... ∨ Z^~(K))
        permuted_or = np.zeros(self.dimension, dtype=bool)
        for k in range(K):
            perm_idx = k % len(self._cdt_permutations)
            z_permuted = z[self._cdt_permutations[perm_idx]]
            permuted_or = np.logical_or(permuted_or, z_permuted)

        result = np.logical_and(z, permuted_or).astype(np.int32)
        return self.backend.from_numpy(result)

    def bind(self, a: Array, b: Array) -> Array:
        """Bind two hypervectors.

        Behavior depends on binding_mode:
        - 'xor': XOR binding (self-inverse, result dissimilar to inputs)
        - 'cdt': Context-dependent thinning (preserves similarity to inputs)

        For XOR mode:
            - Preserves sparsity on average: p(1-p) + (1-p)p = 2p(1-p)
            - For optimal p = 1/√D, result sparsity ≈ 2/√D

        For CDT mode:
            - Result is similar to both a and b (unstructured similarity)
            - Similar inputs produce similar outputs (structured similarity)

        Args:
            a: First hypervector
            b: Second hypervector

        Returns:
            Bound hypervector
        """
        if self.binding_mode == 'cdt':
            return self.context_dependent_thinning([a, b])
        else:
            # XOR binding (default)
            return self.backend.xor(a, b)

    def unbind(self, a: Array, b: Array) -> Array:
        """Unbind to recover value.

        Behavior depends on binding_mode:
        - 'xor': XOR is self-inverse, exact recovery: unbind(bind(a, b), b) = a
        - 'cdt': No inverse exists; returns the bound vector itself since it's
          already similar to the components (use similarity search for retrieval)

        Args:
            a: Bound hypervector (or first operand)
            b: Second operand (key for XOR mode, ignored for CDT mode)

        Returns:
            For XOR: Exact unbound hypervector
            For CDT: The bound vector (use similarity search to find components)
        """
        if self.binding_mode == 'cdt':
            # CDT doesn't have an inverse operation
            # The bound vector is already similar to its components,
            # so return it for similarity-based retrieval
            return a
        else:
            # XOR is self-inverse
            return self.backend.xor(a, b)

    def bundle(self, vectors: Sequence[Array], maintain_sparsity: bool = True) -> Array:
        """Bundle using majority voting.

        For sparse codes, bundling requires careful handling to maintain sparsity:
        1. Sum all vectors element-wise
        2. Apply threshold to get binary result
        3. Optionally re-sparsify to maintain target sparsity

        Args:
            vectors: Sequence of hypervectors to bundle
            maintain_sparsity: If True, enforce target sparsity (default: True)

        Returns:
            Bundled hypervector

        Raises:
            ValueError: If vectors is empty
        """
        if not vectors:
            raise ValueError("Cannot bundle empty sequence")

        vectors = list(vectors)

        # Sum all vectors (counts how many 1s at each position)
        sum_result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)

        if maintain_sparsity:
            # Strategy: Take top-k positions with highest counts
            # where k ≈ sparsity * dimension
            sum_np = self.backend.to_numpy(sum_result)
            target_ones = int(self.sparsity * self.dimension)

            # Get indices of top-k values
            if target_ones > 0:
                # Use argpartition for efficiency (O(n) instead of O(n log n))
                threshold_idx = max(0, len(sum_np) - target_ones)
                threshold = np.partition(sum_np, threshold_idx)[threshold_idx]

                # Set positions >= threshold to 1, rest to 0
                result_np = (sum_np >= threshold).astype(np.int32)

                # If we have ties at the threshold, we might have slightly more
                # than target_ones. This is acceptable for maintaining sparsity.
                return self.backend.from_numpy(result_np)
            else:
                # No ones in result (edge case)
                return self.backend.zeros(self.dimension, dtype='int32')
        else:
            # Simple majority voting: threshold at N/2
            threshold = len(vectors) / 2.0
            result = self.backend.threshold(sum_result, threshold=threshold, above=1.0, below=0.0)
            return result.astype('int32')

    def permute(self, vec: Array, k: int = 1) -> Array:
        """Permute using circular shift.

        Shifts vector elements by k positions. For sparse codes,
        this maintains sparsity perfectly.

        Args:
            vec: Hypervector to permute
            k: Number of positions to shift (default: 1)

        Returns:
            Permuted hypervector
        """
        return self.backend.roll(vec, shift=k, axis=0)

    def measure_sparsity(self, vec: Array) -> float:
        """Measure actual sparsity of a vector.

        Args:
            vec: Hypervector to measure

        Returns:
            Fraction of 1s in the vector
        """
        vec_np = self.backend.to_numpy(vec)
        count_ones = np.sum(vec_np)
        return float(count_ones) / len(vec_np)

    def rehash(self, vec: Array) -> Array:
        """Rehash vector to restore optimal sparsity.

        Useful after multiple operations that may have changed sparsity.
        Randomly selects positions to maintain target sparsity while
        preserving as much similarity as possible.

        Args:
            vec: Hypervector to rehash

        Returns:
            Rehashed hypervector with target sparsity
        """
        vec_np = self.backend.to_numpy(vec)
        target_ones = int(self.sparsity * self.dimension)

        # Get current 1 positions
        current_ones = np.where(vec_np == 1)[0]
        current_count = len(current_ones)

        if current_count == target_ones:
            # Already at target sparsity
            return vec
        elif current_count > target_ones:
            # Too many 1s: randomly remove some
            keep_indices = np.random.choice(
                current_ones, size=target_ones, replace=False
            )
            result = np.zeros_like(vec_np)
            result[keep_indices] = 1
        else:
            # Too few 1s: randomly add some
            current_zeros = np.where(vec_np == 0)[0]
            add_count = target_ones - current_count
            add_indices = np.random.choice(
                current_zeros, size=add_count, replace=False
            )
            result = vec_np.copy()
            result[add_indices] = 1

        return self.backend.from_numpy(result.astype(np.int32))

    def encode_sequence(
        self,
        items: Sequence[Array],
        use_ngrams: bool = False,
        n: int = 2
    ) -> Array:
        """Encode sequence of items.

        Two strategies:
        1. Position binding: item_i ⊗ ρⁱ(position)
        2. N-grams: Bundle all n-grams in sequence

        Args:
            items: Sequence of hypervectors
            use_ngrams: If True, use n-gram encoding (default: False)
            n: N-gram size (default: 2 for bigrams)

        Returns:
            Sequence hypervector

        Raises:
            ValueError: If items is empty
        """
        if not items:
            raise ValueError("Cannot encode empty sequence")

        items = list(items)

        if use_ngrams:
            # N-gram encoding
            if len(items) < n:
                # Sequence too short for n-grams, fall back to simple bundle
                return self.bundle(items)

            ngrams = []
            for i in range(len(items) - n + 1):
                # Create n-gram by binding n consecutive items
                ngram = items[i]
                for j in range(1, n):
                    ngram = self.bind(ngram, items[i + j])
                ngrams.append(ngram)

            return self.bundle(ngrams)
        else:
            # Position binding encoding
            pos = self.random(seed=42)  # Fixed position vector
            bound_items = []

            for i, item in enumerate(items):
                permuted_pos = self.permute(pos, k=i)
                bound_items.append(self.bind(item, permuted_pos))

            return self.bundle(bound_items)

    def __repr__(self) -> str:
        return (f"BSDCModel(dimension={self.dimension}, "
                f"sparsity={self.sparsity:.4f}, "
                f"binding_mode='{self.binding_mode}', "
                f"space={self.space.space_name}, "
                f"backend={self.backend.name})")

__init__(dimension=10000, sparsity=None, space=None, backend=None, seed=None, binding_mode='xor')

Initialize BSDC model.

Args: dimension: Dimensionality of hypervectors (typically > 1000) sparsity: Fraction of 1s (default: 1/√D which is optimal) space: Vector space (defaults to SparseSpace with optimal sparsity) backend: Computational backend seed: Random seed for space binding_mode: 'xor' (default) or 'cdt' for context-dependent thinning

Source code in holovec/models/bsdc.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def __init__(
    self,
    dimension: int = 10000,
    sparsity: float | None = None,
    space: VectorSpace | None = None,
    backend: Backend | None = None,
    seed: int | None = None,
    binding_mode: str = 'xor',
):
    """Initialize BSDC model.

    Args:
        dimension: Dimensionality of hypervectors (typically > 1000)
        sparsity: Fraction of 1s (default: 1/√D which is optimal)
        space: Vector space (defaults to SparseSpace with optimal sparsity)
        backend: Computational backend
        seed: Random seed for space
        binding_mode: 'xor' (default) or 'cdt' for context-dependent thinning
    """
    if binding_mode not in ('xor', 'cdt'):
        raise ValueError(f"binding_mode must be 'xor' or 'cdt', got '{binding_mode}'")

    if space is None:
        from ..backends import get_backend
        backend = backend if backend is not None else get_backend()
        space = SparseSpace(dimension, sparsity=sparsity, backend=backend, seed=seed)

    super().__init__(space, backend)

    self.binding_mode = binding_mode
    self._seed = seed

    # Store sparsity for easy access
    if isinstance(space, SparseSpace):
        self.sparsity = space.sparsity
    else:
        # Fallback if using non-sparse space
        import math
        self.sparsity = sparsity if sparsity is not None else 1.0 / math.sqrt(dimension)

    # Pre-generate permutation patterns for CDT
    if binding_mode == 'cdt':
        self._cdt_permutations = self._generate_cdt_permutations()

bind(a, b)

Bind two hypervectors.

Behavior depends on binding_mode: - 'xor': XOR binding (self-inverse, result dissimilar to inputs) - 'cdt': Context-dependent thinning (preserves similarity to inputs)

For XOR mode: - Preserves sparsity on average: p(1-p) + (1-p)p = 2p(1-p) - For optimal p = 1/√D, result sparsity ≈ 2/√D

For CDT mode: - Result is similar to both a and b (unstructured similarity) - Similar inputs produce similar outputs (structured similarity)

Args: a: First hypervector b: Second hypervector

Returns: Bound hypervector

Source code in holovec/models/bsdc.py
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
def bind(self, a: Array, b: Array) -> Array:
    """Bind two hypervectors.

    Behavior depends on binding_mode:
    - 'xor': XOR binding (self-inverse, result dissimilar to inputs)
    - 'cdt': Context-dependent thinning (preserves similarity to inputs)

    For XOR mode:
        - Preserves sparsity on average: p(1-p) + (1-p)p = 2p(1-p)
        - For optimal p = 1/√D, result sparsity ≈ 2/√D

    For CDT mode:
        - Result is similar to both a and b (unstructured similarity)
        - Similar inputs produce similar outputs (structured similarity)

    Args:
        a: First hypervector
        b: Second hypervector

    Returns:
        Bound hypervector
    """
    if self.binding_mode == 'cdt':
        return self.context_dependent_thinning([a, b])
    else:
        # XOR binding (default)
        return self.backend.xor(a, b)

bundle(vectors, maintain_sparsity=True)

Bundle using majority voting.

For sparse codes, bundling requires careful handling to maintain sparsity: 1. Sum all vectors element-wise 2. Apply threshold to get binary result 3. Optionally re-sparsify to maintain target sparsity

Args: vectors: Sequence of hypervectors to bundle maintain_sparsity: If True, enforce target sparsity (default: True)

Returns: Bundled hypervector

Raises: ValueError: If vectors is empty

Source code in holovec/models/bsdc.py
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
def bundle(self, vectors: Sequence[Array], maintain_sparsity: bool = True) -> Array:
    """Bundle using majority voting.

    For sparse codes, bundling requires careful handling to maintain sparsity:
    1. Sum all vectors element-wise
    2. Apply threshold to get binary result
    3. Optionally re-sparsify to maintain target sparsity

    Args:
        vectors: Sequence of hypervectors to bundle
        maintain_sparsity: If True, enforce target sparsity (default: True)

    Returns:
        Bundled hypervector

    Raises:
        ValueError: If vectors is empty
    """
    if not vectors:
        raise ValueError("Cannot bundle empty sequence")

    vectors = list(vectors)

    # Sum all vectors (counts how many 1s at each position)
    sum_result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)

    if maintain_sparsity:
        # Strategy: Take top-k positions with highest counts
        # where k ≈ sparsity * dimension
        sum_np = self.backend.to_numpy(sum_result)
        target_ones = int(self.sparsity * self.dimension)

        # Get indices of top-k values
        if target_ones > 0:
            # Use argpartition for efficiency (O(n) instead of O(n log n))
            threshold_idx = max(0, len(sum_np) - target_ones)
            threshold = np.partition(sum_np, threshold_idx)[threshold_idx]

            # Set positions >= threshold to 1, rest to 0
            result_np = (sum_np >= threshold).astype(np.int32)

            # If we have ties at the threshold, we might have slightly more
            # than target_ones. This is acceptable for maintaining sparsity.
            return self.backend.from_numpy(result_np)
        else:
            # No ones in result (edge case)
            return self.backend.zeros(self.dimension, dtype='int32')
    else:
        # Simple majority voting: threshold at N/2
        threshold = len(vectors) / 2.0
        result = self.backend.threshold(sum_result, threshold=threshold, above=1.0, below=0.0)
        return result.astype('int32')

context_dependent_thinning(components)

Bind components using context-dependent thinning (CDT).

Algorithm (Rachkovskij 2001): 1. Superpose components via OR: Z = X₁ ∨ X₂ ∨ ... ∨ Xₛ 2. Thin via permuted self-conjunction: ⟨Z⟩ = Z ∧ (Z^~(1) ∨ Z^~(2) ∨ ... ∨ Z^~(K))

Properties: - Preserves unstructured similarity: result is similar to each component - Preserves structured similarity: similar inputs → similar outputs - Maintains target sparsity automatically

Args: components: Sequence of hypervectors to bind together

Returns: Bound hypervector with preserved similarity to components

Example: >>> model = BSDCModel(dimension=10000, binding_mode='cdt') >>> a, b, c = model.random(), model.random(), model.random() >>> bound = model.context_dependent_thinning([a, b, c]) >>> # bound is similar to a, b, and c (unstructured similarity)

Source code in holovec/models/bsdc.py
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
def context_dependent_thinning(
    self,
    components: Sequence[Array],
) -> Array:
    """Bind components using context-dependent thinning (CDT).

    Algorithm (Rachkovskij 2001):
        1. Superpose components via OR: Z = X₁ ∨ X₂ ∨ ... ∨ Xₛ
        2. Thin via permuted self-conjunction:
           ⟨Z⟩ = Z ∧ (Z^~(1) ∨ Z^~(2) ∨ ... ∨ Z^~(K))

    Properties:
        - Preserves unstructured similarity: result is similar to each component
        - Preserves structured similarity: similar inputs → similar outputs
        - Maintains target sparsity automatically

    Args:
        components: Sequence of hypervectors to bind together

    Returns:
        Bound hypervector with preserved similarity to components

    Example:
        >>> model = BSDCModel(dimension=10000, binding_mode='cdt')
        >>> a, b, c = model.random(), model.random(), model.random()
        >>> bound = model.context_dependent_thinning([a, b, c])
        >>> # bound is similar to a, b, and c (unstructured similarity)
    """
    if not components:
        raise ValueError("Cannot bind empty sequence")

    components = list(components)

    if len(components) == 1:
        return components[0].copy() if hasattr(components[0], 'copy') else components[0]

    # Convert to numpy for efficient logical operations
    components_np = [self.backend.to_numpy(c) for c in components]

    # Step 1: Superpose via OR
    z = components_np[0].astype(bool)
    for c in components_np[1:]:
        z = np.logical_or(z, c.astype(bool))

    # Step 2: Compute required thinning iterations
    current_density = float(np.sum(z)) / self.dimension
    K = self._compute_thinning_iterations(len(components), current_density)

    if K == 0:
        # Already at or below target sparsity
        result = z.astype(np.int32)
        return self.backend.from_numpy(result)

    # Step 3: Thin via permuted self-conjunction
    # ⟨Z⟩ = Z ∧ (Z^~(1) ∨ Z^~(2) ∨ ... ∨ Z^~(K))
    permuted_or = np.zeros(self.dimension, dtype=bool)
    for k in range(K):
        perm_idx = k % len(self._cdt_permutations)
        z_permuted = z[self._cdt_permutations[perm_idx]]
        permuted_or = np.logical_or(permuted_or, z_permuted)

    result = np.logical_and(z, permuted_or).astype(np.int32)
    return self.backend.from_numpy(result)

encode_sequence(items, use_ngrams=False, n=2)

Encode sequence of items.

Two strategies: 1. Position binding: item_i ⊗ ρⁱ(position) 2. N-grams: Bundle all n-grams in sequence

Args: items: Sequence of hypervectors use_ngrams: If True, use n-gram encoding (default: False) n: N-gram size (default: 2 for bigrams)

Returns: Sequence hypervector

Raises: ValueError: If items is empty

Source code in holovec/models/bsdc.py
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
def encode_sequence(
    self,
    items: Sequence[Array],
    use_ngrams: bool = False,
    n: int = 2
) -> Array:
    """Encode sequence of items.

    Two strategies:
    1. Position binding: item_i ⊗ ρⁱ(position)
    2. N-grams: Bundle all n-grams in sequence

    Args:
        items: Sequence of hypervectors
        use_ngrams: If True, use n-gram encoding (default: False)
        n: N-gram size (default: 2 for bigrams)

    Returns:
        Sequence hypervector

    Raises:
        ValueError: If items is empty
    """
    if not items:
        raise ValueError("Cannot encode empty sequence")

    items = list(items)

    if use_ngrams:
        # N-gram encoding
        if len(items) < n:
            # Sequence too short for n-grams, fall back to simple bundle
            return self.bundle(items)

        ngrams = []
        for i in range(len(items) - n + 1):
            # Create n-gram by binding n consecutive items
            ngram = items[i]
            for j in range(1, n):
                ngram = self.bind(ngram, items[i + j])
            ngrams.append(ngram)

        return self.bundle(ngrams)
    else:
        # Position binding encoding
        pos = self.random(seed=42)  # Fixed position vector
        bound_items = []

        for i, item in enumerate(items):
            permuted_pos = self.permute(pos, k=i)
            bound_items.append(self.bind(item, permuted_pos))

        return self.bundle(bound_items)

measure_sparsity(vec)

Measure actual sparsity of a vector.

Args: vec: Hypervector to measure

Returns: Fraction of 1s in the vector

Source code in holovec/models/bsdc.py
360
361
362
363
364
365
366
367
368
369
370
371
def measure_sparsity(self, vec: Array) -> float:
    """Measure actual sparsity of a vector.

    Args:
        vec: Hypervector to measure

    Returns:
        Fraction of 1s in the vector
    """
    vec_np = self.backend.to_numpy(vec)
    count_ones = np.sum(vec_np)
    return float(count_ones) / len(vec_np)

permute(vec, k=1)

Permute using circular shift.

Shifts vector elements by k positions. For sparse codes, this maintains sparsity perfectly.

Args: vec: Hypervector to permute k: Number of positions to shift (default: 1)

Returns: Permuted hypervector

Source code in holovec/models/bsdc.py
345
346
347
348
349
350
351
352
353
354
355
356
357
358
def permute(self, vec: Array, k: int = 1) -> Array:
    """Permute using circular shift.

    Shifts vector elements by k positions. For sparse codes,
    this maintains sparsity perfectly.

    Args:
        vec: Hypervector to permute
        k: Number of positions to shift (default: 1)

    Returns:
        Permuted hypervector
    """
    return self.backend.roll(vec, shift=k, axis=0)

rehash(vec)

Rehash vector to restore optimal sparsity.

Useful after multiple operations that may have changed sparsity. Randomly selects positions to maintain target sparsity while preserving as much similarity as possible.

Args: vec: Hypervector to rehash

Returns: Rehashed hypervector with target sparsity

Source code in holovec/models/bsdc.py
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
def rehash(self, vec: Array) -> Array:
    """Rehash vector to restore optimal sparsity.

    Useful after multiple operations that may have changed sparsity.
    Randomly selects positions to maintain target sparsity while
    preserving as much similarity as possible.

    Args:
        vec: Hypervector to rehash

    Returns:
        Rehashed hypervector with target sparsity
    """
    vec_np = self.backend.to_numpy(vec)
    target_ones = int(self.sparsity * self.dimension)

    # Get current 1 positions
    current_ones = np.where(vec_np == 1)[0]
    current_count = len(current_ones)

    if current_count == target_ones:
        # Already at target sparsity
        return vec
    elif current_count > target_ones:
        # Too many 1s: randomly remove some
        keep_indices = np.random.choice(
            current_ones, size=target_ones, replace=False
        )
        result = np.zeros_like(vec_np)
        result[keep_indices] = 1
    else:
        # Too few 1s: randomly add some
        current_zeros = np.where(vec_np == 0)[0]
        add_count = target_ones - current_count
        add_indices = np.random.choice(
            current_zeros, size=add_count, replace=False
        )
        result = vec_np.copy()
        result[add_indices] = 1

    return self.backend.from_numpy(result.astype(np.int32))

unbind(a, b)

Unbind to recover value.

Behavior depends on binding_mode: - 'xor': XOR is self-inverse, exact recovery: unbind(bind(a, b), b) = a - 'cdt': No inverse exists; returns the bound vector itself since it's already similar to the components (use similarity search for retrieval)

Args: a: Bound hypervector (or first operand) b: Second operand (key for XOR mode, ignored for CDT mode)

Returns: For XOR: Exact unbound hypervector For CDT: The bound vector (use similarity search to find components)

Source code in holovec/models/bsdc.py
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
def unbind(self, a: Array, b: Array) -> Array:
    """Unbind to recover value.

    Behavior depends on binding_mode:
    - 'xor': XOR is self-inverse, exact recovery: unbind(bind(a, b), b) = a
    - 'cdt': No inverse exists; returns the bound vector itself since it's
      already similar to the components (use similarity search for retrieval)

    Args:
        a: Bound hypervector (or first operand)
        b: Second operand (key for XOR mode, ignored for CDT mode)

    Returns:
        For XOR: Exact unbound hypervector
        For CDT: The bound vector (use similarity search to find components)
    """
    if self.binding_mode == 'cdt':
        # CDT doesn't have an inverse operation
        # The bound vector is already similar to its components,
        # so return it for similarity-based retrieval
        return a
    else:
        # XOR is self-inverse
        return self.backend.xor(a, b)

BSDC-SEG

holovec.models.bsdc_seg.BSDCSEGModel

Bases: VSAModel

Segment-sparse binary VSA model (BSDC-SEG).

Binding: XOR (element-wise, self-inverse) Unbinding: XOR (self-inverse) Bundling: segment-wise majority (exactly 1 per segment) Permutation: circular shift

Uses SparseSegmentSpace with S segments (D % S == 0).

Source code in holovec/models/bsdc_seg.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
class BSDCSEGModel(VSAModel):
    """Segment-sparse binary VSA model (BSDC-SEG).

    Binding: XOR (element-wise, self-inverse)
    Unbinding: XOR (self-inverse)
    Bundling: segment-wise majority (exactly 1 per segment)
    Permutation: circular shift

    Uses SparseSegmentSpace with S segments (D % S == 0).
    """

    def __init__(
        self,
        dimension: int,
        segments: int | None = None,
        space: VectorSpace | None = None,
        backend: Backend | None = None,
        seed: int | None = None,
    ):
        if space is None:
            if segments is None:
                raise ValueError("segments is required when space is not provided")
            from ..backends import get_backend

            backend = backend if backend is not None else get_backend()
            space = SparseSegmentSpace(dimension, segments=segments, backend=backend, seed=seed)
        elif not isinstance(space, SparseSegmentSpace):
            raise TypeError(f"space must be SparseSegmentSpace, got {type(space)}")

        super().__init__(space, backend)
        self.segments = space.segments
        self.segment_length = space.segment_length

    @property
    def model_name(self) -> str:
        return "BSDC-SEG"

    @property
    def is_self_inverse(self) -> bool:
        return True

    @property
    def is_commutative(self) -> bool:
        return True

    @property
    def is_exact_inverse(self) -> bool:
        return True

    def bind(self, a: Array, b: Array) -> Array:
        """Bind using XOR (self-inverse)."""
        return self.backend.xor(a, b)

    def unbind(self, a: Array, b: Array) -> Array:
        """Unbind using XOR (self-inverse)."""
        return self.bind(a, b)

    def bundle(self, vectors: Sequence[Array]) -> Array:
        """Segment-wise majority with exactly 1 winner per segment.

        Counts votes per index within each segment and selects the index with
        maximum count (deterministic tie-break: lowest index).
        """
        if not vectors:
            raise ValueError("Cannot bundle empty sequence")
        import numpy as _np

        # Normalize each to a valid segment pattern first
        seg_norm = [self.space.normalize(v) for v in vectors]
        arrs = [_np.array(self.backend.to_numpy(v)) for v in seg_norm]
        # Accumulate counts per segment position
        counts = _np.zeros((self.dimension,), dtype=_np.int32)
        for a in arrs:
            counts += a
        out = _np.zeros_like(counts, dtype=_np.int32)
        L = self.segment_length
        for s in range(self.segments):
            start = s * L
            end = start + L
            seg_counts = counts[start:end]
            idx = int(_np.argmax(seg_counts))  # deterministic tie-breaker
            out[start + idx] = 1
        return self.backend.from_numpy(out)

    def permute(self, vec: Array, k: int = 1) -> Array:
        return self.backend.roll(vec, shift=k, axis=0)

bind(a, b)

Bind using XOR (self-inverse).

Source code in holovec/models/bsdc_seg.py
67
68
69
def bind(self, a: Array, b: Array) -> Array:
    """Bind using XOR (self-inverse)."""
    return self.backend.xor(a, b)

bundle(vectors)

Segment-wise majority with exactly 1 winner per segment.

Counts votes per index within each segment and selects the index with maximum count (deterministic tie-break: lowest index).

Source code in holovec/models/bsdc_seg.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def bundle(self, vectors: Sequence[Array]) -> Array:
    """Segment-wise majority with exactly 1 winner per segment.

    Counts votes per index within each segment and selects the index with
    maximum count (deterministic tie-break: lowest index).
    """
    if not vectors:
        raise ValueError("Cannot bundle empty sequence")
    import numpy as _np

    # Normalize each to a valid segment pattern first
    seg_norm = [self.space.normalize(v) for v in vectors]
    arrs = [_np.array(self.backend.to_numpy(v)) for v in seg_norm]
    # Accumulate counts per segment position
    counts = _np.zeros((self.dimension,), dtype=_np.int32)
    for a in arrs:
        counts += a
    out = _np.zeros_like(counts, dtype=_np.int32)
    L = self.segment_length
    for s in range(self.segments):
        start = s * L
        end = start + L
        seg_counts = counts[start:end]
        idx = int(_np.argmax(seg_counts))  # deterministic tie-breaker
        out[start + idx] = 1
    return self.backend.from_numpy(out)

unbind(a, b)

Unbind using XOR (self-inverse).

Source code in holovec/models/bsdc_seg.py
71
72
73
def unbind(self, a: Array, b: Array) -> Array:
    """Unbind using XOR (self-inverse)."""
    return self.bind(a, b)

Encoders

Scalar Encoders

holovec.encoders.scalar.FractionalPowerEncoder

Bases: ScalarEncoder

Fractional Power Encoding (FPE) for continuous scalars.

Based on Frady et al. (2021) "Computing on Functions Using Randomized Vector Representations". Encodes scalars by exponentiating a random phasor base vector: encode(x) = φ^x.

The inner product between encoded vectors approximates a similarity kernel (sinc for uniform phase distribution). This encoding preserves linearity and enables precise decoding via sinc kernel reconstruction.

Works best with FHRR (complex domain) but also supports HRR (real domain).

References: Frady et al. (2021): https://arxiv.org/abs/2109.03429 Verges et al. (2025): Learning encoding phasors with FPE

Attributes: bandwidth: Controls kernel width (lower = wider kernel) base_phasor: Random phasor vector φ = [e^(iφ₁), ..., e^(iφₙ)]

Source code in holovec/encoders/scalar.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
class FractionalPowerEncoder(ScalarEncoder):
    """
    Fractional Power Encoding (FPE) for continuous scalars.

    Based on Frady et al. (2021) "Computing on Functions Using Randomized
    Vector Representations". Encodes scalars by exponentiating a random
    phasor base vector: encode(x) = φ^x.

    The inner product between encoded vectors approximates a similarity
    kernel (sinc for uniform phase distribution). This encoding preserves
    linearity and enables precise decoding via sinc kernel reconstruction.

    Works best with FHRR (complex domain) but also supports HRR (real domain).

    References:
        Frady et al. (2021): https://arxiv.org/abs/2109.03429
        Verges et al. (2025): Learning encoding phasors with FPE

    Attributes:
        bandwidth: Controls kernel width (lower = wider kernel)
        base_phasor: Random phasor vector φ = [e^(iφ₁), ..., e^(iφₙ)]
    """

    def __init__(
        self,
        model: VSAModel,
        min_val: float,
        max_val: float,
        bandwidth: float = 1.0,
        seed: int | None = None,
        phase_dist: str = "uniform",
        mixture_bandwidths: list[float] | None = None,
        mixture_weights: list[float] | None = None,
    ):
        """
        Initialize FractionalPowerEncoder.

        Parameters
        ----------
        model : VSAModel
            VSA model (FHRR or HRR). FHRR (complex-valued) is preferred for
            exact fractional powers. HRR (real-valued) uses cosine projection.
        min_val : float
            Minimum value of encoding range. Values below this will be clipped.
        max_val : float
            Maximum value of encoding range. Values above this will be clipped.
        bandwidth : float, optional
            Bandwidth parameter β controlling kernel width (default: 1.0).

            **Mathematical Role:**
            - Encoding: z(x) = φ^(β·x_normalized)
            - Kernel: K(x₁, x₂) ≈ sinc(β·π·|x₁ - x₂|) for uniform phase distribution
            - Smaller β → wider kernel → more generalization
            - Larger β → narrower kernel → more discrimination

            **Typical Values:**
            - β = 0.01: Wide kernel, high generalization (classification)
            - β = 1.0: Medium kernel (default)
            - β = 10.0: Narrow kernel, low generalization (regression)

        seed : int or None, optional
            Random seed for generating base phasor (for reproducibility).
            Different seeds produce different random frequency vectors θ.
        phase_dist : str, optional
            Distribution for sampling frequency vector θ (default: 'uniform').

            **Available Distributions:**
            - 'uniform': θⱼ ~ Uniform[-π, π] → sinc kernel (default)
            - 'gaussian': θⱼ ~ N(0, 1) → Gaussian kernel approximation
            - 'laplace': θⱼ ~ Laplace(0, 1) → Exponential kernel, heavy tails
            - 'cauchy': θⱼ ~ Cauchy(0, 1) → Very heavy tails, long-range
            - 'student': θⱼ ~ Student-t(df=3) → Moderate tails, robust

            Different distributions induce different similarity kernels,
            affecting generalization properties.

        mixture_bandwidths : List[float] or None, optional
            List of K bandwidth values [β₁, β₂, ..., βₖ] for mixture encoding.

            **Mixture Encoding:**
            Instead of single bandwidth β, use weighted combination:
                z_mix(x) = Σₖ αₖ · φ^(βₖ·x)

            where αₖ are mixture_weights. This creates multi-scale representation
            combining coarse (small β) and fine (large β) kernels.

            **Example:**
            mixture_bandwidths = [0.01, 0.1, 1.0, 10.0]  # 4 scales
            Creates encoding with both local and global similarity.

        mixture_weights : List[float] or None, optional
            Weights αₖ for each bandwidth in mixture (must sum to 1).

            If None and mixture_bandwidths is provided, uses uniform weights:
                αₖ = 1/K for all k

            Weights can be:
            1. Hand-crafted (domain knowledge)
            2. Learned via `learn_mixture_weights()` (ridge regression)
            3. Uniform (default)

        Raises
        ------
        ValueError
            If phase_dist not in valid set, or if mixture_weights/mixture_bandwidths
            have mismatched lengths.

        Notes
        -----
        **Mathematical Foundation:**

        Fractional Power Encoding maps scalar x to hypervector via:
            z(x) = φ^(β·x_normalized)

        where:
        - φ = [e^(iθ₁), e^(iθ₂), ..., e^(iθₐ)] is base phasor (D dimensions)
        - θⱼ are random frequencies sampled from phase_dist
        - x_normalized ∈ [0, 1] is x mapped to unit interval
        - β is bandwidth parameter

        **Inner Product Kernel:**

        For uniform phase distribution θⱼ ~ Uniform[-π, π]:
            ⟨z(x₁), z(x₂)⟩ / D ≈ sinc(β·π·|x₁ - x₂|)

        This sinc kernel has important properties:
        - Smooth interpolation between similar values
        - Exact at x₁ = x₂ (similarity = 1)
        - Decreases monotonically with distance
        - Zero-crossings at integer multiples of 1/β

        **Comparison to Random Fourier Features:**

        FPE is equivalent to Random Fourier Features (Rahimi & Recht, 2007)
        for kernel approximation:
            k(x₁, x₂) ≈ φ(x₁)ᵀφ(x₂) / D

        where φ(x) = [cos(θ₁x), sin(θ₁x), ..., cos(θₐx), sin(θₐx)]

        For complex hypervectors, FPE uses complex exponentials instead:
            φ(x) = [e^(iθ₁x), e^(iθ₂x), ..., e^(iθₐx)]

        which provides more compact representation and supports exact
        fractional power operations in frequency domain.

        References
        ----------
        - Frady et al. (2021): "Computing on Functions Using Randomized
          Vector Representations" - Original FPE paper
        - Rahimi & Recht (2007): "Random Features for Large-Scale Kernel Machines"
        - Sutherland & Schneider (2015): "On the Error of Random Fourier Features"
        - Verges et al. (2025): "Learning Encoding Phasors with Fractional Power Encoding"

        Examples
        --------
        >>> # Basic FPE for temperature encoding
        >>> model = VSA.create('FHRR', dim=10000)
        >>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
        >>> temp_25 = encoder.encode(25.0)
        >>> temp_26 = encoder.encode(26.0)
        >>> similarity = model.similarity(temp_25, temp_26)  # ≈ 0.95

        >>> # Multi-scale mixture encoding
        >>> encoder_mix = FractionalPowerEncoder(
        ...     model, min_val=0, max_val=100,
        ...     mixture_bandwidths=[0.01, 0.1, 1.0, 10.0],
        ...     mixture_weights=[0.4, 0.3, 0.2, 0.1]  # Emphasize coarse scales
        ... )

        >>> # Alternative kernel via phase distribution
        >>> encoder_gauss = FractionalPowerEncoder(
        ...     model, min_val=0, max_val=100,
        ...     phase_dist='gaussian'  # Gaussian kernel instead of sinc
        ... )
        """
        super().__init__(model, min_val, max_val)

        self.bandwidth = bandwidth
        self.seed = seed

        # Distribution controls for frequencies (theta)
        self.phase_dist = (phase_dist or "uniform").lower()
        valid = {"uniform", "gaussian", "laplace", "cauchy", "student"}
        if self.phase_dist not in valid:
            raise ValueError(f"Unsupported phase_dist '{phase_dist}'. Choose from {sorted(valid)}.")

        # Mixture support (optional)
        self.mixture_bandwidths = mixture_bandwidths
        self.mixture_weights = mixture_weights
        if self.mixture_bandwidths is not None:
            if len(self.mixture_bandwidths) == 0:
                raise ValueError("mixture_bandwidths must be non-empty if provided")
            if self.mixture_weights is None:
                self.mixture_weights = [1.0 / len(self.mixture_bandwidths)] * len(self.mixture_bandwidths)
            if len(self.mixture_weights) != len(self.mixture_bandwidths):
                raise ValueError("mixture_weights must match mixture_bandwidths length")
            # Normalize weights
            s = sum(self.mixture_weights)
            if s <= 0:
                raise ValueError("mixture_weights must sum to positive value")
            self.mixture_weights = [w / s for w in self.mixture_weights]

        # Check complex vs real
        self.is_complex = self.model.space.space_name == "complex"

        # Base phases/frequencies θ_j
        # For uniform, we can derive from a random phasor; for others, sample numeric theta
        if self.phase_dist == "uniform":
            # Maintain backward compatibility using base phasor
            self.base_phasor = self._generate_base_phasor(seed)
            # Derive angles from the base phasor
            self.theta = self.backend.angle(self.base_phasor)
        else:
            # Numeric theta sampled in init; store as backend array
            self.theta = self._generate_theta_distribution(self.phase_dist, seed)
            # For complex path we do not need base_phasor; for real path, we’ll compute cos(theta * exponent)
            self.base_phasor = None

    def _generate_base_phasor(self, seed: int | None) -> Array:
        """
        Generate random phasor base vector with uniform phase distribution.

        For uniform phases φᵢ ~ Uniform[-π, π], this induces the sinc kernel:
        K(d) = sinc(πd)

        Args:
            seed: Random seed for reproducibility

        Returns:
            Base phasor vector φ = [e^(iφ₁), e^(iφ₂), ..., e^(iφₙ)]
        """
        # Generate random phasors using backend (fully backend-agnostic)
        if self.is_complex:
            # For complex models (FHRR), generate random phasors directly
            phasor = self.backend.random_phasor(
                shape=self.dimension,
                dtype='complex64',
                seed=seed
            )
        else:
            # For real models (HRR), generate phasors then project to real
            phasor_complex = self.backend.random_phasor(
                shape=self.dimension,
                dtype='complex64',
                seed=seed
            )
            # Project to real via inverse FFT
            phasor_real = self.backend.ifft(phasor_complex).real
            # Normalize to unit norm using backend
            phasor = self.backend.normalize(phasor_real)

        return phasor

    def _generate_theta_distribution(self, phase_dist: str, seed: int | None) -> Array:
        """
        Generate frequency vector θ according to specified distribution.

        Parameters
        ----------
        phase_dist : str
            Distribution name for sampling frequencies.
        seed : int or None
            Random seed for reproducibility.

        Returns
        -------
        Array
            Frequency vector θ of shape (D,) in backend format.

        Notes
        -----
        **Distribution Choices and Induced Kernels:**

        Different frequency distributions induce different similarity kernels
        via the Fourier transform relationship:

        1. **Uniform θⱼ ~ Uniform[-π, π]** (default):
           - Kernel: K(d) = sinc(π·d) = sin(π·d)/(π·d)
           - Properties: Smooth, monotonic decay, oscillatory
           - Best for: General-purpose continuous encoding
           - Zero-crossings at integer distances

        2. **Gaussian θⱼ ~ N(0, 1)**:
           - Kernel: K(d) ≈ exp(-d²/2) (Gaussian RBF kernel)
           - Properties: Smooth, no oscillations, fast decay
           - Best for: Local similarity, smooth interpolation
           - Widely used in kernel methods (SVMs, GPs)

        3. **Laplace θⱼ ~ Laplace(0, 1)**:
           - Kernel: K(d) ∝ exp(-|d|) (Exponential kernel)
           - Properties: Heavy tails, slower than Gaussian decay
           - Best for: Robust similarity, outlier tolerance
           - More forgiving to distant values

        4. **Cauchy θⱼ ~ Cauchy(0, 1)**:
           - Kernel: K(d) ∝ 1/(1 + d²) (Rational quadratic)
           - Properties: Very heavy tails, long-range interactions
           - Best for: Multi-scale similarity, hierarchical data
           - Cauchy kernel is limit of Student-t as df→∞

        5. **Student-t θⱼ ~ Student-t(df=3)**:
           - Kernel: K(d) ∝ (1 + d²/3)^(-2) (generalized Student)
           - Properties: Moderate heavy tails (df=3 chosen empirically)
           - Best for: Robust regression, noisy data
           - Interpolates between Gaussian (df→∞) and Cauchy (df→0)

        **Mathematical Background:**

        The relationship between frequency distribution p(θ) and
        similarity kernel K(d) follows from Bochner's theorem:

        A continuous kernel K(x₁, x₂) = K(x₁ - x₂) is positive definite
        if and only if K(d) is the Fourier transform of a non-negative
        measure (the frequency distribution p(θ)):

            K(d) = ∫ exp(i·θ·d) p(θ) dθ

        For FPE, the inner product is:
            ⟨z(x₁), z(x₂)⟩ / D ≈ 𝔼_θ[exp(i·θ·β·(x₁ - x₂))]
                                = ∫ exp(i·θ·β·d) p(θ) dθ
                                = K(β·d)

        where d = x₁ - x₂ is the distance between scalars.

        **Sampling Methods:**

        - **Uniform, Gaussian, Student-t**: Direct sampling from distribution
        - **Laplace**: Inverse CDF transform from uniform:
            θ = -sign(u) · log(1 - 2|u|)  where u ~ Uniform(-0.5, 0.5)
        - **Cauchy**: Inverse CDF transform:
            θ = tan(π·u)  where u ~ Uniform(-0.5, 0.5)

        **NumPy Usage Justification:**

        Uses local NumPy import because special distributions (Laplace, Cauchy)
        are not available in backend abstraction. Frequencies are converted
        to backend array immediately via `from_numpy()`.

        References
        ----------
        - Rahimi & Recht (2007): "Random Features for Large-Scale Kernel Machines"
          Section 3: Relationship between frequency distribution and kernel
        - Sutherland & Schneider (2015): "On the Error of Random Fourier Features"
          Analysis of approximation quality for different kernels
        - Bochner (1932): "Vorlesungen über Fouriersche Integrale"
          Original Bochner's theorem
        - Rasmussen & Williams (2006): "Gaussian Processes for Machine Learning"
          Chapter 4: Covariance functions and kernel design

        Examples
        --------
        >>> # Gaussian kernel for smooth similarity
        >>> model = VSA.create('FHRR', dim=10000)
        >>> enc = FractionalPowerEncoder(model, 0, 100, phase_dist='gaussian')

        >>> # Cauchy kernel for long-range similarity
        >>> enc_cauchy = FractionalPowerEncoder(model, 0, 100, phase_dist='cauchy')
        """
        import numpy as _np

        rng = _np.random.default_rng(seed)
        D = self.dimension
        if phase_dist == "gaussian":
            theta_np = rng.normal(0.0, 1.0, size=(D,)).astype(_np.float32)
        elif phase_dist == "laplace":
            # Laplace via inverse transform: scale=1
            u = rng.uniform(-0.5, 0.5, size=(D,)).astype(_np.float32)
            theta_np = (_np.sign(u) * _np.log1p(-2.0 * _np.abs(u))).astype(_np.float32) * -1.0
        elif phase_dist == "cauchy":
            u = rng.uniform(-0.5, 0.5, size=(D,)).astype(_np.float32)
            theta_np = _np.tan(_np.pi * u).astype(_np.float32)
        elif phase_dist == "student":
            theta_np = rng.standard_t(df=3.0, size=(D,)).astype(_np.float32)
        else:
            # Default to uniform angles; match base_phasor angle convention [-π, π]
            theta_np = rng.uniform(-_np.pi, _np.pi, size=(D,)).astype(_np.float32)

        return self.backend.from_numpy(theta_np)

    def encode(self, value: float) -> Array:
        """
        Encode scalar value to hypervector using fractional power.

        Parameters
        ----------
        value : float
            Scalar value to encode. Will be clipped to [min_val, max_val].

        Returns
        -------
        Array
            Encoded hypervector of shape (dimension,) in backend format.

        Notes
        -----
        **Single Bandwidth Encoding:**

        For single bandwidth β, implements:
            z(x) = φ^(β·x_normalized)

        where:
        - x_normalized = (value - min_val) / (max_val - min_val) ∈ [0, 1]
        - φ = [e^(iθ₁), ..., e^(iθₐ)] is base phasor with random frequencies θⱼ
        - Result is normalized according to model's space

        Element-wise computation:
            z_j(x) = e^(i·θⱼ·β·x_normalized)  (complex models)
            z_j(x) = cos(θⱼ·β·x_normalized)   (real models)

        **Mixture Encoding:**

        When mixture_bandwidths = [β₁, ..., βₖ] is provided, uses weighted sum:
            z_mix(x) = Σₖ αₖ · φ^(βₖ·x_normalized)

        where αₖ are mixture_weights (default: uniform αₖ = 1/K).

        **Advantages of Mixture Encoding:**

        1. **Multi-Scale Representation**: Combines coarse (small β) and
           fine (large β) similarity kernels in single hypervector

        2. **Improved Generalization**: Coarse scales provide robustness,
           fine scales provide discrimination

        3. **Learned Weights**: Weights αₖ can be learned via
           `learn_mixture_weights()` to optimize for specific task

        4. **Kernel Combination**: Mixture is equivalent to combining
           multiple kernels: K_mix(d) = Σₖ αₖ·K_βₖ(d)

        **Computational Complexity:**

        - Single bandwidth: O(D) operations (element-wise exponential)
        - Mixture with K bandwidths: O(K·D) operations
        - Backend operations (exp, multiply) are vectorized/GPU-accelerated

        **Normalization:**

        Output is normalized using model's normalization scheme:
        - FHRR/HRR: L2 normalization (unit norm)
        - MAP: Element-wise normalization
        - BSC/BSDC: No normalization (binary)

        This ensures hypervectors are in valid space for subsequent
        binding/bundling operations.

        Examples
        --------
        >>> # Basic encoding
        >>> model = VSA.create('FHRR', dim=10000)
        >>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
        >>> hv_25 = encoder.encode(25.0)  # Encode temperature 25°C
        >>> hv_26 = encoder.encode(26.0)
        >>> similarity = model.similarity(hv_25, hv_26)
        >>> print(f"Similarity: {similarity:.3f}")  # ≈ 0.950 (close values)

        >>> # Mixture encoding for multi-scale representation
        >>> encoder_mix = FractionalPowerEncoder(
        ...     model, min_val=0, max_val=100,
        ...     mixture_bandwidths=[0.01, 1.0, 100.0]
        ... )
        >>> hv_mix = encoder_mix.encode(25.0)  # Combines 3 scales

        >>> # Effect of bandwidth on similarity
        >>> enc_wide = FractionalPowerEncoder(model, 0, 100, bandwidth=0.1)
        >>> enc_narrow = FractionalPowerEncoder(model, 0, 100, bandwidth=10.0)
        >>> sim_wide = model.similarity(enc_wide.encode(25), enc_wide.encode(30))
        >>> sim_narrow = model.similarity(enc_narrow.encode(25), enc_narrow.encode(30))
        >>> # sim_wide > sim_narrow (wider kernel → more generalization)
        """
        # Normalize value to [0, 1]
        normalized = self.normalize(value)

        # Handle mixture: list of beta_k and weights alpha_k
        betas: list[float]
        alphas: list[float]
        if self.mixture_bandwidths is not None:
            betas = list(self.mixture_bandwidths)
            alphas = list(self.mixture_weights or [])
        else:
            betas = [self.bandwidth]
            alphas = [1.0]

        parts = []
        for alpha, beta in zip(alphas, betas):
            exponent = beta * normalized
            if self.is_complex:
                # Complex: encode as exp(i * theta * exponent)
                theta = self.theta if self.theta is not None else self.backend.angle(self.base_phasor)
                phase = self.backend.multiply_scalar(theta, exponent)
                phasor = self.backend.exp(1j * phase)
                parts.append(self.backend.multiply_scalar(phasor, alpha))
            else:
                # Real: use cosine features directly: cos(theta * exponent)
                theta = self.theta if self.theta is not None else self.backend.angle(self.base_phasor)
                phase = self.backend.multiply_scalar(theta, exponent)
                # cos(phase) = Re(exp(i*phase))
                phasor = self.backend.real(self.backend.exp(1j * phase))
                parts.append(self.backend.multiply_scalar(phasor, alpha))

        if len(parts) == 1:
            encoded = parts[0]
        else:
            encoded = self.backend.sum(self.backend.stack(parts, axis=0), axis=0)

        # Normalize output according to space
        return self.model.normalize(encoded)

    def decode(
        self,
        hypervector: Array,
        resolution: int = 1000,
        max_iterations: int = 100,
        tolerance: float = 1e-6
    ) -> float:
        """
        Decode hypervector back to scalar value using two-stage optimization.

        Parameters
        ----------
        hypervector : Array
            Hypervector to decode (typically a noisy/bundled encoding).
        resolution : int, optional
            Number of grid points for coarse search (default: 1000).
            Higher resolution improves initial guess but increases cost.
        max_iterations : int, optional
            Maximum gradient descent iterations (default: 100).
            Typical convergence: 20-50 iterations.
        tolerance : float, optional
            Convergence tolerance for gradient descent (default: 1e-6).
            Stop when |Δx| < tolerance.

        Returns
        -------
        float
            Decoded scalar value in [min_val, max_val].

        Notes
        -----
        **Decoding Algorithm:**

        Uses two-stage optimization to find value x maximizing similarity:
            x* = argmax_x ⟨encode(x), hypervector⟩

        **Stage 1: Coarse Grid Search** (O(resolution · D))
        - Evaluate similarity at `resolution` uniformly-spaced points
        - Find x₀ with highest similarity
        - Provides good initialization for gradient descent

        **Stage 2: Gradient Descent** (O(max_iterations · D))
        - Starting from x₀, perform gradient ascent:
            x_{t+1} = x_t + η_t · ∇_x ⟨encode(x_t), hypervector⟩
        - Gradient computed via finite differences:
            ∇_x ≈ (sim(x + ε) - sim(x)) / ε
        - Step size η_t decays: η_t = η_0 · 0.95^t (prevents oscillation)
        - Clips updates to [0, 1] normalized range

        **Why This Works:**

        For FPE with sinc kernel K(x₁, x₂) = sinc(β·π·|x₁ - x₂|):
        - Similarity function is unimodal (single peak)
        - Peak occurs at x = x_true (encoded value)
        - Gradient descent converges to global maximum

        However, for noisy hypervectors (e.g., bundled encodings):
        - Multiple local maxima may exist
        - Coarse search reduces chance of local minimum trap
        - Wider kernels (small β) → smoother objective → easier optimization

        **Approximation Quality:**

        Decoding accuracy depends on several factors:

        1. **Dimension D**: Higher D → more accurate encoding → better decoding
           - D = 1000: Moderate accuracy (similarity ≈ 0.85)
           - D = 10000: High accuracy (similarity ≈ 0.99)

        2. **Signal-to-Noise Ratio**: Clean encoding vs bundled/noisy
           - Clean: Near-perfect recovery (error < 1%)
           - Bundled (10 items): Good recovery (error ≈ 5-10%)
           - Bundled (100 items): Degraded (error ≈ 20-30%)

        3. **Bandwidth β**: Wider kernels → smoother similarity landscape
           - β = 0.01: Very smooth, easy to optimize
           - β = 10.0: Narrow kernel, may have local maxima

        4. **Mixture Encoding**: Multiple bandwidths complicate landscape
           - May require finer grid search (higher resolution)
           - May need more gradient descent iterations

        **Computational Cost:**

        Total operations: O(resolution · D + max_iterations · D)

        Typical values:
        - resolution = 1000, max_iterations = 100, D = 10000
        - Total: ~1.1M evaluations
        - Runtime: ~0.1-1.0 seconds (CPU), ~0.01-0.1 seconds (GPU)

        For real-time applications, reduce resolution or max_iterations:
        - resolution = 100 (coarser search)
        - max_iterations = 20 (early stopping)

        **Comparison to Other Decoders:**

        - **Codebook Lookup** (LevelEncoder): O(K · D) for K levels
          Faster but discrete, no interpolation

        - **Resonator Network** (cleanup): O(iterations · M · D) for M items
          Better for structured/compositional decoding

        - **FPE Gradient Descent**: O(resolution · D + iterations · D)
          Best for continuous scalar recovery

        References
        ----------
        - Frady et al. (2021): "Computing on Functions Using Randomized
          Vector Representations" - Section on FPE decoding
        - Nocedal & Wright (2006): "Numerical Optimization" - Gradient descent
          methods and convergence analysis

        Examples
        --------
        >>> # Basic decoding
        >>> model = VSA.create('FHRR', dim=10000)
        >>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
        >>> hv = encoder.encode(25.0)
        >>> decoded = encoder.decode(hv)
        >>> print(f"Decoded: {decoded:.2f}")  # ≈ 25.00

        >>> # Decoding noisy hypervector (bundled encoding)
        >>> hv_bundle = model.bundle([encoder.encode(25.0), encoder.encode(26.0)])
        >>> decoded_bundle = encoder.decode(hv_bundle)
        >>> print(f"Decoded bundle: {decoded_bundle:.2f}")  # ≈ 25.5

        >>> # Fast decoding (lower resolution/iterations)
        >>> decoded_fast = encoder.decode(hv, resolution=100, max_iterations=20)
        """
        # Coarse search: evaluate on grid
        normalized_grid = self.backend.linspace(0, 1, resolution)

        best_similarity = -float('inf')
        best_normalized = 0.5  # Start in middle

        for norm_val_np in self.backend.to_numpy(normalized_grid):
            norm_val = float(norm_val_np)
            encoded = self.encode(self.denormalize(norm_val))
            similarity = float(
                self.backend.to_numpy(
                    self.model.similarity(encoded, hypervector)
                )
            )

            if similarity > best_similarity:
                best_similarity = similarity
                best_normalized = norm_val

        # Fine search: gradient descent around best coarse value
        # For simplicity, use finite differences for gradient
        current = best_normalized
        step_size = 0.01

        for _ in range(max_iterations):
            # Evaluate at current position
            encoded_curr = self.encode(self.denormalize(current))
            sim_curr = float(
                self.backend.to_numpy(
                    self.model.similarity(encoded_curr, hypervector)
                )
            )

            # Evaluate at current + epsilon
            epsilon = 1e-4
            encoded_plus = self.encode(self.denormalize(current + epsilon))
            sim_plus = float(
                self.backend.to_numpy(
                    self.model.similarity(encoded_plus, hypervector)
                )
            )

            # Compute gradient
            gradient = (sim_plus - sim_curr) / epsilon

            # Update (gradient ascent)
            new_current = current + step_size * gradient

            # Clip to [0, 1]
            new_current = max(0.0, min(1.0, new_current))

            # Check convergence
            if abs(new_current - current) < tolerance:
                break

            current = new_current
            step_size *= 0.95  # Decay step size

        # Denormalize and return
        return self.denormalize(current)

    @property
    def is_reversible(self) -> bool:
        """FPE supports approximate decoding."""
        return True

    @property
    def compatible_models(self) -> list[str]:
        """FPE works best with FHRR, also compatible with HRR."""
        return ["FHRR", "HRR"]

    def __repr__(self) -> str:
        """String representation."""
        return (
            f"FractionalPowerEncoder("
            f"model={self.model.model_name}, "
            f"range=[{self.min_val}, {self.max_val}], "
            f"bandwidth={self.bandwidth}, "
            f"phase_dist={self.phase_dist}, "
            f"mixture={'yes' if self.mixture_bandwidths else 'no'}, "
            f"dimension={self.dimension})"
        )

    # ====== M2: Learned mixture weights (ridge-style closed form) ======
    def learn_mixture_weights(
        self,
        values: list[float],
        labels: list[int],
        reg: float = 1e-3,
    ) -> list[float]:
        """
        Learn mixture weights (alphas) for fixed mixture_bandwidths using a simple
        ridge-style objective that aligns encoded mixtures to per-class prototypes.

        Approach:
            - Build class prototypes p_c as the mean of current encodings (using current weights)
            - For each sample i, compute per-band encodings E_i = [e_{i1},...,e_{iK}] (shape d×K)
            - Solve (Σ E_i^T E_i + reg I) α = Σ E_i^T p_{y_i}
            - Project α onto simplex (nonnegative, sum=1)

        Args:
            values: list of scalar inputs
            labels: list of integer class labels (same length as values)
            reg: L2 regularization strength (default 1e-3)

        Returns:
            Learned mixture weights (list of floats summing to 1)

        Notes:
            - Requires mixture_bandwidths to be set (K>=2)
            - Uses numpy for solving normal equations; backend remains unchanged
        """
        import numpy as _np

        if self.mixture_bandwidths is None or len(self.mixture_bandwidths) < 2:
            raise ValueError("learn_mixture_weights requires mixture_bandwidths with K >= 2")

        # Prepare classes and group samples
        values = list(values)
        labels = list(labels)
        if len(values) != len(labels):
            raise ValueError("values and labels must have same length")

        classes = sorted(set(labels))
        K = len(self.mixture_bandwidths)
        d = self.dimension

        # Build current encodings to compute class prototypes (using current mixture weights)
        encodings = [self.encode(v) for v in values]
        # Convert to numpy arrays for prototype computation
        enc_np = [_np.array(self.model.backend.to_numpy(e)) for e in encodings]
        # Class prototypes: mean of encodings per class (vector length d)
        prototypes = {}
        for c in classes:
            idxs = [i for i, y in enumerate(labels) if y == c]
            if not idxs:
                continue
            prototypes[c] = _np.mean(_np.stack([enc_np[i] for i in idxs], axis=0), axis=0)

        # Helper to compute per-band encodings matrix E_i (d×K) for a value
        def _per_band_matrix(val: float) -> _np.ndarray:
            norm = self.normalize(val)
            cols = []
            for beta in self.mixture_bandwidths:
                exponent = beta * norm
                theta = self.theta if self.theta is not None else self.model.backend.angle(self.base_phasor)
                phase = self.model.backend.multiply_scalar(theta, exponent)
                if self.is_complex:
                    ph = self.model.backend.exp(1j * phase)
                    col = self.model.backend.to_numpy(ph)
                else:
                    col = self.model.backend.to_numpy(self.model.backend.real(self.model.backend.exp(1j * phase)))
                cols.append(_np.array(col))
            # Stack columns to d×K
            return _np.stack(cols, axis=1)

        # Accumulate normal equations
        A = _np.zeros((K, K), dtype=_np.float64)
        b = _np.zeros((K,), dtype=_np.float64)
        for v, y in zip(values, labels):
            E = _per_band_matrix(v)   # d×K
            p = prototypes[y]         # d
            # E^T E and E^T p
            A += E.T @ E
            b += E.T @ p

        # Regularization
        A += reg * _np.eye(K, dtype=_np.float64)
        # Solve
        try:
            alpha = _np.linalg.solve(A, b)
        except _np.linalg.LinAlgError:
            alpha = _np.linalg.lstsq(A, b, rcond=None)[0]

        # Project to simplex (≥0, sum=1)
        alpha = _np.maximum(alpha, 0.0)
        s = float(_np.sum(alpha))
        if s <= 0:
            alpha = _np.ones_like(alpha) / len(alpha)
        else:
            alpha = alpha / s

        # Update in encoder
        self.mixture_weights = [float(a) for a in alpha.tolist()]
        return self.mixture_weights

compatible_models property

FPE works best with FHRR, also compatible with HRR.

is_reversible property

FPE supports approximate decoding.

__init__(model, min_val, max_val, bandwidth=1.0, seed=None, phase_dist='uniform', mixture_bandwidths=None, mixture_weights=None)

Initialize FractionalPowerEncoder.

Parameters:

Name Type Description Default
model VSAModel

VSA model (FHRR or HRR). FHRR (complex-valued) is preferred for exact fractional powers. HRR (real-valued) uses cosine projection.

required
min_val float

Minimum value of encoding range. Values below this will be clipped.

required
max_val float

Maximum value of encoding range. Values above this will be clipped.

required
bandwidth float

Bandwidth parameter β controlling kernel width (default: 1.0).

Mathematical Role: - Encoding: z(x) = φ^(β·x_normalized) - Kernel: K(x₁, x₂) ≈ sinc(β·π·|x₁ - x₂|) for uniform phase distribution - Smaller β → wider kernel → more generalization - Larger β → narrower kernel → more discrimination

Typical Values: - β = 0.01: Wide kernel, high generalization (classification) - β = 1.0: Medium kernel (default) - β = 10.0: Narrow kernel, low generalization (regression)

1.0
seed int or None

Random seed for generating base phasor (for reproducibility). Different seeds produce different random frequency vectors θ.

None
phase_dist str

Distribution for sampling frequency vector θ (default: 'uniform').

Available Distributions: - 'uniform': θⱼ ~ Uniform[-π, π] → sinc kernel (default) - 'gaussian': θⱼ ~ N(0, 1) → Gaussian kernel approximation - 'laplace': θⱼ ~ Laplace(0, 1) → Exponential kernel, heavy tails - 'cauchy': θⱼ ~ Cauchy(0, 1) → Very heavy tails, long-range - 'student': θⱼ ~ Student-t(df=3) → Moderate tails, robust

Different distributions induce different similarity kernels, affecting generalization properties.

'uniform'
mixture_bandwidths List[float] or None

List of K bandwidth values [β₁, β₂, ..., βₖ] for mixture encoding.

Mixture Encoding: Instead of single bandwidth β, use weighted combination: z_mix(x) = Σₖ αₖ · φ^(βₖ·x)

where αₖ are mixture_weights. This creates multi-scale representation combining coarse (small β) and fine (large β) kernels.

Example: mixture_bandwidths = [0.01, 0.1, 1.0, 10.0] # 4 scales Creates encoding with both local and global similarity.

None
mixture_weights List[float] or None

Weights αₖ for each bandwidth in mixture (must sum to 1).

If None and mixture_bandwidths is provided, uses uniform weights: αₖ = 1/K for all k

Weights can be: 1. Hand-crafted (domain knowledge) 2. Learned via learn_mixture_weights() (ridge regression) 3. Uniform (default)

None

Raises:

Type Description
ValueError

If phase_dist not in valid set, or if mixture_weights/mixture_bandwidths have mismatched lengths.

Notes

Mathematical Foundation:

Fractional Power Encoding maps scalar x to hypervector via: z(x) = φ^(β·x_normalized)

where: - φ = [e^(iθ₁), e^(iθ₂), ..., e^(iθₐ)] is base phasor (D dimensions) - θⱼ are random frequencies sampled from phase_dist - x_normalized ∈ [0, 1] is x mapped to unit interval - β is bandwidth parameter

Inner Product Kernel:

For uniform phase distribution θⱼ ~ Uniform[-π, π]: ⟨z(x₁), z(x₂)⟩ / D ≈ sinc(β·π·|x₁ - x₂|)

This sinc kernel has important properties: - Smooth interpolation between similar values - Exact at x₁ = x₂ (similarity = 1) - Decreases monotonically with distance - Zero-crossings at integer multiples of 1/β

Comparison to Random Fourier Features:

FPE is equivalent to Random Fourier Features (Rahimi & Recht, 2007) for kernel approximation: k(x₁, x₂) ≈ φ(x₁)ᵀφ(x₂) / D

where φ(x) = [cos(θ₁x), sin(θ₁x), ..., cos(θₐx), sin(θₐx)]

For complex hypervectors, FPE uses complex exponentials instead: φ(x) = [e^(iθ₁x), e^(iθ₂x), ..., e^(iθₐx)]

which provides more compact representation and supports exact fractional power operations in frequency domain.

References
  • Frady et al. (2021): "Computing on Functions Using Randomized Vector Representations" - Original FPE paper
  • Rahimi & Recht (2007): "Random Features for Large-Scale Kernel Machines"
  • Sutherland & Schneider (2015): "On the Error of Random Fourier Features"
  • Verges et al. (2025): "Learning Encoding Phasors with Fractional Power Encoding"

Examples:

>>> # Basic FPE for temperature encoding
>>> model = VSA.create('FHRR', dim=10000)
>>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
>>> temp_25 = encoder.encode(25.0)
>>> temp_26 = encoder.encode(26.0)
>>> similarity = model.similarity(temp_25, temp_26)  # ≈ 0.95
>>> # Multi-scale mixture encoding
>>> encoder_mix = FractionalPowerEncoder(
...     model, min_val=0, max_val=100,
...     mixture_bandwidths=[0.01, 0.1, 1.0, 10.0],
...     mixture_weights=[0.4, 0.3, 0.2, 0.1]  # Emphasize coarse scales
... )
>>> # Alternative kernel via phase distribution
>>> encoder_gauss = FractionalPowerEncoder(
...     model, min_val=0, max_val=100,
...     phase_dist='gaussian'  # Gaussian kernel instead of sinc
... )
Source code in holovec/encoders/scalar.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
def __init__(
    self,
    model: VSAModel,
    min_val: float,
    max_val: float,
    bandwidth: float = 1.0,
    seed: int | None = None,
    phase_dist: str = "uniform",
    mixture_bandwidths: list[float] | None = None,
    mixture_weights: list[float] | None = None,
):
    """
    Initialize FractionalPowerEncoder.

    Parameters
    ----------
    model : VSAModel
        VSA model (FHRR or HRR). FHRR (complex-valued) is preferred for
        exact fractional powers. HRR (real-valued) uses cosine projection.
    min_val : float
        Minimum value of encoding range. Values below this will be clipped.
    max_val : float
        Maximum value of encoding range. Values above this will be clipped.
    bandwidth : float, optional
        Bandwidth parameter β controlling kernel width (default: 1.0).

        **Mathematical Role:**
        - Encoding: z(x) = φ^(β·x_normalized)
        - Kernel: K(x₁, x₂) ≈ sinc(β·π·|x₁ - x₂|) for uniform phase distribution
        - Smaller β → wider kernel → more generalization
        - Larger β → narrower kernel → more discrimination

        **Typical Values:**
        - β = 0.01: Wide kernel, high generalization (classification)
        - β = 1.0: Medium kernel (default)
        - β = 10.0: Narrow kernel, low generalization (regression)

    seed : int or None, optional
        Random seed for generating base phasor (for reproducibility).
        Different seeds produce different random frequency vectors θ.
    phase_dist : str, optional
        Distribution for sampling frequency vector θ (default: 'uniform').

        **Available Distributions:**
        - 'uniform': θⱼ ~ Uniform[-π, π] → sinc kernel (default)
        - 'gaussian': θⱼ ~ N(0, 1) → Gaussian kernel approximation
        - 'laplace': θⱼ ~ Laplace(0, 1) → Exponential kernel, heavy tails
        - 'cauchy': θⱼ ~ Cauchy(0, 1) → Very heavy tails, long-range
        - 'student': θⱼ ~ Student-t(df=3) → Moderate tails, robust

        Different distributions induce different similarity kernels,
        affecting generalization properties.

    mixture_bandwidths : List[float] or None, optional
        List of K bandwidth values [β₁, β₂, ..., βₖ] for mixture encoding.

        **Mixture Encoding:**
        Instead of single bandwidth β, use weighted combination:
            z_mix(x) = Σₖ αₖ · φ^(βₖ·x)

        where αₖ are mixture_weights. This creates multi-scale representation
        combining coarse (small β) and fine (large β) kernels.

        **Example:**
        mixture_bandwidths = [0.01, 0.1, 1.0, 10.0]  # 4 scales
        Creates encoding with both local and global similarity.

    mixture_weights : List[float] or None, optional
        Weights αₖ for each bandwidth in mixture (must sum to 1).

        If None and mixture_bandwidths is provided, uses uniform weights:
            αₖ = 1/K for all k

        Weights can be:
        1. Hand-crafted (domain knowledge)
        2. Learned via `learn_mixture_weights()` (ridge regression)
        3. Uniform (default)

    Raises
    ------
    ValueError
        If phase_dist not in valid set, or if mixture_weights/mixture_bandwidths
        have mismatched lengths.

    Notes
    -----
    **Mathematical Foundation:**

    Fractional Power Encoding maps scalar x to hypervector via:
        z(x) = φ^(β·x_normalized)

    where:
    - φ = [e^(iθ₁), e^(iθ₂), ..., e^(iθₐ)] is base phasor (D dimensions)
    - θⱼ are random frequencies sampled from phase_dist
    - x_normalized ∈ [0, 1] is x mapped to unit interval
    - β is bandwidth parameter

    **Inner Product Kernel:**

    For uniform phase distribution θⱼ ~ Uniform[-π, π]:
        ⟨z(x₁), z(x₂)⟩ / D ≈ sinc(β·π·|x₁ - x₂|)

    This sinc kernel has important properties:
    - Smooth interpolation between similar values
    - Exact at x₁ = x₂ (similarity = 1)
    - Decreases monotonically with distance
    - Zero-crossings at integer multiples of 1/β

    **Comparison to Random Fourier Features:**

    FPE is equivalent to Random Fourier Features (Rahimi & Recht, 2007)
    for kernel approximation:
        k(x₁, x₂) ≈ φ(x₁)ᵀφ(x₂) / D

    where φ(x) = [cos(θ₁x), sin(θ₁x), ..., cos(θₐx), sin(θₐx)]

    For complex hypervectors, FPE uses complex exponentials instead:
        φ(x) = [e^(iθ₁x), e^(iθ₂x), ..., e^(iθₐx)]

    which provides more compact representation and supports exact
    fractional power operations in frequency domain.

    References
    ----------
    - Frady et al. (2021): "Computing on Functions Using Randomized
      Vector Representations" - Original FPE paper
    - Rahimi & Recht (2007): "Random Features for Large-Scale Kernel Machines"
    - Sutherland & Schneider (2015): "On the Error of Random Fourier Features"
    - Verges et al. (2025): "Learning Encoding Phasors with Fractional Power Encoding"

    Examples
    --------
    >>> # Basic FPE for temperature encoding
    >>> model = VSA.create('FHRR', dim=10000)
    >>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
    >>> temp_25 = encoder.encode(25.0)
    >>> temp_26 = encoder.encode(26.0)
    >>> similarity = model.similarity(temp_25, temp_26)  # ≈ 0.95

    >>> # Multi-scale mixture encoding
    >>> encoder_mix = FractionalPowerEncoder(
    ...     model, min_val=0, max_val=100,
    ...     mixture_bandwidths=[0.01, 0.1, 1.0, 10.0],
    ...     mixture_weights=[0.4, 0.3, 0.2, 0.1]  # Emphasize coarse scales
    ... )

    >>> # Alternative kernel via phase distribution
    >>> encoder_gauss = FractionalPowerEncoder(
    ...     model, min_val=0, max_val=100,
    ...     phase_dist='gaussian'  # Gaussian kernel instead of sinc
    ... )
    """
    super().__init__(model, min_val, max_val)

    self.bandwidth = bandwidth
    self.seed = seed

    # Distribution controls for frequencies (theta)
    self.phase_dist = (phase_dist or "uniform").lower()
    valid = {"uniform", "gaussian", "laplace", "cauchy", "student"}
    if self.phase_dist not in valid:
        raise ValueError(f"Unsupported phase_dist '{phase_dist}'. Choose from {sorted(valid)}.")

    # Mixture support (optional)
    self.mixture_bandwidths = mixture_bandwidths
    self.mixture_weights = mixture_weights
    if self.mixture_bandwidths is not None:
        if len(self.mixture_bandwidths) == 0:
            raise ValueError("mixture_bandwidths must be non-empty if provided")
        if self.mixture_weights is None:
            self.mixture_weights = [1.0 / len(self.mixture_bandwidths)] * len(self.mixture_bandwidths)
        if len(self.mixture_weights) != len(self.mixture_bandwidths):
            raise ValueError("mixture_weights must match mixture_bandwidths length")
        # Normalize weights
        s = sum(self.mixture_weights)
        if s <= 0:
            raise ValueError("mixture_weights must sum to positive value")
        self.mixture_weights = [w / s for w in self.mixture_weights]

    # Check complex vs real
    self.is_complex = self.model.space.space_name == "complex"

    # Base phases/frequencies θ_j
    # For uniform, we can derive from a random phasor; for others, sample numeric theta
    if self.phase_dist == "uniform":
        # Maintain backward compatibility using base phasor
        self.base_phasor = self._generate_base_phasor(seed)
        # Derive angles from the base phasor
        self.theta = self.backend.angle(self.base_phasor)
    else:
        # Numeric theta sampled in init; store as backend array
        self.theta = self._generate_theta_distribution(self.phase_dist, seed)
        # For complex path we do not need base_phasor; for real path, we’ll compute cos(theta * exponent)
        self.base_phasor = None

__repr__()

String representation.

Source code in holovec/encoders/scalar.py
722
723
724
725
726
727
728
729
730
731
732
def __repr__(self) -> str:
    """String representation."""
    return (
        f"FractionalPowerEncoder("
        f"model={self.model.model_name}, "
        f"range=[{self.min_val}, {self.max_val}], "
        f"bandwidth={self.bandwidth}, "
        f"phase_dist={self.phase_dist}, "
        f"mixture={'yes' if self.mixture_bandwidths else 'no'}, "
        f"dimension={self.dimension})"
    )

decode(hypervector, resolution=1000, max_iterations=100, tolerance=1e-06)

Decode hypervector back to scalar value using two-stage optimization.

Parameters:

Name Type Description Default
hypervector Array

Hypervector to decode (typically a noisy/bundled encoding).

required
resolution int

Number of grid points for coarse search (default: 1000). Higher resolution improves initial guess but increases cost.

1000
max_iterations int

Maximum gradient descent iterations (default: 100). Typical convergence: 20-50 iterations.

100
tolerance float

Convergence tolerance for gradient descent (default: 1e-6). Stop when |Δx| < tolerance.

1e-06

Returns:

Type Description
float

Decoded scalar value in [min_val, max_val].

Notes

Decoding Algorithm:

Uses two-stage optimization to find value x maximizing similarity: x* = argmax_x ⟨encode(x), hypervector⟩

Stage 1: Coarse Grid Search (O(resolution · D)) - Evaluate similarity at resolution uniformly-spaced points - Find x₀ with highest similarity - Provides good initialization for gradient descent

Stage 2: Gradient Descent (O(max_iterations · D)) - Starting from x₀, perform gradient ascent: x_{t+1} = x_t + η_t · ∇_x ⟨encode(x_t), hypervector⟩ - Gradient computed via finite differences: ∇_x ≈ (sim(x + ε) - sim(x)) / ε - Step size η_t decays: η_t = η_0 · 0.95^t (prevents oscillation) - Clips updates to [0, 1] normalized range

Why This Works:

For FPE with sinc kernel K(x₁, x₂) = sinc(β·π·|x₁ - x₂|): - Similarity function is unimodal (single peak) - Peak occurs at x = x_true (encoded value) - Gradient descent converges to global maximum

However, for noisy hypervectors (e.g., bundled encodings): - Multiple local maxima may exist - Coarse search reduces chance of local minimum trap - Wider kernels (small β) → smoother objective → easier optimization

Approximation Quality:

Decoding accuracy depends on several factors:

  1. Dimension D: Higher D → more accurate encoding → better decoding
  2. D = 1000: Moderate accuracy (similarity ≈ 0.85)
  3. D = 10000: High accuracy (similarity ≈ 0.99)

  4. Signal-to-Noise Ratio: Clean encoding vs bundled/noisy

  5. Clean: Near-perfect recovery (error < 1%)
  6. Bundled (10 items): Good recovery (error ≈ 5-10%)
  7. Bundled (100 items): Degraded (error ≈ 20-30%)

  8. Bandwidth β: Wider kernels → smoother similarity landscape

  9. β = 0.01: Very smooth, easy to optimize
  10. β = 10.0: Narrow kernel, may have local maxima

  11. Mixture Encoding: Multiple bandwidths complicate landscape

  12. May require finer grid search (higher resolution)
  13. May need more gradient descent iterations

Computational Cost:

Total operations: O(resolution · D + max_iterations · D)

Typical values: - resolution = 1000, max_iterations = 100, D = 10000 - Total: ~1.1M evaluations - Runtime: ~0.1-1.0 seconds (CPU), ~0.01-0.1 seconds (GPU)

For real-time applications, reduce resolution or max_iterations: - resolution = 100 (coarser search) - max_iterations = 20 (early stopping)

Comparison to Other Decoders:

  • Codebook Lookup (LevelEncoder): O(K · D) for K levels Faster but discrete, no interpolation

  • Resonator Network (cleanup): O(iterations · M · D) for M items Better for structured/compositional decoding

  • FPE Gradient Descent: O(resolution · D + iterations · D) Best for continuous scalar recovery

References
  • Frady et al. (2021): "Computing on Functions Using Randomized Vector Representations" - Section on FPE decoding
  • Nocedal & Wright (2006): "Numerical Optimization" - Gradient descent methods and convergence analysis

Examples:

>>> # Basic decoding
>>> model = VSA.create('FHRR', dim=10000)
>>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
>>> hv = encoder.encode(25.0)
>>> decoded = encoder.decode(hv)
>>> print(f"Decoded: {decoded:.2f}")  # ≈ 25.00
>>> # Decoding noisy hypervector (bundled encoding)
>>> hv_bundle = model.bundle([encoder.encode(25.0), encoder.encode(26.0)])
>>> decoded_bundle = encoder.decode(hv_bundle)
>>> print(f"Decoded bundle: {decoded_bundle:.2f}")  # ≈ 25.5
>>> # Fast decoding (lower resolution/iterations)
>>> decoded_fast = encoder.decode(hv, resolution=100, max_iterations=20)
Source code in holovec/encoders/scalar.py
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
def decode(
    self,
    hypervector: Array,
    resolution: int = 1000,
    max_iterations: int = 100,
    tolerance: float = 1e-6
) -> float:
    """
    Decode hypervector back to scalar value using two-stage optimization.

    Parameters
    ----------
    hypervector : Array
        Hypervector to decode (typically a noisy/bundled encoding).
    resolution : int, optional
        Number of grid points for coarse search (default: 1000).
        Higher resolution improves initial guess but increases cost.
    max_iterations : int, optional
        Maximum gradient descent iterations (default: 100).
        Typical convergence: 20-50 iterations.
    tolerance : float, optional
        Convergence tolerance for gradient descent (default: 1e-6).
        Stop when |Δx| < tolerance.

    Returns
    -------
    float
        Decoded scalar value in [min_val, max_val].

    Notes
    -----
    **Decoding Algorithm:**

    Uses two-stage optimization to find value x maximizing similarity:
        x* = argmax_x ⟨encode(x), hypervector⟩

    **Stage 1: Coarse Grid Search** (O(resolution · D))
    - Evaluate similarity at `resolution` uniformly-spaced points
    - Find x₀ with highest similarity
    - Provides good initialization for gradient descent

    **Stage 2: Gradient Descent** (O(max_iterations · D))
    - Starting from x₀, perform gradient ascent:
        x_{t+1} = x_t + η_t · ∇_x ⟨encode(x_t), hypervector⟩
    - Gradient computed via finite differences:
        ∇_x ≈ (sim(x + ε) - sim(x)) / ε
    - Step size η_t decays: η_t = η_0 · 0.95^t (prevents oscillation)
    - Clips updates to [0, 1] normalized range

    **Why This Works:**

    For FPE with sinc kernel K(x₁, x₂) = sinc(β·π·|x₁ - x₂|):
    - Similarity function is unimodal (single peak)
    - Peak occurs at x = x_true (encoded value)
    - Gradient descent converges to global maximum

    However, for noisy hypervectors (e.g., bundled encodings):
    - Multiple local maxima may exist
    - Coarse search reduces chance of local minimum trap
    - Wider kernels (small β) → smoother objective → easier optimization

    **Approximation Quality:**

    Decoding accuracy depends on several factors:

    1. **Dimension D**: Higher D → more accurate encoding → better decoding
       - D = 1000: Moderate accuracy (similarity ≈ 0.85)
       - D = 10000: High accuracy (similarity ≈ 0.99)

    2. **Signal-to-Noise Ratio**: Clean encoding vs bundled/noisy
       - Clean: Near-perfect recovery (error < 1%)
       - Bundled (10 items): Good recovery (error ≈ 5-10%)
       - Bundled (100 items): Degraded (error ≈ 20-30%)

    3. **Bandwidth β**: Wider kernels → smoother similarity landscape
       - β = 0.01: Very smooth, easy to optimize
       - β = 10.0: Narrow kernel, may have local maxima

    4. **Mixture Encoding**: Multiple bandwidths complicate landscape
       - May require finer grid search (higher resolution)
       - May need more gradient descent iterations

    **Computational Cost:**

    Total operations: O(resolution · D + max_iterations · D)

    Typical values:
    - resolution = 1000, max_iterations = 100, D = 10000
    - Total: ~1.1M evaluations
    - Runtime: ~0.1-1.0 seconds (CPU), ~0.01-0.1 seconds (GPU)

    For real-time applications, reduce resolution or max_iterations:
    - resolution = 100 (coarser search)
    - max_iterations = 20 (early stopping)

    **Comparison to Other Decoders:**

    - **Codebook Lookup** (LevelEncoder): O(K · D) for K levels
      Faster but discrete, no interpolation

    - **Resonator Network** (cleanup): O(iterations · M · D) for M items
      Better for structured/compositional decoding

    - **FPE Gradient Descent**: O(resolution · D + iterations · D)
      Best for continuous scalar recovery

    References
    ----------
    - Frady et al. (2021): "Computing on Functions Using Randomized
      Vector Representations" - Section on FPE decoding
    - Nocedal & Wright (2006): "Numerical Optimization" - Gradient descent
      methods and convergence analysis

    Examples
    --------
    >>> # Basic decoding
    >>> model = VSA.create('FHRR', dim=10000)
    >>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
    >>> hv = encoder.encode(25.0)
    >>> decoded = encoder.decode(hv)
    >>> print(f"Decoded: {decoded:.2f}")  # ≈ 25.00

    >>> # Decoding noisy hypervector (bundled encoding)
    >>> hv_bundle = model.bundle([encoder.encode(25.0), encoder.encode(26.0)])
    >>> decoded_bundle = encoder.decode(hv_bundle)
    >>> print(f"Decoded bundle: {decoded_bundle:.2f}")  # ≈ 25.5

    >>> # Fast decoding (lower resolution/iterations)
    >>> decoded_fast = encoder.decode(hv, resolution=100, max_iterations=20)
    """
    # Coarse search: evaluate on grid
    normalized_grid = self.backend.linspace(0, 1, resolution)

    best_similarity = -float('inf')
    best_normalized = 0.5  # Start in middle

    for norm_val_np in self.backend.to_numpy(normalized_grid):
        norm_val = float(norm_val_np)
        encoded = self.encode(self.denormalize(norm_val))
        similarity = float(
            self.backend.to_numpy(
                self.model.similarity(encoded, hypervector)
            )
        )

        if similarity > best_similarity:
            best_similarity = similarity
            best_normalized = norm_val

    # Fine search: gradient descent around best coarse value
    # For simplicity, use finite differences for gradient
    current = best_normalized
    step_size = 0.01

    for _ in range(max_iterations):
        # Evaluate at current position
        encoded_curr = self.encode(self.denormalize(current))
        sim_curr = float(
            self.backend.to_numpy(
                self.model.similarity(encoded_curr, hypervector)
            )
        )

        # Evaluate at current + epsilon
        epsilon = 1e-4
        encoded_plus = self.encode(self.denormalize(current + epsilon))
        sim_plus = float(
            self.backend.to_numpy(
                self.model.similarity(encoded_plus, hypervector)
            )
        )

        # Compute gradient
        gradient = (sim_plus - sim_curr) / epsilon

        # Update (gradient ascent)
        new_current = current + step_size * gradient

        # Clip to [0, 1]
        new_current = max(0.0, min(1.0, new_current))

        # Check convergence
        if abs(new_current - current) < tolerance:
            break

        current = new_current
        step_size *= 0.95  # Decay step size

    # Denormalize and return
    return self.denormalize(current)

encode(value)

Encode scalar value to hypervector using fractional power.

Parameters:

Name Type Description Default
value float

Scalar value to encode. Will be clipped to [min_val, max_val].

required

Returns:

Type Description
Array

Encoded hypervector of shape (dimension,) in backend format.

Notes

Single Bandwidth Encoding:

For single bandwidth β, implements: z(x) = φ^(β·x_normalized)

where: - x_normalized = (value - min_val) / (max_val - min_val) ∈ [0, 1] - φ = [e^(iθ₁), ..., e^(iθₐ)] is base phasor with random frequencies θⱼ - Result is normalized according to model's space

Element-wise computation: z_j(x) = e^(i·θⱼ·β·x_normalized) (complex models) z_j(x) = cos(θⱼ·β·x_normalized) (real models)

Mixture Encoding:

When mixture_bandwidths = [β₁, ..., βₖ] is provided, uses weighted sum: z_mix(x) = Σₖ αₖ · φ^(βₖ·x_normalized)

where αₖ are mixture_weights (default: uniform αₖ = 1/K).

Advantages of Mixture Encoding:

  1. Multi-Scale Representation: Combines coarse (small β) and fine (large β) similarity kernels in single hypervector

  2. Improved Generalization: Coarse scales provide robustness, fine scales provide discrimination

  3. Learned Weights: Weights αₖ can be learned via learn_mixture_weights() to optimize for specific task

  4. Kernel Combination: Mixture is equivalent to combining multiple kernels: K_mix(d) = Σₖ αₖ·K_βₖ(d)

Computational Complexity:

  • Single bandwidth: O(D) operations (element-wise exponential)
  • Mixture with K bandwidths: O(K·D) operations
  • Backend operations (exp, multiply) are vectorized/GPU-accelerated

Normalization:

Output is normalized using model's normalization scheme: - FHRR/HRR: L2 normalization (unit norm) - MAP: Element-wise normalization - BSC/BSDC: No normalization (binary)

This ensures hypervectors are in valid space for subsequent binding/bundling operations.

Examples:

>>> # Basic encoding
>>> model = VSA.create('FHRR', dim=10000)
>>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
>>> hv_25 = encoder.encode(25.0)  # Encode temperature 25°C
>>> hv_26 = encoder.encode(26.0)
>>> similarity = model.similarity(hv_25, hv_26)
>>> print(f"Similarity: {similarity:.3f}")  # ≈ 0.950 (close values)
>>> # Mixture encoding for multi-scale representation
>>> encoder_mix = FractionalPowerEncoder(
...     model, min_val=0, max_val=100,
...     mixture_bandwidths=[0.01, 1.0, 100.0]
... )
>>> hv_mix = encoder_mix.encode(25.0)  # Combines 3 scales
>>> # Effect of bandwidth on similarity
>>> enc_wide = FractionalPowerEncoder(model, 0, 100, bandwidth=0.1)
>>> enc_narrow = FractionalPowerEncoder(model, 0, 100, bandwidth=10.0)
>>> sim_wide = model.similarity(enc_wide.encode(25), enc_wide.encode(30))
>>> sim_narrow = model.similarity(enc_narrow.encode(25), enc_narrow.encode(30))
>>> # sim_wide > sim_narrow (wider kernel → more generalization)
Source code in holovec/encoders/scalar.py
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
def encode(self, value: float) -> Array:
    """
    Encode scalar value to hypervector using fractional power.

    Parameters
    ----------
    value : float
        Scalar value to encode. Will be clipped to [min_val, max_val].

    Returns
    -------
    Array
        Encoded hypervector of shape (dimension,) in backend format.

    Notes
    -----
    **Single Bandwidth Encoding:**

    For single bandwidth β, implements:
        z(x) = φ^(β·x_normalized)

    where:
    - x_normalized = (value - min_val) / (max_val - min_val) ∈ [0, 1]
    - φ = [e^(iθ₁), ..., e^(iθₐ)] is base phasor with random frequencies θⱼ
    - Result is normalized according to model's space

    Element-wise computation:
        z_j(x) = e^(i·θⱼ·β·x_normalized)  (complex models)
        z_j(x) = cos(θⱼ·β·x_normalized)   (real models)

    **Mixture Encoding:**

    When mixture_bandwidths = [β₁, ..., βₖ] is provided, uses weighted sum:
        z_mix(x) = Σₖ αₖ · φ^(βₖ·x_normalized)

    where αₖ are mixture_weights (default: uniform αₖ = 1/K).

    **Advantages of Mixture Encoding:**

    1. **Multi-Scale Representation**: Combines coarse (small β) and
       fine (large β) similarity kernels in single hypervector

    2. **Improved Generalization**: Coarse scales provide robustness,
       fine scales provide discrimination

    3. **Learned Weights**: Weights αₖ can be learned via
       `learn_mixture_weights()` to optimize for specific task

    4. **Kernel Combination**: Mixture is equivalent to combining
       multiple kernels: K_mix(d) = Σₖ αₖ·K_βₖ(d)

    **Computational Complexity:**

    - Single bandwidth: O(D) operations (element-wise exponential)
    - Mixture with K bandwidths: O(K·D) operations
    - Backend operations (exp, multiply) are vectorized/GPU-accelerated

    **Normalization:**

    Output is normalized using model's normalization scheme:
    - FHRR/HRR: L2 normalization (unit norm)
    - MAP: Element-wise normalization
    - BSC/BSDC: No normalization (binary)

    This ensures hypervectors are in valid space for subsequent
    binding/bundling operations.

    Examples
    --------
    >>> # Basic encoding
    >>> model = VSA.create('FHRR', dim=10000)
    >>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
    >>> hv_25 = encoder.encode(25.0)  # Encode temperature 25°C
    >>> hv_26 = encoder.encode(26.0)
    >>> similarity = model.similarity(hv_25, hv_26)
    >>> print(f"Similarity: {similarity:.3f}")  # ≈ 0.950 (close values)

    >>> # Mixture encoding for multi-scale representation
    >>> encoder_mix = FractionalPowerEncoder(
    ...     model, min_val=0, max_val=100,
    ...     mixture_bandwidths=[0.01, 1.0, 100.0]
    ... )
    >>> hv_mix = encoder_mix.encode(25.0)  # Combines 3 scales

    >>> # Effect of bandwidth on similarity
    >>> enc_wide = FractionalPowerEncoder(model, 0, 100, bandwidth=0.1)
    >>> enc_narrow = FractionalPowerEncoder(model, 0, 100, bandwidth=10.0)
    >>> sim_wide = model.similarity(enc_wide.encode(25), enc_wide.encode(30))
    >>> sim_narrow = model.similarity(enc_narrow.encode(25), enc_narrow.encode(30))
    >>> # sim_wide > sim_narrow (wider kernel → more generalization)
    """
    # Normalize value to [0, 1]
    normalized = self.normalize(value)

    # Handle mixture: list of beta_k and weights alpha_k
    betas: list[float]
    alphas: list[float]
    if self.mixture_bandwidths is not None:
        betas = list(self.mixture_bandwidths)
        alphas = list(self.mixture_weights or [])
    else:
        betas = [self.bandwidth]
        alphas = [1.0]

    parts = []
    for alpha, beta in zip(alphas, betas):
        exponent = beta * normalized
        if self.is_complex:
            # Complex: encode as exp(i * theta * exponent)
            theta = self.theta if self.theta is not None else self.backend.angle(self.base_phasor)
            phase = self.backend.multiply_scalar(theta, exponent)
            phasor = self.backend.exp(1j * phase)
            parts.append(self.backend.multiply_scalar(phasor, alpha))
        else:
            # Real: use cosine features directly: cos(theta * exponent)
            theta = self.theta if self.theta is not None else self.backend.angle(self.base_phasor)
            phase = self.backend.multiply_scalar(theta, exponent)
            # cos(phase) = Re(exp(i*phase))
            phasor = self.backend.real(self.backend.exp(1j * phase))
            parts.append(self.backend.multiply_scalar(phasor, alpha))

    if len(parts) == 1:
        encoded = parts[0]
    else:
        encoded = self.backend.sum(self.backend.stack(parts, axis=0), axis=0)

    # Normalize output according to space
    return self.model.normalize(encoded)

learn_mixture_weights(values, labels, reg=0.001)

Learn mixture weights (alphas) for fixed mixture_bandwidths using a simple ridge-style objective that aligns encoded mixtures to per-class prototypes.

Approach: - Build class prototypes p_c as the mean of current encodings (using current weights) - For each sample i, compute per-band encodings E_i = [e_{i1},...,e_{iK}] (shape d×K) - Solve (Σ E_i^T E_i + reg I) α = Σ E_i^T p_{y_i} - Project α onto simplex (nonnegative, sum=1)

Args: values: list of scalar inputs labels: list of integer class labels (same length as values) reg: L2 regularization strength (default 1e-3)

Returns: Learned mixture weights (list of floats summing to 1)

Notes: - Requires mixture_bandwidths to be set (K>=2) - Uses numpy for solving normal equations; backend remains unchanged

Source code in holovec/encoders/scalar.py
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
def learn_mixture_weights(
    self,
    values: list[float],
    labels: list[int],
    reg: float = 1e-3,
) -> list[float]:
    """
    Learn mixture weights (alphas) for fixed mixture_bandwidths using a simple
    ridge-style objective that aligns encoded mixtures to per-class prototypes.

    Approach:
        - Build class prototypes p_c as the mean of current encodings (using current weights)
        - For each sample i, compute per-band encodings E_i = [e_{i1},...,e_{iK}] (shape d×K)
        - Solve (Σ E_i^T E_i + reg I) α = Σ E_i^T p_{y_i}
        - Project α onto simplex (nonnegative, sum=1)

    Args:
        values: list of scalar inputs
        labels: list of integer class labels (same length as values)
        reg: L2 regularization strength (default 1e-3)

    Returns:
        Learned mixture weights (list of floats summing to 1)

    Notes:
        - Requires mixture_bandwidths to be set (K>=2)
        - Uses numpy for solving normal equations; backend remains unchanged
    """
    import numpy as _np

    if self.mixture_bandwidths is None or len(self.mixture_bandwidths) < 2:
        raise ValueError("learn_mixture_weights requires mixture_bandwidths with K >= 2")

    # Prepare classes and group samples
    values = list(values)
    labels = list(labels)
    if len(values) != len(labels):
        raise ValueError("values and labels must have same length")

    classes = sorted(set(labels))
    K = len(self.mixture_bandwidths)
    d = self.dimension

    # Build current encodings to compute class prototypes (using current mixture weights)
    encodings = [self.encode(v) for v in values]
    # Convert to numpy arrays for prototype computation
    enc_np = [_np.array(self.model.backend.to_numpy(e)) for e in encodings]
    # Class prototypes: mean of encodings per class (vector length d)
    prototypes = {}
    for c in classes:
        idxs = [i for i, y in enumerate(labels) if y == c]
        if not idxs:
            continue
        prototypes[c] = _np.mean(_np.stack([enc_np[i] for i in idxs], axis=0), axis=0)

    # Helper to compute per-band encodings matrix E_i (d×K) for a value
    def _per_band_matrix(val: float) -> _np.ndarray:
        norm = self.normalize(val)
        cols = []
        for beta in self.mixture_bandwidths:
            exponent = beta * norm
            theta = self.theta if self.theta is not None else self.model.backend.angle(self.base_phasor)
            phase = self.model.backend.multiply_scalar(theta, exponent)
            if self.is_complex:
                ph = self.model.backend.exp(1j * phase)
                col = self.model.backend.to_numpy(ph)
            else:
                col = self.model.backend.to_numpy(self.model.backend.real(self.model.backend.exp(1j * phase)))
            cols.append(_np.array(col))
        # Stack columns to d×K
        return _np.stack(cols, axis=1)

    # Accumulate normal equations
    A = _np.zeros((K, K), dtype=_np.float64)
    b = _np.zeros((K,), dtype=_np.float64)
    for v, y in zip(values, labels):
        E = _per_band_matrix(v)   # d×K
        p = prototypes[y]         # d
        # E^T E and E^T p
        A += E.T @ E
        b += E.T @ p

    # Regularization
    A += reg * _np.eye(K, dtype=_np.float64)
    # Solve
    try:
        alpha = _np.linalg.solve(A, b)
    except _np.linalg.LinAlgError:
        alpha = _np.linalg.lstsq(A, b, rcond=None)[0]

    # Project to simplex (≥0, sum=1)
    alpha = _np.maximum(alpha, 0.0)
    s = float(_np.sum(alpha))
    if s <= 0:
        alpha = _np.ones_like(alpha) / len(alpha)
    else:
        alpha = alpha / s

    # Update in encoder
    self.mixture_weights = [float(a) for a in alpha.tolist()]
    return self.mixture_weights

holovec.encoders.scalar.ThermometerEncoder

Bases: ScalarEncoder

Thermometer encoding for scalar values.

Divides value range into N bins and encodes a value as the bundle of all bins it exceeds. Creates monotonic similarity profile.

Simpler and more robust than FPE, but with coarser granularity. Works with all VSA models.

References: Kanerva (2009): "Hyperdimensional Computing"

Source code in holovec/encoders/scalar.py
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
class ThermometerEncoder(ScalarEncoder):
    """
    Thermometer encoding for scalar values.

    Divides value range into N bins and encodes a value as the bundle
    of all bins it exceeds. Creates monotonic similarity profile.

    Simpler and more robust than FPE, but with coarser granularity.
    Works with all VSA models.

    References:
        Kanerva (2009): "Hyperdimensional Computing"
    """

    def __init__(
        self,
        model: VSAModel,
        min_val: float,
        max_val: float,
        n_bins: int = 100,
        seed: int | None = None
    ):
        """
        Initialize ThermometerEncoder.

        Args:
            model: VSA model (any)
            min_val: Minimum value of encoding range
            max_val: Maximum value of encoding range
            n_bins: Number of bins to divide range into (default 100)
            seed: Random seed for generating bin vectors

        Raises:
            ValueError: If n_bins < 2
        """
        super().__init__(model, min_val, max_val)

        if n_bins < 2:
            raise ValueError(f"n_bins must be >= 2, got {n_bins}")

        self.n_bins = n_bins
        self.seed = seed

        # Generate random vectors for each bin
        self.bin_vectors = [
            model.random(seed=seed + i if seed is not None else None)
            for i in range(n_bins)
        ]

        # Compute bin edges
        self.bin_width = self.range / n_bins

    def encode(self, value: float) -> Array:
        """
        Encode scalar as bundle of all bins it exceeds.

        Args:
            value: Scalar value to encode

        Returns:
            Encoded hypervector (bundle of activated bins)
        """
        # Normalize value
        normalized = self.normalize(value)

        # Determine which bin the value falls into
        bin_index = int(normalized * self.n_bins)
        bin_index = min(bin_index, self.n_bins - 1)  # Handle edge case

        # Bundle all bins from 0 to bin_index (inclusive)
        if bin_index == 0:
            return self.bin_vectors[0]

        activated_bins = self.bin_vectors[:bin_index + 1]
        return self.model.bundle(activated_bins)

    def decode(self, hypervector: Array) -> float:
        """
        Decode is not implemented for ThermometerEncoder.

        Thermometer encoding is not easily reversible without
        storing additional information.

        Raises:
            NotImplementedError: Always raises
        """
        raise NotImplementedError(
            "ThermometerEncoder does not support decoding. "
            "Use FractionalPowerEncoder if decoding is required."
        )

    @property
    def is_reversible(self) -> bool:
        """Thermometer encoding is not reversible."""
        return False

    @property
    def compatible_models(self) -> list[str]:
        """Works with all VSA models."""
        return ["MAP", "FHRR", "HRR", "BSC", "GHRR", "VTB", "BSDC"]

    def __repr__(self) -> str:
        """String representation."""
        return (
            f"ThermometerEncoder("
            f"model={self.model.model_name}, "
            f"range=[{self.min_val}, {self.max_val}], "
            f"n_bins={self.n_bins}, "
            f"dimension={self.dimension})"
        )

compatible_models property

Works with all VSA models.

is_reversible property

Thermometer encoding is not reversible.

__init__(model, min_val, max_val, n_bins=100, seed=None)

Initialize ThermometerEncoder.

Args: model: VSA model (any) min_val: Minimum value of encoding range max_val: Maximum value of encoding range n_bins: Number of bins to divide range into (default 100) seed: Random seed for generating bin vectors

Raises: ValueError: If n_bins < 2

Source code in holovec/encoders/scalar.py
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
def __init__(
    self,
    model: VSAModel,
    min_val: float,
    max_val: float,
    n_bins: int = 100,
    seed: int | None = None
):
    """
    Initialize ThermometerEncoder.

    Args:
        model: VSA model (any)
        min_val: Minimum value of encoding range
        max_val: Maximum value of encoding range
        n_bins: Number of bins to divide range into (default 100)
        seed: Random seed for generating bin vectors

    Raises:
        ValueError: If n_bins < 2
    """
    super().__init__(model, min_val, max_val)

    if n_bins < 2:
        raise ValueError(f"n_bins must be >= 2, got {n_bins}")

    self.n_bins = n_bins
    self.seed = seed

    # Generate random vectors for each bin
    self.bin_vectors = [
        model.random(seed=seed + i if seed is not None else None)
        for i in range(n_bins)
    ]

    # Compute bin edges
    self.bin_width = self.range / n_bins

__repr__()

String representation.

Source code in holovec/encoders/scalar.py
939
940
941
942
943
944
945
946
947
def __repr__(self) -> str:
    """String representation."""
    return (
        f"ThermometerEncoder("
        f"model={self.model.model_name}, "
        f"range=[{self.min_val}, {self.max_val}], "
        f"n_bins={self.n_bins}, "
        f"dimension={self.dimension})"
    )

decode(hypervector)

Decode is not implemented for ThermometerEncoder.

Thermometer encoding is not easily reversible without storing additional information.

Raises: NotImplementedError: Always raises

Source code in holovec/encoders/scalar.py
914
915
916
917
918
919
920
921
922
923
924
925
926
927
def decode(self, hypervector: Array) -> float:
    """
    Decode is not implemented for ThermometerEncoder.

    Thermometer encoding is not easily reversible without
    storing additional information.

    Raises:
        NotImplementedError: Always raises
    """
    raise NotImplementedError(
        "ThermometerEncoder does not support decoding. "
        "Use FractionalPowerEncoder if decoding is required."
    )

encode(value)

Encode scalar as bundle of all bins it exceeds.

Args: value: Scalar value to encode

Returns: Encoded hypervector (bundle of activated bins)

Source code in holovec/encoders/scalar.py
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
def encode(self, value: float) -> Array:
    """
    Encode scalar as bundle of all bins it exceeds.

    Args:
        value: Scalar value to encode

    Returns:
        Encoded hypervector (bundle of activated bins)
    """
    # Normalize value
    normalized = self.normalize(value)

    # Determine which bin the value falls into
    bin_index = int(normalized * self.n_bins)
    bin_index = min(bin_index, self.n_bins - 1)  # Handle edge case

    # Bundle all bins from 0 to bin_index (inclusive)
    if bin_index == 0:
        return self.bin_vectors[0]

    activated_bins = self.bin_vectors[:bin_index + 1]
    return self.model.bundle(activated_bins)

holovec.encoders.scalar.LevelEncoder

Bases: ScalarEncoder

Level (codebook) encoding for discrete scalar values.

Maps discrete levels to random orthogonal vectors via lookup table. Fast (O(1) encode/decode) and exact for discrete values.

Best used when you have a small number of discrete values rather than continuous range.

Example: >>> # Encode weekdays (7 discrete values) >>> model = VSA.create('FHRR', dim=10000) >>> encoder = LevelEncoder(model, min_val=0, max_val=6, n_levels=7) >>> monday = encoder.encode(0) # Exact encoding >>> friday = encoder.encode(4)

Source code in holovec/encoders/scalar.py
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
class LevelEncoder(ScalarEncoder):
    """
    Level (codebook) encoding for discrete scalar values.

    Maps discrete levels to random orthogonal vectors via lookup table.
    Fast (O(1) encode/decode) and exact for discrete values.

    Best used when you have a small number of discrete values rather
    than continuous range.

    Example:
        >>> # Encode weekdays (7 discrete values)
        >>> model = VSA.create('FHRR', dim=10000)
        >>> encoder = LevelEncoder(model, min_val=0, max_val=6, n_levels=7)
        >>> monday = encoder.encode(0)  # Exact encoding
        >>> friday = encoder.encode(4)
    """

    def __init__(
        self,
        model: VSAModel,
        min_val: float,
        max_val: float,
        n_levels: int,
        seed: int | None = None
    ):
        """
        Initialize LevelEncoder.

        Args:
            model: VSA model (any)
            min_val: Minimum value (corresponds to level 0)
            max_val: Maximum value (corresponds to level n_levels-1)
            n_levels: Number of discrete levels
            seed: Random seed for generating level vectors

        Raises:
            ValueError: If n_levels < 2
        """
        super().__init__(model, min_val, max_val)

        if n_levels < 2:
            raise ValueError(f"n_levels must be >= 2, got {n_levels}")

        self.n_levels = n_levels
        self.seed = seed

        # Generate random vector for each level
        self.level_vectors = [
            model.random(seed=seed + i if seed is not None else None)
            for i in range(n_levels)
        ]

        # Compute level width
        self.level_width = self.range / (n_levels - 1)

    def encode(self, value: float) -> Array:
        """
        Encode scalar to nearest level's hypervector.

        Args:
            value: Scalar value to encode

        Returns:
            Hypervector corresponding to nearest level
        """
        # Normalize to [0, 1]
        normalized = self.normalize(value)

        # Map to level index (round to nearest)
        level_index = int(round(normalized * (self.n_levels - 1)))
        level_index = max(0, min(level_index, self.n_levels - 1))

        return self.level_vectors[level_index]

    def decode(self, hypervector: Array) -> float:
        """
        Decode hypervector to nearest level value.

        Args:
            hypervector: Hypervector to decode

        Returns:
            Decoded scalar value (will be one of the discrete levels)
        """
        # Find most similar level vector
        best_similarity = -float('inf')
        best_level = 0

        for level_idx, level_vec in enumerate(self.level_vectors):
            similarity = float(
                self.backend.to_numpy(
                    self.model.similarity(hypervector, level_vec)
                )
            )
            if similarity > best_similarity:
                best_similarity = similarity
                best_level = level_idx

        # Convert level index back to value
        normalized = best_level / (self.n_levels - 1)
        return self.denormalize(normalized)

    @property
    def is_reversible(self) -> bool:
        """Level encoding is reversible (to nearest level)."""
        return True

    @property
    def compatible_models(self) -> list[str]:
        """Works with all VSA models."""
        return ["MAP", "FHRR", "HRR", "BSC", "GHRR", "VTB", "BSDC"]

    def __repr__(self) -> str:
        """String representation."""
        return (
            f"LevelEncoder("
            f"model={self.model.model_name}, "
            f"range=[{self.min_val}, {self.max_val}], "
            f"n_levels={self.n_levels}, "
            f"dimension={self.dimension})"
        )

compatible_models property

Works with all VSA models.

is_reversible property

Level encoding is reversible (to nearest level).

__init__(model, min_val, max_val, n_levels, seed=None)

Initialize LevelEncoder.

Args: model: VSA model (any) min_val: Minimum value (corresponds to level 0) max_val: Maximum value (corresponds to level n_levels-1) n_levels: Number of discrete levels seed: Random seed for generating level vectors

Raises: ValueError: If n_levels < 2

Source code in holovec/encoders/scalar.py
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
def __init__(
    self,
    model: VSAModel,
    min_val: float,
    max_val: float,
    n_levels: int,
    seed: int | None = None
):
    """
    Initialize LevelEncoder.

    Args:
        model: VSA model (any)
        min_val: Minimum value (corresponds to level 0)
        max_val: Maximum value (corresponds to level n_levels-1)
        n_levels: Number of discrete levels
        seed: Random seed for generating level vectors

    Raises:
        ValueError: If n_levels < 2
    """
    super().__init__(model, min_val, max_val)

    if n_levels < 2:
        raise ValueError(f"n_levels must be >= 2, got {n_levels}")

    self.n_levels = n_levels
    self.seed = seed

    # Generate random vector for each level
    self.level_vectors = [
        model.random(seed=seed + i if seed is not None else None)
        for i in range(n_levels)
    ]

    # Compute level width
    self.level_width = self.range / (n_levels - 1)

__repr__()

String representation.

Source code in holovec/encoders/scalar.py
1063
1064
1065
1066
1067
1068
1069
1070
1071
def __repr__(self) -> str:
    """String representation."""
    return (
        f"LevelEncoder("
        f"model={self.model.model_name}, "
        f"range=[{self.min_val}, {self.max_val}], "
        f"n_levels={self.n_levels}, "
        f"dimension={self.dimension})"
    )

decode(hypervector)

Decode hypervector to nearest level value.

Args: hypervector: Hypervector to decode

Returns: Decoded scalar value (will be one of the discrete levels)

Source code in holovec/encoders/scalar.py
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
def decode(self, hypervector: Array) -> float:
    """
    Decode hypervector to nearest level value.

    Args:
        hypervector: Hypervector to decode

    Returns:
        Decoded scalar value (will be one of the discrete levels)
    """
    # Find most similar level vector
    best_similarity = -float('inf')
    best_level = 0

    for level_idx, level_vec in enumerate(self.level_vectors):
        similarity = float(
            self.backend.to_numpy(
                self.model.similarity(hypervector, level_vec)
            )
        )
        if similarity > best_similarity:
            best_similarity = similarity
            best_level = level_idx

    # Convert level index back to value
    normalized = best_level / (self.n_levels - 1)
    return self.denormalize(normalized)

encode(value)

Encode scalar to nearest level's hypervector.

Args: value: Scalar value to encode

Returns: Hypervector corresponding to nearest level

Source code in holovec/encoders/scalar.py
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
def encode(self, value: float) -> Array:
    """
    Encode scalar to nearest level's hypervector.

    Args:
        value: Scalar value to encode

    Returns:
        Hypervector corresponding to nearest level
    """
    # Normalize to [0, 1]
    normalized = self.normalize(value)

    # Map to level index (round to nearest)
    level_index = int(round(normalized * (self.n_levels - 1)))
    level_index = max(0, min(level_index, self.n_levels - 1))

    return self.level_vectors[level_index]

Sequence Encoders

holovec.encoders.sequence.PositionBindingEncoder

Bases: SequenceEncoder

Position binding encoder for sequences using permutation-based positions.

Based on Plate (2003) "Holographic Reduced Representations" and Schlegel et al. (2021) "A comparison of vector symbolic architectures".

Encodes sequences by binding each element with a position-specific permutation of a base position vector:

encode([A, B, C]) = bind(A, ρ¹) + bind(B, ρ²) + bind(C, ρ³)

where ρ is the permutation operation and ρⁱ represents i applications.

This encoding is: - Order-sensitive: Different positions create different bindings - Variable-length: Works with any sequence length - Partial-match capable: Similar sequences have similar encodings

Attributes: codebook: Dictionary mapping symbols to hypervectors auto_generate: Whether to auto-generate vectors for unknown symbols seed_offset: Offset for generating consistent symbol vectors

Example: >>> model = VSA.create('MAP', dim=10000) >>> encoder = PositionBindingEncoder(model) >>> >>> # Encode a sequence of symbols >>> seq = ['hello', 'world', '!'] >>> hv = encoder.encode(seq) >>> >>> # Similar sequences have high similarity >>> seq2 = ['hello', 'world'] >>> hv2 = encoder.encode(seq2) >>> model.similarity(hv, hv2) # High (shared prefix)

Source code in holovec/encoders/sequence.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
class PositionBindingEncoder(SequenceEncoder):
    """
    Position binding encoder for sequences using permutation-based positions.

    Based on Plate (2003) "Holographic Reduced Representations" and
    Schlegel et al. (2021) "A comparison of vector symbolic architectures".

    Encodes sequences by binding each element with a position-specific
    permutation of a base position vector:

        encode([A, B, C]) = bind(A, ρ¹) + bind(B, ρ²) + bind(C, ρ³)

    where ρ is the permutation operation and ρⁱ represents i applications.

    This encoding is:
    - Order-sensitive: Different positions create different bindings
    - Variable-length: Works with any sequence length
    - Partial-match capable: Similar sequences have similar encodings

    Attributes:
        codebook: Dictionary mapping symbols to hypervectors
        auto_generate: Whether to auto-generate vectors for unknown symbols
        seed_offset: Offset for generating consistent symbol vectors

    Example:
        >>> model = VSA.create('MAP', dim=10000)
        >>> encoder = PositionBindingEncoder(model)
        >>>
        >>> # Encode a sequence of symbols
        >>> seq = ['hello', 'world', '!']
        >>> hv = encoder.encode(seq)
        >>>
        >>> # Similar sequences have high similarity
        >>> seq2 = ['hello', 'world']
        >>> hv2 = encoder.encode(seq2)
        >>> model.similarity(hv, hv2)  # High (shared prefix)
    """

    def __init__(
        self,
        model: VSAModel,
        codebook: dict[str, Array] | None = None,
        max_length: int | None = None,
        auto_generate: bool = True,
        seed: int | None = None
    ):
        """
        Initialize position binding encoder.

        Args:
            model: VSA model instance
            codebook: Pre-defined symbol → hypervector mapping (optional)
            max_length: Maximum sequence length (None for unlimited)
            auto_generate: Auto-generate vectors for unknown symbols (default: True)
            seed: Random seed for generating symbol vectors

        Raises:
            ValueError: If model is not compatible
        """
        super().__init__(model, max_length)

        self.codebook = codebook if codebook is not None else {}
        self.auto_generate = auto_generate
        self.seed = seed
        self._next_symbol_seed = 0  # Counter for symbol generation

    def encode(self, sequence: list[str | int]) -> Array:
        """
        Encode sequence using position binding.

        Each element is bound with a position-specific permutation and
        all bound pairs are bundled:

            result = Σᵢ bind(element_i, permute(position_vector, i))

        Args:
            sequence: List of symbols (strings or integers) to encode

        Returns:
            Hypervector representing the sequence

        Raises:
            ValueError: If sequence is empty
            ValueError: If sequence exceeds max_length
            ValueError: If symbol not in codebook and auto_generate=False

        Example:
            >>> encoder.encode(['cat', 'sat', 'on', 'mat'])
        """
        if not sequence:
            raise ValueError("Cannot encode empty sequence")

        if self.max_length is not None and len(sequence) > self.max_length:
            raise ValueError(
                f"Sequence length {len(sequence)} exceeds max_length {self.max_length}"
            )

        # Get or generate hypervectors for each symbol
        symbol_vectors = []
        for symbol in sequence:
            if symbol not in self.codebook:
                if not self.auto_generate:
                    raise ValueError(
                        f"Symbol '{symbol}' not in codebook and auto_generate=False"
                    )
                # Generate new vector for this symbol
                self.codebook[symbol] = self._generate_symbol_vector(symbol)

            symbol_vectors.append(self.codebook[symbol])

        # Bind each symbol with its position and bundle
        position_bound = []
        for i, symbol_vec in enumerate(symbol_vectors):
            # Position encoding: permute by position index
            # permute(vec, i) applies permutation i times
            position_vec = self.model.permute(symbol_vec, k=i)
            position_bound.append(position_vec)

        # Bundle all position-bound vectors
        sequence_hv = self.model.bundle(position_bound)

        return sequence_hv

    def decode(
        self,
        hypervector: Array,
        max_positions: int = 10,
        threshold: float = 0.3
    ) -> list[str]:
        """
        Decode sequence hypervector to recover symbols.

        Uses cleanup memory approach: for each position, unpermute and
        find most similar symbol in codebook.

        Args:
            hypervector: Sequence hypervector to decode
            max_positions: Maximum positions to try decoding (default: 10)
            threshold: Minimum similarity threshold for valid symbols (default: 0.3)

        Returns:
            List of decoded symbols (may be shorter than original)

        Raises:
            RuntimeError: If codebook is empty

        Note:
            Decoding is approximate and works best for sequences shorter
            than max_positions with high SNR.

        Example:
            >>> encoded = encoder.encode(['a', 'b', 'c'])
            >>> decoded = encoder.decode(encoded, max_positions=5)
            >>> decoded  # ['a', 'b', 'c'] (approximate)
        """
        if not self.codebook:
            raise RuntimeError("Cannot decode: codebook is empty")

        # Convert codebook to symbol → vector for faster lookup
        symbols = list(self.codebook.keys())
        vectors = [self.codebook[s] for s in symbols]

        decoded = []

        for pos in range(max_positions):
            # Unpermute by position to recover symbol at this position
            unpermuted = self.model.unpermute(hypervector, k=pos)

            # Find most similar symbol in codebook
            best_similarity = -float('inf')
            best_symbol = None

            for symbol, symbol_vec in zip(symbols, vectors):
                sim = float(self.model.similarity(unpermuted, symbol_vec))
                if sim > best_similarity:
                    best_similarity = sim
                    best_symbol = symbol

            # Only include if above threshold
            if best_similarity >= threshold:
                decoded.append(best_symbol)
            else:
                # No strong match - likely end of sequence
                break

        return decoded

    def _generate_symbol_vector(self, symbol: str | int) -> Array:
        """
        Generate a random hypervector for a new symbol.

        Uses consistent seeding based on symbol to ensure reproducibility.

        Args:
            symbol: Symbol to generate vector for

        Returns:
            Random hypervector for this symbol
        """
        # Create seed from base seed + symbol hash + counter
        if self.seed is not None:
            symbol_seed = self.seed + hash(symbol) % 10000 + self._next_symbol_seed
        else:
            symbol_seed = hash(symbol) % 100000 + self._next_symbol_seed

        self._next_symbol_seed += 1

        return self.model.random(seed=symbol_seed)

    def add_symbol(self, symbol: str | int, vector: Array | None = None):
        """
        Add a symbol to the codebook.

        Args:
            symbol: Symbol to add
            vector: Hypervector to associate (generated if None)

        Example:
            >>> # Pre-define a vector for a special symbol
            >>> special_vec = model.random(seed=42)
            >>> encoder.add_symbol('<START>', special_vec)
        """
        if vector is None:
            vector = self._generate_symbol_vector(symbol)
        self.codebook[symbol] = vector

    def get_codebook_size(self) -> int:
        """
        Get number of symbols in codebook.

        Returns:
            Number of symbols stored
        """
        return len(self.codebook)

    @property
    def is_reversible(self) -> bool:
        """
        PositionBindingEncoder supports approximate decoding.

        Returns:
            True (approximate decoding available)
        """
        return True

    @property
    def compatible_models(self) -> list[str]:
        """
        Works with all VSA models that support permutation.

        Returns:
            List of all model names
        """
        return ["MAP", "FHRR", "HRR", "BSC", "GHRR", "VTB", "BSDC"]

    def __repr__(self) -> str:
        """String representation."""
        return (
            f"PositionBindingEncoder("
            f"model={self.model.model_name}, "
            f"codebook_size={len(self.codebook)}, "
            f"max_length={self.max_length}, "
            f"auto_generate={self.auto_generate})"
        )

compatible_models property

Works with all VSA models that support permutation.

Returns: List of all model names

is_reversible property

PositionBindingEncoder supports approximate decoding.

Returns: True (approximate decoding available)

__init__(model, codebook=None, max_length=None, auto_generate=True, seed=None)

Initialize position binding encoder.

Args: model: VSA model instance codebook: Pre-defined symbol → hypervector mapping (optional) max_length: Maximum sequence length (None for unlimited) auto_generate: Auto-generate vectors for unknown symbols (default: True) seed: Random seed for generating symbol vectors

Raises: ValueError: If model is not compatible

Source code in holovec/encoders/sequence.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def __init__(
    self,
    model: VSAModel,
    codebook: dict[str, Array] | None = None,
    max_length: int | None = None,
    auto_generate: bool = True,
    seed: int | None = None
):
    """
    Initialize position binding encoder.

    Args:
        model: VSA model instance
        codebook: Pre-defined symbol → hypervector mapping (optional)
        max_length: Maximum sequence length (None for unlimited)
        auto_generate: Auto-generate vectors for unknown symbols (default: True)
        seed: Random seed for generating symbol vectors

    Raises:
        ValueError: If model is not compatible
    """
    super().__init__(model, max_length)

    self.codebook = codebook if codebook is not None else {}
    self.auto_generate = auto_generate
    self.seed = seed
    self._next_symbol_seed = 0  # Counter for symbol generation

__repr__()

String representation.

Source code in holovec/encoders/sequence.py
269
270
271
272
273
274
275
276
277
def __repr__(self) -> str:
    """String representation."""
    return (
        f"PositionBindingEncoder("
        f"model={self.model.model_name}, "
        f"codebook_size={len(self.codebook)}, "
        f"max_length={self.max_length}, "
        f"auto_generate={self.auto_generate})"
    )

add_symbol(symbol, vector=None)

Add a symbol to the codebook.

Args: symbol: Symbol to add vector: Hypervector to associate (generated if None)

Example: >>> # Pre-define a vector for a special symbol >>> special_vec = model.random(seed=42) >>> encoder.add_symbol('', special_vec)

Source code in holovec/encoders/sequence.py
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
def add_symbol(self, symbol: str | int, vector: Array | None = None):
    """
    Add a symbol to the codebook.

    Args:
        symbol: Symbol to add
        vector: Hypervector to associate (generated if None)

    Example:
        >>> # Pre-define a vector for a special symbol
        >>> special_vec = model.random(seed=42)
        >>> encoder.add_symbol('<START>', special_vec)
    """
    if vector is None:
        vector = self._generate_symbol_vector(symbol)
    self.codebook[symbol] = vector

decode(hypervector, max_positions=10, threshold=0.3)

Decode sequence hypervector to recover symbols.

Uses cleanup memory approach: for each position, unpermute and find most similar symbol in codebook.

Args: hypervector: Sequence hypervector to decode max_positions: Maximum positions to try decoding (default: 10) threshold: Minimum similarity threshold for valid symbols (default: 0.3)

Returns: List of decoded symbols (may be shorter than original)

Raises: RuntimeError: If codebook is empty

Note: Decoding is approximate and works best for sequences shorter than max_positions with high SNR.

Example: >>> encoded = encoder.encode(['a', 'b', 'c']) >>> decoded = encoder.decode(encoded, max_positions=5) >>> decoded # ['a', 'b', 'c'] (approximate)

Source code in holovec/encoders/sequence.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
def decode(
    self,
    hypervector: Array,
    max_positions: int = 10,
    threshold: float = 0.3
) -> list[str]:
    """
    Decode sequence hypervector to recover symbols.

    Uses cleanup memory approach: for each position, unpermute and
    find most similar symbol in codebook.

    Args:
        hypervector: Sequence hypervector to decode
        max_positions: Maximum positions to try decoding (default: 10)
        threshold: Minimum similarity threshold for valid symbols (default: 0.3)

    Returns:
        List of decoded symbols (may be shorter than original)

    Raises:
        RuntimeError: If codebook is empty

    Note:
        Decoding is approximate and works best for sequences shorter
        than max_positions with high SNR.

    Example:
        >>> encoded = encoder.encode(['a', 'b', 'c'])
        >>> decoded = encoder.decode(encoded, max_positions=5)
        >>> decoded  # ['a', 'b', 'c'] (approximate)
    """
    if not self.codebook:
        raise RuntimeError("Cannot decode: codebook is empty")

    # Convert codebook to symbol → vector for faster lookup
    symbols = list(self.codebook.keys())
    vectors = [self.codebook[s] for s in symbols]

    decoded = []

    for pos in range(max_positions):
        # Unpermute by position to recover symbol at this position
        unpermuted = self.model.unpermute(hypervector, k=pos)

        # Find most similar symbol in codebook
        best_similarity = -float('inf')
        best_symbol = None

        for symbol, symbol_vec in zip(symbols, vectors):
            sim = float(self.model.similarity(unpermuted, symbol_vec))
            if sim > best_similarity:
                best_similarity = sim
                best_symbol = symbol

        # Only include if above threshold
        if best_similarity >= threshold:
            decoded.append(best_symbol)
        else:
            # No strong match - likely end of sequence
            break

    return decoded

encode(sequence)

Encode sequence using position binding.

Each element is bound with a position-specific permutation and all bound pairs are bundled:

result = Σᵢ bind(element_i, permute(position_vector, i))

Args: sequence: List of symbols (strings or integers) to encode

Returns: Hypervector representing the sequence

Raises: ValueError: If sequence is empty ValueError: If sequence exceeds max_length ValueError: If symbol not in codebook and auto_generate=False

Example: >>> encoder.encode(['cat', 'sat', 'on', 'mat'])

Source code in holovec/encoders/sequence.py
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
def encode(self, sequence: list[str | int]) -> Array:
    """
    Encode sequence using position binding.

    Each element is bound with a position-specific permutation and
    all bound pairs are bundled:

        result = Σᵢ bind(element_i, permute(position_vector, i))

    Args:
        sequence: List of symbols (strings or integers) to encode

    Returns:
        Hypervector representing the sequence

    Raises:
        ValueError: If sequence is empty
        ValueError: If sequence exceeds max_length
        ValueError: If symbol not in codebook and auto_generate=False

    Example:
        >>> encoder.encode(['cat', 'sat', 'on', 'mat'])
    """
    if not sequence:
        raise ValueError("Cannot encode empty sequence")

    if self.max_length is not None and len(sequence) > self.max_length:
        raise ValueError(
            f"Sequence length {len(sequence)} exceeds max_length {self.max_length}"
        )

    # Get or generate hypervectors for each symbol
    symbol_vectors = []
    for symbol in sequence:
        if symbol not in self.codebook:
            if not self.auto_generate:
                raise ValueError(
                    f"Symbol '{symbol}' not in codebook and auto_generate=False"
                )
            # Generate new vector for this symbol
            self.codebook[symbol] = self._generate_symbol_vector(symbol)

        symbol_vectors.append(self.codebook[symbol])

    # Bind each symbol with its position and bundle
    position_bound = []
    for i, symbol_vec in enumerate(symbol_vectors):
        # Position encoding: permute by position index
        # permute(vec, i) applies permutation i times
        position_vec = self.model.permute(symbol_vec, k=i)
        position_bound.append(position_vec)

    # Bundle all position-bound vectors
    sequence_hv = self.model.bundle(position_bound)

    return sequence_hv

get_codebook_size()

Get number of symbols in codebook.

Returns: Number of symbols stored

Source code in holovec/encoders/sequence.py
240
241
242
243
244
245
246
247
def get_codebook_size(self) -> int:
    """
    Get number of symbols in codebook.

    Returns:
        Number of symbols stored
    """
    return len(self.codebook)

holovec.encoders.sequence.NGramEncoder

Bases: SequenceEncoder

N-gram encoder for capturing local sequence patterns using sliding windows.

Based on Plate (2003), Rachkovskij (1996), and Kleyko et al. (2023) Section 3.3.4.

Encodes sequences by extracting n-grams (sliding windows of n consecutive symbols) and encoding each n-gram compositionally:

For sequence [A, B, C, D] with n=2, stride=1:
- Extract n-grams: [A,B], [B,C], [C,D]
- Encode each n-gram using position binding
- Combine via bundling or chaining

Two encoding modes:

  1. Bundling mode (bag-of-ngrams): encode(seq) = bundle([encode_ngram([A,B]), encode_ngram([B,C]), ...])
  2. Order-invariant across n-grams (but preserves within n-gram)
  3. Good for classification (e.g., text categorization)
  4. Similar to bag-of-words but with local context

  5. Chaining mode (ordered n-grams): encode(seq) = Σᵢ bind(encode_ngram(ngramᵢ), ρⁱ)

  6. Order-sensitive across n-grams
  7. Good for sequence matching
  8. Enables partial decoding

Attributes: n: Size of n-grams (1=unigrams, 2=bigrams, 3=trigrams, etc.) stride: Step size between n-grams (1=overlapping, n=non-overlapping) mode: 'bundling' or 'chaining' ngram_encoder: Internal PositionBindingEncoder for individual n-grams

Example: >>> model = VSA.create('MAP', dim=10000) >>> encoder = NGramEncoder(model, n=2, stride=1, mode='bundling') >>> >>> # Encode text as bigrams >>> seq = ['the', 'cat', 'sat', 'on', 'mat'] >>> hv = encoder.encode(seq) # Bigrams: [the,cat], [cat,sat], [sat,on], [on,mat] >>> >>> # Similar text has high similarity >>> seq2 = ['the', 'cat', 'sat', 'on', 'hat'] >>> hv2 = encoder.encode(seq2) # Shares 3/4 bigrams >>> model.similarity(hv, hv2) # High similarity

Source code in holovec/encoders/sequence.py
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
class NGramEncoder(SequenceEncoder):
    """
    N-gram encoder for capturing local sequence patterns using sliding windows.

    Based on Plate (2003), Rachkovskij (1996), and Kleyko et al. (2023) Section 3.3.4.

    Encodes sequences by extracting n-grams (sliding windows of n consecutive symbols)
    and encoding each n-gram compositionally:

        For sequence [A, B, C, D] with n=2, stride=1:
        - Extract n-grams: [A,B], [B,C], [C,D]
        - Encode each n-gram using position binding
        - Combine via bundling or chaining

    Two encoding modes:

    1. **Bundling mode** (bag-of-ngrams):
       encode(seq) = bundle([encode_ngram([A,B]), encode_ngram([B,C]), ...])
       - Order-invariant across n-grams (but preserves within n-gram)
       - Good for classification (e.g., text categorization)
       - Similar to bag-of-words but with local context

    2. **Chaining mode** (ordered n-grams):
       encode(seq) = Σᵢ bind(encode_ngram(ngramᵢ), ρⁱ)
       - Order-sensitive across n-grams
       - Good for sequence matching
       - Enables partial decoding

    Attributes:
        n: Size of n-grams (1=unigrams, 2=bigrams, 3=trigrams, etc.)
        stride: Step size between n-grams (1=overlapping, n=non-overlapping)
        mode: 'bundling' or 'chaining'
        ngram_encoder: Internal PositionBindingEncoder for individual n-grams

    Example:
        >>> model = VSA.create('MAP', dim=10000)
        >>> encoder = NGramEncoder(model, n=2, stride=1, mode='bundling')
        >>>
        >>> # Encode text as bigrams
        >>> seq = ['the', 'cat', 'sat', 'on', 'mat']
        >>> hv = encoder.encode(seq)  # Bigrams: [the,cat], [cat,sat], [sat,on], [on,mat]
        >>>
        >>> # Similar text has high similarity
        >>> seq2 = ['the', 'cat', 'sat', 'on', 'hat']
        >>> hv2 = encoder.encode(seq2)  # Shares 3/4 bigrams
        >>> model.similarity(hv, hv2)  # High similarity
    """

    def __init__(
        self,
        model: VSAModel,
        n: int = 2,
        stride: int = 1,
        mode: str = 'bundling',
        codebook: dict[str, Array] | None = None,
        auto_generate: bool = True,
        seed: int | None = None
    ):
        """
        Initialize n-gram encoder.

        Args:
            model: VSA model instance
            n: Size of n-grams (must be >= 1)
            stride: Step between n-grams (must be >= 1)
            mode: 'bundling' for bag-of-ngrams or 'chaining' for ordered n-grams
            codebook: Optional pre-defined symbol → hypervector mapping
            auto_generate: Auto-generate vectors for unknown symbols
            seed: Random seed for symbol vector generation

        Raises:
            ValueError: If n < 1, stride < 1, or mode is invalid
        """
        super().__init__(model, max_length=None)

        if n < 1:
            raise ValueError(f"n must be >= 1, got {n}")
        if stride < 1:
            raise ValueError(f"stride must be >= 1, got {stride}")
        if mode not in ['bundling', 'chaining']:
            raise ValueError(f"mode must be 'bundling' or 'chaining', got '{mode}'")

        self.n = n
        self.stride = stride
        self.mode = mode

        # Internal encoder for individual n-grams
        # Each n-gram is encoded as a position-bound sequence
        self.ngram_encoder = PositionBindingEncoder(
            model=model,
            codebook=codebook,
            max_length=n,  # Each n-gram has length n
            auto_generate=auto_generate,
            seed=seed
        )

    def encode(self, sequence: list[str | int]) -> Array:
        """
        Encode sequence using n-gram representation.

        Extracts all n-grams using sliding window with specified stride,
        encodes each n-gram, then combines via bundling or chaining.

        Args:
            sequence: List of symbols to encode

        Returns:
            Hypervector representing the sequence as n-grams

        Raises:
            ValueError: If sequence is too short (length < n)

        Example:
            >>> # Bigrams with stride=1 (overlapping)
            >>> encoder = NGramEncoder(model, n=2, stride=1)
            >>> encoder.encode(['A', 'B', 'C'])  # N-grams: AB, BC
            >>>
            >>> # Trigrams with stride=2 (partial overlap)
            >>> encoder = NGramEncoder(model, n=3, stride=2)
            >>> encoder.encode(['A', 'B', 'C', 'D', 'E'])  # N-grams: ABC, CDE
        """
        if len(sequence) < self.n:
            raise ValueError(
                f"Sequence length {len(sequence)} is less than n={self.n}"
            )

        # Extract all n-grams using sliding window
        ngrams = []
        for i in range(0, len(sequence) - self.n + 1, self.stride):
            ngram = sequence[i:i + self.n]
            ngrams.append(ngram)

        if not ngrams:
            raise ValueError("No n-grams extracted from sequence")

        # Encode each n-gram using position binding
        ngram_hvs = []
        for ngram in ngrams:
            ngram_hv = self.ngram_encoder.encode(ngram)
            ngram_hvs.append(ngram_hv)

        # Combine n-gram hypervectors based on mode
        if self.mode == 'bundling':
            # Bag-of-ngrams: simple bundle (order-invariant)
            sequence_hv = self.model.bundle(ngram_hvs)

        else:  # mode == 'chaining'
            # Ordered n-grams: bind each with position
            position_bound = []
            for i, ngram_hv in enumerate(ngram_hvs):
                # Position encoding: permute by n-gram index
                position_hv = self.model.permute(ngram_hv, k=i)
                position_bound.append(position_hv)

            sequence_hv = self.model.bundle(position_bound)

        return sequence_hv

    def decode(
        self,
        hypervector: Array,
        max_ngrams: int = 10,
        threshold: float = 0.3
    ) -> list[list[str | int]]:
        """
        Decode n-gram hypervector to recover n-grams.

        Only supported for 'chaining' mode. For 'bundling' mode,
        n-grams are order-invariant and cannot be sequentially decoded.

        Args:
            hypervector: Encoded sequence hypervector
            max_ngrams: Maximum number of n-grams to decode
            threshold: Minimum similarity threshold for valid n-grams

        Returns:
            List of decoded n-grams, each as a list of symbols

        Raises:
            NotImplementedError: If mode is 'bundling' (not decodable)
            RuntimeError: If codebook is empty

        Example:
            >>> encoder = NGramEncoder(model, n=2, mode='chaining')
            >>> hv = encoder.encode(['A', 'B', 'C'])
            >>> decoder.decode(hv, max_ngrams=3)  # [['A', 'B'], ['B', 'C']]
        """
        if self.mode != 'chaining':
            raise NotImplementedError(
                f"Decoding only supported for 'chaining' mode, not '{self.mode}'"
            )

        if not self.ngram_encoder.codebook:
            raise RuntimeError("Cannot decode: codebook is empty")

        # For chaining mode, unpermute each position and decode the n-gram
        decoded_ngrams = []

        for pos in range(max_ngrams):
            # Unpermute by position to recover n-gram at this index
            unpermuted = self.model.unpermute(hypervector, k=pos)

            # Decode the n-gram using ngram_encoder
            try:
                ngram_symbols = self.ngram_encoder.decode(
                    unpermuted,
                    max_positions=self.n,
                    threshold=threshold
                )

                # Only include if we got a full n-gram
                if len(ngram_symbols) >= self.n:
                    decoded_ngrams.append(ngram_symbols[:self.n])
                else:
                    # Incomplete n-gram - likely end of sequence
                    break

            except Exception:
                # Decoding failed - likely end of sequence
                break

        return decoded_ngrams

    def get_codebook(self) -> dict[str, Array]:
        """
        Get the internal symbol codebook.

        Returns:
            Dictionary mapping symbols to hypervectors
        """
        return self.ngram_encoder.codebook

    def get_codebook_size(self) -> int:
        """
        Get number of unique symbols in codebook.

        Returns:
            Number of symbols
        """
        return self.ngram_encoder.get_codebook_size()

    @property
    def is_reversible(self) -> bool:
        """
        NGramEncoder supports decoding only in 'chaining' mode.

        Returns:
            True if mode is 'chaining', False if 'bundling'
        """
        return self.mode == 'chaining'

    @property
    def compatible_models(self) -> list[str]:
        """
        Works with all VSA models.

        Returns:
            List of all model names
        """
        return ["MAP", "FHRR", "HRR", "BSC", "GHRR", "VTB", "BSDC"]

    def __repr__(self) -> str:
        """String representation."""
        return (
            f"NGramEncoder("
            f"model={self.model.model_name}, "
            f"n={self.n}, "
            f"stride={self.stride}, "
            f"mode='{self.mode}', "
            f"codebook_size={self.get_codebook_size()})"
        )

compatible_models property

Works with all VSA models.

Returns: List of all model names

is_reversible property

NGramEncoder supports decoding only in 'chaining' mode.

Returns: True if mode is 'chaining', False if 'bundling'

__init__(model, n=2, stride=1, mode='bundling', codebook=None, auto_generate=True, seed=None)

Initialize n-gram encoder.

Args: model: VSA model instance n: Size of n-grams (must be >= 1) stride: Step between n-grams (must be >= 1) mode: 'bundling' for bag-of-ngrams or 'chaining' for ordered n-grams codebook: Optional pre-defined symbol → hypervector mapping auto_generate: Auto-generate vectors for unknown symbols seed: Random seed for symbol vector generation

Raises: ValueError: If n < 1, stride < 1, or mode is invalid

Source code in holovec/encoders/sequence.py
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
def __init__(
    self,
    model: VSAModel,
    n: int = 2,
    stride: int = 1,
    mode: str = 'bundling',
    codebook: dict[str, Array] | None = None,
    auto_generate: bool = True,
    seed: int | None = None
):
    """
    Initialize n-gram encoder.

    Args:
        model: VSA model instance
        n: Size of n-grams (must be >= 1)
        stride: Step between n-grams (must be >= 1)
        mode: 'bundling' for bag-of-ngrams or 'chaining' for ordered n-grams
        codebook: Optional pre-defined symbol → hypervector mapping
        auto_generate: Auto-generate vectors for unknown symbols
        seed: Random seed for symbol vector generation

    Raises:
        ValueError: If n < 1, stride < 1, or mode is invalid
    """
    super().__init__(model, max_length=None)

    if n < 1:
        raise ValueError(f"n must be >= 1, got {n}")
    if stride < 1:
        raise ValueError(f"stride must be >= 1, got {stride}")
    if mode not in ['bundling', 'chaining']:
        raise ValueError(f"mode must be 'bundling' or 'chaining', got '{mode}'")

    self.n = n
    self.stride = stride
    self.mode = mode

    # Internal encoder for individual n-grams
    # Each n-gram is encoded as a position-bound sequence
    self.ngram_encoder = PositionBindingEncoder(
        model=model,
        codebook=codebook,
        max_length=n,  # Each n-gram has length n
        auto_generate=auto_generate,
        seed=seed
    )

__repr__()

String representation.

Source code in holovec/encoders/sequence.py
541
542
543
544
545
546
547
548
549
550
def __repr__(self) -> str:
    """String representation."""
    return (
        f"NGramEncoder("
        f"model={self.model.model_name}, "
        f"n={self.n}, "
        f"stride={self.stride}, "
        f"mode='{self.mode}', "
        f"codebook_size={self.get_codebook_size()})"
    )

decode(hypervector, max_ngrams=10, threshold=0.3)

Decode n-gram hypervector to recover n-grams.

Only supported for 'chaining' mode. For 'bundling' mode, n-grams are order-invariant and cannot be sequentially decoded.

Args: hypervector: Encoded sequence hypervector max_ngrams: Maximum number of n-grams to decode threshold: Minimum similarity threshold for valid n-grams

Returns: List of decoded n-grams, each as a list of symbols

Raises: NotImplementedError: If mode is 'bundling' (not decodable) RuntimeError: If codebook is empty

Example: >>> encoder = NGramEncoder(model, n=2, mode='chaining') >>> hv = encoder.encode(['A', 'B', 'C']) >>> decoder.decode(hv, max_ngrams=3) # [['A', 'B'], ['B', 'C']]

Source code in holovec/encoders/sequence.py
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
def decode(
    self,
    hypervector: Array,
    max_ngrams: int = 10,
    threshold: float = 0.3
) -> list[list[str | int]]:
    """
    Decode n-gram hypervector to recover n-grams.

    Only supported for 'chaining' mode. For 'bundling' mode,
    n-grams are order-invariant and cannot be sequentially decoded.

    Args:
        hypervector: Encoded sequence hypervector
        max_ngrams: Maximum number of n-grams to decode
        threshold: Minimum similarity threshold for valid n-grams

    Returns:
        List of decoded n-grams, each as a list of symbols

    Raises:
        NotImplementedError: If mode is 'bundling' (not decodable)
        RuntimeError: If codebook is empty

    Example:
        >>> encoder = NGramEncoder(model, n=2, mode='chaining')
        >>> hv = encoder.encode(['A', 'B', 'C'])
        >>> decoder.decode(hv, max_ngrams=3)  # [['A', 'B'], ['B', 'C']]
    """
    if self.mode != 'chaining':
        raise NotImplementedError(
            f"Decoding only supported for 'chaining' mode, not '{self.mode}'"
        )

    if not self.ngram_encoder.codebook:
        raise RuntimeError("Cannot decode: codebook is empty")

    # For chaining mode, unpermute each position and decode the n-gram
    decoded_ngrams = []

    for pos in range(max_ngrams):
        # Unpermute by position to recover n-gram at this index
        unpermuted = self.model.unpermute(hypervector, k=pos)

        # Decode the n-gram using ngram_encoder
        try:
            ngram_symbols = self.ngram_encoder.decode(
                unpermuted,
                max_positions=self.n,
                threshold=threshold
            )

            # Only include if we got a full n-gram
            if len(ngram_symbols) >= self.n:
                decoded_ngrams.append(ngram_symbols[:self.n])
            else:
                # Incomplete n-gram - likely end of sequence
                break

        except Exception:
            # Decoding failed - likely end of sequence
            break

    return decoded_ngrams

encode(sequence)

Encode sequence using n-gram representation.

Extracts all n-grams using sliding window with specified stride, encodes each n-gram, then combines via bundling or chaining.

Args: sequence: List of symbols to encode

Returns: Hypervector representing the sequence as n-grams

Raises: ValueError: If sequence is too short (length < n)

Example: >>> # Bigrams with stride=1 (overlapping) >>> encoder = NGramEncoder(model, n=2, stride=1) >>> encoder.encode(['A', 'B', 'C']) # N-grams: AB, BC >>> >>> # Trigrams with stride=2 (partial overlap) >>> encoder = NGramEncoder(model, n=3, stride=2) >>> encoder.encode(['A', 'B', 'C', 'D', 'E']) # N-grams: ABC, CDE

Source code in holovec/encoders/sequence.py
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
def encode(self, sequence: list[str | int]) -> Array:
    """
    Encode sequence using n-gram representation.

    Extracts all n-grams using sliding window with specified stride,
    encodes each n-gram, then combines via bundling or chaining.

    Args:
        sequence: List of symbols to encode

    Returns:
        Hypervector representing the sequence as n-grams

    Raises:
        ValueError: If sequence is too short (length < n)

    Example:
        >>> # Bigrams with stride=1 (overlapping)
        >>> encoder = NGramEncoder(model, n=2, stride=1)
        >>> encoder.encode(['A', 'B', 'C'])  # N-grams: AB, BC
        >>>
        >>> # Trigrams with stride=2 (partial overlap)
        >>> encoder = NGramEncoder(model, n=3, stride=2)
        >>> encoder.encode(['A', 'B', 'C', 'D', 'E'])  # N-grams: ABC, CDE
    """
    if len(sequence) < self.n:
        raise ValueError(
            f"Sequence length {len(sequence)} is less than n={self.n}"
        )

    # Extract all n-grams using sliding window
    ngrams = []
    for i in range(0, len(sequence) - self.n + 1, self.stride):
        ngram = sequence[i:i + self.n]
        ngrams.append(ngram)

    if not ngrams:
        raise ValueError("No n-grams extracted from sequence")

    # Encode each n-gram using position binding
    ngram_hvs = []
    for ngram in ngrams:
        ngram_hv = self.ngram_encoder.encode(ngram)
        ngram_hvs.append(ngram_hv)

    # Combine n-gram hypervectors based on mode
    if self.mode == 'bundling':
        # Bag-of-ngrams: simple bundle (order-invariant)
        sequence_hv = self.model.bundle(ngram_hvs)

    else:  # mode == 'chaining'
        # Ordered n-grams: bind each with position
        position_bound = []
        for i, ngram_hv in enumerate(ngram_hvs):
            # Position encoding: permute by n-gram index
            position_hv = self.model.permute(ngram_hv, k=i)
            position_bound.append(position_hv)

        sequence_hv = self.model.bundle(position_bound)

    return sequence_hv

get_codebook()

Get the internal symbol codebook.

Returns: Dictionary mapping symbols to hypervectors

Source code in holovec/encoders/sequence.py
503
504
505
506
507
508
509
510
def get_codebook(self) -> dict[str, Array]:
    """
    Get the internal symbol codebook.

    Returns:
        Dictionary mapping symbols to hypervectors
    """
    return self.ngram_encoder.codebook

get_codebook_size()

Get number of unique symbols in codebook.

Returns: Number of symbols

Source code in holovec/encoders/sequence.py
512
513
514
515
516
517
518
519
def get_codebook_size(self) -> int:
    """
    Get number of unique symbols in codebook.

    Returns:
        Number of symbols
    """
    return self.ngram_encoder.get_codebook_size()

holovec.encoders.sequence.TrajectoryEncoder

Bases: SequenceEncoder

Trajectory encoder for continuous sequences (time series, paths, motion).

Based on Frady et al. (2021) "Computing on Functions" and position binding from Plate (2003), encoding trajectories by binding temporal information with spatial positions.

A trajectory is a sequence of positions over time: - 1D: time series [v₁, v₂, v₃, ...] - 2D: path [(x₁,y₁), (x₂,y₂), ...] - 3D: motion [(x₁,y₁,z₁), (x₂,y₂,z₂), ...]

Encoding strategy: For each time step tᵢ with position pᵢ: 1. Encode time: time_hv = scalar_encode(tᵢ) 2. Encode position coords: coord_hvs = [scalar_encode(c) for c in pᵢ] 3. Bind coords to dimensions: pos_hv = Σⱼ bind(Dⱼ, coord_hv_j) 4. Bind time with position: point_hv = bind(time_hv, pos_hv) 5. Permute by index: indexed_hv = permute(point_hv, i)

trajectory_hv = Σᵢ indexed_hv

This creates an encoding that: - Preserves temporal ordering (via permutation) - Captures smooth trajectories (via continuous scalar encoding) - Enables partial matching and interpolation - Supports multi-dimensional paths

Attributes: scalar_encoder: Encoder for continuous values (FPE or Thermometer) n_dimensions: Dimensionality of trajectory (1D, 2D, or 3D) time_range: (min_time, max_time) for temporal normalization dim_vectors: Hypervectors for spatial dimensions (x, y, z)

Example: >>> from holovec import VSA >>> from holovec.encoders import FractionalPowerEncoder, TrajectoryEncoder >>> >>> model = VSA.create('FHRR', dim=10000) >>> scalar_enc = FractionalPowerEncoder(model, min_val=0, max_val=100) >>> encoder = TrajectoryEncoder(model, scalar_encoder=scalar_enc, n_dimensions=2) >>> >>> # Encode a 2D path >>> path = [(10, 20), (15, 25), (20, 30), (25, 35)] >>> hv = encoder.encode(path) >>> >>> # Similar paths have high similarity >>> path2 = [(10, 20), (15, 25), (20, 30), (25, 40)] # Slightly different >>> hv2 = encoder.encode(path2) >>> model.similarity(hv, hv2) # High similarity

Source code in holovec/encoders/sequence.py
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
class TrajectoryEncoder(SequenceEncoder):
    """
    Trajectory encoder for continuous sequences (time series, paths, motion).

    Based on Frady et al. (2021) "Computing on Functions" and position binding
    from Plate (2003), encoding trajectories by binding temporal information
    with spatial positions.

    A trajectory is a sequence of positions over time:
    - 1D: time series [v₁, v₂, v₃, ...]
    - 2D: path [(x₁,y₁), (x₂,y₂), ...]
    - 3D: motion [(x₁,y₁,z₁), (x₂,y₂,z₂), ...]

    Encoding strategy:
        For each time step tᵢ with position pᵢ:
        1. Encode time: time_hv = scalar_encode(tᵢ)
        2. Encode position coords: coord_hvs = [scalar_encode(c) for c in pᵢ]
        3. Bind coords to dimensions: pos_hv = Σⱼ bind(Dⱼ, coord_hv_j)
        4. Bind time with position: point_hv = bind(time_hv, pos_hv)
        5. Permute by index: indexed_hv = permute(point_hv, i)

        trajectory_hv = Σᵢ indexed_hv

    This creates an encoding that:
    - Preserves temporal ordering (via permutation)
    - Captures smooth trajectories (via continuous scalar encoding)
    - Enables partial matching and interpolation
    - Supports multi-dimensional paths

    Attributes:
        scalar_encoder: Encoder for continuous values (FPE or Thermometer)
        n_dimensions: Dimensionality of trajectory (1D, 2D, or 3D)
        time_range: (min_time, max_time) for temporal normalization
        dim_vectors: Hypervectors for spatial dimensions (x, y, z)

    Example:
        >>> from holovec import VSA
        >>> from holovec.encoders import FractionalPowerEncoder, TrajectoryEncoder
        >>>
        >>> model = VSA.create('FHRR', dim=10000)
        >>> scalar_enc = FractionalPowerEncoder(model, min_val=0, max_val=100)
        >>> encoder = TrajectoryEncoder(model, scalar_encoder=scalar_enc, n_dimensions=2)
        >>>
        >>> # Encode a 2D path
        >>> path = [(10, 20), (15, 25), (20, 30), (25, 35)]
        >>> hv = encoder.encode(path)
        >>>
        >>> # Similar paths have high similarity
        >>> path2 = [(10, 20), (15, 25), (20, 30), (25, 40)]  # Slightly different
        >>> hv2 = encoder.encode(path2)
        >>> model.similarity(hv, hv2)  # High similarity
    """

    def __init__(
        self,
        model: VSAModel,
        scalar_encoder: ScalarEncoder,
        n_dimensions: int = 1,
        time_range: tuple[float, float] | None = None,
        seed: int | None = None
    ):
        """
        Initialize trajectory encoder.

        Args:
            model: VSA model instance
            scalar_encoder: Encoder for continuous values (FPE or Thermometer recommended)
            n_dimensions: Trajectory dimensionality (1, 2, or 3)
            time_range: (min, max) time values for normalization (optional)
            seed: Random seed for dimension vector generation

        Raises:
            ValueError: If n_dimensions not in {1, 2, 3}
            TypeError: If scalar_encoder is not reversible
        """
        super().__init__(model, max_length=None)

        if n_dimensions not in {1, 2, 3}:
            raise ValueError(
                f"n_dimensions must be 1, 2, or 3, got {n_dimensions}"
            )

        if not isinstance(scalar_encoder, ScalarEncoder):
            raise TypeError(
                f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}"
            )

        # Check model compatibility
        if model != scalar_encoder.model:
            raise ValueError(
                "scalar_encoder must use the same VSA model as TrajectoryEncoder"
            )

        self.scalar_encoder = scalar_encoder
        self.n_dimensions = n_dimensions
        self.time_range = time_range
        self.seed = seed

        # Generate dimension hypervectors (for x, y, z coordinates)
        self.dim_vectors: list[Array] = []
        for i in range(n_dimensions):
            dim_seed = (seed + i) if seed is not None else (1000 + i)
            self.dim_vectors.append(model.random(seed=dim_seed))

    def encode(self, trajectory: list[float | tuple[float, ...]]) -> Array:
        """
        Encode a trajectory as a hypervector.

        Each point in the trajectory is encoded with temporal information,
        then all points are combined with position-based permutation.

        Args:
            trajectory: List of points
                - 1D: List[float] e.g., [1.0, 2.5, 3.7, ...]
                - 2D: List[Tuple[float, float]] e.g., [(1,2), (3,4), ...]
                - 3D: List[Tuple[float, float, float]] e.g., [(1,2,3), ...]

        Returns:
            Hypervector representing the trajectory

        Raises:
            ValueError: If trajectory is empty or points have wrong dimensionality

        Example:
            >>> # 1D time series
            >>> encoder_1d = TrajectoryEncoder(model, scalar_enc, n_dimensions=1)
            >>> hv = encoder_1d.encode([1.0, 2.5, 3.7, 5.2])
            >>>
            >>> # 2D path
            >>> encoder_2d = TrajectoryEncoder(model, scalar_enc, n_dimensions=2)
            >>> hv = encoder_2d.encode([(0,0), (1,1), (2,2)])
        """
        if len(trajectory) == 0:
            raise ValueError("Cannot encode empty trajectory")

        # Encode each point with temporal binding
        point_hvs = []

        for i, point in enumerate(trajectory):
            # Normalize point to tuple format
            if self.n_dimensions == 1:
                # 1D: scalar → (scalar,)
                if isinstance(point, int | float):
                    coords = (float(point),)
                else:
                    coords = (float(point[0]),)
            else:
                # 2D/3D: accept tuple, list, or array-like
                try:
                    # Convert to tuple (works for tuple, list, numpy array, etc.)
                    coords = tuple(float(c) for c in point)
                except (TypeError, ValueError):
                    raise ValueError(
                        f"Expected iterable for {self.n_dimensions}D point, got {type(point)}"
                    )

            # Validate dimensionality
            if len(coords) != self.n_dimensions:
                raise ValueError(
                    f"Expected {self.n_dimensions}D point, got {len(coords)}D: {coords}"
                )

            # Encode time (index as time if no time_range specified)
            if self.time_range is not None:
                # Normalize time to range
                t = i / len(trajectory)  # [0, 1]
                t_scaled = self.time_range[0] + t * (self.time_range[1] - self.time_range[0])
                time_hv = self.scalar_encoder.encode(t_scaled)
            else:
                # Use index directly
                time_hv = self.scalar_encoder.encode(float(i))

            # Encode position (bind each coordinate with its dimension)
            coord_hvs = []
            for j, coord_val in enumerate(coords):
                coord_hv = self.scalar_encoder.encode(coord_val)
                dim_hv = self.dim_vectors[j]
                bound_coord = self.model.bind(dim_hv, coord_hv)
                coord_hvs.append(bound_coord)

            # Bundle coordinates to create position hypervector
            pos_hv = self.model.bundle(coord_hvs)

            # Bind time with position
            point_hv = self.model.bind(time_hv, pos_hv)

            # Apply position-specific permutation (for ordering)
            indexed_hv = self.model.permute(point_hv, k=i)

            point_hvs.append(indexed_hv)

        # Bundle all points
        trajectory_hv = self.model.bundle(point_hvs)

        return trajectory_hv

    def decode(self, hypervector: Array, max_points: int = 10) -> list[tuple[float, ...]]:
        """
        Decode trajectory hypervector to recover approximate points.

        Note: Trajectory decoding is not yet implemented. It requires:
        1. Unpermuting each position
        2. Unbinding time from position
        3. Unbinding each coordinate from dimension vectors
        4. Decoding scalar values
        5. Interpolation for smooth trajectories

        Args:
            hypervector: Encoded trajectory hypervector
            max_points: Maximum points to decode

        Returns:
            List of decoded points (not implemented yet)

        Raises
        ------
        NotImplementedError
            Trajectory decoding requires solving nested binding inverse problem.

        Notes
        -----
        Trajectory decoding is not implemented because it requires multi-level
        unbinding with cascading error accumulation:

        **Mathematical Challenge:**

        The encoding process creates nested bindings:
            trajectory_hv = bundle([
                bind(time(t), bind(dimension(d), scalar(coord[t,d])))
                for all t, d
            ])

        To decode a single point at time t:
        1. Unbind time: point_hv[t] = unbind(trajectory_hv, time(t))
        2. For each dimension d:
           a. Unbind dimension: coord_hv[d] = unbind(point_hv[t], dimension(d))
           b. Decode scalar: coord[t,d] = scalar_decode(coord_hv[d])

        **Why This Is Intractable:**

        - **Two-level unbinding**: Time then dimension (or vice versa)
        - **Error compounding**: Each unbind adds noise
        - **No known time points**: Must search over possible time values
        - **Interpolation complexity**: Smooth trajectory requires dense sampling
        - **Computational cost**:
          * For T time points, D dimensions
          * Requires: T × D × (decode_iterations) evaluations
          * Example: 100 points × 3D × 100 iterations = 30,000 evals

        **Additional Challenges:**

        1. **Order Ambiguity**: Don't know which time point comes first
        2. **Density Unknown**: Don't know temporal sampling rate
        3. **Dimension Count**: Must know dimensionality a priori
        4. **Coordinate Ranges**: Scalar decoder needs value bounds

        **Possible Approaches (Future Work):**

        1. **Constrained Decoding**: If time points are known:
           - Unbind each known time point
           - Decode coordinates independently
           - Complexity: O(T × D × decode_cost)

        2. **Template Matching**: Pre-encode common trajectory patterns
           - Create codebook of canonical trajectories
           - Use cleanup to find nearest match
           - Works for classification, not reconstruction

        3. **Learned Decoder**: Train neural network trajectory_hv → points
           - Requires large training dataset
           - Can learn to handle noise and ambiguity
           - See: Imani et al. (2019) for similar approach

        4. **Iterative Resonator**: Use resonator cleanup at each level
           - Unbind time with resonator cleanup
           - Unbind dimension with resonator cleanup
           - Requires codebooks for both time and coordinates

        **Current Recommendation:**

        Use TrajectoryEncoder for one-way encoding in applications like:
        - Trajectory classification (gesture recognition, motion analysis)
        - Trajectory similarity search (find similar paths)
        - Trajectory clustering (group similar motions)

        For reconstruction, consider storing original trajectories separately
        and using hypervector encoding only for similarity queries.

        References
        ----------
        - Plate (2003): "Holographic Reduced Representations" - Section 4.3
          on error accumulation in multi-level binding
        - Räsänen & Saarinen (2016): "Sequence prediction with sparse
          distributed hyperdimensional coding" - Analysis of temporal binding
        """
        raise NotImplementedError(
            "Trajectory decoding is not implemented due to nested binding complexity. "
            "See docstring for detailed mathematical explanation. "
            "For reconstruction tasks, store original trajectories and use "
            "hypervector encoding for similarity-based retrieval only."
        )

    @property
    def is_reversible(self) -> bool:
        """
        TrajectoryEncoder does not yet support decoding.

        Returns:
            False (decoding not implemented)

        Note:
            Decoding requires multi-level unbinding and interpolation,
            which will be implemented in a future version.
        """
        return False

    @property
    def compatible_models(self) -> list[str]:
        """
        Works with all VSA models.

        Returns:
            List of all model names
        """
        return ["MAP", "FHRR", "HRR", "BSC", "GHRR", "VTB", "BSDC"]

    @property
    def input_type(self) -> str:
        """Input type description."""
        dim_names = {1: "1D time series", 2: "2D path", 3: "3D trajectory"}
        return dim_names[self.n_dimensions]

    def __repr__(self) -> str:
        """String representation."""
        return (
            f"TrajectoryEncoder("
            f"model={self.model.model_name}, "
            f"scalar_encoder={type(self.scalar_encoder).__name__}, "
            f"n_dimensions={self.n_dimensions}, "
            f"time_range={self.time_range})"
        )

compatible_models property

Works with all VSA models.

Returns: List of all model names

input_type property

Input type description.

is_reversible property

TrajectoryEncoder does not yet support decoding.

Returns: False (decoding not implemented)

Note: Decoding requires multi-level unbinding and interpolation, which will be implemented in a future version.

__init__(model, scalar_encoder, n_dimensions=1, time_range=None, seed=None)

Initialize trajectory encoder.

Args: model: VSA model instance scalar_encoder: Encoder for continuous values (FPE or Thermometer recommended) n_dimensions: Trajectory dimensionality (1, 2, or 3) time_range: (min, max) time values for normalization (optional) seed: Random seed for dimension vector generation

Raises: ValueError: If n_dimensions not in {1, 2, 3} TypeError: If scalar_encoder is not reversible

Source code in holovec/encoders/sequence.py
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
def __init__(
    self,
    model: VSAModel,
    scalar_encoder: ScalarEncoder,
    n_dimensions: int = 1,
    time_range: tuple[float, float] | None = None,
    seed: int | None = None
):
    """
    Initialize trajectory encoder.

    Args:
        model: VSA model instance
        scalar_encoder: Encoder for continuous values (FPE or Thermometer recommended)
        n_dimensions: Trajectory dimensionality (1, 2, or 3)
        time_range: (min, max) time values for normalization (optional)
        seed: Random seed for dimension vector generation

    Raises:
        ValueError: If n_dimensions not in {1, 2, 3}
        TypeError: If scalar_encoder is not reversible
    """
    super().__init__(model, max_length=None)

    if n_dimensions not in {1, 2, 3}:
        raise ValueError(
            f"n_dimensions must be 1, 2, or 3, got {n_dimensions}"
        )

    if not isinstance(scalar_encoder, ScalarEncoder):
        raise TypeError(
            f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}"
        )

    # Check model compatibility
    if model != scalar_encoder.model:
        raise ValueError(
            "scalar_encoder must use the same VSA model as TrajectoryEncoder"
        )

    self.scalar_encoder = scalar_encoder
    self.n_dimensions = n_dimensions
    self.time_range = time_range
    self.seed = seed

    # Generate dimension hypervectors (for x, y, z coordinates)
    self.dim_vectors: list[Array] = []
    for i in range(n_dimensions):
        dim_seed = (seed + i) if seed is not None else (1000 + i)
        self.dim_vectors.append(model.random(seed=dim_seed))

__repr__()

String representation.

Source code in holovec/encoders/sequence.py
885
886
887
888
889
890
891
892
893
def __repr__(self) -> str:
    """String representation."""
    return (
        f"TrajectoryEncoder("
        f"model={self.model.model_name}, "
        f"scalar_encoder={type(self.scalar_encoder).__name__}, "
        f"n_dimensions={self.n_dimensions}, "
        f"time_range={self.time_range})"
    )

decode(hypervector, max_points=10)

Decode trajectory hypervector to recover approximate points.

Note: Trajectory decoding is not yet implemented. It requires: 1. Unpermuting each position 2. Unbinding time from position 3. Unbinding each coordinate from dimension vectors 4. Decoding scalar values 5. Interpolation for smooth trajectories

Args: hypervector: Encoded trajectory hypervector max_points: Maximum points to decode

Returns: List of decoded points (not implemented yet)

Raises:

Type Description
NotImplementedError

Trajectory decoding requires solving nested binding inverse problem.

Notes

Trajectory decoding is not implemented because it requires multi-level unbinding with cascading error accumulation:

Mathematical Challenge:

The encoding process creates nested bindings: trajectory_hv = bundle([ bind(time(t), bind(dimension(d), scalar(coord[t,d]))) for all t, d ])

To decode a single point at time t: 1. Unbind time: point_hv[t] = unbind(trajectory_hv, time(t)) 2. For each dimension d: a. Unbind dimension: coord_hv[d] = unbind(point_hv[t], dimension(d)) b. Decode scalar: coord[t,d] = scalar_decode(coord_hv[d])

Why This Is Intractable:

  • Two-level unbinding: Time then dimension (or vice versa)
  • Error compounding: Each unbind adds noise
  • No known time points: Must search over possible time values
  • Interpolation complexity: Smooth trajectory requires dense sampling
  • Computational cost:
  • For T time points, D dimensions
  • Requires: T × D × (decode_iterations) evaluations
  • Example: 100 points × 3D × 100 iterations = 30,000 evals

Additional Challenges:

  1. Order Ambiguity: Don't know which time point comes first
  2. Density Unknown: Don't know temporal sampling rate
  3. Dimension Count: Must know dimensionality a priori
  4. Coordinate Ranges: Scalar decoder needs value bounds

Possible Approaches (Future Work):

  1. Constrained Decoding: If time points are known:
  2. Unbind each known time point
  3. Decode coordinates independently
  4. Complexity: O(T × D × decode_cost)

  5. Template Matching: Pre-encode common trajectory patterns

  6. Create codebook of canonical trajectories
  7. Use cleanup to find nearest match
  8. Works for classification, not reconstruction

  9. Learned Decoder: Train neural network trajectory_hv → points

  10. Requires large training dataset
  11. Can learn to handle noise and ambiguity
  12. See: Imani et al. (2019) for similar approach

  13. Iterative Resonator: Use resonator cleanup at each level

  14. Unbind time with resonator cleanup
  15. Unbind dimension with resonator cleanup
  16. Requires codebooks for both time and coordinates

Current Recommendation:

Use TrajectoryEncoder for one-way encoding in applications like: - Trajectory classification (gesture recognition, motion analysis) - Trajectory similarity search (find similar paths) - Trajectory clustering (group similar motions)

For reconstruction, consider storing original trajectories separately and using hypervector encoding only for similarity queries.

References
  • Plate (2003): "Holographic Reduced Representations" - Section 4.3 on error accumulation in multi-level binding
  • Räsänen & Saarinen (2016): "Sequence prediction with sparse distributed hyperdimensional coding" - Analysis of temporal binding
Source code in holovec/encoders/sequence.py
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
def decode(self, hypervector: Array, max_points: int = 10) -> list[tuple[float, ...]]:
    """
    Decode trajectory hypervector to recover approximate points.

    Note: Trajectory decoding is not yet implemented. It requires:
    1. Unpermuting each position
    2. Unbinding time from position
    3. Unbinding each coordinate from dimension vectors
    4. Decoding scalar values
    5. Interpolation for smooth trajectories

    Args:
        hypervector: Encoded trajectory hypervector
        max_points: Maximum points to decode

    Returns:
        List of decoded points (not implemented yet)

    Raises
    ------
    NotImplementedError
        Trajectory decoding requires solving nested binding inverse problem.

    Notes
    -----
    Trajectory decoding is not implemented because it requires multi-level
    unbinding with cascading error accumulation:

    **Mathematical Challenge:**

    The encoding process creates nested bindings:
        trajectory_hv = bundle([
            bind(time(t), bind(dimension(d), scalar(coord[t,d])))
            for all t, d
        ])

    To decode a single point at time t:
    1. Unbind time: point_hv[t] = unbind(trajectory_hv, time(t))
    2. For each dimension d:
       a. Unbind dimension: coord_hv[d] = unbind(point_hv[t], dimension(d))
       b. Decode scalar: coord[t,d] = scalar_decode(coord_hv[d])

    **Why This Is Intractable:**

    - **Two-level unbinding**: Time then dimension (or vice versa)
    - **Error compounding**: Each unbind adds noise
    - **No known time points**: Must search over possible time values
    - **Interpolation complexity**: Smooth trajectory requires dense sampling
    - **Computational cost**:
      * For T time points, D dimensions
      * Requires: T × D × (decode_iterations) evaluations
      * Example: 100 points × 3D × 100 iterations = 30,000 evals

    **Additional Challenges:**

    1. **Order Ambiguity**: Don't know which time point comes first
    2. **Density Unknown**: Don't know temporal sampling rate
    3. **Dimension Count**: Must know dimensionality a priori
    4. **Coordinate Ranges**: Scalar decoder needs value bounds

    **Possible Approaches (Future Work):**

    1. **Constrained Decoding**: If time points are known:
       - Unbind each known time point
       - Decode coordinates independently
       - Complexity: O(T × D × decode_cost)

    2. **Template Matching**: Pre-encode common trajectory patterns
       - Create codebook of canonical trajectories
       - Use cleanup to find nearest match
       - Works for classification, not reconstruction

    3. **Learned Decoder**: Train neural network trajectory_hv → points
       - Requires large training dataset
       - Can learn to handle noise and ambiguity
       - See: Imani et al. (2019) for similar approach

    4. **Iterative Resonator**: Use resonator cleanup at each level
       - Unbind time with resonator cleanup
       - Unbind dimension with resonator cleanup
       - Requires codebooks for both time and coordinates

    **Current Recommendation:**

    Use TrajectoryEncoder for one-way encoding in applications like:
    - Trajectory classification (gesture recognition, motion analysis)
    - Trajectory similarity search (find similar paths)
    - Trajectory clustering (group similar motions)

    For reconstruction, consider storing original trajectories separately
    and using hypervector encoding only for similarity queries.

    References
    ----------
    - Plate (2003): "Holographic Reduced Representations" - Section 4.3
      on error accumulation in multi-level binding
    - Räsänen & Saarinen (2016): "Sequence prediction with sparse
      distributed hyperdimensional coding" - Analysis of temporal binding
    """
    raise NotImplementedError(
        "Trajectory decoding is not implemented due to nested binding complexity. "
        "See docstring for detailed mathematical explanation. "
        "For reconstruction tasks, store original trajectories and use "
        "hypervector encoding for similarity-based retrieval only."
    )

encode(trajectory)

Encode a trajectory as a hypervector.

Each point in the trajectory is encoded with temporal information, then all points are combined with position-based permutation.

Args: trajectory: List of points - 1D: List[float] e.g., [1.0, 2.5, 3.7, ...] - 2D: List[Tuple[float, float]] e.g., [(1,2), (3,4), ...] - 3D: List[Tuple[float, float, float]] e.g., [(1,2,3), ...]

Returns: Hypervector representing the trajectory

Raises: ValueError: If trajectory is empty or points have wrong dimensionality

Example: >>> # 1D time series >>> encoder_1d = TrajectoryEncoder(model, scalar_enc, n_dimensions=1) >>> hv = encoder_1d.encode([1.0, 2.5, 3.7, 5.2]) >>> >>> # 2D path >>> encoder_2d = TrajectoryEncoder(model, scalar_enc, n_dimensions=2) >>> hv = encoder_2d.encode([(0,0), (1,1), (2,2)])

Source code in holovec/encoders/sequence.py
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
def encode(self, trajectory: list[float | tuple[float, ...]]) -> Array:
    """
    Encode a trajectory as a hypervector.

    Each point in the trajectory is encoded with temporal information,
    then all points are combined with position-based permutation.

    Args:
        trajectory: List of points
            - 1D: List[float] e.g., [1.0, 2.5, 3.7, ...]
            - 2D: List[Tuple[float, float]] e.g., [(1,2), (3,4), ...]
            - 3D: List[Tuple[float, float, float]] e.g., [(1,2,3), ...]

    Returns:
        Hypervector representing the trajectory

    Raises:
        ValueError: If trajectory is empty or points have wrong dimensionality

    Example:
        >>> # 1D time series
        >>> encoder_1d = TrajectoryEncoder(model, scalar_enc, n_dimensions=1)
        >>> hv = encoder_1d.encode([1.0, 2.5, 3.7, 5.2])
        >>>
        >>> # 2D path
        >>> encoder_2d = TrajectoryEncoder(model, scalar_enc, n_dimensions=2)
        >>> hv = encoder_2d.encode([(0,0), (1,1), (2,2)])
    """
    if len(trajectory) == 0:
        raise ValueError("Cannot encode empty trajectory")

    # Encode each point with temporal binding
    point_hvs = []

    for i, point in enumerate(trajectory):
        # Normalize point to tuple format
        if self.n_dimensions == 1:
            # 1D: scalar → (scalar,)
            if isinstance(point, int | float):
                coords = (float(point),)
            else:
                coords = (float(point[0]),)
        else:
            # 2D/3D: accept tuple, list, or array-like
            try:
                # Convert to tuple (works for tuple, list, numpy array, etc.)
                coords = tuple(float(c) for c in point)
            except (TypeError, ValueError):
                raise ValueError(
                    f"Expected iterable for {self.n_dimensions}D point, got {type(point)}"
                )

        # Validate dimensionality
        if len(coords) != self.n_dimensions:
            raise ValueError(
                f"Expected {self.n_dimensions}D point, got {len(coords)}D: {coords}"
            )

        # Encode time (index as time if no time_range specified)
        if self.time_range is not None:
            # Normalize time to range
            t = i / len(trajectory)  # [0, 1]
            t_scaled = self.time_range[0] + t * (self.time_range[1] - self.time_range[0])
            time_hv = self.scalar_encoder.encode(t_scaled)
        else:
            # Use index directly
            time_hv = self.scalar_encoder.encode(float(i))

        # Encode position (bind each coordinate with its dimension)
        coord_hvs = []
        for j, coord_val in enumerate(coords):
            coord_hv = self.scalar_encoder.encode(coord_val)
            dim_hv = self.dim_vectors[j]
            bound_coord = self.model.bind(dim_hv, coord_hv)
            coord_hvs.append(bound_coord)

        # Bundle coordinates to create position hypervector
        pos_hv = self.model.bundle(coord_hvs)

        # Bind time with position
        point_hv = self.model.bind(time_hv, pos_hv)

        # Apply position-specific permutation (for ordering)
        indexed_hv = self.model.permute(point_hv, k=i)

        point_hvs.append(indexed_hv)

    # Bundle all points
    trajectory_hv = self.model.bundle(point_hvs)

    return trajectory_hv

Spatial Encoders

holovec.encoders.spatial.ImageEncoder

Bases: Encoder

Image encoder for 2D images (grayscale, RGB, or RGBA).

Encodes images by binding spatial positions (x, y) with pixel values. For color images, each channel is bound to a channel dimension vector before being combined with position information.

Encoding strategy: For each pixel at position (x, y) with value v: 1. Encode position: pos_hv = bundle([bind(X, enc(x)), bind(Y, enc(y))]) 2. Encode value(s): - Grayscale: val_hv = enc(v) - RGB: val_hv = bundle([bind(R, enc(r)), bind(G, enc(g)), bind(B, enc(b))]) 3. Bind position with value: pixel_hv = bind(pos_hv, val_hv) 4. Bundle all pixels: image_hv = bundle([all pixel_hvs])

This creates a distributed representation that preserves both spatial structure and pixel values, enabling similarity-based image comparison.

Parameters:

Name Type Description Default
model VSAModel

The VSA model to use for encoding operations.

required
scalar_encoder ScalarEncoder

Encoder for continuous pixel values (0-255 typically).

required
normalize_pixels bool

Whether to normalize pixel values to [0, 1] before encoding. Default is True.

True
seed int

Random seed for reproducibility. Default is None.

None

Attributes:

Name Type Description
n_channels int

Number of channels in the last encoded image (1, 3, or 4).

image_shape tuple

Shape (height, width, channels) of the last encoded image.

Examples:

>>> from holovec import VSA
>>> from holovec.encoders import ImageEncoder, ThermometerEncoder
>>> import numpy as np
>>>
>>> model = VSA.create('MAP', dim=10000, seed=42)
>>> scalar_enc = ThermometerEncoder(model, min_val=0, max_val=1, n_bins=256, seed=42)
>>> encoder = ImageEncoder(model, scalar_enc, normalize_pixels=True, seed=42)
>>>
>>> # Encode a small grayscale image
>>> image = np.array([[100, 150], [200, 250]], dtype=np.uint8)
>>> hv = encoder.encode(image)
>>> print(hv.shape)  # (10000,)
>>>
>>> # Encode RGB image
>>> rgb_image = np.random.randint(0, 256, (28, 28, 3), dtype=np.uint8)
>>> hv_rgb = encoder.encode(rgb_image)
Source code in holovec/encoders/spatial.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
class ImageEncoder(Encoder):
    """
    Image encoder for 2D images (grayscale, RGB, or RGBA).

    Encodes images by binding spatial positions (x, y) with pixel values.
    For color images, each channel is bound to a channel dimension vector
    before being combined with position information.

    Encoding strategy:
        For each pixel at position (x, y) with value v:
        1. Encode position: pos_hv = bundle([bind(X, enc(x)), bind(Y, enc(y))])
        2. Encode value(s):
           - Grayscale: val_hv = enc(v)
           - RGB: val_hv = bundle([bind(R, enc(r)), bind(G, enc(g)), bind(B, enc(b))])
        3. Bind position with value: pixel_hv = bind(pos_hv, val_hv)
        4. Bundle all pixels: image_hv = bundle([all pixel_hvs])

    This creates a distributed representation that preserves both spatial
    structure and pixel values, enabling similarity-based image comparison.

    Parameters
    ----------
    model : VSAModel
        The VSA model to use for encoding operations.
    scalar_encoder : ScalarEncoder
        Encoder for continuous pixel values (0-255 typically).
    normalize_pixels : bool, optional
        Whether to normalize pixel values to [0, 1] before encoding.
        Default is True.
    seed : int, optional
        Random seed for reproducibility. Default is None.

    Attributes
    ----------
    n_channels : int
        Number of channels in the last encoded image (1, 3, or 4).
    image_shape : tuple
        Shape (height, width, channels) of the last encoded image.

    Examples
    --------
    >>> from holovec import VSA
    >>> from holovec.encoders import ImageEncoder, ThermometerEncoder
    >>> import numpy as np
    >>>
    >>> model = VSA.create('MAP', dim=10000, seed=42)
    >>> scalar_enc = ThermometerEncoder(model, min_val=0, max_val=1, n_bins=256, seed=42)
    >>> encoder = ImageEncoder(model, scalar_enc, normalize_pixels=True, seed=42)
    >>>
    >>> # Encode a small grayscale image
    >>> image = np.array([[100, 150], [200, 250]], dtype=np.uint8)
    >>> hv = encoder.encode(image)
    >>> print(hv.shape)  # (10000,)
    >>>
    >>> # Encode RGB image
    >>> rgb_image = np.random.randint(0, 256, (28, 28, 3), dtype=np.uint8)
    >>> hv_rgb = encoder.encode(rgb_image)
    """

    def __init__(
        self,
        model: VSAModel,
        scalar_encoder: ScalarEncoder,
        normalize_pixels: bool = True,
        seed: int | None = None,
    ):
        """Initialize ImageEncoder."""
        # Validate and set scalar_encoder BEFORE calling super().__init__
        # because base class checks compatible_models which references it
        if not isinstance(scalar_encoder, ScalarEncoder):
            raise TypeError(f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}")

        if scalar_encoder.model != model:
            raise ValueError("scalar_encoder must use the same VSA model as the ImageEncoder")

        self.scalar_encoder = scalar_encoder
        self.normalize_pixels = normalize_pixels

        super().__init__(model)

        # Generate dimension vectors for spatial coordinates
        base_seed = seed if seed is not None else 2000
        self.X = model.random(seed=base_seed)  # X dimension
        self.Y = model.random(seed=base_seed + 1)  # Y dimension

        # Generate dimension vectors for color channels (RGB/RGBA)
        self.R = model.random(seed=base_seed + 2)  # Red channel
        self.G = model.random(seed=base_seed + 3)  # Green channel
        self.B = model.random(seed=base_seed + 4)  # Blue channel
        self.A = model.random(seed=base_seed + 5)  # Alpha channel

        # Track last encoded image properties
        self.n_channels: int | None = None
        self.image_shape: tuple[int, ...] | None = None

    def encode(self, image: "Array | numpy.ndarray") -> Array:
        """
        Encode an image into a hypervector.

        Parameters
        ----------
        image : array-like
            Image array with shape (height, width) for grayscale or
            (height, width, channels) for color images.
            Pixel values should be in range [0, 255] for uint8 or
            [0, 1] for float.
            Typically a NumPy array from PIL, OpenCV, or similar libraries.

        Returns
        -------
        Array
            Hypervector encoding of the image.

        Raises
        ------
        ValueError
            If image has invalid shape or number of channels.

        Notes
        -----
        This encoder accepts images as NumPy arrays (the standard format from
        image libraries like PIL, OpenCV, scikit-image) and processes them using
        the configured backend. While input must be NumPy, internal VSA operations
        use the model's backend (NumPy/PyTorch/JAX).
        """
        # Import numpy locally to avoid module-level backend dependency
        # Images from external sources (PIL, OpenCV) are numpy arrays
        import numpy as _np

        # Convert to numpy array if needed (handles lists, tuples, etc.)
        if not isinstance(image, _np.ndarray):
            image = _np.array(image)

        # Validate and normalize image shape
        if image.ndim == 2:
            # Grayscale image
            height, width = image.shape
            n_channels = 1
            # Add channel dimension: (H, W) -> (H, W, 1)
            image = _np.expand_dims(image, axis=-1)
        elif image.ndim == 3:
            height, width, n_channels = image.shape
            if n_channels not in [1, 3, 4]:
                raise ValueError(f"Image must have 1, 3, or 4 channels, got {n_channels}")
        else:
            raise ValueError(f"Image must be 2D (grayscale) or 3D (color), got shape {image.shape}")

        # Store image properties
        self.n_channels = n_channels
        self.image_shape = (height, width, n_channels)

        # Normalize pixel values if requested
        if self.normalize_pixels:
            # Check dtype using string representation to avoid dtype dependency
            dtype_str = str(image.dtype)
            if "uint8" in dtype_str:
                image = image.astype(_np.float32) / 255.0
            elif "int" in dtype_str:
                # Other integer types: normalize assuming 0-255 range
                image = image.astype(_np.float32) / 255.0
            # If already float, assume it's in [0, 1]

        # Encode all pixels
        pixel_hvs = []

        for y in range(height):
            for x in range(width):
                # Encode spatial position
                x_hv = self.scalar_encoder.encode(float(x))
                y_hv = self.scalar_encoder.encode(float(y))

                x_bound = self.model.bind(self.X, x_hv)
                y_bound = self.model.bind(self.Y, y_hv)
                pos_hv = self.model.bundle([x_bound, y_bound])

                # Encode pixel value(s)
                if n_channels == 1:
                    # Grayscale: just encode the intensity
                    val_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
                elif n_channels == 3:
                    # RGB: bind each channel to its dimension vector
                    r_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
                    g_hv = self.scalar_encoder.encode(float(image[y, x, 1]))
                    b_hv = self.scalar_encoder.encode(float(image[y, x, 2]))

                    r_bound = self.model.bind(self.R, r_hv)
                    g_bound = self.model.bind(self.G, g_hv)
                    b_bound = self.model.bind(self.B, b_hv)

                    val_hv = self.model.bundle([r_bound, g_bound, b_bound])
                else:  # n_channels == 4
                    # RGBA: bind each channel including alpha
                    r_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
                    g_hv = self.scalar_encoder.encode(float(image[y, x, 1]))
                    b_hv = self.scalar_encoder.encode(float(image[y, x, 2]))
                    a_hv = self.scalar_encoder.encode(float(image[y, x, 3]))

                    r_bound = self.model.bind(self.R, r_hv)
                    g_bound = self.model.bind(self.G, g_hv)
                    b_bound = self.model.bind(self.B, b_hv)
                    a_bound = self.model.bind(self.A, a_hv)

                    val_hv = self.model.bundle([r_bound, g_bound, b_bound, a_bound])

                # Bind position with value
                pixel_hv = self.model.bind(pos_hv, val_hv)
                pixel_hvs.append(pixel_hv)

        # Bundle all pixels to create image hypervector
        image_hv = self.model.bundle(pixel_hvs)

        return image_hv

    def decode(
        self, hypervector: Array, height: int, width: int, n_channels: int = 1
    ) -> "numpy.ndarray":
        """
        Decode a hypervector to reconstruct an approximate image.

        Note: Image decoding is approximate and requires knowing the target
        image dimensions. Reconstruction quality depends on the scalar encoder's
        decoding capabilities and may require candidate value search.

        Parameters
        ----------
        hypervector : Array
            The hypervector to decode.
        height : int
            Target image height.
        width : int
            Target image width.
        n_channels : int, optional
            Number of channels (1, 3, or 4). Default is 1.

        Returns
        -------
        np.ndarray
            Reconstructed image with shape (height, width) for grayscale
            or (height, width, n_channels) for color.

        Raises
        ------
        NotImplementedError
            Image decoding is computationally intractable without additional constraints.

        Notes
        -----
        Image decoding is not implemented because it requires solving a high-dimensional
        inverse problem that is fundamentally ill-posed:

        **Mathematical Challenge:**

        The encoding process binds pixel values with position vectors:
            image_hv = bundle([bind(position(i,j), scalar(pixel[i,j])) for all i,j])

        To decode, we must:
        1. Unbind each position: pixel_hv[i,j] = unbind(image_hv, position(i,j))
        2. Decode each scalar: pixel[i,j] = scalar_decode(pixel_hv[i,j])

        **Why This Is Intractable:**

        - Unbinding is approximate (except for FHRR with exact inverse)
        - Each unbind operation introduces noise
        - For H×W image: H×W unbind operations compound errors
        - Scalar decoding via optimization (1000 evals × 100 iterations)
        - Total: ~100M evaluations for 100×100 image
        - No gradient available for joint optimization

        **Alternative Approaches:**

        1. **Database Retrieval**: Encode query image, find nearest match in database
           - Complexity: O(N) for N known images
           - Works well for classification/recognition tasks

        2. **Iterative Resonator**: Use resonator cleanup with pixel codebook
           - Requires pre-built codebook of common pixel patterns
           - May reconstruct coarse structure but not fine details

        3. **Neural Decoder**: Train neural network image_hv → image
           - Requires supervised training data
           - Can learn inverse mapping empirically
           - See: Imani et al. (2019) "VoiceHD" for similar approach

        For practical applications, use ImageEncoder for one-way encoding
        (e.g., image→hypervector→classifier) rather than reconstruction.

        References
        ----------
        - Imani et al. (2019): "VoiceHD: Hyperdimensional Computing for
          Efficient Speech Recognition"
        - Plate (2003): "Holographic Reduced Representations" - Chapter 4 on
          approximate unbinding and error accumulation
        """
        raise NotImplementedError(
            "Image decoding is not implemented due to computational intractability. "
            "See docstring for detailed mathematical explanation and alternatives. "
            "For reconstruction tasks, use similarity-based retrieval from a database "
            "of known images, or train a neural decoder network."
        )

    @property
    def is_reversible(self) -> bool:
        """
        Whether the encoder supports decoding.

        Returns
        -------
        bool
            False - image decoding not yet implemented.
        """
        return False

    @property
    def compatible_models(self) -> list[str]:
        """
        List of compatible VSA model names.

        Returns
        -------
        list of str
            All VSA models supported (depends on scalar encoder compatibility).
        """
        return self.scalar_encoder.compatible_models

    @property
    def input_type(self) -> str:
        """
        Description of expected input type.

        Returns
        -------
        str
            Description of input format.
        """
        if self.n_channels is None:
            return "2D array (grayscale) or 3D array (color) with shape (H, W) or (H, W, C)"
        elif self.n_channels == 1:
            return f"Grayscale image ({self.image_shape[0]}x{self.image_shape[1]})"
        elif self.n_channels == 3:
            return f"RGB image ({self.image_shape[0]}x{self.image_shape[1]}x3)"
        else:
            return f"RGBA image ({self.image_shape[0]}x{self.image_shape[1]}x4)"

    def __repr__(self) -> str:
        """Return string representation."""
        return (
            f"ImageEncoder(model={self.model.model_name}, "
            f"scalar_encoder={self.scalar_encoder.__class__.__name__}, "
            f"normalize_pixels={self.normalize_pixels})"
        )

compatible_models property

List of compatible VSA model names.

Returns:

Type Description
list of str

All VSA models supported (depends on scalar encoder compatibility).

input_type property

Description of expected input type.

Returns:

Type Description
str

Description of input format.

is_reversible property

Whether the encoder supports decoding.

Returns:

Type Description
bool

False - image decoding not yet implemented.

__init__(model, scalar_encoder, normalize_pixels=True, seed=None)

Initialize ImageEncoder.

Source code in holovec/encoders/spatial.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def __init__(
    self,
    model: VSAModel,
    scalar_encoder: ScalarEncoder,
    normalize_pixels: bool = True,
    seed: int | None = None,
):
    """Initialize ImageEncoder."""
    # Validate and set scalar_encoder BEFORE calling super().__init__
    # because base class checks compatible_models which references it
    if not isinstance(scalar_encoder, ScalarEncoder):
        raise TypeError(f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}")

    if scalar_encoder.model != model:
        raise ValueError("scalar_encoder must use the same VSA model as the ImageEncoder")

    self.scalar_encoder = scalar_encoder
    self.normalize_pixels = normalize_pixels

    super().__init__(model)

    # Generate dimension vectors for spatial coordinates
    base_seed = seed if seed is not None else 2000
    self.X = model.random(seed=base_seed)  # X dimension
    self.Y = model.random(seed=base_seed + 1)  # Y dimension

    # Generate dimension vectors for color channels (RGB/RGBA)
    self.R = model.random(seed=base_seed + 2)  # Red channel
    self.G = model.random(seed=base_seed + 3)  # Green channel
    self.B = model.random(seed=base_seed + 4)  # Blue channel
    self.A = model.random(seed=base_seed + 5)  # Alpha channel

    # Track last encoded image properties
    self.n_channels: int | None = None
    self.image_shape: tuple[int, ...] | None = None

__repr__()

Return string representation.

Source code in holovec/encoders/spatial.py
357
358
359
360
361
362
363
def __repr__(self) -> str:
    """Return string representation."""
    return (
        f"ImageEncoder(model={self.model.model_name}, "
        f"scalar_encoder={self.scalar_encoder.__class__.__name__}, "
        f"normalize_pixels={self.normalize_pixels})"
    )

decode(hypervector, height, width, n_channels=1)

Decode a hypervector to reconstruct an approximate image.

Note: Image decoding is approximate and requires knowing the target image dimensions. Reconstruction quality depends on the scalar encoder's decoding capabilities and may require candidate value search.

Parameters:

Name Type Description Default
hypervector Array

The hypervector to decode.

required
height int

Target image height.

required
width int

Target image width.

required
n_channels int

Number of channels (1, 3, or 4). Default is 1.

1

Returns:

Type Description
ndarray

Reconstructed image with shape (height, width) for grayscale or (height, width, n_channels) for color.

Raises:

Type Description
NotImplementedError

Image decoding is computationally intractable without additional constraints.

Notes

Image decoding is not implemented because it requires solving a high-dimensional inverse problem that is fundamentally ill-posed:

Mathematical Challenge:

The encoding process binds pixel values with position vectors: image_hv = bundle([bind(position(i,j), scalar(pixel[i,j])) for all i,j])

To decode, we must: 1. Unbind each position: pixel_hv[i,j] = unbind(image_hv, position(i,j)) 2. Decode each scalar: pixel[i,j] = scalar_decode(pixel_hv[i,j])

Why This Is Intractable:

  • Unbinding is approximate (except for FHRR with exact inverse)
  • Each unbind operation introduces noise
  • For H×W image: H×W unbind operations compound errors
  • Scalar decoding via optimization (1000 evals × 100 iterations)
  • Total: ~100M evaluations for 100×100 image
  • No gradient available for joint optimization

Alternative Approaches:

  1. Database Retrieval: Encode query image, find nearest match in database
  2. Complexity: O(N) for N known images
  3. Works well for classification/recognition tasks

  4. Iterative Resonator: Use resonator cleanup with pixel codebook

  5. Requires pre-built codebook of common pixel patterns
  6. May reconstruct coarse structure but not fine details

  7. Neural Decoder: Train neural network image_hv → image

  8. Requires supervised training data
  9. Can learn inverse mapping empirically
  10. See: Imani et al. (2019) "VoiceHD" for similar approach

For practical applications, use ImageEncoder for one-way encoding (e.g., image→hypervector→classifier) rather than reconstruction.

References
  • Imani et al. (2019): "VoiceHD: Hyperdimensional Computing for Efficient Speech Recognition"
  • Plate (2003): "Holographic Reduced Representations" - Chapter 4 on approximate unbinding and error accumulation
Source code in holovec/encoders/spatial.py
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
def decode(
    self, hypervector: Array, height: int, width: int, n_channels: int = 1
) -> "numpy.ndarray":
    """
    Decode a hypervector to reconstruct an approximate image.

    Note: Image decoding is approximate and requires knowing the target
    image dimensions. Reconstruction quality depends on the scalar encoder's
    decoding capabilities and may require candidate value search.

    Parameters
    ----------
    hypervector : Array
        The hypervector to decode.
    height : int
        Target image height.
    width : int
        Target image width.
    n_channels : int, optional
        Number of channels (1, 3, or 4). Default is 1.

    Returns
    -------
    np.ndarray
        Reconstructed image with shape (height, width) for grayscale
        or (height, width, n_channels) for color.

    Raises
    ------
    NotImplementedError
        Image decoding is computationally intractable without additional constraints.

    Notes
    -----
    Image decoding is not implemented because it requires solving a high-dimensional
    inverse problem that is fundamentally ill-posed:

    **Mathematical Challenge:**

    The encoding process binds pixel values with position vectors:
        image_hv = bundle([bind(position(i,j), scalar(pixel[i,j])) for all i,j])

    To decode, we must:
    1. Unbind each position: pixel_hv[i,j] = unbind(image_hv, position(i,j))
    2. Decode each scalar: pixel[i,j] = scalar_decode(pixel_hv[i,j])

    **Why This Is Intractable:**

    - Unbinding is approximate (except for FHRR with exact inverse)
    - Each unbind operation introduces noise
    - For H×W image: H×W unbind operations compound errors
    - Scalar decoding via optimization (1000 evals × 100 iterations)
    - Total: ~100M evaluations for 100×100 image
    - No gradient available for joint optimization

    **Alternative Approaches:**

    1. **Database Retrieval**: Encode query image, find nearest match in database
       - Complexity: O(N) for N known images
       - Works well for classification/recognition tasks

    2. **Iterative Resonator**: Use resonator cleanup with pixel codebook
       - Requires pre-built codebook of common pixel patterns
       - May reconstruct coarse structure but not fine details

    3. **Neural Decoder**: Train neural network image_hv → image
       - Requires supervised training data
       - Can learn inverse mapping empirically
       - See: Imani et al. (2019) "VoiceHD" for similar approach

    For practical applications, use ImageEncoder for one-way encoding
    (e.g., image→hypervector→classifier) rather than reconstruction.

    References
    ----------
    - Imani et al. (2019): "VoiceHD: Hyperdimensional Computing for
      Efficient Speech Recognition"
    - Plate (2003): "Holographic Reduced Representations" - Chapter 4 on
      approximate unbinding and error accumulation
    """
    raise NotImplementedError(
        "Image decoding is not implemented due to computational intractability. "
        "See docstring for detailed mathematical explanation and alternatives. "
        "For reconstruction tasks, use similarity-based retrieval from a database "
        "of known images, or train a neural decoder network."
    )

encode(image)

Encode an image into a hypervector.

Parameters:

Name Type Description Default
image array - like

Image array with shape (height, width) for grayscale or (height, width, channels) for color images. Pixel values should be in range [0, 255] for uint8 or [0, 1] for float. Typically a NumPy array from PIL, OpenCV, or similar libraries.

required

Returns:

Type Description
Array

Hypervector encoding of the image.

Raises:

Type Description
ValueError

If image has invalid shape or number of channels.

Notes

This encoder accepts images as NumPy arrays (the standard format from image libraries like PIL, OpenCV, scikit-image) and processes them using the configured backend. While input must be NumPy, internal VSA operations use the model's backend (NumPy/PyTorch/JAX).

Source code in holovec/encoders/spatial.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
def encode(self, image: "Array | numpy.ndarray") -> Array:
    """
    Encode an image into a hypervector.

    Parameters
    ----------
    image : array-like
        Image array with shape (height, width) for grayscale or
        (height, width, channels) for color images.
        Pixel values should be in range [0, 255] for uint8 or
        [0, 1] for float.
        Typically a NumPy array from PIL, OpenCV, or similar libraries.

    Returns
    -------
    Array
        Hypervector encoding of the image.

    Raises
    ------
    ValueError
        If image has invalid shape or number of channels.

    Notes
    -----
    This encoder accepts images as NumPy arrays (the standard format from
    image libraries like PIL, OpenCV, scikit-image) and processes them using
    the configured backend. While input must be NumPy, internal VSA operations
    use the model's backend (NumPy/PyTorch/JAX).
    """
    # Import numpy locally to avoid module-level backend dependency
    # Images from external sources (PIL, OpenCV) are numpy arrays
    import numpy as _np

    # Convert to numpy array if needed (handles lists, tuples, etc.)
    if not isinstance(image, _np.ndarray):
        image = _np.array(image)

    # Validate and normalize image shape
    if image.ndim == 2:
        # Grayscale image
        height, width = image.shape
        n_channels = 1
        # Add channel dimension: (H, W) -> (H, W, 1)
        image = _np.expand_dims(image, axis=-1)
    elif image.ndim == 3:
        height, width, n_channels = image.shape
        if n_channels not in [1, 3, 4]:
            raise ValueError(f"Image must have 1, 3, or 4 channels, got {n_channels}")
    else:
        raise ValueError(f"Image must be 2D (grayscale) or 3D (color), got shape {image.shape}")

    # Store image properties
    self.n_channels = n_channels
    self.image_shape = (height, width, n_channels)

    # Normalize pixel values if requested
    if self.normalize_pixels:
        # Check dtype using string representation to avoid dtype dependency
        dtype_str = str(image.dtype)
        if "uint8" in dtype_str:
            image = image.astype(_np.float32) / 255.0
        elif "int" in dtype_str:
            # Other integer types: normalize assuming 0-255 range
            image = image.astype(_np.float32) / 255.0
        # If already float, assume it's in [0, 1]

    # Encode all pixels
    pixel_hvs = []

    for y in range(height):
        for x in range(width):
            # Encode spatial position
            x_hv = self.scalar_encoder.encode(float(x))
            y_hv = self.scalar_encoder.encode(float(y))

            x_bound = self.model.bind(self.X, x_hv)
            y_bound = self.model.bind(self.Y, y_hv)
            pos_hv = self.model.bundle([x_bound, y_bound])

            # Encode pixel value(s)
            if n_channels == 1:
                # Grayscale: just encode the intensity
                val_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
            elif n_channels == 3:
                # RGB: bind each channel to its dimension vector
                r_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
                g_hv = self.scalar_encoder.encode(float(image[y, x, 1]))
                b_hv = self.scalar_encoder.encode(float(image[y, x, 2]))

                r_bound = self.model.bind(self.R, r_hv)
                g_bound = self.model.bind(self.G, g_hv)
                b_bound = self.model.bind(self.B, b_hv)

                val_hv = self.model.bundle([r_bound, g_bound, b_bound])
            else:  # n_channels == 4
                # RGBA: bind each channel including alpha
                r_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
                g_hv = self.scalar_encoder.encode(float(image[y, x, 1]))
                b_hv = self.scalar_encoder.encode(float(image[y, x, 2]))
                a_hv = self.scalar_encoder.encode(float(image[y, x, 3]))

                r_bound = self.model.bind(self.R, r_hv)
                g_bound = self.model.bind(self.G, g_hv)
                b_bound = self.model.bind(self.B, b_hv)
                a_bound = self.model.bind(self.A, a_hv)

                val_hv = self.model.bundle([r_bound, g_bound, b_bound, a_bound])

            # Bind position with value
            pixel_hv = self.model.bind(pos_hv, val_hv)
            pixel_hvs.append(pixel_hv)

    # Bundle all pixels to create image hypervector
    image_hv = self.model.bundle(pixel_hvs)

    return image_hv

holovec.encoders.structured.VectorEncoder

Bases: StructuredEncoder

Vector encoder for multi-dimensional numeric data using role-filler binding.

Encodes vectors by binding each dimension with its scalar-encoded value:

encode([v₁, v₂, ..., vₐ]) = Σᵢ bind(Dᵢ, scalar_encode(vᵢ))

where: - Dᵢ is a random hypervector for dimension i - scalar_encode(vᵢ) encodes the scalar value using FPE/Thermometer/Level - bind() creates a role-filler binding - Σ bundles all dimension-value pairs

This creates a compositional encoding where: - Each dimension has explicit representation (Dᵢ) - Similar values in corresponding dimensions → higher similarity - Supports partial matching across dimensions - Enables approximate decoding via unbinding

Attributes: scalar_encoder: Encoder for individual scalar values n_dimensions: Number of dimensions in input vectors dim_vectors: List of dimension hypervectors (Dᵢ) normalize_input: Whether to normalize input vectors

Example: >>> from holovec import VSA >>> from holovec.encoders import FractionalPowerEncoder, VectorEncoder >>> >>> model = VSA.create('FHRR', dim=10000) >>> scalar_enc = FractionalPowerEncoder(model, min_val=0, max_val=1) >>> encoder = VectorEncoder(model, scalar_encoder=scalar_enc, n_dims=128) >>> >>> # Encode a feature vector (list or any backend array) >>> features = [0.5] * 128 # Can also use numpy/torch/jax arrays >>> hv = encoder.encode(features) >>> >>> # Similar vectors have high similarity >>> features2 = [0.51] * 128 # Slightly different >>> hv2 = encoder.encode(features2) >>> model.similarity(hv, hv2) # High similarity >>> >>> # Decode to recover approximate values >>> recovered = encoder.decode(hv) >>> # Verify approximate recovery via similarity >>> model.similarity(encoder.encode(recovered), hv) > 0.9

Source code in holovec/encoders/structured.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
class VectorEncoder(StructuredEncoder):
    """
    Vector encoder for multi-dimensional numeric data using role-filler binding.

    Encodes vectors by binding each dimension with its scalar-encoded value:

        encode([v₁, v₂, ..., vₐ]) = Σᵢ bind(Dᵢ, scalar_encode(vᵢ))

    where:
    - Dᵢ is a random hypervector for dimension i
    - scalar_encode(vᵢ) encodes the scalar value using FPE/Thermometer/Level
    - bind() creates a role-filler binding
    - Σ bundles all dimension-value pairs

    This creates a compositional encoding where:
    - Each dimension has explicit representation (Dᵢ)
    - Similar values in corresponding dimensions → higher similarity
    - Supports partial matching across dimensions
    - Enables approximate decoding via unbinding

    Attributes:
        scalar_encoder: Encoder for individual scalar values
        n_dimensions: Number of dimensions in input vectors
        dim_vectors: List of dimension hypervectors (Dᵢ)
        normalize_input: Whether to normalize input vectors

    Example:
        >>> from holovec import VSA
        >>> from holovec.encoders import FractionalPowerEncoder, VectorEncoder
        >>>
        >>> model = VSA.create('FHRR', dim=10000)
        >>> scalar_enc = FractionalPowerEncoder(model, min_val=0, max_val=1)
        >>> encoder = VectorEncoder(model, scalar_encoder=scalar_enc, n_dims=128)
        >>>
        >>> # Encode a feature vector (list or any backend array)
        >>> features = [0.5] * 128  # Can also use numpy/torch/jax arrays
        >>> hv = encoder.encode(features)
        >>>
        >>> # Similar vectors have high similarity
        >>> features2 = [0.51] * 128  # Slightly different
        >>> hv2 = encoder.encode(features2)
        >>> model.similarity(hv, hv2)  # High similarity
        >>>
        >>> # Decode to recover approximate values
        >>> recovered = encoder.decode(hv)
        >>> # Verify approximate recovery via similarity
        >>> model.similarity(encoder.encode(recovered), hv) > 0.9
    """

    def __init__(
        self,
        model: VSAModel,
        scalar_encoder: ScalarEncoder,
        n_dimensions: int,
        normalize_input: bool = False,
        seed: int | None = None
    ):
        """
        Initialize vector encoder.

        Args:
            model: VSA model instance
            scalar_encoder: Encoder for individual scalar values
            n_dimensions: Number of dimensions in input vectors
            normalize_input: Whether to normalize input vectors to unit length
            seed: Random seed for dimension vector generation

        Raises:
            ValueError: If n_dimensions < 1
            TypeError: If scalar_encoder is not a ScalarEncoder
        """
        super().__init__(model)

        if n_dimensions < 1:
            raise ValueError(f"n_dimensions must be >= 1, got {n_dimensions}")

        if not isinstance(scalar_encoder, ScalarEncoder):
            raise TypeError(
                f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}"
            )

        # Check model compatibility
        if model != scalar_encoder.model:
            raise ValueError(
                "scalar_encoder must use the same VSA model as VectorEncoder"
            )

        self.scalar_encoder = scalar_encoder
        self.n_dimensions = n_dimensions
        self.normalize_input = normalize_input
        self.seed = seed

        # Generate dimension hypervectors (one per dimension)
        # These are the "roles" in role-filler binding
        self.dim_vectors: list[Array] = []
        for i in range(n_dimensions):
            # Use deterministic seeding for reproducibility
            if seed is not None:
                dim_seed = seed + i
            else:
                dim_seed = i + 1000  # Offset to avoid collision with symbol seeds

            self.dim_vectors.append(model.random(seed=dim_seed))

    def encode(self, vector: Array) -> Array:
        """
        Encode a vector using dimension binding.

        Each element is bound with its corresponding dimension vector:

            result = Σᵢ bind(Dᵢ, scalar_encode(vector[i]))

        Args:
            vector: Input vector to encode, shape (n_dimensions,)

        Returns:
            Hypervector representing the vector

        Raises:
            ValueError: If vector shape doesn't match n_dimensions

        Example:
            >>> encoder = VectorEncoder(model, scalar_enc, n_dims=3)
            >>> vector = [1.0, 2.0, 3.0]  # Can also be numpy/torch/jax array
            >>> hv = encoder.encode(vector)
        """
        # Convert to backend array if needed
        vector = self.backend.array(vector)

        if vector.shape != (self.n_dimensions,):
            raise ValueError(
                f"Expected vector of shape ({self.n_dimensions},), "
                f"got {vector.shape}"
            )

        # Optional: normalize to unit length
        if self.normalize_input:
            vector = self.backend.normalize(vector)

        # Bind each dimension with its scalar-encoded value
        bound_dims = []
        for i, value in enumerate(vector):
            # Encode scalar value as hypervector
            value_hv = self.scalar_encoder.encode(float(value))

            # Bind dimension role with value filler
            dim_hv = self.dim_vectors[i]
            bound = self.model.bind(dim_hv, value_hv)

            bound_dims.append(bound)

        # Bundle all dimension-value bindings
        vector_hv = self.model.bundle(bound_dims)

        return vector_hv

    def decode(self, hypervector: Array) -> Array:
        """
        Decode vector hypervector to recover approximate values.

        For each dimension i:
        1. Unbind dimension: value_hv = unbind(hypervector, Dᵢ)
        2. Decode scalar: value ≈ scalar_encoder.decode(value_hv)

        Args:
            hypervector: Vector hypervector to decode, shape (dimension,)

        Returns:
            Decoded vector, shape (n_dimensions,) (backend array type)

        Raises:
            NotImplementedError: If scalar_encoder doesn't support decoding

        Note:
            Decoding is approximate and quality depends on:
            - VSA model (exact vs. approximate binding)
            - Scalar encoder precision
            - Number of dimensions (more dims → more noise)

        Example:
            >>> original = [1.0, 2.0, 3.0]
            >>> encoded = encoder.encode(original)
            >>> decoded = encoder.decode(encoded)
            >>> # Check approximate recovery (using backend operations)
            >>> model.similarity(encoder.encode(decoded), encoded) > 0.9
        """
        if not self.scalar_encoder.is_reversible:
            raise NotImplementedError(
                f"Cannot decode: scalar_encoder {type(self.scalar_encoder).__name__} "
                "does not support decoding"
            )

        decoded_values = []

        for i in range(self.n_dimensions):
            # Unbind dimension to recover value hypervector
            dim_hv = self.dim_vectors[i]
            value_hv = self.model.unbind(hypervector, dim_hv)

            # Decode scalar value
            value = self.scalar_encoder.decode(value_hv)
            decoded_values.append(value)

        return self.backend.array(decoded_values)

    @property
    def is_reversible(self) -> bool:
        """
        VectorEncoder supports approximate decoding if scalar_encoder does.

        Returns:
            True if scalar_encoder supports decoding, False otherwise
        """
        return self.scalar_encoder.is_reversible

    @property
    def compatible_models(self) -> list[str]:
        """
        Works with all VSA models.

        Decoding quality varies:
        - Exact models (FHRR, MAP): High accuracy
        - Approximate models (HRR, BSC): Moderate accuracy

        Returns:
            List of all model names
        """
        return ["MAP", "FHRR", "HRR", "BSC", "GHRR", "VTB", "BSDC"]

    @property
    def input_type(self) -> str:
        """Input type description."""
        return f"{self.n_dimensions}-dimensional vector"

    def __repr__(self) -> str:
        """String representation."""
        return (
            f"VectorEncoder("
            f"model={self.model.model_name}, "
            f"scalar_encoder={type(self.scalar_encoder).__name__}, "
            f"n_dimensions={self.n_dimensions}, "
            f"normalize_input={self.normalize_input})"
        )

compatible_models property

Works with all VSA models.

Decoding quality varies: - Exact models (FHRR, MAP): High accuracy - Approximate models (HRR, BSC): Moderate accuracy

Returns: List of all model names

input_type property

Input type description.

is_reversible property

VectorEncoder supports approximate decoding if scalar_encoder does.

Returns: True if scalar_encoder supports decoding, False otherwise

__init__(model, scalar_encoder, n_dimensions, normalize_input=False, seed=None)

Initialize vector encoder.

Args: model: VSA model instance scalar_encoder: Encoder for individual scalar values n_dimensions: Number of dimensions in input vectors normalize_input: Whether to normalize input vectors to unit length seed: Random seed for dimension vector generation

Raises: ValueError: If n_dimensions < 1 TypeError: If scalar_encoder is not a ScalarEncoder

Source code in holovec/encoders/structured.py
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def __init__(
    self,
    model: VSAModel,
    scalar_encoder: ScalarEncoder,
    n_dimensions: int,
    normalize_input: bool = False,
    seed: int | None = None
):
    """
    Initialize vector encoder.

    Args:
        model: VSA model instance
        scalar_encoder: Encoder for individual scalar values
        n_dimensions: Number of dimensions in input vectors
        normalize_input: Whether to normalize input vectors to unit length
        seed: Random seed for dimension vector generation

    Raises:
        ValueError: If n_dimensions < 1
        TypeError: If scalar_encoder is not a ScalarEncoder
    """
    super().__init__(model)

    if n_dimensions < 1:
        raise ValueError(f"n_dimensions must be >= 1, got {n_dimensions}")

    if not isinstance(scalar_encoder, ScalarEncoder):
        raise TypeError(
            f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}"
        )

    # Check model compatibility
    if model != scalar_encoder.model:
        raise ValueError(
            "scalar_encoder must use the same VSA model as VectorEncoder"
        )

    self.scalar_encoder = scalar_encoder
    self.n_dimensions = n_dimensions
    self.normalize_input = normalize_input
    self.seed = seed

    # Generate dimension hypervectors (one per dimension)
    # These are the "roles" in role-filler binding
    self.dim_vectors: list[Array] = []
    for i in range(n_dimensions):
        # Use deterministic seeding for reproducibility
        if seed is not None:
            dim_seed = seed + i
        else:
            dim_seed = i + 1000  # Offset to avoid collision with symbol seeds

        self.dim_vectors.append(model.random(seed=dim_seed))

__repr__()

String representation.

Source code in holovec/encoders/structured.py
248
249
250
251
252
253
254
255
256
def __repr__(self) -> str:
    """String representation."""
    return (
        f"VectorEncoder("
        f"model={self.model.model_name}, "
        f"scalar_encoder={type(self.scalar_encoder).__name__}, "
        f"n_dimensions={self.n_dimensions}, "
        f"normalize_input={self.normalize_input})"
    )

decode(hypervector)

Decode vector hypervector to recover approximate values.

For each dimension i: 1. Unbind dimension: value_hv = unbind(hypervector, Dᵢ) 2. Decode scalar: value ≈ scalar_encoder.decode(value_hv)

Args: hypervector: Vector hypervector to decode, shape (dimension,)

Returns: Decoded vector, shape (n_dimensions,) (backend array type)

Raises: NotImplementedError: If scalar_encoder doesn't support decoding

Note: Decoding is approximate and quality depends on: - VSA model (exact vs. approximate binding) - Scalar encoder precision - Number of dimensions (more dims → more noise)

Example: >>> original = [1.0, 2.0, 3.0] >>> encoded = encoder.encode(original) >>> decoded = encoder.decode(encoded) >>> # Check approximate recovery (using backend operations) >>> model.similarity(encoder.encode(decoded), encoded) > 0.9

Source code in holovec/encoders/structured.py
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
def decode(self, hypervector: Array) -> Array:
    """
    Decode vector hypervector to recover approximate values.

    For each dimension i:
    1. Unbind dimension: value_hv = unbind(hypervector, Dᵢ)
    2. Decode scalar: value ≈ scalar_encoder.decode(value_hv)

    Args:
        hypervector: Vector hypervector to decode, shape (dimension,)

    Returns:
        Decoded vector, shape (n_dimensions,) (backend array type)

    Raises:
        NotImplementedError: If scalar_encoder doesn't support decoding

    Note:
        Decoding is approximate and quality depends on:
        - VSA model (exact vs. approximate binding)
        - Scalar encoder precision
        - Number of dimensions (more dims → more noise)

    Example:
        >>> original = [1.0, 2.0, 3.0]
        >>> encoded = encoder.encode(original)
        >>> decoded = encoder.decode(encoded)
        >>> # Check approximate recovery (using backend operations)
        >>> model.similarity(encoder.encode(decoded), encoded) > 0.9
    """
    if not self.scalar_encoder.is_reversible:
        raise NotImplementedError(
            f"Cannot decode: scalar_encoder {type(self.scalar_encoder).__name__} "
            "does not support decoding"
        )

    decoded_values = []

    for i in range(self.n_dimensions):
        # Unbind dimension to recover value hypervector
        dim_hv = self.dim_vectors[i]
        value_hv = self.model.unbind(hypervector, dim_hv)

        # Decode scalar value
        value = self.scalar_encoder.decode(value_hv)
        decoded_values.append(value)

    return self.backend.array(decoded_values)

encode(vector)

Encode a vector using dimension binding.

Each element is bound with its corresponding dimension vector:

result = Σᵢ bind(Dᵢ, scalar_encode(vector[i]))

Args: vector: Input vector to encode, shape (n_dimensions,)

Returns: Hypervector representing the vector

Raises: ValueError: If vector shape doesn't match n_dimensions

Example: >>> encoder = VectorEncoder(model, scalar_enc, n_dims=3) >>> vector = [1.0, 2.0, 3.0] # Can also be numpy/torch/jax array >>> hv = encoder.encode(vector)

Source code in holovec/encoders/structured.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
def encode(self, vector: Array) -> Array:
    """
    Encode a vector using dimension binding.

    Each element is bound with its corresponding dimension vector:

        result = Σᵢ bind(Dᵢ, scalar_encode(vector[i]))

    Args:
        vector: Input vector to encode, shape (n_dimensions,)

    Returns:
        Hypervector representing the vector

    Raises:
        ValueError: If vector shape doesn't match n_dimensions

    Example:
        >>> encoder = VectorEncoder(model, scalar_enc, n_dims=3)
        >>> vector = [1.0, 2.0, 3.0]  # Can also be numpy/torch/jax array
        >>> hv = encoder.encode(vector)
    """
    # Convert to backend array if needed
    vector = self.backend.array(vector)

    if vector.shape != (self.n_dimensions,):
        raise ValueError(
            f"Expected vector of shape ({self.n_dimensions},), "
            f"got {vector.shape}"
        )

    # Optional: normalize to unit length
    if self.normalize_input:
        vector = self.backend.normalize(vector)

    # Bind each dimension with its scalar-encoded value
    bound_dims = []
    for i, value in enumerate(vector):
        # Encode scalar value as hypervector
        value_hv = self.scalar_encoder.encode(float(value))

        # Bind dimension role with value filler
        dim_hv = self.dim_vectors[i]
        bound = self.model.bind(dim_hv, value_hv)

        bound_dims.append(bound)

    # Bundle all dimension-value bindings
    vector_hv = self.model.bundle(bound_dims)

    return vector_hv

Retrieval

Codebook

holovec.retrieval.codebook.Codebook

Thin wrapper for label→vector mappings with convenience methods.

Keeps insertion order of labels. Vectors are backend arrays.

Source code in holovec/retrieval/codebook.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
class Codebook:
    """Thin wrapper for label→vector mappings with convenience methods.

    Keeps insertion order of labels. Vectors are backend arrays.
    """

    def __init__(self, items: dict[str, Array] | None = None, backend: Backend | None = None):
        self._items: dict[str, Array] = {}
        self._backend: Backend = backend if backend is not None else get_backend("numpy")
        if items:
            self.extend(items)

    # Basic operations
    def add(self, label: str, vector: Array) -> None:
        self._items[label] = vector

    def extend(self, items: dict[str, Array]) -> None:
        for k, v in items.items():
            self.add(k, v)

    @property
    def labels(self) -> list[str]:
        return list(self._items.keys())

    @property
    def size(self) -> int:
        return len(self._items)

    # Dict-like interface
    def __getitem__(self, label: str) -> Array:
        """Get vector by label. Raises KeyError if not found."""
        return self._items[label]

    def __contains__(self, label: str) -> bool:
        """Check if label exists in codebook."""
        return label in self._items

    def __len__(self) -> int:
        """Return number of items in codebook."""
        return len(self._items)

    def __iter__(self):
        """Iterate over labels."""
        return iter(self._items)

    def items(self):
        """Return iterator over (label, vector) pairs."""
        return self._items.items()

    def keys(self):
        """Return iterator over labels."""
        return self._items.keys()

    def values(self):
        """Return iterator over vectors."""
        return self._items.values()

    def get(self, label: str, default: Array | None = None) -> Array | None:
        """Get vector by label, returning default if not found."""
        return self._items.get(label, default)

    def as_list(self) -> list[tuple[str, Array]]:
        return list(self._items.items())

    def as_matrix(self, backend: Backend | None = None) -> tuple[list[str], Array]:
        """Return (labels, matrix) where matrix has shape (L, D)."""
        be = backend or self._backend
        if self.size == 0:
            return [], be.zeros((0,), dtype="float32")
        labels = self.labels
        stacked = be.stack([self._items[lbl] for lbl in labels], axis=0)
        return labels, stacked

    # Persistence (npz)
    def save(self, path: str) -> None:
        labels, mat = self.as_matrix()
        mat_np = self._backend.to_numpy(mat)
        np.savez(path, labels=np.array(labels, dtype=object), matrix=mat_np)

    @classmethod
    def load(cls, path: str, backend: Backend | None = None) -> "Codebook":
        be = backend or get_backend("numpy")
        data = np.load(path, allow_pickle=True)
        labels = [str(x) for x in data["labels"].tolist()]
        mat = data["matrix"]
        items: dict[str, Array] = {}
        for i, lbl in enumerate(labels):
            items[lbl] = be.from_numpy(mat[i])
        return cls(items=items, backend=be)

__contains__(label)

Check if label exists in codebook.

Source code in holovec/retrieval/codebook.py
41
42
43
def __contains__(self, label: str) -> bool:
    """Check if label exists in codebook."""
    return label in self._items

__getitem__(label)

Get vector by label. Raises KeyError if not found.

Source code in holovec/retrieval/codebook.py
37
38
39
def __getitem__(self, label: str) -> Array:
    """Get vector by label. Raises KeyError if not found."""
    return self._items[label]

__iter__()

Iterate over labels.

Source code in holovec/retrieval/codebook.py
49
50
51
def __iter__(self):
    """Iterate over labels."""
    return iter(self._items)

__len__()

Return number of items in codebook.

Source code in holovec/retrieval/codebook.py
45
46
47
def __len__(self) -> int:
    """Return number of items in codebook."""
    return len(self._items)

as_matrix(backend=None)

Return (labels, matrix) where matrix has shape (L, D).

Source code in holovec/retrieval/codebook.py
72
73
74
75
76
77
78
79
def as_matrix(self, backend: Backend | None = None) -> tuple[list[str], Array]:
    """Return (labels, matrix) where matrix has shape (L, D)."""
    be = backend or self._backend
    if self.size == 0:
        return [], be.zeros((0,), dtype="float32")
    labels = self.labels
    stacked = be.stack([self._items[lbl] for lbl in labels], axis=0)
    return labels, stacked

get(label, default=None)

Get vector by label, returning default if not found.

Source code in holovec/retrieval/codebook.py
65
66
67
def get(self, label: str, default: Array | None = None) -> Array | None:
    """Get vector by label, returning default if not found."""
    return self._items.get(label, default)

items()

Return iterator over (label, vector) pairs.

Source code in holovec/retrieval/codebook.py
53
54
55
def items(self):
    """Return iterator over (label, vector) pairs."""
    return self._items.items()

keys()

Return iterator over labels.

Source code in holovec/retrieval/codebook.py
57
58
59
def keys(self):
    """Return iterator over labels."""
    return self._items.keys()

values()

Return iterator over vectors.

Source code in holovec/retrieval/codebook.py
61
62
63
def values(self):
    """Return iterator over vectors."""
    return self._items.values()

ItemStore

holovec.retrieval.itemstore.ItemStore

Thin retrieval wrapper around a Codebook and a CleanupStrategy.

Provides nearest-neighbor queries and multi-factor factorization via the configured cleanup strategy.

Source code in holovec/retrieval/itemstore.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
class ItemStore:
    """Thin retrieval wrapper around a Codebook and a CleanupStrategy.

    Provides nearest-neighbor queries and multi-factor factorization via
    the configured cleanup strategy.
    """

    def __init__(
        self,
        model: VSAModel,
        cleanup: CleanupStrategy | None = None,
    ) -> None:
        self.model = model
        self.cleanup: CleanupStrategy = cleanup if cleanup is not None else BruteForceCleanup()
        self.codebook = Codebook(backend=model.backend)

    def fit(self, items: dict[str, Array] | Codebook) -> "ItemStore":
        if isinstance(items, Codebook):
            self.codebook = items
        else:
            self.codebook = Codebook(items, backend=self.model.backend)
        return self

    def add(self, label: str, vector: Array) -> None:
        self.codebook.add(label, vector)

    def extend(self, items: dict[str, Array]) -> None:
        self.codebook.extend(items)

    def query(
        self,
        vec: Array,
        k: int = 1,
        return_similarities: bool = True,
        fast: bool = True,
    ) -> list[tuple[str, float]]:
        """Query top-k nearest items.

        If fast=True, uses a batched matrix routine when possible, otherwise
        falls back to scalar nearest_neighbors.
        """
        if fast and self.codebook.size > 0:
            labels, mat = self.codebook.as_matrix(self.model.backend)
            be = self.model.backend
            # Continuous spaces: cosine-like; ComplexSpace handled specially
            space_name = self.model.space.space_name
            try:
                if space_name.startswith("complex"):
                    # sim = Re(conj(C) @ v) / D
                    v = vec
                    conjC = be.conjugate(mat)
                    dots = be.matmul(conjC, v)  # (L,)
                    sims_arr = be.real(dots)
                    sims_np = be.to_numpy(sims_arr) / float(self.model.dimension)
                else:
                    # cosine: (C v) / (||C_i|| * ||v||)
                    dots = be.matmul(mat, vec)  # (L,)
                    # norms per row
                    # norm(C_i) = sqrt(sum(C_i^2)) → use l2 along axis=1
                    row_norms = be.norm(mat, ord=2, axis=1)
                    v_norm = be.norm(vec, ord=2)
                    denom = be.multiply(row_norms, v_norm)
                    sims_arr = be.divide(dots, denom)
                    sims_np = be.to_numpy(sims_arr)
                # Prepare top-k
                import numpy as _np

                sims_np = sims_np.astype(float)
                if k >= len(labels):
                    order = _np.argsort(-sims_np)
                else:
                    # partial sort then full sort within top-k
                    idx_part = _np.argpartition(-sims_np, kth=k - 1)[:k]
                    order = idx_part[_np.argsort(-sims_np[idx_part])]
                out = [(labels[i], float(sims_np[i])) for i in order[:k]]
                if return_similarities:
                    return out
                else:
                    return [(lbl, 0.0) for lbl, _ in out]
            except Exception:
                # Fallback to scalar path on any backend issues
                pass

        labels, sims = nearest_neighbors(
            vec, self.codebook._items, self.model, k=k, return_similarities=True
        )
        return (
            list(zip(labels, sims or [])) if return_similarities else [(lbl, 0.0) for lbl in labels]
        )

    def factorize(
        self,
        vec: Array,
        n_factors: int,
        **kwargs,
    ) -> tuple[list[str], list[float]]:
        return self.cleanup.factorize(
            vec,
            self.codebook._items,
            self.model,
            n_factors=n_factors,
            **kwargs,
        )

    # Persistence delegates to Codebook
    def save(self, path: str) -> None:
        self.codebook.save(path)

    @classmethod
    def load(
        cls,
        model: VSAModel,
        path: str,
        cleanup: CleanupStrategy | None = None,
    ) -> "ItemStore":
        store = cls(model=model, cleanup=cleanup)
        store.codebook = Codebook.load(path, backend=model.backend)
        return store

query(vec, k=1, return_similarities=True, fast=True)

Query top-k nearest items.

If fast=True, uses a batched matrix routine when possible, otherwise falls back to scalar nearest_neighbors.

Source code in holovec/retrieval/itemstore.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def query(
    self,
    vec: Array,
    k: int = 1,
    return_similarities: bool = True,
    fast: bool = True,
) -> list[tuple[str, float]]:
    """Query top-k nearest items.

    If fast=True, uses a batched matrix routine when possible, otherwise
    falls back to scalar nearest_neighbors.
    """
    if fast and self.codebook.size > 0:
        labels, mat = self.codebook.as_matrix(self.model.backend)
        be = self.model.backend
        # Continuous spaces: cosine-like; ComplexSpace handled specially
        space_name = self.model.space.space_name
        try:
            if space_name.startswith("complex"):
                # sim = Re(conj(C) @ v) / D
                v = vec
                conjC = be.conjugate(mat)
                dots = be.matmul(conjC, v)  # (L,)
                sims_arr = be.real(dots)
                sims_np = be.to_numpy(sims_arr) / float(self.model.dimension)
            else:
                # cosine: (C v) / (||C_i|| * ||v||)
                dots = be.matmul(mat, vec)  # (L,)
                # norms per row
                # norm(C_i) = sqrt(sum(C_i^2)) → use l2 along axis=1
                row_norms = be.norm(mat, ord=2, axis=1)
                v_norm = be.norm(vec, ord=2)
                denom = be.multiply(row_norms, v_norm)
                sims_arr = be.divide(dots, denom)
                sims_np = be.to_numpy(sims_arr)
            # Prepare top-k
            import numpy as _np

            sims_np = sims_np.astype(float)
            if k >= len(labels):
                order = _np.argsort(-sims_np)
            else:
                # partial sort then full sort within top-k
                idx_part = _np.argpartition(-sims_np, kth=k - 1)[:k]
                order = idx_part[_np.argsort(-sims_np[idx_part])]
            out = [(labels[i], float(sims_np[i])) for i in order[:k]]
            if return_similarities:
                return out
            else:
                return [(lbl, 0.0) for lbl, _ in out]
        except Exception:
            # Fallback to scalar path on any backend issues
            pass

    labels, sims = nearest_neighbors(
        vec, self.codebook._items, self.model, k=k, return_similarities=True
    )
    return (
        list(zip(labels, sims or [])) if return_similarities else [(lbl, 0.0) for lbl in labels]
    )

AssocStore

holovec.retrieval.assocstore.AssocStore

Lean heteroassociative store: keys → values via aligned codebooks.

Stores two codebooks with aligned label order. Query by a key vector returns the best-matching key label and its corresponding value label/vector.

Source code in holovec/retrieval/assocstore.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
class AssocStore:
    """Lean heteroassociative store: keys → values via aligned codebooks.

    Stores two codebooks with aligned label order. Query by a key vector returns
    the best-matching key label and its corresponding value label/vector.
    """

    def __init__(self, model: VSAModel) -> None:
        self.model = model
        self.keys = Codebook(backend=model.backend)
        self.values = Codebook(backend=model.backend)
        self._label_order: list[str] = []

    def fit(self, key_items: dict[str, Array], value_items: dict[str, Array]) -> "AssocStore":
        # Intersect labels and preserve deterministic order
        labels = [lbl for lbl in key_items.keys() if lbl in value_items]
        self._label_order = labels
        self.keys = Codebook({lbl: key_items[lbl] for lbl in labels}, backend=self.model.backend)
        self.values = Codebook(
            {lbl: value_items[lbl] for lbl in labels}, backend=self.model.backend
        )
        return self

    def add(self, label: str, key_vec: Array, value_vec: Array) -> None:
        self.keys.add(label, key_vec)
        self.values.add(label, value_vec)
        if label not in self._label_order:
            self._label_order.append(label)

    def query_label(self, key_vec: Array, k: int = 1) -> list[tuple[str, float]]:
        labels, sims = nearest_neighbors(
            key_vec, self.keys._items, self.model, k=k, return_similarities=True
        )
        return list(zip(labels, sims or []))

    def query_value(self, key_vec: Array, top: int = 1) -> tuple[str, Array]:
        lbls = self.query_label(key_vec, k=1)
        if not lbls:
            raise ValueError("No items in store")
        lbl = lbls[0][0]
        return lbl, self.values._items[lbl]

    def save(self, keys_path: str, values_path: str) -> None:
        self.keys.save(keys_path)
        self.values.save(values_path)

    @classmethod
    def load(cls, model: VSAModel, keys_path: str, values_path: str) -> "AssocStore":
        st = cls(model)
        st.keys = Codebook.load(keys_path, backend=model.backend)
        st.values = Codebook.load(values_path, backend=model.backend)
        st._label_order = st.keys.labels
        return st

Cleanup

holovec.utils.cleanup.BruteForceCleanup

Bases: CleanupStrategy

Brute-force cleanup via exhaustive codebook search.

This is the baseline cleanup strategy that computes similarity between the query and every codebook entry, returning the best match. Simple and effective, but slow for large codebooks.

Performance: - Time complexity: O(n × d) for n items, d dimensions - Space complexity: O(1) - Best for: Small codebooks (< 1000 items)

Examples: >>> # Create strategy >>> cleanup = BruteForceCleanup() >>> >>> # Single cleanup >>> label, sim = cleanup.cleanup(query, codebook, model) >>> print(f"Found: {label}") >>> >>> # Multi-factor factorization >>> labels, sims = cleanup.factorize(query, codebook, model, n_factors=3) >>> print(f"Factors: {labels}")

References: Kanerva (2009): Classic cleanup operation

Source code in holovec/utils/cleanup.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
class BruteForceCleanup(CleanupStrategy):
    """Brute-force cleanup via exhaustive codebook search.

    This is the baseline cleanup strategy that computes similarity between
    the query and every codebook entry, returning the best match. Simple
    and effective, but slow for large codebooks.

    Performance:
        - Time complexity: O(n × d) for n items, d dimensions
        - Space complexity: O(1)
        - Best for: Small codebooks (< 1000 items)

    Examples:
        >>> # Create strategy
        >>> cleanup = BruteForceCleanup()
        >>>
        >>> # Single cleanup
        >>> label, sim = cleanup.cleanup(query, codebook, model)
        >>> print(f"Found: {label}")
        >>>
        >>> # Multi-factor factorization
        >>> labels, sims = cleanup.factorize(query, codebook, model, n_factors=3)
        >>> print(f"Factors: {labels}")

    References:
        Kanerva (2009): Classic cleanup operation
    """

    def cleanup(
        self,
        query: Array,
        codebook: dict[str, Array],
        model: VSAModel,
    ) -> tuple[str, float]:
        """Find best match via exhaustive search.

        Computes similarity between query and every codebook entry,
        returning the label with highest similarity.

        Args:
            query: Query hypervector to clean up
            codebook: Dictionary mapping labels to hypervectors
            model: VSA model for similarity computation

        Returns:
            Tuple of (label, similarity) for the best match

        Raises:
            TypeError: If arguments are not correct types
            ValueError: If codebook is empty

        Examples:
            >>> label, sim = cleanup.cleanup(query, codebook, model)
            >>> print(f"Best match: {label} (sim: {sim:.3f})")
        """
        # Type validation
        if query is None:
            raise TypeError("query cannot be None")
        if not isinstance(codebook, dict):
            raise TypeError(f"codebook must be dict, got {type(codebook)}")
        if not isinstance(model, VSAModel):
            raise TypeError(f"model must be VSAModel, got {type(model)}")

        # Value validation
        if len(codebook) == 0:
            raise ValueError("codebook must not be empty")

        # Array shape validation (ensure query is 1-D vector matching model dimension)
        try:
            query_shape = model.backend.shape(query)
            expected_shape = (model.dimension,)
            if query_shape != expected_shape:
                raise ValueError(
                    f"query must have shape {expected_shape}, got {query_shape}. "
                    f"Ensure query is a 1-D hypervector matching model dimension."
                )
        except (AttributeError, TypeError) as e:
            raise TypeError(
                f"query must be a valid array compatible with model backend, got {type(query)}. "
                f"Backend error: {e}"
            )

        # Compute similarities for all entries
        best_label = None
        best_similarity = float('-inf')

        for label, vector in codebook.items():
            similarity = model.similarity(query, vector)
            if similarity > best_similarity:
                best_similarity = similarity
                best_label = label

        return best_label, float(best_similarity)

    def factorize(
        self,
        query: Array,
        codebook: dict[str, Array],
        model: VSAModel,
        n_factors: int = 2,
        max_iterations: int = 20,
        threshold: float = 0.99,
    ) -> tuple[list[str], list[float]]:
        """Factorize via iterative cleanup and unbinding.

        Repeatedly finds the best match, unbinds it from the query,
        and continues until n_factors are extracted or convergence.

        Args:
            query: Composite hypervector to factorize
            codebook: Dictionary mapping labels to hypervectors
            model: VSA model for bind/unbind/similarity operations
            n_factors: Number of factors to extract (default: 2)
            max_iterations: Maximum iterations per factor (default: 20)
            threshold: Convergence threshold for similarity (default: 0.99)

        Returns:
            Tuple of:
                - labels: List of factor labels in extraction order
                - similarities: List of similarities for each factor

        Raises:
            TypeError: If arguments are not correct types
            ValueError: If n_factors < 1 or codebook is empty

        Examples:
            >>> labels, sims = cleanup.factorize(
            ...     query, codebook, model, n_factors=3, threshold=0.95
            ... )
            >>> print(f"Extracted {len(labels)} factors")
        """
        # Type validation
        if query is None:
            raise TypeError("query cannot be None")
        if not isinstance(codebook, dict):
            raise TypeError(f"codebook must be dict, got {type(codebook)}")
        if not isinstance(model, VSAModel):
            raise TypeError(f"model must be VSAModel, got {type(model)}")
        if not isinstance(n_factors, int):
            raise TypeError(f"n_factors must be int, got {type(n_factors)}")
        if not isinstance(max_iterations, int):
            raise TypeError(f"max_iterations must be int, got {type(max_iterations)}")
        if not isinstance(threshold, int | float):
            raise TypeError(f"threshold must be numeric, got {type(threshold)}")

        # Value validation
        if n_factors < 1:
            raise ValueError(f"n_factors must be >= 1, got {n_factors}")
        if len(codebook) == 0:
            raise ValueError("codebook must not be empty")
        if max_iterations < 1:
            raise ValueError(f"max_iterations must be >= 1, got {max_iterations}")
        if not (0.0 <= threshold <= 1.0):
            raise ValueError(f"threshold must be in [0.0, 1.0], got {threshold}")

        # Array shape validation (ensure query is 1-D vector matching model dimension)
        try:
            query_shape = model.backend.shape(query)
            expected_shape = (model.dimension,)
            if query_shape != expected_shape:
                raise ValueError(
                    f"query must have shape {expected_shape}, got {query_shape}. "
                    f"Ensure query is a 1-D hypervector matching model dimension."
                )
        except (AttributeError, TypeError) as e:
            raise TypeError(
                f"query must be a valid array compatible with model backend, got {type(query)}. "
                f"Backend error: {e}"
            )

        # Extract factors iteratively
        labels = []
        similarities = []
        current = query

        for _ in range(n_factors):
            # Find best match
            label, similarity = self.cleanup(current, codebook, model)
            labels.append(label)
            similarities.append(similarity)

            # Check convergence
            if similarity >= threshold:
                # High similarity - factor found
                pass

            # Unbind the found factor and continue
            factor_vector = codebook[label]
            current = model.unbind(current, factor_vector)

        return labels, similarities

cleanup(query, codebook, model)

Find best match via exhaustive search.

Computes similarity between query and every codebook entry, returning the label with highest similarity.

Args: query: Query hypervector to clean up codebook: Dictionary mapping labels to hypervectors model: VSA model for similarity computation

Returns: Tuple of (label, similarity) for the best match

Raises: TypeError: If arguments are not correct types ValueError: If codebook is empty

Examples: >>> label, sim = cleanup.cleanup(query, codebook, model) >>> print(f"Best match: {label} (sim: {sim:.3f})")

Source code in holovec/utils/cleanup.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def cleanup(
    self,
    query: Array,
    codebook: dict[str, Array],
    model: VSAModel,
) -> tuple[str, float]:
    """Find best match via exhaustive search.

    Computes similarity between query and every codebook entry,
    returning the label with highest similarity.

    Args:
        query: Query hypervector to clean up
        codebook: Dictionary mapping labels to hypervectors
        model: VSA model for similarity computation

    Returns:
        Tuple of (label, similarity) for the best match

    Raises:
        TypeError: If arguments are not correct types
        ValueError: If codebook is empty

    Examples:
        >>> label, sim = cleanup.cleanup(query, codebook, model)
        >>> print(f"Best match: {label} (sim: {sim:.3f})")
    """
    # Type validation
    if query is None:
        raise TypeError("query cannot be None")
    if not isinstance(codebook, dict):
        raise TypeError(f"codebook must be dict, got {type(codebook)}")
    if not isinstance(model, VSAModel):
        raise TypeError(f"model must be VSAModel, got {type(model)}")

    # Value validation
    if len(codebook) == 0:
        raise ValueError("codebook must not be empty")

    # Array shape validation (ensure query is 1-D vector matching model dimension)
    try:
        query_shape = model.backend.shape(query)
        expected_shape = (model.dimension,)
        if query_shape != expected_shape:
            raise ValueError(
                f"query must have shape {expected_shape}, got {query_shape}. "
                f"Ensure query is a 1-D hypervector matching model dimension."
            )
    except (AttributeError, TypeError) as e:
        raise TypeError(
            f"query must be a valid array compatible with model backend, got {type(query)}. "
            f"Backend error: {e}"
        )

    # Compute similarities for all entries
    best_label = None
    best_similarity = float('-inf')

    for label, vector in codebook.items():
        similarity = model.similarity(query, vector)
        if similarity > best_similarity:
            best_similarity = similarity
            best_label = label

    return best_label, float(best_similarity)

factorize(query, codebook, model, n_factors=2, max_iterations=20, threshold=0.99)

Factorize via iterative cleanup and unbinding.

Repeatedly finds the best match, unbinds it from the query, and continues until n_factors are extracted or convergence.

Args: query: Composite hypervector to factorize codebook: Dictionary mapping labels to hypervectors model: VSA model for bind/unbind/similarity operations n_factors: Number of factors to extract (default: 2) max_iterations: Maximum iterations per factor (default: 20) threshold: Convergence threshold for similarity (default: 0.99)

Returns: Tuple of: - labels: List of factor labels in extraction order - similarities: List of similarities for each factor

Raises: TypeError: If arguments are not correct types ValueError: If n_factors < 1 or codebook is empty

Examples: >>> labels, sims = cleanup.factorize( ... query, codebook, model, n_factors=3, threshold=0.95 ... ) >>> print(f"Extracted {len(labels)} factors")

Source code in holovec/utils/cleanup.py
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
def factorize(
    self,
    query: Array,
    codebook: dict[str, Array],
    model: VSAModel,
    n_factors: int = 2,
    max_iterations: int = 20,
    threshold: float = 0.99,
) -> tuple[list[str], list[float]]:
    """Factorize via iterative cleanup and unbinding.

    Repeatedly finds the best match, unbinds it from the query,
    and continues until n_factors are extracted or convergence.

    Args:
        query: Composite hypervector to factorize
        codebook: Dictionary mapping labels to hypervectors
        model: VSA model for bind/unbind/similarity operations
        n_factors: Number of factors to extract (default: 2)
        max_iterations: Maximum iterations per factor (default: 20)
        threshold: Convergence threshold for similarity (default: 0.99)

    Returns:
        Tuple of:
            - labels: List of factor labels in extraction order
            - similarities: List of similarities for each factor

    Raises:
        TypeError: If arguments are not correct types
        ValueError: If n_factors < 1 or codebook is empty

    Examples:
        >>> labels, sims = cleanup.factorize(
        ...     query, codebook, model, n_factors=3, threshold=0.95
        ... )
        >>> print(f"Extracted {len(labels)} factors")
    """
    # Type validation
    if query is None:
        raise TypeError("query cannot be None")
    if not isinstance(codebook, dict):
        raise TypeError(f"codebook must be dict, got {type(codebook)}")
    if not isinstance(model, VSAModel):
        raise TypeError(f"model must be VSAModel, got {type(model)}")
    if not isinstance(n_factors, int):
        raise TypeError(f"n_factors must be int, got {type(n_factors)}")
    if not isinstance(max_iterations, int):
        raise TypeError(f"max_iterations must be int, got {type(max_iterations)}")
    if not isinstance(threshold, int | float):
        raise TypeError(f"threshold must be numeric, got {type(threshold)}")

    # Value validation
    if n_factors < 1:
        raise ValueError(f"n_factors must be >= 1, got {n_factors}")
    if len(codebook) == 0:
        raise ValueError("codebook must not be empty")
    if max_iterations < 1:
        raise ValueError(f"max_iterations must be >= 1, got {max_iterations}")
    if not (0.0 <= threshold <= 1.0):
        raise ValueError(f"threshold must be in [0.0, 1.0], got {threshold}")

    # Array shape validation (ensure query is 1-D vector matching model dimension)
    try:
        query_shape = model.backend.shape(query)
        expected_shape = (model.dimension,)
        if query_shape != expected_shape:
            raise ValueError(
                f"query must have shape {expected_shape}, got {query_shape}. "
                f"Ensure query is a 1-D hypervector matching model dimension."
            )
    except (AttributeError, TypeError) as e:
        raise TypeError(
            f"query must be a valid array compatible with model backend, got {type(query)}. "
            f"Backend error: {e}"
        )

    # Extract factors iteratively
    labels = []
    similarities = []
    current = query

    for _ in range(n_factors):
        # Find best match
        label, similarity = self.cleanup(current, codebook, model)
        labels.append(label)
        similarities.append(similarity)

        # Check convergence
        if similarity >= threshold:
            # High similarity - factor found
            pass

        # Unbind the found factor and continue
        factor_vector = codebook[label]
        current = model.unbind(current, factor_vector)

    return labels, similarities

holovec.utils.cleanup.ResonatorCleanup

Bases: CleanupStrategy

Resonator network cleanup via iterative refinement.

Implements the resonator network algorithm from Kymn et al. (2024), which uses iterative attention mechanisms to refine factor estimates. Achieves 10-100x speedup over brute-force for multi-factor unbinding.

Algorithm: 1. Initialize estimates for all factors 2. For each iteration: a. Unbind other factors to isolate target b. Cleanup against codebook c. Update estimate 3. Repeat until convergence or max_iterations

Performance: - Convergence: Typically 5-15 iterations - Speedup: 10-100x over brute-force - Best for: Multi-factor compositions (3+ factors)

Examples: >>> # Create resonator cleanup >>> cleanup = ResonatorCleanup() >>> >>> # Single cleanup (same as brute-force) >>> label, sim = cleanup.cleanup(query, codebook, model) >>> >>> # Multi-factor with resonator (much faster) >>> labels, sims = cleanup.factorize( ... query, codebook, model, n_factors=5, threshold=0.99 ... ) >>> print(f"Converged with {len(labels)} factors")

Attributes: None (stateless)

References: Kymn et al. (2024): Attention Mechanisms in VSAs - Section 3: Resonator Networks - Algorithm 1: Iterative factorization

Source code in holovec/utils/cleanup.py
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
class ResonatorCleanup(CleanupStrategy):
    """Resonator network cleanup via iterative refinement.

    Implements the resonator network algorithm from Kymn et al. (2024),
    which uses iterative attention mechanisms to refine factor estimates.
    Achieves 10-100x speedup over brute-force for multi-factor unbinding.

    Algorithm:
        1. Initialize estimates for all factors
        2. For each iteration:
            a. Unbind other factors to isolate target
            b. Cleanup against codebook
            c. Update estimate
        3. Repeat until convergence or max_iterations

    Performance:
        - Convergence: Typically 5-15 iterations
        - Speedup: 10-100x over brute-force
        - Best for: Multi-factor compositions (3+ factors)

    Examples:
        >>> # Create resonator cleanup
        >>> cleanup = ResonatorCleanup()
        >>>
        >>> # Single cleanup (same as brute-force)
        >>> label, sim = cleanup.cleanup(query, codebook, model)
        >>>
        >>> # Multi-factor with resonator (much faster)
        >>> labels, sims = cleanup.factorize(
        ...     query, codebook, model, n_factors=5, threshold=0.99
        ... )
        >>> print(f"Converged with {len(labels)} factors")

    Attributes:
        None (stateless)

    References:
        Kymn et al. (2024): Attention Mechanisms in VSAs
            - Section 3: Resonator Networks
            - Algorithm 1: Iterative factorization
    """

    def cleanup(
        self,
        query: Array,
        codebook: dict[str, Array],
        model: VSAModel,
    ) -> tuple[str, float]:
        """Find best match via exhaustive search.

        For single-factor cleanup, resonator networks reduce to brute-force
        search. Use factorize() for multi-factor speedup.

        Args:
            query: Query hypervector to clean up
            codebook: Dictionary mapping labels to hypervectors
            model: VSA model for similarity computation

        Returns:
            Tuple of (label, similarity) for the best match

        Raises:
            TypeError: If arguments are not correct types
            ValueError: If codebook is empty

        Examples:
            >>> label, sim = cleanup.cleanup(query, codebook, model)
        """
        # For single cleanup, resonator = brute-force
        # Use the brute-force implementation
        brute_force = BruteForceCleanup()
        return brute_force.cleanup(query, codebook, model)

    def factorize(
        self,
        query: Array,
        codebook: dict[str, Array],
        model: VSAModel,
        n_factors: int = 2,
        max_iterations: int = 20,
        threshold: float = 0.99,
        # Refinements
        temperature: float = 20.0,
        top_k: int = 1,
        patience: int = 3,
        min_delta: float = 1e-4,
        mode: str = 'hard',
    ) -> tuple[list[str], list[float]]:
        """Factorize via resonator network iteration.

        Uses iterative attention to refine factor estimates simultaneously,
        achieving much faster convergence than sequential unbinding.

        Algorithm (from Kymn et al. 2024):
            1. Initialize: estimates = [random from codebook] × n_factors
            2. Repeat for max_iterations:
                a. For each factor i:
                    - Unbind all other estimates from query
                    - Cleanup result against codebook
                    - Update estimate[i]
                b. Check convergence (all similarities >= threshold)
            3. Return final estimates and similarities

        Args:
            query: Composite hypervector to factorize
            codebook: Dictionary mapping labels to hypervectors
            model: VSA model for bind/unbind/similarity operations
            n_factors: Number of factors to extract (default: 2)
            max_iterations: Maximum iterations (default: 20)
            threshold: Convergence threshold for similarity (default: 0.99)

        Returns:
            Tuple of:
                - labels: List of factor labels
                - similarities: List of similarities for each factor

        Raises:
            TypeError: If arguments are not correct types
            ValueError: If n_factors < 1 or codebook is empty

        Examples:
            >>> # Fast multi-factor unbinding
            >>> labels, sims = cleanup.factorize(
            ...     query, codebook, model, n_factors=5
            ... )
            >>> print(f"Factors: {labels}")
            >>> print(f"Avg similarity: {sum(sims)/len(sims):.3f}")
        """
        # Type validation
        if query is None:
            raise TypeError("query cannot be None")
        if not isinstance(codebook, dict):
            raise TypeError(f"codebook must be dict, got {type(codebook)}")
        if not isinstance(model, VSAModel):
            raise TypeError(f"model must be VSAModel, got {type(model)}")
        if not isinstance(n_factors, int):
            raise TypeError(f"n_factors must be int, got {type(n_factors)}")
        if not isinstance(max_iterations, int):
            raise TypeError(f"max_iterations must be int, got {type(max_iterations)}")
        if not isinstance(threshold, int | float):
            raise TypeError(f"threshold must be numeric, got {type(threshold)}")

        # Value validation
        if n_factors < 1:
            raise ValueError(f"n_factors must be >= 1, got {n_factors}")
        if len(codebook) == 0:
            raise ValueError("codebook must not be empty")
        if max_iterations < 1:
            raise ValueError(f"max_iterations must be >= 1, got {max_iterations}")
        if not (0.0 <= threshold <= 1.0):
            raise ValueError(f"threshold must be in [0.0, 1.0], got {threshold}")

        # Array shape validation (ensure query is 1-D vector matching model dimension)
        try:
            query_shape = model.backend.shape(query)
            expected_shape = (model.dimension,)
            if query_shape != expected_shape:
                raise ValueError(
                    f"query must have shape {expected_shape}, got {query_shape}. "
                    f"Ensure query is a 1-D hypervector matching model dimension."
                )
        except (AttributeError, TypeError) as e:
            raise TypeError(
                f"query must be a valid array compatible with model backend, got {type(query)}. "
                f"Backend error: {e}"
            )

        # Initialize estimates with deterministic codebook entries (cycle)
        codebook_labels = list(codebook.keys())
        estimates = []
        estimate_labels = []

        for i in range(n_factors):
            # Use modulo to cycle through codebook if n_factors > codebook size
            label = codebook_labels[i % len(codebook_labels)]
            estimates.append(codebook[label])
            estimate_labels.append(label)

        # Iterative refinement with optional early stopping
        best_avg = -1.0
        no_improve = 0
        for iteration in range(max_iterations):
            converged = True

            for i in range(n_factors):
                # Unbind all OTHER estimates from query to isolate factor i
                isolated = query
                for j in range(n_factors):
                    if j != i:
                        isolated = model.unbind(isolated, estimates[j])

                # Compute similarities to entire codebook
                sims: list[tuple[str, float]] = []
                for lbl, vec in codebook.items():
                    sims.append((lbl, float(model.similarity(isolated, vec))))
                # Sort by similarity desc
                sims.sort(key=lambda t: t[1], reverse=True)

                # Hard vs soft update
                use_soft = (mode == 'soft') or (top_k > 1)
                if not use_soft:
                    label, similarity = sims[0]
                    estimates[i] = codebook[label]
                    estimate_labels[i] = label
                else:
                    # Take top-K and softmax-weight them
                    k = min(max(2, top_k), len(sims))
                    top = sims[:k]
                    import numpy as _np
                    vals = _np.array([s for _, s in top], dtype=_np.float64)
                    # temperature > 0; larger → flatter
                    logits = vals * float(temperature)
                    logits = logits - logits.max()
                    w = _np.exp(logits)
                    w = w / (w.sum() + 1e-12)
                    # Bundle weighted
                    parts = []
                    for (lbl, _s), wt in zip(top, w.tolist()):
                        parts.append(model.backend.multiply_scalar(codebook[lbl], float(wt)))
                    estimates[i] = model.backend.sum(model.backend.stack(parts, axis=0), axis=0)
                    # Label: top-1 for reporting
                    estimate_labels[i] = top[0][0]
                    similarity = float(top[0][1])

                # Check convergence for this factor
                if similarity < threshold:
                    converged = False

            # Global early stopping on plateau
            # Compute avg isolated similarity across factors
            curr_sims = []
            for i in range(n_factors):
                isolated = query
                for j in range(n_factors):
                    if j != i:
                        isolated = model.unbind(isolated, estimates[j])
                curr_sims.append(float(model.similarity(isolated, estimates[i])))
            avg_sim = sum(curr_sims) / max(1, len(curr_sims))

            if avg_sim > best_avg + min_delta:
                best_avg = avg_sim
                no_improve = 0
            else:
                no_improve += 1

            if converged or no_improve >= patience:
                break

        # Compute final similarities (as in original API)
        similarities: list[float] = []
        for i in range(n_factors):
            isolated = query
            for j in range(n_factors):
                if j != i:
                    isolated = model.unbind(isolated, estimates[j])
            similarity = model.similarity(isolated, estimates[i])
            similarities.append(float(similarity))

        return estimate_labels, similarities

    def factorize_verbose(
        self,
        query: Array,
        codebook: dict[str, Array],
        model: VSAModel,
        n_factors: int = 2,
        max_iterations: int = 20,
        threshold: float = 0.99,
        temperature: float = 20.0,
        top_k: int = 1,
        patience: int = 3,
        min_delta: float = 1e-4,
        mode: str = 'hard',
    ) -> tuple[list[str], list[float], list[float]]:
        """Like factorize(), but also returns avg-similarity history per iteration."""
        # Lightweight wrapper: capture avg similarity after each iteration
        # Re-implement loop to record history.
        # Initialize estimates
        codebook_labels = list(codebook.keys())
        estimates = []
        estimate_labels = []
        for i in range(n_factors):
            label = codebook_labels[i % len(codebook_labels)]
            estimates.append(codebook[label])
            estimate_labels.append(label)

        history: list[float] = []
        best_avg = -1.0
        no_improve = 0
        for _iter in range(max_iterations):
            converged = True
            for i in range(n_factors):
                isolated = query
                for j in range(n_factors):
                    if j != i:
                        isolated = model.unbind(isolated, estimates[j])
                sims = [(lbl, float(model.similarity(isolated, vec))) for lbl, vec in codebook.items()]
                sims.sort(key=lambda t: t[1], reverse=True)
                use_soft = (mode == 'soft') or (top_k > 1)
                if not use_soft:
                    label, similarity = sims[0]
                    estimates[i] = codebook[label]
                    estimate_labels[i] = label
                else:
                    k = min(max(2, top_k), len(sims))
                    top = sims[:k]
                    import numpy as _np
                    vals = _np.array([s for _, s in top], dtype=_np.float64)
                    logits = vals * float(temperature)
                    logits = logits - logits.max()
                    w = _np.exp(logits)
                    w = w / (w.sum() + 1e-12)
                    parts = []
                    for (lbl, _s), wt in zip(top, w.tolist()):
                        parts.append(model.backend.multiply_scalar(codebook[lbl], float(wt)))
                    estimates[i] = model.backend.sum(model.backend.stack(parts, axis=0), axis=0)
                    estimate_labels[i] = top[0][0]
                    similarity = float(top[0][1])
                if similarity < threshold:
                    converged = False

            # record avg similarity across factors
            curr_sims = []
            for i in range(n_factors):
                isolated = query
                for j in range(n_factors):
                    if j != i:
                        isolated = model.unbind(isolated, estimates[j])
                curr_sims.append(float(model.similarity(isolated, estimates[i])))
            avg_sim = sum(curr_sims) / max(1, len(curr_sims))
            history.append(avg_sim)

            if avg_sim > best_avg + min_delta:
                best_avg = avg_sim
                no_improve = 0
            else:
                no_improve += 1
            if converged or no_improve >= patience:
                break

        # Final similarities
        final_sims = []
        for i in range(n_factors):
            isolated = query
            for j in range(n_factors):
                if j != i:
                    isolated = model.unbind(isolated, estimates[j])
            final_sims.append(float(model.similarity(isolated, estimates[i])))
        return estimate_labels, final_sims, history

cleanup(query, codebook, model)

Find best match via exhaustive search.

For single-factor cleanup, resonator networks reduce to brute-force search. Use factorize() for multi-factor speedup.

Args: query: Query hypervector to clean up codebook: Dictionary mapping labels to hypervectors model: VSA model for similarity computation

Returns: Tuple of (label, similarity) for the best match

Raises: TypeError: If arguments are not correct types ValueError: If codebook is empty

Examples: >>> label, sim = cleanup.cleanup(query, codebook, model)

Source code in holovec/utils/cleanup.py
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
def cleanup(
    self,
    query: Array,
    codebook: dict[str, Array],
    model: VSAModel,
) -> tuple[str, float]:
    """Find best match via exhaustive search.

    For single-factor cleanup, resonator networks reduce to brute-force
    search. Use factorize() for multi-factor speedup.

    Args:
        query: Query hypervector to clean up
        codebook: Dictionary mapping labels to hypervectors
        model: VSA model for similarity computation

    Returns:
        Tuple of (label, similarity) for the best match

    Raises:
        TypeError: If arguments are not correct types
        ValueError: If codebook is empty

    Examples:
        >>> label, sim = cleanup.cleanup(query, codebook, model)
    """
    # For single cleanup, resonator = brute-force
    # Use the brute-force implementation
    brute_force = BruteForceCleanup()
    return brute_force.cleanup(query, codebook, model)

factorize(query, codebook, model, n_factors=2, max_iterations=20, threshold=0.99, temperature=20.0, top_k=1, patience=3, min_delta=0.0001, mode='hard')

Factorize via resonator network iteration.

Uses iterative attention to refine factor estimates simultaneously, achieving much faster convergence than sequential unbinding.

Algorithm (from Kymn et al. 2024): 1. Initialize: estimates = [random from codebook] × n_factors 2. Repeat for max_iterations: a. For each factor i: - Unbind all other estimates from query - Cleanup result against codebook - Update estimate[i] b. Check convergence (all similarities >= threshold) 3. Return final estimates and similarities

Args: query: Composite hypervector to factorize codebook: Dictionary mapping labels to hypervectors model: VSA model for bind/unbind/similarity operations n_factors: Number of factors to extract (default: 2) max_iterations: Maximum iterations (default: 20) threshold: Convergence threshold for similarity (default: 0.99)

Returns: Tuple of: - labels: List of factor labels - similarities: List of similarities for each factor

Raises: TypeError: If arguments are not correct types ValueError: If n_factors < 1 or codebook is empty

Examples: >>> # Fast multi-factor unbinding >>> labels, sims = cleanup.factorize( ... query, codebook, model, n_factors=5 ... ) >>> print(f"Factors: {labels}") >>> print(f"Avg similarity: {sum(sims)/len(sims):.3f}")

Source code in holovec/utils/cleanup.py
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
def factorize(
    self,
    query: Array,
    codebook: dict[str, Array],
    model: VSAModel,
    n_factors: int = 2,
    max_iterations: int = 20,
    threshold: float = 0.99,
    # Refinements
    temperature: float = 20.0,
    top_k: int = 1,
    patience: int = 3,
    min_delta: float = 1e-4,
    mode: str = 'hard',
) -> tuple[list[str], list[float]]:
    """Factorize via resonator network iteration.

    Uses iterative attention to refine factor estimates simultaneously,
    achieving much faster convergence than sequential unbinding.

    Algorithm (from Kymn et al. 2024):
        1. Initialize: estimates = [random from codebook] × n_factors
        2. Repeat for max_iterations:
            a. For each factor i:
                - Unbind all other estimates from query
                - Cleanup result against codebook
                - Update estimate[i]
            b. Check convergence (all similarities >= threshold)
        3. Return final estimates and similarities

    Args:
        query: Composite hypervector to factorize
        codebook: Dictionary mapping labels to hypervectors
        model: VSA model for bind/unbind/similarity operations
        n_factors: Number of factors to extract (default: 2)
        max_iterations: Maximum iterations (default: 20)
        threshold: Convergence threshold for similarity (default: 0.99)

    Returns:
        Tuple of:
            - labels: List of factor labels
            - similarities: List of similarities for each factor

    Raises:
        TypeError: If arguments are not correct types
        ValueError: If n_factors < 1 or codebook is empty

    Examples:
        >>> # Fast multi-factor unbinding
        >>> labels, sims = cleanup.factorize(
        ...     query, codebook, model, n_factors=5
        ... )
        >>> print(f"Factors: {labels}")
        >>> print(f"Avg similarity: {sum(sims)/len(sims):.3f}")
    """
    # Type validation
    if query is None:
        raise TypeError("query cannot be None")
    if not isinstance(codebook, dict):
        raise TypeError(f"codebook must be dict, got {type(codebook)}")
    if not isinstance(model, VSAModel):
        raise TypeError(f"model must be VSAModel, got {type(model)}")
    if not isinstance(n_factors, int):
        raise TypeError(f"n_factors must be int, got {type(n_factors)}")
    if not isinstance(max_iterations, int):
        raise TypeError(f"max_iterations must be int, got {type(max_iterations)}")
    if not isinstance(threshold, int | float):
        raise TypeError(f"threshold must be numeric, got {type(threshold)}")

    # Value validation
    if n_factors < 1:
        raise ValueError(f"n_factors must be >= 1, got {n_factors}")
    if len(codebook) == 0:
        raise ValueError("codebook must not be empty")
    if max_iterations < 1:
        raise ValueError(f"max_iterations must be >= 1, got {max_iterations}")
    if not (0.0 <= threshold <= 1.0):
        raise ValueError(f"threshold must be in [0.0, 1.0], got {threshold}")

    # Array shape validation (ensure query is 1-D vector matching model dimension)
    try:
        query_shape = model.backend.shape(query)
        expected_shape = (model.dimension,)
        if query_shape != expected_shape:
            raise ValueError(
                f"query must have shape {expected_shape}, got {query_shape}. "
                f"Ensure query is a 1-D hypervector matching model dimension."
            )
    except (AttributeError, TypeError) as e:
        raise TypeError(
            f"query must be a valid array compatible with model backend, got {type(query)}. "
            f"Backend error: {e}"
        )

    # Initialize estimates with deterministic codebook entries (cycle)
    codebook_labels = list(codebook.keys())
    estimates = []
    estimate_labels = []

    for i in range(n_factors):
        # Use modulo to cycle through codebook if n_factors > codebook size
        label = codebook_labels[i % len(codebook_labels)]
        estimates.append(codebook[label])
        estimate_labels.append(label)

    # Iterative refinement with optional early stopping
    best_avg = -1.0
    no_improve = 0
    for iteration in range(max_iterations):
        converged = True

        for i in range(n_factors):
            # Unbind all OTHER estimates from query to isolate factor i
            isolated = query
            for j in range(n_factors):
                if j != i:
                    isolated = model.unbind(isolated, estimates[j])

            # Compute similarities to entire codebook
            sims: list[tuple[str, float]] = []
            for lbl, vec in codebook.items():
                sims.append((lbl, float(model.similarity(isolated, vec))))
            # Sort by similarity desc
            sims.sort(key=lambda t: t[1], reverse=True)

            # Hard vs soft update
            use_soft = (mode == 'soft') or (top_k > 1)
            if not use_soft:
                label, similarity = sims[0]
                estimates[i] = codebook[label]
                estimate_labels[i] = label
            else:
                # Take top-K and softmax-weight them
                k = min(max(2, top_k), len(sims))
                top = sims[:k]
                import numpy as _np
                vals = _np.array([s for _, s in top], dtype=_np.float64)
                # temperature > 0; larger → flatter
                logits = vals * float(temperature)
                logits = logits - logits.max()
                w = _np.exp(logits)
                w = w / (w.sum() + 1e-12)
                # Bundle weighted
                parts = []
                for (lbl, _s), wt in zip(top, w.tolist()):
                    parts.append(model.backend.multiply_scalar(codebook[lbl], float(wt)))
                estimates[i] = model.backend.sum(model.backend.stack(parts, axis=0), axis=0)
                # Label: top-1 for reporting
                estimate_labels[i] = top[0][0]
                similarity = float(top[0][1])

            # Check convergence for this factor
            if similarity < threshold:
                converged = False

        # Global early stopping on plateau
        # Compute avg isolated similarity across factors
        curr_sims = []
        for i in range(n_factors):
            isolated = query
            for j in range(n_factors):
                if j != i:
                    isolated = model.unbind(isolated, estimates[j])
            curr_sims.append(float(model.similarity(isolated, estimates[i])))
        avg_sim = sum(curr_sims) / max(1, len(curr_sims))

        if avg_sim > best_avg + min_delta:
            best_avg = avg_sim
            no_improve = 0
        else:
            no_improve += 1

        if converged or no_improve >= patience:
            break

    # Compute final similarities (as in original API)
    similarities: list[float] = []
    for i in range(n_factors):
        isolated = query
        for j in range(n_factors):
            if j != i:
                isolated = model.unbind(isolated, estimates[j])
        similarity = model.similarity(isolated, estimates[i])
        similarities.append(float(similarity))

    return estimate_labels, similarities

factorize_verbose(query, codebook, model, n_factors=2, max_iterations=20, threshold=0.99, temperature=20.0, top_k=1, patience=3, min_delta=0.0001, mode='hard')

Like factorize(), but also returns avg-similarity history per iteration.

Source code in holovec/utils/cleanup.py
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
def factorize_verbose(
    self,
    query: Array,
    codebook: dict[str, Array],
    model: VSAModel,
    n_factors: int = 2,
    max_iterations: int = 20,
    threshold: float = 0.99,
    temperature: float = 20.0,
    top_k: int = 1,
    patience: int = 3,
    min_delta: float = 1e-4,
    mode: str = 'hard',
) -> tuple[list[str], list[float], list[float]]:
    """Like factorize(), but also returns avg-similarity history per iteration."""
    # Lightweight wrapper: capture avg similarity after each iteration
    # Re-implement loop to record history.
    # Initialize estimates
    codebook_labels = list(codebook.keys())
    estimates = []
    estimate_labels = []
    for i in range(n_factors):
        label = codebook_labels[i % len(codebook_labels)]
        estimates.append(codebook[label])
        estimate_labels.append(label)

    history: list[float] = []
    best_avg = -1.0
    no_improve = 0
    for _iter in range(max_iterations):
        converged = True
        for i in range(n_factors):
            isolated = query
            for j in range(n_factors):
                if j != i:
                    isolated = model.unbind(isolated, estimates[j])
            sims = [(lbl, float(model.similarity(isolated, vec))) for lbl, vec in codebook.items()]
            sims.sort(key=lambda t: t[1], reverse=True)
            use_soft = (mode == 'soft') or (top_k > 1)
            if not use_soft:
                label, similarity = sims[0]
                estimates[i] = codebook[label]
                estimate_labels[i] = label
            else:
                k = min(max(2, top_k), len(sims))
                top = sims[:k]
                import numpy as _np
                vals = _np.array([s for _, s in top], dtype=_np.float64)
                logits = vals * float(temperature)
                logits = logits - logits.max()
                w = _np.exp(logits)
                w = w / (w.sum() + 1e-12)
                parts = []
                for (lbl, _s), wt in zip(top, w.tolist()):
                    parts.append(model.backend.multiply_scalar(codebook[lbl], float(wt)))
                estimates[i] = model.backend.sum(model.backend.stack(parts, axis=0), axis=0)
                estimate_labels[i] = top[0][0]
                similarity = float(top[0][1])
            if similarity < threshold:
                converged = False

        # record avg similarity across factors
        curr_sims = []
        for i in range(n_factors):
            isolated = query
            for j in range(n_factors):
                if j != i:
                    isolated = model.unbind(isolated, estimates[j])
            curr_sims.append(float(model.similarity(isolated, estimates[i])))
        avg_sim = sum(curr_sims) / max(1, len(curr_sims))
        history.append(avg_sim)

        if avg_sim > best_avg + min_delta:
            best_avg = avg_sim
            no_improve = 0
        else:
            no_improve += 1
        if converged or no_improve >= patience:
            break

    # Final similarities
    final_sims = []
    for i in range(n_factors):
        isolated = query
        for j in range(n_factors):
            if j != i:
                isolated = model.unbind(isolated, estimates[j])
        final_sims.append(float(model.similarity(isolated, estimates[i])))
    return estimate_labels, final_sims, history

Backends

holovec.backends.base.Backend

Bases: ABC

Abstract base class for computational backends.

All backends must implement these operations to support VSA computations across different frameworks (NumPy, PyTorch, JAX).

Source code in holovec/backends/base.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
class Backend(ABC):
    """Abstract base class for computational backends.

    All backends must implement these operations to support VSA computations
    across different frameworks (NumPy, PyTorch, JAX).
    """

    @property
    @abstractmethod
    def name(self) -> str:
        """Return the backend name (e.g., 'numpy', 'torch', 'jax')."""
        pass

    @abstractmethod
    def is_available(self) -> bool:
        """Check if the backend is available in the current environment."""
        pass

    # ===== Capability Probes =====

    def supports_complex(self) -> bool:
        """Check if backend supports complex number operations.

        Complex operations are required for FHRR (Fourier HRR) and other
        frequency-domain VSA models.

        Returns:
            True if backend can handle complex dtypes (complex64, complex128)
        """
        return True  # Default: assume support (override in backend if needed)

    def supports_sparse(self) -> bool:
        """Check if backend supports sparse array operations.

        Sparse operations are beneficial for BSC (Binary Spatter Codes) and
        BSDC (Binary Sparse Distributed Codes) which have high sparsity.

        Returns:
            True if backend has native sparse array support
        """
        return False  # Default: no sparse support (override if available)

    def supports_gpu(self) -> bool:
        """Check if backend has GPU acceleration support.

        GPU support enables significant speedups for large-scale operations
        and is critical for production deployments.

        Returns:
            True if backend can utilize GPU hardware
        """
        return False  # Default: CPU only (override for PyTorch/JAX)

    def supports_jit(self) -> bool:
        """Check if backend supports Just-In-Time (JIT) compilation.

        JIT compilation can provide 10-100x speedups for certain operations
        by compiling Python code to optimized machine code.

        Returns:
            True if backend has JIT compilation (e.g., JAX, Numba)
        """
        return False  # Default: no JIT (override for JAX)

    def supports_device(self, device: str) -> bool:
        """Check if backend supports a specific device.

        Args:
            device: Device identifier (e.g., 'cpu', 'cuda', 'cuda:0', 'mps')

        Returns:
            True if the specified device is available

        Examples:
            >>> backend.supports_device('cpu')  # Always True
            >>> backend.supports_device('cuda')  # True if CUDA GPU available
            >>> backend.supports_device('mps')  # True if Apple Metal available
        """
        # Default: only CPU supported
        return device.lower() in ('cpu', 'cpu:0')

    # ===== Array Creation =====

    @abstractmethod
    def zeros(self, shape: int | tuple[int, ...], dtype: str = 'float32') -> Array:
        """Create an array of zeros with the given shape and dtype."""
        pass

    @abstractmethod
    def ones(self, shape: int | tuple[int, ...], dtype: str = 'float32') -> Array:
        """Create an array of ones with the given shape and dtype."""
        pass

    @abstractmethod
    def random_normal(
        self,
        shape: int | tuple[int, ...],
        mean: float = 0.0,
        std: float = 1.0,
        dtype: str = 'float32',
        seed: int | None = None
    ) -> Array:
        """Create an array of random values from a normal distribution."""
        pass

    @abstractmethod
    def random_uniform(
        self,
        shape: int | tuple[int, ...],
        low: float = 0.0,
        high: float = 1.0,
        dtype: str = 'float32',
        seed: int | None = None
    ) -> Array:
        """Create an array of random values from a uniform distribution."""
        pass

    @abstractmethod
    def random_binary(
        self,
        shape: int | tuple[int, ...],
        p: float = 0.5,
        dtype: str = 'int32',
        seed: int | None = None
    ) -> Array:
        """Create a binary array with probability p of being 1."""
        pass

    @abstractmethod
    def random_bipolar(
        self,
        shape: int | tuple[int, ...],
        p: float = 0.5,
        dtype: str = 'float32',
        seed: int | None = None
    ) -> Array:
        """Create a bipolar array {-1, +1} with probability p of being +1."""
        pass

    @abstractmethod
    def random_phasor(
        self,
        shape: int | tuple[int, ...],
        dtype: str = 'complex64',
        seed: int | None = None
    ) -> Array:
        """Create an array of random unit phasors (complex numbers with magnitude 1)."""
        pass

    @abstractmethod
    def array(self, data: Any, dtype: str | None = None) -> Array:
        """Create an array from Python data (list, tuple, etc.)."""
        pass

    # ===== Element-wise Operations =====

    @abstractmethod
    def multiply(self, a: Array, b: Array) -> Array:
        """Element-wise multiplication."""
        pass

    @abstractmethod
    def add(self, a: Array, b: Array) -> Array:
        """Element-wise addition."""
        pass

    @abstractmethod
    def subtract(self, a: Array, b: Array) -> Array:
        """Element-wise subtraction."""
        pass

    @abstractmethod
    def divide(self, a: Array, b: Array) -> Array:
        """Element-wise division."""
        pass

    @abstractmethod
    def xor(self, a: Array, b: Array) -> Array:
        """Element-wise XOR (for binary/bipolar)."""
        pass

    @abstractmethod
    def conjugate(self, a: Array) -> Array:
        """Complex conjugate (for complex arrays)."""
        pass

    @abstractmethod
    def exp(self, a: Array) -> Array:
        """Element-wise exponential: e^a.

        Args:
            a: Input array

        Returns:
            Array with exp applied element-wise
        """
        pass

    @abstractmethod
    def log(self, a: Array) -> Array:
        """Element-wise natural logarithm: ln(a).

        Args:
            a: Input array (must be positive)

        Returns:
            Array with log applied element-wise
        """
        pass

    # ===== Additional Element-wise Utilities =====

    @abstractmethod
    def power(self, a: Array, exponent: float) -> Array:
        """Element-wise power: a**exponent."""
        pass

    @abstractmethod
    def angle(self, a: Array) -> Array:
        """Element-wise phase/angle for complex arrays (radians)."""
        pass

    @abstractmethod
    def real(self, a: Array) -> Array:
        """Element-wise real part of (possibly complex) array."""
        pass

    @abstractmethod
    def imag(self, a: Array) -> Array:
        """Element-wise imaginary part of (possibly complex) array."""
        pass

    @abstractmethod
    def multiply_scalar(self, a: Array, scalar: float) -> Array:
        """Multiply array by a Python scalar."""
        pass

    @abstractmethod
    def linspace(self, start: float, stop: float, num: int) -> Array:
        """Create linearly spaced array of length num in [start, stop]."""
        pass

    # ===== Reductions =====

    @abstractmethod
    def sum(self, a: Array, axis: int | None = None, keepdims: bool = False) -> Array:
        """Sum along an axis."""
        pass

    @abstractmethod
    def mean(self, a: Array, axis: int | None = None, keepdims: bool = False) -> Array:
        """Mean along an axis."""
        pass

    @abstractmethod
    def norm(self, a: Array, ord: int | str = 2, axis: int | None = None) -> Array:
        """Compute the norm of an array."""
        pass

    @abstractmethod
    def dot(self, a: Array, b: Array) -> Array:
        """Dot product of two vectors."""
        pass

    @abstractmethod
    def max(self, a: Array, axis: int | None = None, keepdims: bool = False) -> Array:
        """Maximum value along an axis.

        Args:
            a: Input array
            axis: Axis along which to compute max (None for global max)
            keepdims: Whether to keep dimensions

        Returns:
            Maximum value(s)
        """
        pass

    @abstractmethod
    def min(self, a: Array, axis: int | None = None, keepdims: bool = False) -> Array:
        """Minimum value along an axis.

        Args:
            a: Input array
            axis: Axis along which to compute min (None for global min)
            keepdims: Whether to keep dimensions

        Returns:
            Minimum value(s)
        """
        pass

    @abstractmethod
    def argmax(self, a: Array, axis: int | None = None) -> Array:
        """Index of maximum value along an axis.

        Args:
            a: Input array
            axis: Axis along which to find argmax (None for global argmax)

        Returns:
            Index/indices of maximum value(s)
        """
        pass

    @abstractmethod
    def argmin(self, a: Array, axis: int | None = None) -> Array:
        """Index of minimum value along an axis.

        Args:
            a: Input array
            axis: Axis along which to find argmin (None for global argmin)

        Returns:
            Index/indices of minimum value(s)
        """
        pass

    # ===== Normalization =====

    @abstractmethod
    def normalize(self, a: Array, ord: int | str = 2, axis: int | None = None, eps: float = 1e-12) -> Array:
        """Normalize an array to unit norm."""
        pass

    @abstractmethod
    def softmax(self, a: Array, axis: int = -1) -> Array:
        """Softmax function with numerical stability.

        Computes: softmax(x_i) = exp(x_i - max(x)) / Σ exp(x_j - max(x))

        The max subtraction provides numerical stability by preventing overflow
        in the exponential function.

        Args:
            a: Input array
            axis: Axis along which to compute softmax

        Returns:
            Array with softmax applied along specified axis

        References:
            - Bricken & Pehlevan (2022): "Attention Approximates Sparse Distributed Memory"
            - Furlong & Eliasmith (2023): "Fractional binding in VSAs"
        """
        pass

    # ===== FFT Operations =====

    @abstractmethod
    def fft(self, a: Array) -> Array:
        """1D Fast Fourier Transform."""
        pass

    @abstractmethod
    def ifft(self, a: Array) -> Array:
        """1D Inverse Fast Fourier Transform."""
        pass

    # ===== Circular Operations =====

    @abstractmethod
    def circular_convolve(self, a: Array, b: Array) -> Array:
        """Circular convolution of two vectors."""
        pass

    @abstractmethod
    def circular_correlate(self, a: Array, b: Array) -> Array:
        """Circular correlation of two vectors."""
        pass

    # ===== Permutations =====

    @abstractmethod
    def permute(self, a: Array, indices: Array) -> Array:
        """Permute array elements according to indices."""
        pass

    @abstractmethod
    def roll(self, a: Array, shift: int, axis: int | None = None) -> Array:
        """Roll array elements along an axis."""
        pass

    # ===== Similarity Measures =====

    @abstractmethod
    def cosine_similarity(self, a: Array, b: Array) -> float:
        """Compute cosine similarity between two vectors."""
        pass

    @abstractmethod
    def hamming_distance(self, a: Array, b: Array) -> float:
        """Compute Hamming distance between two binary/bipolar vectors."""
        pass

    @abstractmethod
    def euclidean_distance(self, a: Array, b: Array) -> float:
        """Compute Euclidean distance between two vectors."""
        pass

    # ===== Utilities =====

    @abstractmethod
    def shape(self, a: Array) -> tuple[int, ...]:
        """Return the shape of an array."""
        pass

    @abstractmethod
    def dtype(self, a: Array) -> str:
        """Return the dtype of an array as a string."""
        pass

    @abstractmethod
    def to_numpy(self, a: Array) -> Any:
        """Convert array to NumPy array (for compatibility)."""
        pass

    @abstractmethod
    def from_numpy(self, a: Any) -> Array:
        """Create backend array from NumPy array."""
        pass

    @abstractmethod
    def clip(self, a: Array, min_val: float, max_val: float) -> Array:
        """Clip array values to [min_val, max_val]."""
        pass

    @abstractmethod
    def abs(self, a: Array) -> Array:
        """Element-wise absolute value."""
        pass

    @abstractmethod
    def sign(self, a: Array) -> Array:
        """Element-wise sign."""
        pass

    @abstractmethod
    def threshold(self, a: Array, threshold: float, above: float = 1.0, below: float = 0.0) -> Array:
        """Threshold array values."""
        pass

    @abstractmethod
    def where(self, condition: Array, x: Array, y: Array) -> Array:
        """Select elements from x or y depending on boolean condition."""
        pass

    @abstractmethod
    def stack(self, arrays: Sequence[Array], axis: int = 0) -> Array:
        """Stack arrays along a new axis."""
        pass

    @abstractmethod
    def concatenate(self, arrays: Sequence[Array], axis: int = 0) -> Array:
        """Concatenate arrays along an existing axis."""
        pass

    # ===== Matrix Operations (for GHRR, VTB) =====

    @abstractmethod
    def matmul(self, a: Array, b: Array) -> Array:
        """Matrix multiplication (or batched matrix multiplication).

        Args:
            a: Matrix or batch of matrices
            b: Matrix or batch of matrices

        Returns:
            Matrix product
        """
        pass

    @abstractmethod
    def matrix_transpose(self, a: Array) -> Array:
        """Transpose last two dimensions of array.

        For 2D: standard transpose
        For 3D+: transpose last two dimensions (batch transpose)

        Args:
            a: Array with at least 2 dimensions

        Returns:
            Transposed array
        """
        pass

    @abstractmethod
    def matrix_trace(self, a: Array) -> Array:
        """Compute trace of matrix or batch of matrices.

        For 2D array: returns scalar
        For 3D+ array: returns trace of each matrix in batch

        Args:
            a: Matrix or batch of matrices (last 2 dims are matrix)

        Returns:
            Scalar or array of traces
        """
        pass

    @abstractmethod
    def svd(self, a: Array, full_matrices: bool = True) -> tuple[Array, Array, Array]:
        """Compute Singular Value Decomposition (SVD).

        Decomposes matrix A as A = U @ diag(S) @ Vh, where:
        - U: left singular vectors (unitary)
        - S: singular values (non-negative, sorted descending)
        - Vh: conjugate transpose of right singular vectors (unitary)

        For batched matrices (3D+), computes SVD for each matrix in batch.

        Args:
            a: Matrix or batch of matrices (shape [..., m, n])
            full_matrices: If True, U and Vh have shapes [..., m, m] and [..., n, n].
                          If False, shapes are [..., m, k] and [..., k, n] where k=min(m,n).

        Returns:
            Tuple of (U, S, Vh) arrays

        Examples:
            >>> A = backend.random_normal((3, 3))
            >>> U, S, Vh = backend.svd(A)
            >>> # Verify: A ≈ U @ diag(S) @ Vh
        """
        pass

    @abstractmethod
    def reshape(self, a: Array, shape: tuple[int, ...]) -> Array:
        """Reshape array to new shape.

        Args:
            a: Array to reshape
            shape: Target shape

        Returns:
            Reshaped array
        """
        pass

zeros(shape, dtype='float32') abstractmethod

Create an array of zeros with the given shape and dtype.

Source code in holovec/backends/base.py
 99
100
101
102
@abstractmethod
def zeros(self, shape: int | tuple[int, ...], dtype: str = 'float32') -> Array:
    """Create an array of zeros with the given shape and dtype."""
    pass

ones(shape, dtype='float32') abstractmethod

Create an array of ones with the given shape and dtype.

Source code in holovec/backends/base.py
104
105
106
107
@abstractmethod
def ones(self, shape: int | tuple[int, ...], dtype: str = 'float32') -> Array:
    """Create an array of ones with the given shape and dtype."""
    pass

random_normal(shape, mean=0.0, std=1.0, dtype='float32', seed=None) abstractmethod

Create an array of random values from a normal distribution.

Source code in holovec/backends/base.py
109
110
111
112
113
114
115
116
117
118
119
@abstractmethod
def random_normal(
    self,
    shape: int | tuple[int, ...],
    mean: float = 0.0,
    std: float = 1.0,
    dtype: str = 'float32',
    seed: int | None = None
) -> Array:
    """Create an array of random values from a normal distribution."""
    pass

add(a, b) abstractmethod

Element-wise addition.

Source code in holovec/backends/base.py
177
178
179
180
@abstractmethod
def add(self, a: Array, b: Array) -> Array:
    """Element-wise addition."""
    pass

multiply(a, b) abstractmethod

Element-wise multiplication.

Source code in holovec/backends/base.py
172
173
174
175
@abstractmethod
def multiply(self, a: Array, b: Array) -> Array:
    """Element-wise multiplication."""
    pass

dot(a, b) abstractmethod

Dot product of two vectors.

Source code in holovec/backends/base.py
275
276
277
278
@abstractmethod
def dot(self, a: Array, b: Array) -> Array:
    """Dot product of two vectors."""
    pass

fft(a) abstractmethod

1D Fast Fourier Transform.

Source code in holovec/backends/base.py
365
366
367
368
@abstractmethod
def fft(self, a: Array) -> Array:
    """1D Fast Fourier Transform."""
    pass

ifft(a) abstractmethod

1D Inverse Fast Fourier Transform.

Source code in holovec/backends/base.py
370
371
372
373
@abstractmethod
def ifft(self, a: Array) -> Array:
    """1D Inverse Fast Fourier Transform."""
    pass

norm(a, ord=2, axis=None) abstractmethod

Compute the norm of an array.

Source code in holovec/backends/base.py
270
271
272
273
@abstractmethod
def norm(self, a: Array, ord: int | str = 2, axis: int | None = None) -> Array:
    """Compute the norm of an array."""
    pass

normalize(a, ord=2, axis=None, eps=1e-12) abstractmethod

Normalize an array to unit norm.

Source code in holovec/backends/base.py
336
337
338
339
@abstractmethod
def normalize(self, a: Array, ord: int | str = 2, axis: int | None = None, eps: float = 1e-12) -> Array:
    """Normalize an array to unit norm."""
    pass

to_numpy(a) abstractmethod

Convert array to NumPy array (for compatibility).

Source code in holovec/backends/base.py
428
429
430
431
@abstractmethod
def to_numpy(self, a: Array) -> Any:
    """Convert array to NumPy array (for compatibility)."""
    pass

from_numpy(a) abstractmethod

Create backend array from NumPy array.

Source code in holovec/backends/base.py
433
434
435
436
@abstractmethod
def from_numpy(self, a: Any) -> Array:
    """Create backend array from NumPy array."""
    pass

supports_gpu()

Check if backend has GPU acceleration support.

GPU support enables significant speedups for large-scale operations and is critical for production deployments.

Returns: True if backend can utilize GPU hardware

Source code in holovec/backends/base.py
58
59
60
61
62
63
64
65
66
67
def supports_gpu(self) -> bool:
    """Check if backend has GPU acceleration support.

    GPU support enables significant speedups for large-scale operations
    and is critical for production deployments.

    Returns:
        True if backend can utilize GPU hardware
    """
    return False  # Default: CPU only (override for PyTorch/JAX)

supports_complex()

Check if backend supports complex number operations.

Complex operations are required for FHRR (Fourier HRR) and other frequency-domain VSA models.

Returns: True if backend can handle complex dtypes (complex64, complex128)

Source code in holovec/backends/base.py
36
37
38
39
40
41
42
43
44
45
def supports_complex(self) -> bool:
    """Check if backend supports complex number operations.

    Complex operations are required for FHRR (Fourier HRR) and other
    frequency-domain VSA models.

    Returns:
        True if backend can handle complex dtypes (complex64, complex128)
    """
    return True  # Default: assume support (override in backend if needed)

supports_sparse()

Check if backend supports sparse array operations.

Sparse operations are beneficial for BSC (Binary Spatter Codes) and BSDC (Binary Sparse Distributed Codes) which have high sparsity.

Returns: True if backend has native sparse array support

Source code in holovec/backends/base.py
47
48
49
50
51
52
53
54
55
56
def supports_sparse(self) -> bool:
    """Check if backend supports sparse array operations.

    Sparse operations are beneficial for BSC (Binary Spatter Codes) and
    BSDC (Binary Sparse Distributed Codes) which have high sparsity.

    Returns:
        True if backend has native sparse array support
    """
    return False  # Default: no sparse support (override if available)