Complete API documentation auto-generated from source code docstrings.
VSA Factory
The main entry point for creating VSA models.
holovec.VSA
High-level factory interface for creating VSA models.
This class provides a simple, unified API for creating and using different VSA models. It's the recommended entry point for most users.
Examples: >>> # Create a MAP model with default settings >>> model = VSA.create('MAP') >>> >>> # Create FHRR with specific dimension and backend >>> model = VSA.create('FHRR', dim=512, backend='torch', device='cuda') >>> >>> # Use the model >>> a, b = model.random(), model.random() >>> c = model.bind(a, b) >>> similarity = model.similarity(a, model.unbind(c, b))
Source code in holovec/__init__.py
class VSA:
"""High-level factory interface for creating VSA models.
This class provides a simple, unified API for creating and using
different VSA models. It's the recommended entry point for most users.
Examples:
>>> # Create a MAP model with default settings
>>> model = VSA.create('MAP')
>>>
>>> # Create FHRR with specific dimension and backend
>>> model = VSA.create('FHRR', dim=512, backend='torch', device='cuda')
>>>
>>> # Use the model
>>> a, b = model.random(), model.random()
>>> c = model.bind(a, b)
>>> similarity = model.similarity(a, model.unbind(c, b))
"""
# Model registry
_MODELS: dict[str, Callable[..., VSAModel]] = {
"map": MAPModel,
"fhrr": FHRRModel,
"hrr": HRRModel,
"bsc": BSCModel,
"bsdc": BSDCModel,
"bsdc_seg": BSDCSEGModel,
"bsdc-seg": BSDCSEGModel, # alias with hyphen to match model_name
"ghrr": GHRRModel,
"vtb": VTBModel,
}
# Default vector spaces for each model
_DEFAULT_SPACES: dict[str, str] = {
"map": "bipolar",
"fhrr": "complex",
"hrr": "real",
"bsc": "binary",
"bsdc": "sparse",
"bsdc_seg": "sparse_segment",
"bsdc-seg": "sparse_segment", # alias with hyphen
"ghrr": "matrix",
"vtb": "real",
}
_FACTORY_SPACE_KWARGS: dict[str, frozenset[str]] = {
"bsdc": frozenset({"sparsity"}),
"bsdc_seg": frozenset({"segments"}),
"bsdc-seg": frozenset({"segments"}),
"ghrr": frozenset({"matrix_size", "diagonality"}),
}
_FACTORY_MODEL_KWARGS: dict[str, frozenset[str]] = {
"bsdc": frozenset({"binding_mode"}),
"ghrr": frozenset({"matrix_size", "diagonality"}),
"vtb": frozenset({"n_bases", "shifts", "temperature"}),
}
_FACTORY_BACKEND_KWARGS: dict[str, frozenset[str]] = {
"torch": frozenset({"device"}),
"pytorch": frozenset({"device"}),
}
@classmethod
def create(
cls,
model_type: str,
dim: int = 10000,
backend: str | Backend | None = None,
space: str | None = None,
seed: int | None = None,
**kwargs: object,
) -> VSAModel:
"""Create a VSA model with the specified configuration.
Args:
model_type: Model name ('MAP', 'FHRR', 'HRR', 'BSC', etc.)
dim: Dimensionality of hypervectors
backend: Backend name ('numpy', 'torch', 'jax'), a :class:`Backend`
instance, or None for the default backend
space: Vector space name or None for model's default
seed: Random seed for reproducibility
**kwargs: Additional arguments passed to backend (e.g., device='cuda')
Returns:
VSA model instance
Raises:
ValueError: If model_type is not recognized
Examples:
>>> model = VSA.create('MAP', dim=10000)
>>> model = VSA.create('FHRR', dim=512, backend='torch', device='cuda')
>>> model = VSA.create('MAP', space='real') # Use real-valued MAP
"""
# Normalize model type
model_type_lower = model_type.lower()
if model_type_lower not in cls._MODELS:
available = list(cls._MODELS.keys())
raise ValueError(f"Unknown model type '{model_type}'. Available models: {available}")
# Get model class
model_class = cls._MODELS[model_type_lower]
allowed_kwargs = set(cls._FACTORY_SPACE_KWARGS.get(model_type_lower, frozenset()))
allowed_kwargs.update(cls._FACTORY_MODEL_KWARGS.get(model_type_lower, frozenset()))
if isinstance(backend, str):
allowed_kwargs.update(cls._FACTORY_BACKEND_KWARGS.get(backend.lower(), frozenset()))
unsupported = sorted(set(kwargs) - allowed_kwargs)
if unsupported:
accepted = sorted(allowed_kwargs)
raise TypeError(
f"Unsupported keyword argument(s) for model '{model_type}': {unsupported}. "
f"Accepted keyword argument(s): {accepted}"
)
# Create backend
backend_kwargs = {k: v for k, v in kwargs.items() if k in ["device"]}
if isinstance(backend, Backend):
backend_instance = backend
elif isinstance(backend, str):
backend_instance = get_backend(backend, **backend_kwargs)
elif backend is None:
backend_instance = None
else:
raise TypeError("backend must be a backend name (str) or a Backend instance")
# Determine space type
if space is None:
space = cls._DEFAULT_SPACES.get(model_type_lower)
# Collect space-specific kwargs
space_kwargs = dict.fromkeys(cls._FACTORY_SPACE_KWARGS.get(model_type_lower, frozenset()))
space_kwargs = {key: kwargs[key] for key in space_kwargs if key in kwargs}
if model_type_lower in {"bsdc_seg", "bsdc-seg"} and "segments" not in space_kwargs:
# For BSDC-SEG: default to dim/10 segments (segment_length=10)
space_kwargs["segments"] = max(1, dim // 10)
if space is not None and not isinstance(space, str) and space_kwargs:
raise TypeError(
f"Explicit space instances cannot be combined with space construction "
f"kwargs: {sorted(space_kwargs)}"
)
# Create space if string provided
if isinstance(space, str):
space_instance: VectorSpace = create_space(
space, dimension=dim, backend=backend_instance, seed=seed, **space_kwargs
)
elif space is not None:
space_instance = space
else:
raise ValueError("space could not be resolved to a valid vector space")
# Collect model-specific kwargs
model_kwargs = {
key: kwargs[key]
for key in cls._FACTORY_MODEL_KWARGS.get(model_type_lower, frozenset())
if key in kwargs
}
# Create model
model = model_class(
dimension=dim,
space=space_instance,
backend=backend_instance,
seed=seed,
**model_kwargs,
)
return model
@classmethod
def available_models(cls) -> list[str]:
"""Return list of available model names.
Returns:
List of model names that can be used with create()
"""
return list(cls._MODELS.keys())
@classmethod
def model_info(cls, model_type: str) -> dict:
"""Get information about a specific model.
Args:
model_type: Model name
Returns:
Dictionary with model properties
Example:
>>> info = VSA.model_info('FHRR')
>>> print(info['is_exact_inverse']) # True
"""
model_type_lower = model_type.lower()
if model_type_lower not in cls._MODELS:
raise ValueError(f"Unknown model type '{model_type}'")
# Create a temporary instance to query properties
model = cls.create(model_type, dim=100)
return {
"name": model.model_name,
"is_self_inverse": model.is_self_inverse,
"is_commutative": model.is_commutative,
"is_exact_inverse": model.is_exact_inverse,
"default_space": cls._DEFAULT_SPACES.get(model_type_lower),
"class": model.__class__.__name__,
}
create(model_type, dim=10000, backend=None, space=None, seed=None, **kwargs)
classmethod
Create a VSA model with the specified configuration.
Args:
model_type: Model name ('MAP', 'FHRR', 'HRR', 'BSC', etc.)
dim: Dimensionality of hypervectors
backend: Backend name ('numpy', 'torch', 'jax'), a :class:Backend
instance, or None for the default backend
space: Vector space name or None for model's default
seed: Random seed for reproducibility
**kwargs: Additional arguments passed to backend (e.g., device='cuda')
Returns: VSA model instance
Raises: ValueError: If model_type is not recognized
Examples: >>> model = VSA.create('MAP', dim=10000) >>> model = VSA.create('FHRR', dim=512, backend='torch', device='cuda') >>> model = VSA.create('MAP', space='real') # Use real-valued MAP
Source code in holovec/__init__.py
@classmethod
def create(
cls,
model_type: str,
dim: int = 10000,
backend: str | Backend | None = None,
space: str | None = None,
seed: int | None = None,
**kwargs: object,
) -> VSAModel:
"""Create a VSA model with the specified configuration.
Args:
model_type: Model name ('MAP', 'FHRR', 'HRR', 'BSC', etc.)
dim: Dimensionality of hypervectors
backend: Backend name ('numpy', 'torch', 'jax'), a :class:`Backend`
instance, or None for the default backend
space: Vector space name or None for model's default
seed: Random seed for reproducibility
**kwargs: Additional arguments passed to backend (e.g., device='cuda')
Returns:
VSA model instance
Raises:
ValueError: If model_type is not recognized
Examples:
>>> model = VSA.create('MAP', dim=10000)
>>> model = VSA.create('FHRR', dim=512, backend='torch', device='cuda')
>>> model = VSA.create('MAP', space='real') # Use real-valued MAP
"""
# Normalize model type
model_type_lower = model_type.lower()
if model_type_lower not in cls._MODELS:
available = list(cls._MODELS.keys())
raise ValueError(f"Unknown model type '{model_type}'. Available models: {available}")
# Get model class
model_class = cls._MODELS[model_type_lower]
allowed_kwargs = set(cls._FACTORY_SPACE_KWARGS.get(model_type_lower, frozenset()))
allowed_kwargs.update(cls._FACTORY_MODEL_KWARGS.get(model_type_lower, frozenset()))
if isinstance(backend, str):
allowed_kwargs.update(cls._FACTORY_BACKEND_KWARGS.get(backend.lower(), frozenset()))
unsupported = sorted(set(kwargs) - allowed_kwargs)
if unsupported:
accepted = sorted(allowed_kwargs)
raise TypeError(
f"Unsupported keyword argument(s) for model '{model_type}': {unsupported}. "
f"Accepted keyword argument(s): {accepted}"
)
# Create backend
backend_kwargs = {k: v for k, v in kwargs.items() if k in ["device"]}
if isinstance(backend, Backend):
backend_instance = backend
elif isinstance(backend, str):
backend_instance = get_backend(backend, **backend_kwargs)
elif backend is None:
backend_instance = None
else:
raise TypeError("backend must be a backend name (str) or a Backend instance")
# Determine space type
if space is None:
space = cls._DEFAULT_SPACES.get(model_type_lower)
# Collect space-specific kwargs
space_kwargs = dict.fromkeys(cls._FACTORY_SPACE_KWARGS.get(model_type_lower, frozenset()))
space_kwargs = {key: kwargs[key] for key in space_kwargs if key in kwargs}
if model_type_lower in {"bsdc_seg", "bsdc-seg"} and "segments" not in space_kwargs:
# For BSDC-SEG: default to dim/10 segments (segment_length=10)
space_kwargs["segments"] = max(1, dim // 10)
if space is not None and not isinstance(space, str) and space_kwargs:
raise TypeError(
f"Explicit space instances cannot be combined with space construction "
f"kwargs: {sorted(space_kwargs)}"
)
# Create space if string provided
if isinstance(space, str):
space_instance: VectorSpace = create_space(
space, dimension=dim, backend=backend_instance, seed=seed, **space_kwargs
)
elif space is not None:
space_instance = space
else:
raise ValueError("space could not be resolved to a valid vector space")
# Collect model-specific kwargs
model_kwargs = {
key: kwargs[key]
for key in cls._FACTORY_MODEL_KWARGS.get(model_type_lower, frozenset())
if key in kwargs
}
# Create model
model = model_class(
dimension=dim,
space=space_instance,
backend=backend_instance,
seed=seed,
**model_kwargs,
)
return model
Backend Discovery
holovec.backend_info()
Get information about available backends.
Returns: Dictionary with backend availability and capabilities
Example: >>> info = backend_info() >>> print(info['available_backends']) ['numpy', 'torch', 'jax']
Source code in holovec/__init__.py
def backend_info() -> dict:
"""Get information about available backends.
Returns:
Dictionary with backend availability and capabilities
Example:
>>> info = backend_info()
>>> print(info['available_backends'])
['numpy', 'torch', 'jax']
"""
return backends.backend_info()
Models
All VSA model implementations.
Base Model
holovec.models.base.VSAModel
Bases: ABC
Abstract base class for VSA models.
A VSA model defines the core operations: - bind: Associate two vectors (creates dissimilar result) - unbind: Recover one vector given the other and their binding - bundle: Combine multiple vectors (preserves similarity) - permute: Reorder vector to represent position/sequence
Different models have different algebraic properties: - Self-inverse binding: bind(a, b) = unbind(a, b) - Exact vs approximate inverse - Commutativity of binding
Source code in holovec/models/base.py
class VSAModel(ABC):
"""Abstract base class for VSA models.
A VSA model defines the core operations:
- bind: Associate two vectors (creates dissimilar result)
- unbind: Recover one vector given the other and their binding
- bundle: Combine multiple vectors (preserves similarity)
- permute: Reorder vector to represent position/sequence
Different models have different algebraic properties:
- Self-inverse binding: bind(a, b) = unbind(a, b)
- Exact vs approximate inverse
- Commutativity of binding
"""
def __init__(
self,
space: VectorSpace,
backend: Backend | None = None
):
"""Initialize VSA model.
Args:
space: Vector space defining the representation
backend: Computational backend (defaults to space's backend)
"""
self.space = space
self.backend = backend if backend is not None else space.backend
self.dimension = space.dimension
# ===== Core VSA Operations =====
@abstractmethod
def bind(self, a: Array, b: Array) -> Array:
"""Bind two vectors to create an association.
Binding creates a new vector that is dissimilar to both inputs
but preserves structured similarity (similar inputs → similar bindings).
Args:
a: First vector
b: Second vector
Returns:
Bound vector representing the association of a and b
"""
pass
@abstractmethod
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind to recover one vector given the other.
For self-inverse models: unbind(a, b) = bind(a, b)
For others: approximately recovers a from bind(a, b) and b
Args:
a: Bound vector or first operand
b: Second operand
Returns:
Recovered vector (exact or approximate depending on model)
"""
pass
@abstractmethod
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Bundle (superpose) multiple vectors.
Bundling combines vectors while preserving similarity to all inputs.
The result is similar to each input vector.
Args:
vectors: Sequence of vectors to bundle
Returns:
Bundled vector representing the superposition
Raises:
ValueError: If vectors is empty
"""
pass
@abstractmethod
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute vector to represent position or sequence.
Permutation reorders coordinates and is used to encode position
or create sequences. It's invertible and preserves similarity.
Args:
vec: Vector to permute
k: Number of positions to shift (default: 1)
Returns:
Permuted vector
"""
pass
def unpermute(self, vec: Array, k: int = 1) -> Array:
"""Inverse permutation.
Args:
vec: Vector to unpermute
k: Number of positions to shift back (default: 1)
Returns:
Unpermuted vector
"""
return self.permute(vec, -k)
# ===== Similarity and Cleanup =====
def similarity(self, a: Array, b: Array) -> float:
"""Compute similarity between two vectors.
Delegates to the vector space's similarity metric.
Args:
a: First vector
b: Second vector
Returns:
Similarity score (space-dependent metric)
"""
return self.space.similarity(a, b)
def normalize(self, vec: Array) -> Array:
"""Normalize vector according to space conventions.
Args:
vec: Vector to normalize
Returns:
Normalized vector
"""
return self.space.normalize(vec)
# ===== Vector Generation =====
def random(self, seed: int | None = None) -> Array:
"""Generate a random vector from the space.
Args:
seed: Optional random seed
Returns:
Random vector
"""
return self.space.random(seed=seed)
def random_sequence(self, n: int, seed: int | None = None) -> list[Array]:
"""Generate n random vectors.
Args:
n: Number of vectors to generate
seed: Optional base seed (each vector gets seed + i)
Returns:
List of random vectors
"""
if seed is not None:
return [self.random(seed=seed + i) for i in range(n)]
return [self.random() for _ in range(n)]
# ===== Compositional Operations =====
def bind_multiple(self, vectors: Sequence[Array]) -> Array:
"""Bind multiple vectors sequentially.
For n vectors: bind(bind(bind(v1, v2), v3), ...)
Args:
vectors: Sequence of vectors to bind
Returns:
Result of sequential binding
Raises:
ValueError: If fewer than 2 vectors provided
"""
if len(vectors) < 2:
raise ValueError("Need at least 2 vectors to bind")
result = vectors[0]
for vec in vectors[1:]:
result = self.bind(result, vec)
return result
# ===== Model Properties =====
@property
@abstractmethod
def is_self_inverse(self) -> bool:
"""Whether binding is self-inverse (bind = unbind)."""
pass
@property
@abstractmethod
def is_commutative(self) -> bool:
"""Whether binding is commutative (bind(a, b) = bind(b, a))."""
pass
@property
@abstractmethod
def is_exact_inverse(self) -> bool:
"""Whether unbinding gives exact recovery (no approximation error)."""
pass
@property
@abstractmethod
def model_name(self) -> str:
"""Return the model name (e.g., 'MAP', 'FHRR', 'HRR')."""
pass
def __repr__(self) -> str:
return (f"{self.__class__.__name__}(dimension={self.dimension}, "
f"space={self.space.space_name}, backend={self.backend.name})")
bind(a, b)
abstractmethod
Bind two vectors to create an association.
Binding creates a new vector that is dissimilar to both inputs but preserves structured similarity (similar inputs → similar bindings).
Args: a: First vector b: Second vector
Returns: Bound vector representing the association of a and b
Source code in holovec/models/base.py
@abstractmethod
def bind(self, a: Array, b: Array) -> Array:
"""Bind two vectors to create an association.
Binding creates a new vector that is dissimilar to both inputs
but preserves structured similarity (similar inputs → similar bindings).
Args:
a: First vector
b: Second vector
Returns:
Bound vector representing the association of a and b
"""
pass
unbind(a, b)
abstractmethod
Unbind to recover one vector given the other.
For self-inverse models: unbind(a, b) = bind(a, b) For others: approximately recovers a from bind(a, b) and b
Args: a: Bound vector or first operand b: Second operand
Returns: Recovered vector (exact or approximate depending on model)
Source code in holovec/models/base.py
@abstractmethod
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind to recover one vector given the other.
For self-inverse models: unbind(a, b) = bind(a, b)
For others: approximately recovers a from bind(a, b) and b
Args:
a: Bound vector or first operand
b: Second operand
Returns:
Recovered vector (exact or approximate depending on model)
"""
pass
bundle(vectors)
abstractmethod
Bundle (superpose) multiple vectors.
Bundling combines vectors while preserving similarity to all inputs. The result is similar to each input vector.
Args: vectors: Sequence of vectors to bundle
Returns: Bundled vector representing the superposition
Raises: ValueError: If vectors is empty
Source code in holovec/models/base.py
@abstractmethod
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Bundle (superpose) multiple vectors.
Bundling combines vectors while preserving similarity to all inputs.
The result is similar to each input vector.
Args:
vectors: Sequence of vectors to bundle
Returns:
Bundled vector representing the superposition
Raises:
ValueError: If vectors is empty
"""
pass
permute(vec, k=1)
abstractmethod
Permute vector to represent position or sequence.
Permutation reorders coordinates and is used to encode position or create sequences. It's invertible and preserves similarity.
Args: vec: Vector to permute k: Number of positions to shift (default: 1)
Returns: Permuted vector
Source code in holovec/models/base.py
@abstractmethod
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute vector to represent position or sequence.
Permutation reorders coordinates and is used to encode position
or create sequences. It's invertible and preserves similarity.
Args:
vec: Vector to permute
k: Number of positions to shift (default: 1)
Returns:
Permuted vector
"""
pass
unpermute(vec, k=1)
Inverse permutation.
Args: vec: Vector to unpermute k: Number of positions to shift back (default: 1)
Returns: Unpermuted vector
Source code in holovec/models/base.py
def unpermute(self, vec: Array, k: int = 1) -> Array:
"""Inverse permutation.
Args:
vec: Vector to unpermute
k: Number of positions to shift back (default: 1)
Returns:
Unpermuted vector
"""
return self.permute(vec, -k)
similarity(a, b)
Compute similarity between two vectors.
Delegates to the vector space's similarity metric.
Args: a: First vector b: Second vector
Returns: Similarity score (space-dependent metric)
Source code in holovec/models/base.py
def similarity(self, a: Array, b: Array) -> float:
"""Compute similarity between two vectors.
Delegates to the vector space's similarity metric.
Args:
a: First vector
b: Second vector
Returns:
Similarity score (space-dependent metric)
"""
return self.space.similarity(a, b)
normalize(vec)
Normalize vector according to space conventions.
Args: vec: Vector to normalize
Returns: Normalized vector
Source code in holovec/models/base.py
def normalize(self, vec: Array) -> Array:
"""Normalize vector according to space conventions.
Args:
vec: Vector to normalize
Returns:
Normalized vector
"""
return self.space.normalize(vec)
random(seed=None)
Generate a random vector from the space.
Args: seed: Optional random seed
Returns: Random vector
Source code in holovec/models/base.py
def random(self, seed: int | None = None) -> Array:
"""Generate a random vector from the space.
Args:
seed: Optional random seed
Returns:
Random vector
"""
return self.space.random(seed=seed)
FHRR
holovec.models.fhrr.FHRRModel
Bases: VSAModel
FHRR (Fourier HRR) model using complex phasors.
Binding: element-wise complex multiplication (phase addition) Unbinding: element-wise multiplication with conjugate (phase subtraction) Bundling: element-wise addition + normalization to unit magnitude Permutation: circular shift (can also use phase rotation)
Uses ComplexSpace with unit-magnitude phasors.
Source code in holovec/models/fhrr.py
class FHRRModel(VSAModel):
"""FHRR (Fourier HRR) model using complex phasors.
Binding: element-wise complex multiplication (phase addition)
Unbinding: element-wise multiplication with conjugate (phase subtraction)
Bundling: element-wise addition + normalization to unit magnitude
Permutation: circular shift (can also use phase rotation)
Uses ComplexSpace with unit-magnitude phasors.
"""
def __init__(
self,
dimension: int = 512,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None
):
"""Initialize FHRR model.
Args:
dimension: Dimensionality of hypervectors
(can be smaller than MAP due to better capacity)
space: Vector space (defaults to ComplexSpace)
backend: Computational backend
seed: Random seed for space
"""
if space is None:
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = ComplexSpace(dimension, backend=backend, seed=seed)
super().__init__(space, backend)
@property
def model_name(self) -> str:
return "FHRR"
@property
def is_self_inverse(self) -> bool:
return False # Requires conjugate, not same operation
@property
def is_commutative(self) -> bool:
return True # Complex multiplication is commutative
@property
def is_exact_inverse(self) -> bool:
return True # Conjugate provides exact inverse
def bind(self, a: Array, b: Array) -> Array:
"""Bind using element-wise complex multiplication.
For unit phasors: (a * b)[i] = a[i] * b[i]
This adds phase angles: ∠(a*b) = ∠a + ∠b
Args:
a: First vector (unit phasors)
b: Second vector (unit phasors)
Returns:
Bound vector c = a ⊙ b (element-wise product)
"""
result = self.backend.multiply(a, b)
# Normalize to unit magnitude
return self.normalize(result)
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind using element-wise multiplication with conjugate.
To recover original from c = a ⊙ b:
unbind(c, b) = c ⊙ b* = (a ⊙ b) ⊙ b* = a ⊙ (b ⊙ b*) = a ⊙ 1 = a
Args:
a: Bound vector (or first operand)
b: Second operand
Returns:
Unbound vector (exact recovery)
"""
b_conj = self.backend.conjugate(b)
result = self.backend.multiply(a, b_conj)
return self.normalize(result)
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Bundle using element-wise addition.
Sum phasors and normalize back to unit magnitude.
The result points in the "average" direction of inputs.
Args:
vectors: Sequence of vectors to bundle
Returns:
Bundled vector (normalized to unit magnitude)
Raises:
ValueError: If vectors is empty
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
vectors = list(vectors)
# Sum all vectors (phasors add vectorially)
result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)
# Normalize to unit magnitude
return self.normalize(result)
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute using circular shift.
For FHRR, permutation can be done as:
1. Circular shift (coordinate permutation)
2. Phase rotation (multiply by exp(i*2πk/D))
We use circular shift for consistency with other models.
Args:
vec: Vector to permute
k: Number of positions to shift
Returns:
Permuted vector
"""
return self.backend.roll(vec, shift=k)
def fractional_power(self, vec: Array, exponent: float) -> Array:
"""Raise phasor to a fractional power.
For unit phasor z = exp(iθ): z^α = exp(iαθ)
This is useful for encoding continuous values.
Args:
vec: Vector of unit phasors
exponent: Power to raise to
Returns:
Vector with phases scaled by exponent
Example:
>>> base = model.random()
>>> # Encode value 2.5 using fractional power
>>> encoded = model.fractional_power(base, 2.5)
"""
# For unit phasors z = exp(iθ), we want: z^α = exp(iαθ)
# This is exact and avoids branch cuts from complex logarithms.
#
# Implementation:
# 1. Extract phase θ = arg(z)
# 2. Scale by exponent: αθ
# 3. Create new phasor: exp(iαθ)
# Get phase angles using backend operation
angles = self.backend.angle(vec)
# Scale angles by exponent
scaled_angles = self.backend.multiply_scalar(angles, exponent)
# Create new phasors: exp(i * scaled_angles)
# exp(iθ) = cos(θ) + i*sin(θ)
result = self.backend.exp(1j * scaled_angles)
# Renormalize to unit magnitude (handle numerical errors)
return self.normalize(result)
def __repr__(self) -> str:
return (f"FHRRModel(dimension={self.dimension}, "
f"space={self.space.space_name}, "
f"backend={self.backend.name})")
__init__(dimension=512, space=None, backend=None, seed=None)
Initialize FHRR model.
Args: dimension: Dimensionality of hypervectors (can be smaller than MAP due to better capacity) space: Vector space (defaults to ComplexSpace) backend: Computational backend seed: Random seed for space
Source code in holovec/models/fhrr.py
def __init__(
self,
dimension: int = 512,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None
):
"""Initialize FHRR model.
Args:
dimension: Dimensionality of hypervectors
(can be smaller than MAP due to better capacity)
space: Vector space (defaults to ComplexSpace)
backend: Computational backend
seed: Random seed for space
"""
if space is None:
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = ComplexSpace(dimension, backend=backend, seed=seed)
super().__init__(space, backend)
bind(a, b)
Bind using element-wise complex multiplication.
For unit phasors: (a * b)[i] = a[i] * b[i] This adds phase angles: ∠(a*b) = ∠a + ∠b
Args: a: First vector (unit phasors) b: Second vector (unit phasors)
Returns: Bound vector c = a ⊙ b (element-wise product)
Source code in holovec/models/fhrr.py
def bind(self, a: Array, b: Array) -> Array:
"""Bind using element-wise complex multiplication.
For unit phasors: (a * b)[i] = a[i] * b[i]
This adds phase angles: ∠(a*b) = ∠a + ∠b
Args:
a: First vector (unit phasors)
b: Second vector (unit phasors)
Returns:
Bound vector c = a ⊙ b (element-wise product)
"""
result = self.backend.multiply(a, b)
# Normalize to unit magnitude
return self.normalize(result)
bundle(vectors)
Bundle using element-wise addition.
Sum phasors and normalize back to unit magnitude. The result points in the "average" direction of inputs.
Args: vectors: Sequence of vectors to bundle
Returns: Bundled vector (normalized to unit magnitude)
Raises: ValueError: If vectors is empty
Source code in holovec/models/fhrr.py
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Bundle using element-wise addition.
Sum phasors and normalize back to unit magnitude.
The result points in the "average" direction of inputs.
Args:
vectors: Sequence of vectors to bundle
Returns:
Bundled vector (normalized to unit magnitude)
Raises:
ValueError: If vectors is empty
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
vectors = list(vectors)
# Sum all vectors (phasors add vectorially)
result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)
# Normalize to unit magnitude
return self.normalize(result)
fractional_power(vec, exponent)
Raise phasor to a fractional power.
For unit phasor z = exp(iθ): z^α = exp(iαθ) This is useful for encoding continuous values.
Args: vec: Vector of unit phasors exponent: Power to raise to
Returns: Vector with phases scaled by exponent
Example: >>> base = model.random() >>> # Encode value 2.5 using fractional power >>> encoded = model.fractional_power(base, 2.5)
Source code in holovec/models/fhrr.py
def fractional_power(self, vec: Array, exponent: float) -> Array:
"""Raise phasor to a fractional power.
For unit phasor z = exp(iθ): z^α = exp(iαθ)
This is useful for encoding continuous values.
Args:
vec: Vector of unit phasors
exponent: Power to raise to
Returns:
Vector with phases scaled by exponent
Example:
>>> base = model.random()
>>> # Encode value 2.5 using fractional power
>>> encoded = model.fractional_power(base, 2.5)
"""
# For unit phasors z = exp(iθ), we want: z^α = exp(iαθ)
# This is exact and avoids branch cuts from complex logarithms.
#
# Implementation:
# 1. Extract phase θ = arg(z)
# 2. Scale by exponent: αθ
# 3. Create new phasor: exp(iαθ)
# Get phase angles using backend operation
angles = self.backend.angle(vec)
# Scale angles by exponent
scaled_angles = self.backend.multiply_scalar(angles, exponent)
# Create new phasors: exp(i * scaled_angles)
# exp(iθ) = cos(θ) + i*sin(θ)
result = self.backend.exp(1j * scaled_angles)
# Renormalize to unit magnitude (handle numerical errors)
return self.normalize(result)
permute(vec, k=1)
Permute using circular shift.
For FHRR, permutation can be done as: 1. Circular shift (coordinate permutation) 2. Phase rotation (multiply by exp(i*2πk/D))
We use circular shift for consistency with other models.
Args: vec: Vector to permute k: Number of positions to shift
Returns: Permuted vector
Source code in holovec/models/fhrr.py
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute using circular shift.
For FHRR, permutation can be done as:
1. Circular shift (coordinate permutation)
2. Phase rotation (multiply by exp(i*2πk/D))
We use circular shift for consistency with other models.
Args:
vec: Vector to permute
k: Number of positions to shift
Returns:
Permuted vector
"""
return self.backend.roll(vec, shift=k)
unbind(a, b)
Unbind using element-wise multiplication with conjugate.
To recover original from c = a ⊙ b: unbind(c, b) = c ⊙ b = (a ⊙ b) ⊙ b = a ⊙ (b ⊙ b*) = a ⊙ 1 = a
Args: a: Bound vector (or first operand) b: Second operand
Returns: Unbound vector (exact recovery)
Source code in holovec/models/fhrr.py
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind using element-wise multiplication with conjugate.
To recover original from c = a ⊙ b:
unbind(c, b) = c ⊙ b* = (a ⊙ b) ⊙ b* = a ⊙ (b ⊙ b*) = a ⊙ 1 = a
Args:
a: Bound vector (or first operand)
b: Second operand
Returns:
Unbound vector (exact recovery)
"""
b_conj = self.backend.conjugate(b)
result = self.backend.multiply(a, b_conj)
return self.normalize(result)
GHRR
holovec.models.ghrr.GHRRModel
Bases: VSAModel
GHRR (Generalized Holographic Reduced Representations) model.
Binding: element-wise matrix multiplication (phase addition per matrix) Unbinding: element-wise multiplication with conjugate transpose Bundling: element-wise addition + normalization Permutation: circular shift (or use non-commutativity instead)
Uses MatrixSpace with m×m unitary matrices.
Source code in holovec/models/ghrr.py
class GHRRModel(VSAModel):
"""GHRR (Generalized Holographic Reduced Representations) model.
Binding: element-wise matrix multiplication (phase addition per matrix)
Unbinding: element-wise multiplication with conjugate transpose
Bundling: element-wise addition + normalization
Permutation: circular shift (or use non-commutativity instead)
Uses MatrixSpace with m×m unitary matrices.
"""
def __init__(
self,
dimension: int = 100,
matrix_size: int = 3,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None,
diagonality: float | None = None
):
"""Initialize GHRR model.
Args:
dimension: Number of matrices in hypervector (can be smaller than
scalar models due to better capacity)
matrix_size: Size m of each m×m matrix (default: 3)
Larger m → more non-commutative, better for complex structures
m=1 recovers FHRR
space: Vector space (defaults to MatrixSpace)
backend: Computational backend
seed: Random seed for space
diagonality: Control commutativity in [0, 1]
None: Random (default)
0.0: Maximally non-commutative
1.0: Fully commutative (FHRR-like)
"""
if space is None:
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = MatrixSpace(
dimension,
matrix_size=matrix_size,
backend=backend,
seed=seed,
diagonality=diagonality,
)
super().__init__(space, backend)
# Store matrix size for easy access
self.matrix_size = matrix_size if isinstance(space, MatrixSpace) else 1
self._diagonality = diagonality
@property
def model_name(self) -> str:
return f"GHRR_m{self.matrix_size}"
@property
def is_self_inverse(self) -> bool:
return False # Requires conjugate transpose
@property
def is_commutative(self) -> bool:
return False # Matrix multiplication is non-commutative
@property
def is_exact_inverse(self) -> bool:
return True # Conjugate transpose provides exact inverse
@property
def commutativity_degree(self) -> float:
"""Degree of commutativity in [0, 1].
For GHRR, this depends on the diagonality of Q matrices.
More diagonal → more commutative.
Returns:
0.0 if maximally non-commutative, 1.0 if fully commutative
"""
if self._diagonality is not None:
return self._diagonality
# For random GHRR, larger m tends toward lower diagonality
# This is approximate based on Yeung et al. Figure 6
if self.matrix_size == 1:
return 1.0 # FHRR is commutative
elif self.matrix_size == 2:
return 0.7 # Mostly commutative
elif self.matrix_size == 3:
return 0.5 # Balanced
else:
return 0.3 # Mostly non-commutative
def bind(self, a: Array, b: Array) -> Array:
"""Bind using element-wise matrix multiplication.
For matrices at position j: (a ⊗ b)_j = a_j @ b_j
This is non-commutative: a ⊗ b ≠ b ⊗ a in general.
Args:
a: First hypervector (D, m, m)
b: Second hypervector (D, m, m)
Returns:
Bound hypervector c where c_j = a_j @ b_j for all j
"""
# Element-wise matrix multiplication using matmul broadcast
# For (D, m, m) @ (D, m, m), this does D separate m×m multiplications
result = self.backend.matmul(a, b)
# Normalization not strictly needed for unitary matrices
# but helps with numerical stability
return result
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind using element-wise multiplication with conjugate transpose.
To recover original from c = a ⊗ b:
unbind(c, b) = c_j @ b_j† for all j
This provides exact recovery: unbind(bind(a, b), b) = a
Args:
a: Bound hypervector (or first operand)
b: Second operand
Returns:
Unbound hypervector (exact recovery)
"""
# Compute b^† (conjugate transpose of each matrix)
b_conj_t = self.backend.conjugate(self.backend.matrix_transpose(b))
# Element-wise matrix multiply
result = self.backend.matmul(a, b_conj_t)
return result
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Bundle using element-wise addition.
Sum all hypervectors element-wise. Each element is an m×m matrix.
For GHRR: (a + b)_j = a_j + b_j (matrix addition)
Args:
vectors: Sequence of hypervectors to bundle
Returns:
Bundled hypervector
Raises:
ValueError: If vectors is empty
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
vectors = list(vectors)
# Sum all vectors (element-wise matrix addition)
result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)
# Normalize to project back to unitary matrices
# This is critical for maintaining quasi-orthogonality (Yeung et al. 2024)
# Uses polar decomposition via SVD
result = self.space.normalize(result)
return result
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute using circular shift.
For GHRR, permutation is less critical since non-commutativity
can encode order. But still useful for some applications.
Args:
vec: Hypervector to permute (D, m, m)
k: Number of positions to shift
Returns:
Permuted hypervector
"""
# Roll along first dimension (shift which matrix is at which position)
return self.backend.roll(vec, shift=k, axis=0)
def test_non_commutativity(self, a: Array, b: Array) -> float:
"""Test degree of non-commutativity for two hypervectors.
Computes: δ(a ⊗ b, b ⊗ a)
A similarity of 1.0 means commutative, close to 0 means non-commutative.
Args:
a: First hypervector
b: Second hypervector
Returns:
Similarity between a⊗b and b⊗a
"""
ab = self.bind(a, b)
ba = self.bind(b, a)
return self.similarity(ab, ba)
def compute_diagonality(self, vec: Array) -> float:
"""Compute average diagonality of matrices in hypervector.
Diagonality metric: Σ|Q_jj| / ΣΣ|Q_jk|
Args:
vec: Hypervector (D, m, m)
Returns:
Diagonality in [0, 1]
"""
abs_vec = self.backend.abs(vec)
total_diag = self.backend.sum(self.backend.matrix_trace(abs_vec))
total_all = self.backend.sum(abs_vec)
total_all_value = float(self.backend.to_numpy(total_all))
if total_all_value == 0.0:
return 0.0
return float(self.backend.to_numpy(total_diag)) / total_all_value
def __repr__(self) -> str:
return (f"GHRRModel(dimension={self.dimension}, "
f"matrix_size={self.matrix_size}, "
f"space={self.space.space_name}, "
f"backend={self.backend.name})")
commutativity_degree
property
Degree of commutativity in [0, 1].
For GHRR, this depends on the diagonality of Q matrices. More diagonal → more commutative.
Returns: 0.0 if maximally non-commutative, 1.0 if fully commutative
__init__(dimension=100, matrix_size=3, space=None, backend=None, seed=None, diagonality=None)
Initialize GHRR model.
Args: dimension: Number of matrices in hypervector (can be smaller than scalar models due to better capacity) matrix_size: Size m of each m×m matrix (default: 3) Larger m → more non-commutative, better for complex structures m=1 recovers FHRR space: Vector space (defaults to MatrixSpace) backend: Computational backend seed: Random seed for space diagonality: Control commutativity in [0, 1] None: Random (default) 0.0: Maximally non-commutative 1.0: Fully commutative (FHRR-like)
Source code in holovec/models/ghrr.py
def __init__(
self,
dimension: int = 100,
matrix_size: int = 3,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None,
diagonality: float | None = None
):
"""Initialize GHRR model.
Args:
dimension: Number of matrices in hypervector (can be smaller than
scalar models due to better capacity)
matrix_size: Size m of each m×m matrix (default: 3)
Larger m → more non-commutative, better for complex structures
m=1 recovers FHRR
space: Vector space (defaults to MatrixSpace)
backend: Computational backend
seed: Random seed for space
diagonality: Control commutativity in [0, 1]
None: Random (default)
0.0: Maximally non-commutative
1.0: Fully commutative (FHRR-like)
"""
if space is None:
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = MatrixSpace(
dimension,
matrix_size=matrix_size,
backend=backend,
seed=seed,
diagonality=diagonality,
)
super().__init__(space, backend)
# Store matrix size for easy access
self.matrix_size = matrix_size if isinstance(space, MatrixSpace) else 1
self._diagonality = diagonality
bind(a, b)
Bind using element-wise matrix multiplication.
For matrices at position j: (a ⊗ b)_j = a_j @ b_j
This is non-commutative: a ⊗ b ≠ b ⊗ a in general.
Args: a: First hypervector (D, m, m) b: Second hypervector (D, m, m)
Returns: Bound hypervector c where c_j = a_j @ b_j for all j
Source code in holovec/models/ghrr.py
def bind(self, a: Array, b: Array) -> Array:
"""Bind using element-wise matrix multiplication.
For matrices at position j: (a ⊗ b)_j = a_j @ b_j
This is non-commutative: a ⊗ b ≠ b ⊗ a in general.
Args:
a: First hypervector (D, m, m)
b: Second hypervector (D, m, m)
Returns:
Bound hypervector c where c_j = a_j @ b_j for all j
"""
# Element-wise matrix multiplication using matmul broadcast
# For (D, m, m) @ (D, m, m), this does D separate m×m multiplications
result = self.backend.matmul(a, b)
# Normalization not strictly needed for unitary matrices
# but helps with numerical stability
return result
bundle(vectors)
Bundle using element-wise addition.
Sum all hypervectors element-wise. Each element is an m×m matrix.
For GHRR: (a + b)_j = a_j + b_j (matrix addition)
Args: vectors: Sequence of hypervectors to bundle
Returns: Bundled hypervector
Raises: ValueError: If vectors is empty
Source code in holovec/models/ghrr.py
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Bundle using element-wise addition.
Sum all hypervectors element-wise. Each element is an m×m matrix.
For GHRR: (a + b)_j = a_j + b_j (matrix addition)
Args:
vectors: Sequence of hypervectors to bundle
Returns:
Bundled hypervector
Raises:
ValueError: If vectors is empty
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
vectors = list(vectors)
# Sum all vectors (element-wise matrix addition)
result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)
# Normalize to project back to unitary matrices
# This is critical for maintaining quasi-orthogonality (Yeung et al. 2024)
# Uses polar decomposition via SVD
result = self.space.normalize(result)
return result
compute_diagonality(vec)
Compute average diagonality of matrices in hypervector.
Diagonality metric: Σ|Q_jj| / ΣΣ|Q_jk|
Args: vec: Hypervector (D, m, m)
Returns: Diagonality in [0, 1]
Source code in holovec/models/ghrr.py
def compute_diagonality(self, vec: Array) -> float:
"""Compute average diagonality of matrices in hypervector.
Diagonality metric: Σ|Q_jj| / ΣΣ|Q_jk|
Args:
vec: Hypervector (D, m, m)
Returns:
Diagonality in [0, 1]
"""
abs_vec = self.backend.abs(vec)
total_diag = self.backend.sum(self.backend.matrix_trace(abs_vec))
total_all = self.backend.sum(abs_vec)
total_all_value = float(self.backend.to_numpy(total_all))
if total_all_value == 0.0:
return 0.0
return float(self.backend.to_numpy(total_diag)) / total_all_value
permute(vec, k=1)
Permute using circular shift.
For GHRR, permutation is less critical since non-commutativity can encode order. But still useful for some applications.
Args: vec: Hypervector to permute (D, m, m) k: Number of positions to shift
Returns: Permuted hypervector
Source code in holovec/models/ghrr.py
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute using circular shift.
For GHRR, permutation is less critical since non-commutativity
can encode order. But still useful for some applications.
Args:
vec: Hypervector to permute (D, m, m)
k: Number of positions to shift
Returns:
Permuted hypervector
"""
# Roll along first dimension (shift which matrix is at which position)
return self.backend.roll(vec, shift=k, axis=0)
test_non_commutativity(a, b)
Test degree of non-commutativity for two hypervectors.
Computes: δ(a ⊗ b, b ⊗ a)
A similarity of 1.0 means commutative, close to 0 means non-commutative.
Args: a: First hypervector b: Second hypervector
Returns: Similarity between a⊗b and b⊗a
Source code in holovec/models/ghrr.py
def test_non_commutativity(self, a: Array, b: Array) -> float:
"""Test degree of non-commutativity for two hypervectors.
Computes: δ(a ⊗ b, b ⊗ a)
A similarity of 1.0 means commutative, close to 0 means non-commutative.
Args:
a: First hypervector
b: Second hypervector
Returns:
Similarity between a⊗b and b⊗a
"""
ab = self.bind(a, b)
ba = self.bind(b, a)
return self.similarity(ab, ba)
unbind(a, b)
Unbind using element-wise multiplication with conjugate transpose.
To recover original from c = a ⊗ b: unbind(c, b) = c_j @ b_j† for all j
This provides exact recovery: unbind(bind(a, b), b) = a
Args: a: Bound hypervector (or first operand) b: Second operand
Returns: Unbound hypervector (exact recovery)
Source code in holovec/models/ghrr.py
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind using element-wise multiplication with conjugate transpose.
To recover original from c = a ⊗ b:
unbind(c, b) = c_j @ b_j† for all j
This provides exact recovery: unbind(bind(a, b), b) = a
Args:
a: Bound hypervector (or first operand)
b: Second operand
Returns:
Unbound hypervector (exact recovery)
"""
# Compute b^† (conjugate transpose of each matrix)
b_conj_t = self.backend.conjugate(self.backend.matrix_transpose(b))
# Element-wise matrix multiply
result = self.backend.matmul(a, b_conj_t)
return result
MAP
holovec.models.map.MAPModel
Bases: VSAModel
MAP (Multiply-Add-Permute) model.
Binding: element-wise multiplication Unbinding: element-wise multiplication (self-inverse) Bundling: element-wise addition + normalization Permutation: circular shift
Best used with BipolarSpace or RealSpace.
Source code in holovec/models/map.py
class MAPModel(VSAModel):
"""MAP (Multiply-Add-Permute) model.
Binding: element-wise multiplication
Unbinding: element-wise multiplication (self-inverse)
Bundling: element-wise addition + normalization
Permutation: circular shift
Best used with BipolarSpace or RealSpace.
"""
def __init__(
self,
dimension: int = 10000,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None,
):
"""Initialize MAP model.
Args:
dimension: Dimensionality of hypervectors
space: Vector space (defaults to BipolarSpace)
backend: Computational backend
seed: Random seed for space
"""
if space is None:
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = BipolarSpace(dimension, backend=backend, seed=seed)
super().__init__(space, backend)
# Pre-compute permutation indices for efficiency
self._permutation_indices = list(range(self.dimension))
@property
def model_name(self) -> str:
return "MAP"
@property
def is_self_inverse(self) -> bool:
return True
@property
def is_commutative(self) -> bool:
return True
@property
def is_exact_inverse(self) -> bool:
# Exact for bipolar, approximate for continuous
return self.space.space_name == "bipolar"
def bind(self, a: Array, b: Array) -> Array:
"""Bind using element-wise multiplication.
For bipolar: XOR when represented as {0,1}
For real: Hadamard product
Args:
a: First vector
b: Second vector
Returns:
Bound vector c = a ⊙ b
"""
result = self.backend.multiply(a, b)
# Normalize to maintain unit norm for continuous spaces
if self.space.space_name != "bipolar":
result = self.normalize(result)
return result
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind using element-wise multiplication (self-inverse).
Since binding is self-inverse: unbind(c, b) = c ⊙ b
Args:
a: Bound vector (or first operand)
b: Second operand
Returns:
Unbound vector (exact for bipolar, approximate for continuous)
"""
# For MAP, binding = unbinding
return self.bind(a, b)
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Bundle using element-wise addition.
For bipolar: majority vote after summing
For real: sum and normalize
Args:
vectors: Sequence of vectors to bundle
Returns:
Bundled vector
Raises:
ValueError: If vectors is empty
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
vectors = list(vectors)
# Sum all vectors
result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)
# Normalize according to space
if self.space.space_name == "bipolar":
# Majority vote: sign of sum
result = self.backend.sign(result)
# Handle zeros (shouldn't happen in practice, but be safe)
# If sum is 0, randomly choose ±1
zeros_mask = result == 0
if self.backend.to_numpy(zeros_mask).any():
# For any zeros, use the first vector's value
first_vec = vectors[0]
result = self.backend.where(zeros_mask, first_vec, result)
else:
# For continuous spaces, L2 normalize
result = self.normalize(result)
return result
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute using circular shift.
Shifts vector elements by k positions to the right.
Negative k shifts left.
Args:
vec: Vector to permute
k: Number of positions to shift
Returns:
Permuted vector
"""
return self.backend.roll(vec, shift=k)
def __repr__(self) -> str:
return (
f"MAPModel(dimension={self.dimension}, "
f"space={self.space.space_name}, "
f"backend={self.backend.name})"
)
__init__(dimension=10000, space=None, backend=None, seed=None)
Initialize MAP model.
Args: dimension: Dimensionality of hypervectors space: Vector space (defaults to BipolarSpace) backend: Computational backend seed: Random seed for space
Source code in holovec/models/map.py
def __init__(
self,
dimension: int = 10000,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None,
):
"""Initialize MAP model.
Args:
dimension: Dimensionality of hypervectors
space: Vector space (defaults to BipolarSpace)
backend: Computational backend
seed: Random seed for space
"""
if space is None:
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = BipolarSpace(dimension, backend=backend, seed=seed)
super().__init__(space, backend)
# Pre-compute permutation indices for efficiency
self._permutation_indices = list(range(self.dimension))
bind(a, b)
Bind using element-wise multiplication.
For bipolar: XOR when represented as {0,1} For real: Hadamard product
Args: a: First vector b: Second vector
Returns: Bound vector c = a ⊙ b
Source code in holovec/models/map.py
def bind(self, a: Array, b: Array) -> Array:
"""Bind using element-wise multiplication.
For bipolar: XOR when represented as {0,1}
For real: Hadamard product
Args:
a: First vector
b: Second vector
Returns:
Bound vector c = a ⊙ b
"""
result = self.backend.multiply(a, b)
# Normalize to maintain unit norm for continuous spaces
if self.space.space_name != "bipolar":
result = self.normalize(result)
return result
bundle(vectors)
Bundle using element-wise addition.
For bipolar: majority vote after summing For real: sum and normalize
Args: vectors: Sequence of vectors to bundle
Returns: Bundled vector
Raises: ValueError: If vectors is empty
Source code in holovec/models/map.py
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Bundle using element-wise addition.
For bipolar: majority vote after summing
For real: sum and normalize
Args:
vectors: Sequence of vectors to bundle
Returns:
Bundled vector
Raises:
ValueError: If vectors is empty
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
vectors = list(vectors)
# Sum all vectors
result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)
# Normalize according to space
if self.space.space_name == "bipolar":
# Majority vote: sign of sum
result = self.backend.sign(result)
# Handle zeros (shouldn't happen in practice, but be safe)
# If sum is 0, randomly choose ±1
zeros_mask = result == 0
if self.backend.to_numpy(zeros_mask).any():
# For any zeros, use the first vector's value
first_vec = vectors[0]
result = self.backend.where(zeros_mask, first_vec, result)
else:
# For continuous spaces, L2 normalize
result = self.normalize(result)
return result
permute(vec, k=1)
Permute using circular shift.
Shifts vector elements by k positions to the right. Negative k shifts left.
Args: vec: Vector to permute k: Number of positions to shift
Returns: Permuted vector
Source code in holovec/models/map.py
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute using circular shift.
Shifts vector elements by k positions to the right.
Negative k shifts left.
Args:
vec: Vector to permute
k: Number of positions to shift
Returns:
Permuted vector
"""
return self.backend.roll(vec, shift=k)
unbind(a, b)
Unbind using element-wise multiplication (self-inverse).
Since binding is self-inverse: unbind(c, b) = c ⊙ b
Args: a: Bound vector (or first operand) b: Second operand
Returns: Unbound vector (exact for bipolar, approximate for continuous)
Source code in holovec/models/map.py
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind using element-wise multiplication (self-inverse).
Since binding is self-inverse: unbind(c, b) = c ⊙ b
Args:
a: Bound vector (or first operand)
b: Second operand
Returns:
Unbound vector (exact for bipolar, approximate for continuous)
"""
# For MAP, binding = unbinding
return self.bind(a, b)
HRR
holovec.models.hrr.HRRModel
Bases: VSAModel
HRR (Holographic Reduced Representations) model.
Binding: circular convolution (via FFT) Unbinding: circular correlation (via FFT) Bundling: element-wise addition + normalization Permutation: circular shift
Uses RealSpace with Gaussian distribution N(0, 1/D).
Source code in holovec/models/hrr.py
class HRRModel(VSAModel):
"""HRR (Holographic Reduced Representations) model.
Binding: circular convolution (via FFT)
Unbinding: circular correlation (via FFT)
Bundling: element-wise addition + normalization
Permutation: circular shift
Uses RealSpace with Gaussian distribution N(0, 1/D).
"""
def __init__(
self,
dimension: int = 10000,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None,
):
"""Initialize HRR model.
Args:
dimension: Dimensionality of hypervectors (recommend 1000-10000)
space: Vector space (defaults to RealSpace)
backend: Computational backend
seed: Random seed for space
"""
if space is None:
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = RealSpace(dimension, backend=backend, seed=seed)
super().__init__(space, backend)
@property
def model_name(self) -> str:
return "HRR"
@property
def is_self_inverse(self) -> bool:
return False # Requires correlation, not same operation
@property
def is_commutative(self) -> bool:
return True # Convolution is commutative
@property
def is_exact_inverse(self) -> bool:
return False # Correlation gives approximate inverse
def bind(self, a: Array, b: Array) -> Array:
"""Bind using circular convolution.
Implemented via FFT: conv(a, b) = IFFT(FFT(a) * FFT(b))
Args:
a: First vector
b: Second vector
Returns:
Bound vector c = a ⊛ b (circular convolution)
"""
# Circular convolution in frequency domain
result = self.backend.circular_convolve(a, b)
# Do NOT normalize - preserves magnitude for proper unbinding via
# circular correlation. Normalization would interfere with the
# mathematical relationship required for unbind recovery.
return result
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind using circular correlation (approximate inverse of convolution).
This is the classic HRR unbinding operation that uses circular correlation
to approximately recover the original vector from a bound pair.
Args:
a: Bound vector c = x ⊛ b (result of circular convolution)
b: Key vector (second operand in binding)
Returns:
Approximate recovery of x (original vector), normalized to unit length
Notes
-----
**Mathematical Foundation:**
HRR binding via circular convolution:
c = x ⊛ b
In frequency domain (Fourier):
C(ω) = X(ω) · B(ω)
Unbinding via circular correlation:
x̂ = c ⋆ b = IFFT(C(ω) · B*(ω))
Where B*(ω) is the complex conjugate of B(ω).
Substituting C(ω) = X(ω) · B(ω):
x̂ = IFFT(X(ω) · B(ω) · B*(ω))
= IFFT(X(ω) · |B(ω)|²)
For random vectors with approximately uniform power spectrum (|B(ω)|² ≈ 1),
this gives x̂ ≈ x.
**Approximation Quality:**
Recovery similarity depends on:
- Dimension D: Higher D → better recovery
- Noise level: Clean binding → better unbind
- Bundle size: More items → more interference
Empirical performance (D=10000):
- Clean unbind: similarity ≈ 0.70-0.72 (approximate inverse)
- After bundling 2 items: similarity ≈ 0.57
- After bundling 10 items: similarity ≈ 0.30
- After bundling 100 items: similarity decreases further
Note: Unlike FHRR which achieves exact (1.0) recovery, HRR's circular
correlation provides only approximate recovery. The ~0.71 similarity
is sufficient for retrieval tasks but requires cleanup/thresholding.
References
----------
- Plate (1995): "Holographic Reduced Representations"
- Plate (2003): "Holographic Reduced Representations" (full book)
Examples
--------
>>> model = VSA.create('HRR', dim=10000)
>>> x = model.random(seed=1)
>>> b = model.random(seed=2)
>>> c = model.bind(x, b)
>>> x_recovered = model.unbind(c, b)
>>> similarity = model.similarity(x, x_recovered)
>>> print(f"Recovery similarity: {similarity:.3f}") # ~0.71
"""
# Transform to frequency domain
fa = self.backend.fft(a)
fb = self.backend.fft(b)
# Circular correlation: C(ω) * conj(B(ω))
# This is the classic HRR unbinding operation (Plate, 1995)
fr = self.backend.multiply(fa, self.backend.conjugate(fb))
# Transform back to time domain
time = self.backend.ifft(fr)
# Take real part (imaginary part should be near zero due to real inputs)
result = self.backend.real(time)
# Normalize to unit length for consistent comparison with other vectors
return self.normalize(result)
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Bundle using element-wise addition (superposition).
For HRR, bundling is simple vector addition without normalization.
This preserves the magnitude relationships needed for proper unbinding.
Args:
vectors: Sequence of vectors to bundle
Returns:
Bundled vector (unnormalized sum)
Raises:
ValueError: If vectors is empty
Notes:
Unlike some VSA models, HRR does NOT normalize after bundling.
Normalization would interfere with the circular correlation unbinding
operation. The unbind() method handles normalization of its output.
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
vectors = list(vectors)
# Sum all vectors (simple superposition, no normalization)
result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)
return result
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute using circular shift.
Shifts vector elements by k positions to the right.
Negative k shifts left.
Args:
vec: Vector to permute
k: Number of positions to shift
Returns:
Permuted vector
"""
return self.backend.roll(vec, shift=k)
def __repr__(self) -> str:
return (
f"HRRModel(dimension={self.dimension}, "
f"space={self.space.space_name}, "
f"backend={self.backend.name})"
)
__init__(dimension=10000, space=None, backend=None, seed=None)
Initialize HRR model.
Args: dimension: Dimensionality of hypervectors (recommend 1000-10000) space: Vector space (defaults to RealSpace) backend: Computational backend seed: Random seed for space
Source code in holovec/models/hrr.py
def __init__(
self,
dimension: int = 10000,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None,
):
"""Initialize HRR model.
Args:
dimension: Dimensionality of hypervectors (recommend 1000-10000)
space: Vector space (defaults to RealSpace)
backend: Computational backend
seed: Random seed for space
"""
if space is None:
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = RealSpace(dimension, backend=backend, seed=seed)
super().__init__(space, backend)
bind(a, b)
Bind using circular convolution.
Implemented via FFT: conv(a, b) = IFFT(FFT(a) * FFT(b))
Args: a: First vector b: Second vector
Returns: Bound vector c = a ⊛ b (circular convolution)
Source code in holovec/models/hrr.py
def bind(self, a: Array, b: Array) -> Array:
"""Bind using circular convolution.
Implemented via FFT: conv(a, b) = IFFT(FFT(a) * FFT(b))
Args:
a: First vector
b: Second vector
Returns:
Bound vector c = a ⊛ b (circular convolution)
"""
# Circular convolution in frequency domain
result = self.backend.circular_convolve(a, b)
# Do NOT normalize - preserves magnitude for proper unbinding via
# circular correlation. Normalization would interfere with the
# mathematical relationship required for unbind recovery.
return result
bundle(vectors)
Bundle using element-wise addition (superposition).
For HRR, bundling is simple vector addition without normalization. This preserves the magnitude relationships needed for proper unbinding.
Args: vectors: Sequence of vectors to bundle
Returns: Bundled vector (unnormalized sum)
Raises: ValueError: If vectors is empty
Notes: Unlike some VSA models, HRR does NOT normalize after bundling. Normalization would interfere with the circular correlation unbinding operation. The unbind() method handles normalization of its output.
Source code in holovec/models/hrr.py
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Bundle using element-wise addition (superposition).
For HRR, bundling is simple vector addition without normalization.
This preserves the magnitude relationships needed for proper unbinding.
Args:
vectors: Sequence of vectors to bundle
Returns:
Bundled vector (unnormalized sum)
Raises:
ValueError: If vectors is empty
Notes:
Unlike some VSA models, HRR does NOT normalize after bundling.
Normalization would interfere with the circular correlation unbinding
operation. The unbind() method handles normalization of its output.
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
vectors = list(vectors)
# Sum all vectors (simple superposition, no normalization)
result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)
return result
permute(vec, k=1)
Permute using circular shift.
Shifts vector elements by k positions to the right. Negative k shifts left.
Args: vec: Vector to permute k: Number of positions to shift
Returns: Permuted vector
Source code in holovec/models/hrr.py
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute using circular shift.
Shifts vector elements by k positions to the right.
Negative k shifts left.
Args:
vec: Vector to permute
k: Number of positions to shift
Returns:
Permuted vector
"""
return self.backend.roll(vec, shift=k)
unbind(a, b)
Unbind using circular correlation (approximate inverse of convolution).
This is the classic HRR unbinding operation that uses circular correlation to approximately recover the original vector from a bound pair.
Args: a: Bound vector c = x ⊛ b (result of circular convolution) b: Key vector (second operand in binding)
Returns: Approximate recovery of x (original vector), normalized to unit length
Notes
Mathematical Foundation:
HRR binding via circular convolution: c = x ⊛ b
In frequency domain (Fourier): C(ω) = X(ω) · B(ω)
Unbinding via circular correlation: x̂ = c ⋆ b = IFFT(C(ω) · B*(ω))
Where B*(ω) is the complex conjugate of B(ω).
Substituting C(ω) = X(ω) · B(ω): x̂ = IFFT(X(ω) · B(ω) · B*(ω)) = IFFT(X(ω) · |B(ω)|²)
For random vectors with approximately uniform power spectrum (|B(ω)|² ≈ 1), this gives x̂ ≈ x.
Approximation Quality:
Recovery similarity depends on: - Dimension D: Higher D → better recovery - Noise level: Clean binding → better unbind - Bundle size: More items → more interference
Empirical performance (D=10000): - Clean unbind: similarity ≈ 0.70-0.72 (approximate inverse) - After bundling 2 items: similarity ≈ 0.57 - After bundling 10 items: similarity ≈ 0.30 - After bundling 100 items: similarity decreases further
Note: Unlike FHRR which achieves exact (1.0) recovery, HRR's circular correlation provides only approximate recovery. The ~0.71 similarity is sufficient for retrieval tasks but requires cleanup/thresholding.
References
- Plate (1995): "Holographic Reduced Representations"
- Plate (2003): "Holographic Reduced Representations" (full book)
Examples:
>>> model = VSA.create('HRR', dim=10000)
>>> x = model.random(seed=1)
>>> b = model.random(seed=2)
>>> c = model.bind(x, b)
>>> x_recovered = model.unbind(c, b)
>>> similarity = model.similarity(x, x_recovered)
>>> print(f"Recovery similarity: {similarity:.3f}") # ~0.71
Source code in holovec/models/hrr.py
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind using circular correlation (approximate inverse of convolution).
This is the classic HRR unbinding operation that uses circular correlation
to approximately recover the original vector from a bound pair.
Args:
a: Bound vector c = x ⊛ b (result of circular convolution)
b: Key vector (second operand in binding)
Returns:
Approximate recovery of x (original vector), normalized to unit length
Notes
-----
**Mathematical Foundation:**
HRR binding via circular convolution:
c = x ⊛ b
In frequency domain (Fourier):
C(ω) = X(ω) · B(ω)
Unbinding via circular correlation:
x̂ = c ⋆ b = IFFT(C(ω) · B*(ω))
Where B*(ω) is the complex conjugate of B(ω).
Substituting C(ω) = X(ω) · B(ω):
x̂ = IFFT(X(ω) · B(ω) · B*(ω))
= IFFT(X(ω) · |B(ω)|²)
For random vectors with approximately uniform power spectrum (|B(ω)|² ≈ 1),
this gives x̂ ≈ x.
**Approximation Quality:**
Recovery similarity depends on:
- Dimension D: Higher D → better recovery
- Noise level: Clean binding → better unbind
- Bundle size: More items → more interference
Empirical performance (D=10000):
- Clean unbind: similarity ≈ 0.70-0.72 (approximate inverse)
- After bundling 2 items: similarity ≈ 0.57
- After bundling 10 items: similarity ≈ 0.30
- After bundling 100 items: similarity decreases further
Note: Unlike FHRR which achieves exact (1.0) recovery, HRR's circular
correlation provides only approximate recovery. The ~0.71 similarity
is sufficient for retrieval tasks but requires cleanup/thresholding.
References
----------
- Plate (1995): "Holographic Reduced Representations"
- Plate (2003): "Holographic Reduced Representations" (full book)
Examples
--------
>>> model = VSA.create('HRR', dim=10000)
>>> x = model.random(seed=1)
>>> b = model.random(seed=2)
>>> c = model.bind(x, b)
>>> x_recovered = model.unbind(c, b)
>>> similarity = model.similarity(x, x_recovered)
>>> print(f"Recovery similarity: {similarity:.3f}") # ~0.71
"""
# Transform to frequency domain
fa = self.backend.fft(a)
fb = self.backend.fft(b)
# Circular correlation: C(ω) * conj(B(ω))
# This is the classic HRR unbinding operation (Plate, 1995)
fr = self.backend.multiply(fa, self.backend.conjugate(fb))
# Transform back to time domain
time = self.backend.ifft(fr)
# Take real part (imaginary part should be near zero due to real inputs)
result = self.backend.real(time)
# Normalize to unit length for consistent comparison with other vectors
return self.normalize(result)
VTB
holovec.models.vtb.VTBModel
Bases: VSAModel
VTB (Vector-derived Transformation Binding) model.
Binding (MBAT-style): c = Σ_k w_k(a) · roll(b, s_k) Unbinding (approximate): b̂ = Σ_k w_k(a) · roll(c, -s_k) Bundling: element-wise addition + normalization Permutation: circular shift
Uses RealSpace with L2-normalized real-valued vectors.
Source code in holovec/models/vtb.py
class VTBModel(VSAModel):
"""VTB (Vector-derived Transformation Binding) model.
Binding (MBAT-style): c = Σ_k w_k(a) · roll(b, s_k)
Unbinding (approximate): b̂ = Σ_k w_k(a) · roll(c, -s_k)
Bundling: element-wise addition + normalization
Permutation: circular shift
Uses RealSpace with L2-normalized real-valued vectors.
"""
def __init__(
self,
dimension: int = 10000,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None,
n_bases: int = 4,
shifts: list[int] | None = None,
temperature: float = 100.0,
):
"""Initialize VTB model.
Args:
dimension: Dimensionality of hypervectors
space: Vector space (defaults to RealSpace)
backend: Computational backend
seed: Random seed for space
"""
if space is None:
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = RealSpace(dimension, backend=backend, seed=seed)
super().__init__(space, backend)
# MBAT parameters
self.n_bases = int(n_bases)
if self.n_bases < 2:
raise ValueError("n_bases must be >= 2")
self.temperature = float(temperature)
if self.temperature <= 0:
self.temperature = 1.0
# Basis transformations: use integer circular shifts as R_k
if shifts is None:
# choose distinct small shifts spread across dimension
step = max(1, self.dimension // (self.n_bases + 1))
self.shifts = [((i + 1) * step) % self.dimension for i in range(self.n_bases)]
# ensure non-zero and unique
self.shifts = [s if s != 0 else 1 for s in self.shifts]
self.shifts = list(dict.fromkeys(self.shifts))
while len(self.shifts) < self.n_bases:
# fill with incremental shifts
self.shifts.append((self.shifts[-1] + 1) % self.dimension)
else:
if len(shifts) != self.n_bases:
raise ValueError("len(shifts) must equal n_bases")
self.shifts = [int(s) % self.dimension for s in shifts]
# Code vectors U_k to produce weights w_k(a) = softmax(τ · <a, U_k>)
# Stack shape (K, D)
self._U = self.backend.stack([
self.backend.normalize(self.backend.random_normal(self.dimension, seed=(seed or 0) + k))
for k in range(self.n_bases)
], axis=0)
@property
def model_name(self) -> str:
return "VTB"
@property
def is_self_inverse(self) -> bool:
return False
@property
def is_commutative(self) -> bool:
return False
@property
def is_exact_inverse(self) -> bool:
return False
def _weights(self, a: Array) -> Array:
"""Compute softmax weights over bases from vector a.
w_k(a) = softmax(τ · <a, U_k>)
Returns shape (K,)
"""
# scores: (K,)
scores = []
for k in range(self.n_bases):
uk = self._U[k]
scores.append(self.backend.dot(a, uk))
scores = self.backend.stack(scores, axis=0)
# scale by temperature then softmax
scaled = self.backend.multiply_scalar(scores, self.temperature)
return self.backend.softmax(scaled, axis=0)
def bind(self, a: Array, b: Array) -> Array:
"""Bind using MBAT-style weighted basis transforms.
c = Σ_k w_k(a) · roll(b, s_k)
"""
# Derive transform from a to act on b
w = self._weights(a) # (K,)
# accumulate weighted shifts
parts = []
for k, shift in enumerate(self.shifts):
wk = w[k]
rb = self.backend.roll(b, shift=shift)
parts.append(self.backend.multiply_scalar(rb, float(self.backend.to_numpy(wk))))
result = self.backend.sum(self.backend.stack(parts, axis=0), axis=0)
return self.normalize(result)
def unbind(self, c: Array, b: Array) -> Array:
"""Approximate unbinding using weighted inverse transforms.
IMPORTANT: Due to non-commutativity, this recovers b from c = bind(a, b).
You must pass the FIRST argument of bind (a) as the second argument here.
For c = bind(a, b):
- unbind(c, a) → recovers b (correct usage)
- unbind(c, b) → does NOT recover a
b̂ = Σ_k w_k(b) · roll(c, -s_k)
"""
# Use same transform derived from b
w = self._weights(b)
parts = []
for k, shift in enumerate(self.shifts):
wk = w[k]
rc = self.backend.roll(c, shift=-shift)
parts.append(self.backend.multiply_scalar(rc, float(self.backend.to_numpy(wk))))
num = self.backend.sum(self.backend.stack(parts, axis=0), axis=0)
# Denominator as sum of squared weights (scalar)
w_np = self.backend.to_numpy(w)
denom = float((w_np ** 2).sum()) + 1e-8
result = self.backend.multiply_scalar(num, 1.0 / denom)
return self.normalize(result)
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Bundle using element-wise addition.
Sum all hypervectors element-wise and normalize.
Args:
vectors: Sequence of hypervectors to bundle
Returns:
Bundled hypervector
Raises:
ValueError: If vectors is empty
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
vectors = list(vectors)
# Sum all vectors
result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)
# Normalize to unit length
return self.normalize(result)
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute using circular shift.
Shifts vector elements by k positions. Combined with binding,
this can encode position in sequences.
Args:
vec: Hypervector to permute
k: Number of positions to shift (default: 1)
Returns:
Permuted hypervector
"""
return self.backend.roll(vec, shift=k)
def test_non_commutativity(self, a: Array, b: Array) -> float:
"""Test degree of non-commutativity for two hypervectors.
Computes: similarity(a ⊗ b, b ⊗ a)
A similarity of 1.0 means commutative, close to 0 means non-commutative.
Args:
a: First hypervector
b: Second hypervector
Returns:
Similarity between a⊗b and b⊗a (should be low for VTB)
"""
ab = self.bind(a, b)
ba = self.bind(b, a)
return self.similarity(ab, ba)
def bind_sequence(self, items: Sequence[Array], use_permute: bool = True) -> Array:
"""Bind a sequence of items with positional encoding.
Two strategies:
1. With permutation: c = a₁ ⊗ ρ⁰(pos) + a₂ ⊗ ρ¹(pos) + ...
2. Without permutation: c = (...((a₁ ⊗ a₂) ⊗ a₃)...) (nested binding)
Args:
items: Sequence of hypervectors to bind
use_permute: If True, use permutation strategy; else nested binding
Returns:
Sequence hypervector
Raises:
ValueError: If items is empty
"""
if not items:
raise ValueError("Cannot bind empty sequence")
items = list(items)
if use_permute:
# Strategy 1: Bind each item with permuted position vector
pos = self.random(seed=42) # Fixed position vector
bound_items = []
for i, item in enumerate(items):
permuted_pos = self.permute(pos, k=i)
bound_items.append(self.bind(item, permuted_pos))
return self.bundle(bound_items)
else:
# Strategy 2: Nested binding (naturally non-commutative)
result = items[0]
for item in items[1:]:
result = self.bind(result, item)
return result
def __repr__(self) -> str:
return (f"VTBModel(dimension={self.dimension}, "
f"space={self.space.space_name}, "
f"backend={self.backend.name})")
__init__(dimension=10000, space=None, backend=None, seed=None, n_bases=4, shifts=None, temperature=100.0)
Initialize VTB model.
Args: dimension: Dimensionality of hypervectors space: Vector space (defaults to RealSpace) backend: Computational backend seed: Random seed for space
Source code in holovec/models/vtb.py
def __init__(
self,
dimension: int = 10000,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None,
n_bases: int = 4,
shifts: list[int] | None = None,
temperature: float = 100.0,
):
"""Initialize VTB model.
Args:
dimension: Dimensionality of hypervectors
space: Vector space (defaults to RealSpace)
backend: Computational backend
seed: Random seed for space
"""
if space is None:
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = RealSpace(dimension, backend=backend, seed=seed)
super().__init__(space, backend)
# MBAT parameters
self.n_bases = int(n_bases)
if self.n_bases < 2:
raise ValueError("n_bases must be >= 2")
self.temperature = float(temperature)
if self.temperature <= 0:
self.temperature = 1.0
# Basis transformations: use integer circular shifts as R_k
if shifts is None:
# choose distinct small shifts spread across dimension
step = max(1, self.dimension // (self.n_bases + 1))
self.shifts = [((i + 1) * step) % self.dimension for i in range(self.n_bases)]
# ensure non-zero and unique
self.shifts = [s if s != 0 else 1 for s in self.shifts]
self.shifts = list(dict.fromkeys(self.shifts))
while len(self.shifts) < self.n_bases:
# fill with incremental shifts
self.shifts.append((self.shifts[-1] + 1) % self.dimension)
else:
if len(shifts) != self.n_bases:
raise ValueError("len(shifts) must equal n_bases")
self.shifts = [int(s) % self.dimension for s in shifts]
# Code vectors U_k to produce weights w_k(a) = softmax(τ · <a, U_k>)
# Stack shape (K, D)
self._U = self.backend.stack([
self.backend.normalize(self.backend.random_normal(self.dimension, seed=(seed or 0) + k))
for k in range(self.n_bases)
], axis=0)
bind(a, b)
Bind using MBAT-style weighted basis transforms.
c = Σ_k w_k(a) · roll(b, s_k)
Source code in holovec/models/vtb.py
def bind(self, a: Array, b: Array) -> Array:
"""Bind using MBAT-style weighted basis transforms.
c = Σ_k w_k(a) · roll(b, s_k)
"""
# Derive transform from a to act on b
w = self._weights(a) # (K,)
# accumulate weighted shifts
parts = []
for k, shift in enumerate(self.shifts):
wk = w[k]
rb = self.backend.roll(b, shift=shift)
parts.append(self.backend.multiply_scalar(rb, float(self.backend.to_numpy(wk))))
result = self.backend.sum(self.backend.stack(parts, axis=0), axis=0)
return self.normalize(result)
bind_sequence(items, use_permute=True)
Bind a sequence of items with positional encoding.
Two strategies: 1. With permutation: c = a₁ ⊗ ρ⁰(pos) + a₂ ⊗ ρ¹(pos) + ... 2. Without permutation: c = (...((a₁ ⊗ a₂) ⊗ a₃)...) (nested binding)
Args: items: Sequence of hypervectors to bind use_permute: If True, use permutation strategy; else nested binding
Returns: Sequence hypervector
Raises: ValueError: If items is empty
Source code in holovec/models/vtb.py
def bind_sequence(self, items: Sequence[Array], use_permute: bool = True) -> Array:
"""Bind a sequence of items with positional encoding.
Two strategies:
1. With permutation: c = a₁ ⊗ ρ⁰(pos) + a₂ ⊗ ρ¹(pos) + ...
2. Without permutation: c = (...((a₁ ⊗ a₂) ⊗ a₃)...) (nested binding)
Args:
items: Sequence of hypervectors to bind
use_permute: If True, use permutation strategy; else nested binding
Returns:
Sequence hypervector
Raises:
ValueError: If items is empty
"""
if not items:
raise ValueError("Cannot bind empty sequence")
items = list(items)
if use_permute:
# Strategy 1: Bind each item with permuted position vector
pos = self.random(seed=42) # Fixed position vector
bound_items = []
for i, item in enumerate(items):
permuted_pos = self.permute(pos, k=i)
bound_items.append(self.bind(item, permuted_pos))
return self.bundle(bound_items)
else:
# Strategy 2: Nested binding (naturally non-commutative)
result = items[0]
for item in items[1:]:
result = self.bind(result, item)
return result
bundle(vectors)
Bundle using element-wise addition.
Sum all hypervectors element-wise and normalize.
Args: vectors: Sequence of hypervectors to bundle
Returns: Bundled hypervector
Raises: ValueError: If vectors is empty
Source code in holovec/models/vtb.py
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Bundle using element-wise addition.
Sum all hypervectors element-wise and normalize.
Args:
vectors: Sequence of hypervectors to bundle
Returns:
Bundled hypervector
Raises:
ValueError: If vectors is empty
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
vectors = list(vectors)
# Sum all vectors
result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)
# Normalize to unit length
return self.normalize(result)
permute(vec, k=1)
Permute using circular shift.
Shifts vector elements by k positions. Combined with binding, this can encode position in sequences.
Args: vec: Hypervector to permute k: Number of positions to shift (default: 1)
Returns: Permuted hypervector
Source code in holovec/models/vtb.py
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute using circular shift.
Shifts vector elements by k positions. Combined with binding,
this can encode position in sequences.
Args:
vec: Hypervector to permute
k: Number of positions to shift (default: 1)
Returns:
Permuted hypervector
"""
return self.backend.roll(vec, shift=k)
test_non_commutativity(a, b)
Test degree of non-commutativity for two hypervectors.
Computes: similarity(a ⊗ b, b ⊗ a)
A similarity of 1.0 means commutative, close to 0 means non-commutative.
Args: a: First hypervector b: Second hypervector
Returns: Similarity between a⊗b and b⊗a (should be low for VTB)
Source code in holovec/models/vtb.py
def test_non_commutativity(self, a: Array, b: Array) -> float:
"""Test degree of non-commutativity for two hypervectors.
Computes: similarity(a ⊗ b, b ⊗ a)
A similarity of 1.0 means commutative, close to 0 means non-commutative.
Args:
a: First hypervector
b: Second hypervector
Returns:
Similarity between a⊗b and b⊗a (should be low for VTB)
"""
ab = self.bind(a, b)
ba = self.bind(b, a)
return self.similarity(ab, ba)
unbind(c, b)
Approximate unbinding using weighted inverse transforms.
IMPORTANT: Due to non-commutativity, this recovers b from c = bind(a, b). You must pass the FIRST argument of bind (a) as the second argument here.
For c = bind(a, b): - unbind(c, a) → recovers b (correct usage) - unbind(c, b) → does NOT recover a
b̂ = Σ_k w_k(b) · roll(c, -s_k)
Source code in holovec/models/vtb.py
def unbind(self, c: Array, b: Array) -> Array:
"""Approximate unbinding using weighted inverse transforms.
IMPORTANT: Due to non-commutativity, this recovers b from c = bind(a, b).
You must pass the FIRST argument of bind (a) as the second argument here.
For c = bind(a, b):
- unbind(c, a) → recovers b (correct usage)
- unbind(c, b) → does NOT recover a
b̂ = Σ_k w_k(b) · roll(c, -s_k)
"""
# Use same transform derived from b
w = self._weights(b)
parts = []
for k, shift in enumerate(self.shifts):
wk = w[k]
rc = self.backend.roll(c, shift=-shift)
parts.append(self.backend.multiply_scalar(rc, float(self.backend.to_numpy(wk))))
num = self.backend.sum(self.backend.stack(parts, axis=0), axis=0)
# Denominator as sum of squared weights (scalar)
w_np = self.backend.to_numpy(w)
denom = float((w_np ** 2).sum()) + 1e-8
result = self.backend.multiply_scalar(num, 1.0 / denom)
return self.normalize(result)
BSC
holovec.models.bsc.BSCModel
Bases: VSAModel
BSC (Binary Spatter Codes) model.
Binding: XOR Unbinding: XOR (self-inverse) Bundling: element-wise addition + majority vote Permutation: circular shift
Uses BinarySpace with values in {0, 1}.
Source code in holovec/models/bsc.py
class BSCModel(VSAModel):
"""BSC (Binary Spatter Codes) model.
Binding: XOR
Unbinding: XOR (self-inverse)
Bundling: element-wise addition + majority vote
Permutation: circular shift
Uses BinarySpace with values in {0, 1}.
"""
def __init__(
self,
dimension: int = 10000,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None
):
"""Initialize BSC model.
Args:
dimension: Dimensionality of hypervectors
space: Vector space (defaults to BinarySpace)
backend: Computational backend
seed: Random seed for space
"""
if space is None:
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = BinarySpace(dimension, backend=backend, seed=seed)
super().__init__(space, backend)
@property
def model_name(self) -> str:
return "BSC"
@property
def is_self_inverse(self) -> bool:
return True # XOR is self-inverse
@property
def is_commutative(self) -> bool:
return True # XOR is commutative
@property
def is_exact_inverse(self) -> bool:
return True # XOR provides exact inverse
def bind(self, a: Array, b: Array) -> Array:
"""Bind using XOR.
For binary vectors: a XOR b
Property: a XOR b XOR b = a (self-inverse)
Args:
a: First vector (binary {0, 1})
b: Second vector (binary {0, 1})
Returns:
Bound vector c = a XOR b
"""
return self.backend.xor(a, b)
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind using XOR (self-inverse).
Since XOR is self-inverse: unbind(c, b) = c XOR b
Args:
a: Bound vector (or first operand)
b: Second operand
Returns:
Unbound vector (exact recovery)
"""
# For BSC, binding = unbinding (self-inverse)
return self.bind(a, b)
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Bundle using element-wise addition + majority vote.
Sum all binary vectors element-wise, then threshold at n/2
where n is the number of vectors.
Args:
vectors: Sequence of vectors to bundle
Returns:
Bundled vector (binary {0, 1})
Raises:
ValueError: If vectors is empty
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
vectors = list(vectors)
n = len(vectors)
# Sum all vectors (each element is 0 or 1)
summed = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)
# Majority vote: threshold at n/2
threshold = n / 2.0
result = self.backend.threshold(summed, threshold=threshold, above=1.0, below=0.0)
# Ensure binary dtype
return result.astype(self.space.dtype) if hasattr(result, 'astype') else result
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute using circular shift.
Shifts vector elements by k positions to the right.
Negative k shifts left.
Args:
vec: Vector to permute
k: Number of positions to shift
Returns:
Permuted vector
"""
return self.backend.roll(vec, shift=k)
def to_bipolar(self, vec: Array) -> Array:
"""Convert binary {0, 1} to bipolar {-1, +1}.
Transformation: x → 2x - 1
Args:
vec: Binary vector
Returns:
Bipolar vector
"""
return 2 * vec - 1
def from_bipolar(self, vec: Array) -> Array:
"""Convert bipolar {-1, +1} to binary {0, 1}.
Transformation: x → (x + 1) / 2
Args:
vec: Bipolar vector
Returns:
Binary vector
"""
return (vec + 1) / 2
def __repr__(self) -> str:
return (f"BSCModel(dimension={self.dimension}, "
f"space={self.space.space_name}, "
f"backend={self.backend.name})")
__init__(dimension=10000, space=None, backend=None, seed=None)
Initialize BSC model.
Args: dimension: Dimensionality of hypervectors space: Vector space (defaults to BinarySpace) backend: Computational backend seed: Random seed for space
Source code in holovec/models/bsc.py
def __init__(
self,
dimension: int = 10000,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None
):
"""Initialize BSC model.
Args:
dimension: Dimensionality of hypervectors
space: Vector space (defaults to BinarySpace)
backend: Computational backend
seed: Random seed for space
"""
if space is None:
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = BinarySpace(dimension, backend=backend, seed=seed)
super().__init__(space, backend)
bind(a, b)
Bind using XOR.
For binary vectors: a XOR b Property: a XOR b XOR b = a (self-inverse)
Args: a: First vector (binary {0, 1}) b: Second vector (binary {0, 1})
Returns: Bound vector c = a XOR b
Source code in holovec/models/bsc.py
def bind(self, a: Array, b: Array) -> Array:
"""Bind using XOR.
For binary vectors: a XOR b
Property: a XOR b XOR b = a (self-inverse)
Args:
a: First vector (binary {0, 1})
b: Second vector (binary {0, 1})
Returns:
Bound vector c = a XOR b
"""
return self.backend.xor(a, b)
bundle(vectors)
Bundle using element-wise addition + majority vote.
Sum all binary vectors element-wise, then threshold at n/2 where n is the number of vectors.
Args: vectors: Sequence of vectors to bundle
Returns: Bundled vector (binary {0, 1})
Raises: ValueError: If vectors is empty
Source code in holovec/models/bsc.py
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Bundle using element-wise addition + majority vote.
Sum all binary vectors element-wise, then threshold at n/2
where n is the number of vectors.
Args:
vectors: Sequence of vectors to bundle
Returns:
Bundled vector (binary {0, 1})
Raises:
ValueError: If vectors is empty
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
vectors = list(vectors)
n = len(vectors)
# Sum all vectors (each element is 0 or 1)
summed = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)
# Majority vote: threshold at n/2
threshold = n / 2.0
result = self.backend.threshold(summed, threshold=threshold, above=1.0, below=0.0)
# Ensure binary dtype
return result.astype(self.space.dtype) if hasattr(result, 'astype') else result
from_bipolar(vec)
Convert bipolar {-1, +1} to binary {0, 1}.
Transformation: x → (x + 1) / 2
Args: vec: Bipolar vector
Returns: Binary vector
Source code in holovec/models/bsc.py
def from_bipolar(self, vec: Array) -> Array:
"""Convert bipolar {-1, +1} to binary {0, 1}.
Transformation: x → (x + 1) / 2
Args:
vec: Bipolar vector
Returns:
Binary vector
"""
return (vec + 1) / 2
permute(vec, k=1)
Permute using circular shift.
Shifts vector elements by k positions to the right. Negative k shifts left.
Args: vec: Vector to permute k: Number of positions to shift
Returns: Permuted vector
Source code in holovec/models/bsc.py
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute using circular shift.
Shifts vector elements by k positions to the right.
Negative k shifts left.
Args:
vec: Vector to permute
k: Number of positions to shift
Returns:
Permuted vector
"""
return self.backend.roll(vec, shift=k)
to_bipolar(vec)
Convert binary {0, 1} to bipolar {-1, +1}.
Transformation: x → 2x - 1
Args: vec: Binary vector
Returns: Bipolar vector
Source code in holovec/models/bsc.py
def to_bipolar(self, vec: Array) -> Array:
"""Convert binary {0, 1} to bipolar {-1, +1}.
Transformation: x → 2x - 1
Args:
vec: Binary vector
Returns:
Bipolar vector
"""
return 2 * vec - 1
unbind(a, b)
Unbind using XOR (self-inverse).
Since XOR is self-inverse: unbind(c, b) = c XOR b
Args: a: Bound vector (or first operand) b: Second operand
Returns: Unbound vector (exact recovery)
Source code in holovec/models/bsc.py
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind using XOR (self-inverse).
Since XOR is self-inverse: unbind(c, b) = c XOR b
Args:
a: Bound vector (or first operand)
b: Second operand
Returns:
Unbound vector (exact recovery)
"""
# For BSC, binding = unbinding (self-inverse)
return self.bind(a, b)
BSDC
holovec.models.bsdc.BSDCModel
Bases: VSAModel
BSDC (Binary Sparse Distributed Codes) model.
Binding: XOR (element-wise, self-inverse) or CDT (context-dependent thinning) Unbinding: XOR (same as binding) or similarity-based (CDT) Bundling: Majority voting with sparsity preservation Permutation: circular shift
Uses SparseSpace with optimal sparsity p = 1/√D.
Binding Modes: - 'xor': Traditional XOR binding. Self-inverse, result dissimilar to inputs. - 'cdt': Context-Dependent Thinning (Rachkovskij 2001). Preserves both structured similarity (similar inputs → similar outputs) and unstructured similarity (result similar to its components).
Example: >>> # Default XOR mode >>> model = BSDCModel(dimension=10000) >>> >>> # CDT mode for analogical reasoning >>> model = BSDCModel(dimension=10000, binding_mode='cdt')
Source code in holovec/models/bsdc.py
class BSDCModel(VSAModel):
"""BSDC (Binary Sparse Distributed Codes) model.
Binding: XOR (element-wise, self-inverse) or CDT (context-dependent thinning)
Unbinding: XOR (same as binding) or similarity-based (CDT)
Bundling: Majority voting with sparsity preservation
Permutation: circular shift
Uses SparseSpace with optimal sparsity p = 1/√D.
Binding Modes:
- 'xor': Traditional XOR binding. Self-inverse, result dissimilar to inputs.
- 'cdt': Context-Dependent Thinning (Rachkovskij 2001). Preserves both
structured similarity (similar inputs → similar outputs) and unstructured
similarity (result similar to its components).
Example:
>>> # Default XOR mode
>>> model = BSDCModel(dimension=10000)
>>>
>>> # CDT mode for analogical reasoning
>>> model = BSDCModel(dimension=10000, binding_mode='cdt')
"""
def __init__(
self,
dimension: int = 10000,
sparsity: float | None = None,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None,
binding_mode: str = 'xor',
):
"""Initialize BSDC model.
Args:
dimension: Dimensionality of hypervectors (typically > 1000)
sparsity: Fraction of 1s (default: 1/√D which is optimal)
space: Vector space (defaults to SparseSpace with optimal sparsity)
backend: Computational backend
seed: Random seed for space
binding_mode: 'xor' (default) or 'cdt' for context-dependent thinning
"""
if binding_mode not in ('xor', 'cdt'):
raise ValueError(f"binding_mode must be 'xor' or 'cdt', got '{binding_mode}'")
if space is None:
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = SparseSpace(dimension, sparsity=sparsity, backend=backend, seed=seed)
super().__init__(space, backend)
self.binding_mode = binding_mode
self._seed = seed
# Store sparsity for easy access
if isinstance(space, SparseSpace):
self.sparsity = space.sparsity
else:
# Fallback if using non-sparse space
import math
self.sparsity = sparsity if sparsity is not None else 1.0 / math.sqrt(dimension)
# Pre-generate permutation patterns for CDT
if binding_mode == 'cdt':
self._cdt_permutations = self._generate_cdt_permutations()
@property
def model_name(self) -> str:
return "BSDC"
@property
def is_self_inverse(self) -> bool:
return self.binding_mode == 'xor' # Only XOR is self-inverse
@property
def is_commutative(self) -> bool:
return True # Both XOR and CDT are commutative
@property
def is_exact_inverse(self) -> bool:
return self.binding_mode == 'xor' # Only XOR has exact inverse
def _generate_cdt_permutations(self, n_permutations: int = 20) -> list:
"""Generate fixed permutation patterns for CDT thinning.
Args:
n_permutations: Number of permutation patterns to generate
Returns:
List of permutation index arrays
"""
rng = np.random.default_rng(self._seed if self._seed is not None else 42)
return [
self.backend.array(rng.permutation(self.dimension).tolist(), dtype='int64')
for _ in range(n_permutations)
]
def _compute_thinning_iterations(
self,
n_components: int,
current_density: float,
) -> int:
"""Compute K iterations needed to reach target sparsity.
The CDT algorithm thins a superposition by applying permuted self-conjunction.
After OR of S components: p(Z) ≈ 1 - (1-p)^S ≈ p*S (for small p)
We need K iterations to reduce back to target sparsity.
Args:
n_components: Number of components in superposition
current_density: Current density after OR superposition
Returns:
Number of thinning iterations K
"""
import math
if current_density <= self.sparsity:
return 0
# From Rachkovskij 2001:
# p(Z ∧ Z^~) ≈ p(Z)^2 for random permutations
# After K iterations with OR of permutations:
# Expected density ≈ current_density * (density of OR of K permuted copies)
# We want: current_density * OR_density ≈ target_sparsity
# Simplified: K ≈ target_sparsity / current_density^2
K = max(1, int(math.ceil(self.sparsity / (current_density ** 2))))
return min(K, len(self._cdt_permutations))
def context_dependent_thinning(
self,
components: Sequence[Array],
) -> Array:
"""Bind components using context-dependent thinning (CDT).
Algorithm (Rachkovskij 2001):
1. Superpose components via OR: Z = X₁ ∨ X₂ ∨ ... ∨ Xₛ
2. Thin via permuted self-conjunction:
⟨Z⟩ = Z ∧ (Z^~(1) ∨ Z^~(2) ∨ ... ∨ Z^~(K))
Properties:
- Preserves unstructured similarity: result is similar to each component
- Preserves structured similarity: similar inputs → similar outputs
- Maintains target sparsity automatically
Args:
components: Sequence of hypervectors to bind together
Returns:
Bound hypervector with preserved similarity to components
Example:
>>> model = BSDCModel(dimension=10000, binding_mode='cdt')
>>> a, b, c = model.random(), model.random(), model.random()
>>> bound = model.context_dependent_thinning([a, b, c])
>>> # bound is similar to a, b, and c (unstructured similarity)
"""
if not components:
raise ValueError("Cannot bind empty sequence")
components = list(components)
if len(components) == 1:
return components[0].copy() if hasattr(components[0], 'copy') else components[0]
# Step 1: Superpose via OR
superposed = self.backend.sum(self.backend.stack(components, axis=0), axis=0)
z = self.backend.astype(
self.backend.threshold(superposed, threshold=0.5, above=1, below=0),
'int32',
)
# Step 2: Compute required thinning iterations
current_density = float(self.backend.to_numpy(self.backend.sum(z))) / self.dimension
K = self._compute_thinning_iterations(len(components), current_density)
if K == 0:
# Already at or below target sparsity
return z
# Step 3: Thin via permuted self-conjunction
# ⟨Z⟩ = Z ∧ (Z^~(1) ∨ Z^~(2) ∨ ... ∨ Z^~(K))
permuted = [
self.backend.permute(z, self._cdt_permutations[k % len(self._cdt_permutations)])
for k in range(K)
]
permuted_sum = self.backend.sum(self.backend.stack(permuted, axis=0), axis=0)
permuted_or = self.backend.astype(
self.backend.threshold(permuted_sum, threshold=0.5, above=1, below=0),
'int32',
)
result = self.backend.add(z, permuted_or)
return self.backend.astype(
self.backend.threshold(result, threshold=1.5, above=1, below=0),
'int32',
)
def bind(self, a: Array, b: Array) -> Array:
"""Bind two hypervectors.
Behavior depends on binding_mode:
- 'xor': XOR binding (self-inverse, result dissimilar to inputs)
- 'cdt': Context-dependent thinning (preserves similarity to inputs)
For XOR mode:
- Preserves sparsity on average: p(1-p) + (1-p)p = 2p(1-p)
- For optimal p = 1/√D, result sparsity ≈ 2/√D
For CDT mode:
- Result is similar to both a and b (unstructured similarity)
- Similar inputs produce similar outputs (structured similarity)
Args:
a: First hypervector
b: Second hypervector
Returns:
Bound hypervector
"""
if self.binding_mode == 'cdt':
return self.context_dependent_thinning([a, b])
else:
# XOR binding (default)
return self.backend.xor(a, b)
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind to recover value.
Behavior depends on binding_mode:
- 'xor': XOR is self-inverse, exact recovery: unbind(bind(a, b), b) = a
- 'cdt': No inverse exists; returns the bound vector itself since it's
already similar to the components (use similarity search for retrieval)
Args:
a: Bound hypervector (or first operand)
b: Second operand (key for XOR mode, ignored for CDT mode)
Returns:
For XOR: Exact unbound hypervector
For CDT: The bound vector (use similarity search to find components)
"""
if self.binding_mode == 'cdt':
# CDT doesn't have an inverse operation
# The bound vector is already similar to its components,
# so return it for similarity-based retrieval
return a
else:
# XOR is self-inverse
return self.backend.xor(a, b)
def bundle(self, vectors: Sequence[Array], maintain_sparsity: bool = True) -> Array:
"""Bundle using majority voting.
For sparse codes, bundling requires careful handling to maintain sparsity:
1. Sum all vectors element-wise
2. Apply threshold to get binary result
3. Optionally re-sparsify to maintain target sparsity
Args:
vectors: Sequence of hypervectors to bundle
maintain_sparsity: If True, enforce target sparsity (default: True)
Returns:
Bundled hypervector
Raises:
ValueError: If vectors is empty
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
vectors = list(vectors)
# Sum all vectors (counts how many 1s at each position)
sum_result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)
if maintain_sparsity:
# Strategy: Take top-k positions with highest counts
# where k ≈ sparsity * dimension
sum_np = self.backend.to_numpy(sum_result)
target_ones = int(self.sparsity * self.dimension)
# Get indices of top-k values
if target_ones > 0:
# Use argpartition for efficiency (O(n) instead of O(n log n))
threshold_idx = max(0, len(sum_np) - target_ones)
threshold = np.partition(sum_np, threshold_idx)[threshold_idx]
# Set positions >= threshold to 1, rest to 0
result_np = (sum_np >= threshold).astype(np.int32)
# If we have ties at the threshold, we might have slightly more
# than target_ones. This is acceptable for maintaining sparsity.
return self.backend.from_numpy(result_np)
else:
# No ones in result (edge case)
return self.backend.zeros(self.dimension, dtype='int32')
else:
# Simple majority voting: threshold at N/2
threshold = len(vectors) / 2.0
result = self.backend.threshold(sum_result, threshold=threshold, above=1, below=0)
return self.backend.astype(result, 'int32')
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute using circular shift.
Shifts vector elements by k positions. For sparse codes,
this maintains sparsity perfectly.
Args:
vec: Hypervector to permute
k: Number of positions to shift (default: 1)
Returns:
Permuted hypervector
"""
return self.backend.roll(vec, shift=k, axis=0)
def measure_sparsity(self, vec: Array) -> float:
"""Measure actual sparsity of a vector.
Args:
vec: Hypervector to measure
Returns:
Fraction of 1s in the vector
"""
vec_np = self.backend.to_numpy(vec)
count_ones = np.sum(vec_np)
return float(count_ones) / len(vec_np)
def rehash(self, vec: Array) -> Array:
"""Rehash vector to restore optimal sparsity.
Useful after multiple operations that may have changed sparsity.
Randomly selects positions to maintain target sparsity while
preserving as much similarity as possible.
Args:
vec: Hypervector to rehash
Returns:
Rehashed hypervector with target sparsity
"""
vec_np = self.backend.to_numpy(vec)
target_ones = int(self.sparsity * self.dimension)
# Get current 1 positions
current_ones = np.where(vec_np == 1)[0]
current_count = len(current_ones)
if current_count == target_ones:
# Already at target sparsity
return vec
elif current_count > target_ones:
# Too many 1s: randomly remove some
keep_indices = np.random.choice(
current_ones, size=target_ones, replace=False
)
result = np.zeros_like(vec_np)
result[keep_indices] = 1
else:
# Too few 1s: randomly add some
current_zeros = np.where(vec_np == 0)[0]
add_count = target_ones - current_count
add_indices = np.random.choice(
current_zeros, size=add_count, replace=False
)
result = vec_np.copy()
result[add_indices] = 1
return self.backend.from_numpy(result.astype(np.int32))
def encode_sequence(
self,
items: Sequence[Array],
use_ngrams: bool = False,
n: int = 2
) -> Array:
"""Encode sequence of items.
Two strategies:
1. Position binding: item_i ⊗ ρⁱ(position)
2. N-grams: Bundle all n-grams in sequence
Args:
items: Sequence of hypervectors
use_ngrams: If True, use n-gram encoding (default: False)
n: N-gram size (default: 2 for bigrams)
Returns:
Sequence hypervector
Raises:
ValueError: If items is empty
"""
if not items:
raise ValueError("Cannot encode empty sequence")
items = list(items)
if use_ngrams:
# N-gram encoding
if len(items) < n:
# Sequence too short for n-grams, fall back to simple bundle
return self.bundle(items)
ngrams = []
for i in range(len(items) - n + 1):
# Create n-gram by binding n consecutive items
ngram = items[i]
for j in range(1, n):
ngram = self.bind(ngram, items[i + j])
ngrams.append(ngram)
return self.bundle(ngrams)
else:
# Position binding encoding
pos = self.random(seed=42) # Fixed position vector
bound_items = []
for i, item in enumerate(items):
permuted_pos = self.permute(pos, k=i)
bound_items.append(self.bind(item, permuted_pos))
return self.bundle(bound_items)
def __repr__(self) -> str:
return (f"BSDCModel(dimension={self.dimension}, "
f"sparsity={self.sparsity:.4f}, "
f"binding_mode='{self.binding_mode}', "
f"space={self.space.space_name}, "
f"backend={self.backend.name})")
__init__(dimension=10000, sparsity=None, space=None, backend=None, seed=None, binding_mode='xor')
Initialize BSDC model.
Args: dimension: Dimensionality of hypervectors (typically > 1000) sparsity: Fraction of 1s (default: 1/√D which is optimal) space: Vector space (defaults to SparseSpace with optimal sparsity) backend: Computational backend seed: Random seed for space binding_mode: 'xor' (default) or 'cdt' for context-dependent thinning
Source code in holovec/models/bsdc.py
def __init__(
self,
dimension: int = 10000,
sparsity: float | None = None,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None,
binding_mode: str = 'xor',
):
"""Initialize BSDC model.
Args:
dimension: Dimensionality of hypervectors (typically > 1000)
sparsity: Fraction of 1s (default: 1/√D which is optimal)
space: Vector space (defaults to SparseSpace with optimal sparsity)
backend: Computational backend
seed: Random seed for space
binding_mode: 'xor' (default) or 'cdt' for context-dependent thinning
"""
if binding_mode not in ('xor', 'cdt'):
raise ValueError(f"binding_mode must be 'xor' or 'cdt', got '{binding_mode}'")
if space is None:
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = SparseSpace(dimension, sparsity=sparsity, backend=backend, seed=seed)
super().__init__(space, backend)
self.binding_mode = binding_mode
self._seed = seed
# Store sparsity for easy access
if isinstance(space, SparseSpace):
self.sparsity = space.sparsity
else:
# Fallback if using non-sparse space
import math
self.sparsity = sparsity if sparsity is not None else 1.0 / math.sqrt(dimension)
# Pre-generate permutation patterns for CDT
if binding_mode == 'cdt':
self._cdt_permutations = self._generate_cdt_permutations()
bind(a, b)
Bind two hypervectors.
Behavior depends on binding_mode: - 'xor': XOR binding (self-inverse, result dissimilar to inputs) - 'cdt': Context-dependent thinning (preserves similarity to inputs)
For XOR mode: - Preserves sparsity on average: p(1-p) + (1-p)p = 2p(1-p) - For optimal p = 1/√D, result sparsity ≈ 2/√D
For CDT mode: - Result is similar to both a and b (unstructured similarity) - Similar inputs produce similar outputs (structured similarity)
Args: a: First hypervector b: Second hypervector
Returns: Bound hypervector
Source code in holovec/models/bsdc.py
def bind(self, a: Array, b: Array) -> Array:
"""Bind two hypervectors.
Behavior depends on binding_mode:
- 'xor': XOR binding (self-inverse, result dissimilar to inputs)
- 'cdt': Context-dependent thinning (preserves similarity to inputs)
For XOR mode:
- Preserves sparsity on average: p(1-p) + (1-p)p = 2p(1-p)
- For optimal p = 1/√D, result sparsity ≈ 2/√D
For CDT mode:
- Result is similar to both a and b (unstructured similarity)
- Similar inputs produce similar outputs (structured similarity)
Args:
a: First hypervector
b: Second hypervector
Returns:
Bound hypervector
"""
if self.binding_mode == 'cdt':
return self.context_dependent_thinning([a, b])
else:
# XOR binding (default)
return self.backend.xor(a, b)
bundle(vectors, maintain_sparsity=True)
Bundle using majority voting.
For sparse codes, bundling requires careful handling to maintain sparsity: 1. Sum all vectors element-wise 2. Apply threshold to get binary result 3. Optionally re-sparsify to maintain target sparsity
Args: vectors: Sequence of hypervectors to bundle maintain_sparsity: If True, enforce target sparsity (default: True)
Returns: Bundled hypervector
Raises: ValueError: If vectors is empty
Source code in holovec/models/bsdc.py
def bundle(self, vectors: Sequence[Array], maintain_sparsity: bool = True) -> Array:
"""Bundle using majority voting.
For sparse codes, bundling requires careful handling to maintain sparsity:
1. Sum all vectors element-wise
2. Apply threshold to get binary result
3. Optionally re-sparsify to maintain target sparsity
Args:
vectors: Sequence of hypervectors to bundle
maintain_sparsity: If True, enforce target sparsity (default: True)
Returns:
Bundled hypervector
Raises:
ValueError: If vectors is empty
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
vectors = list(vectors)
# Sum all vectors (counts how many 1s at each position)
sum_result = self.backend.sum(self.backend.stack(vectors, axis=0), axis=0)
if maintain_sparsity:
# Strategy: Take top-k positions with highest counts
# where k ≈ sparsity * dimension
sum_np = self.backend.to_numpy(sum_result)
target_ones = int(self.sparsity * self.dimension)
# Get indices of top-k values
if target_ones > 0:
# Use argpartition for efficiency (O(n) instead of O(n log n))
threshold_idx = max(0, len(sum_np) - target_ones)
threshold = np.partition(sum_np, threshold_idx)[threshold_idx]
# Set positions >= threshold to 1, rest to 0
result_np = (sum_np >= threshold).astype(np.int32)
# If we have ties at the threshold, we might have slightly more
# than target_ones. This is acceptable for maintaining sparsity.
return self.backend.from_numpy(result_np)
else:
# No ones in result (edge case)
return self.backend.zeros(self.dimension, dtype='int32')
else:
# Simple majority voting: threshold at N/2
threshold = len(vectors) / 2.0
result = self.backend.threshold(sum_result, threshold=threshold, above=1, below=0)
return self.backend.astype(result, 'int32')
context_dependent_thinning(components)
Bind components using context-dependent thinning (CDT).
Algorithm (Rachkovskij 2001): 1. Superpose components via OR: Z = X₁ ∨ X₂ ∨ ... ∨ Xₛ 2. Thin via permuted self-conjunction: ⟨Z⟩ = Z ∧ (Z^~(1) ∨ Z^~(2) ∨ ... ∨ Z^~(K))
Properties: - Preserves unstructured similarity: result is similar to each component - Preserves structured similarity: similar inputs → similar outputs - Maintains target sparsity automatically
Args: components: Sequence of hypervectors to bind together
Returns: Bound hypervector with preserved similarity to components
Example: >>> model = BSDCModel(dimension=10000, binding_mode='cdt') >>> a, b, c = model.random(), model.random(), model.random() >>> bound = model.context_dependent_thinning([a, b, c]) >>> # bound is similar to a, b, and c (unstructured similarity)
Source code in holovec/models/bsdc.py
def context_dependent_thinning(
self,
components: Sequence[Array],
) -> Array:
"""Bind components using context-dependent thinning (CDT).
Algorithm (Rachkovskij 2001):
1. Superpose components via OR: Z = X₁ ∨ X₂ ∨ ... ∨ Xₛ
2. Thin via permuted self-conjunction:
⟨Z⟩ = Z ∧ (Z^~(1) ∨ Z^~(2) ∨ ... ∨ Z^~(K))
Properties:
- Preserves unstructured similarity: result is similar to each component
- Preserves structured similarity: similar inputs → similar outputs
- Maintains target sparsity automatically
Args:
components: Sequence of hypervectors to bind together
Returns:
Bound hypervector with preserved similarity to components
Example:
>>> model = BSDCModel(dimension=10000, binding_mode='cdt')
>>> a, b, c = model.random(), model.random(), model.random()
>>> bound = model.context_dependent_thinning([a, b, c])
>>> # bound is similar to a, b, and c (unstructured similarity)
"""
if not components:
raise ValueError("Cannot bind empty sequence")
components = list(components)
if len(components) == 1:
return components[0].copy() if hasattr(components[0], 'copy') else components[0]
# Step 1: Superpose via OR
superposed = self.backend.sum(self.backend.stack(components, axis=0), axis=0)
z = self.backend.astype(
self.backend.threshold(superposed, threshold=0.5, above=1, below=0),
'int32',
)
# Step 2: Compute required thinning iterations
current_density = float(self.backend.to_numpy(self.backend.sum(z))) / self.dimension
K = self._compute_thinning_iterations(len(components), current_density)
if K == 0:
# Already at or below target sparsity
return z
# Step 3: Thin via permuted self-conjunction
# ⟨Z⟩ = Z ∧ (Z^~(1) ∨ Z^~(2) ∨ ... ∨ Z^~(K))
permuted = [
self.backend.permute(z, self._cdt_permutations[k % len(self._cdt_permutations)])
for k in range(K)
]
permuted_sum = self.backend.sum(self.backend.stack(permuted, axis=0), axis=0)
permuted_or = self.backend.astype(
self.backend.threshold(permuted_sum, threshold=0.5, above=1, below=0),
'int32',
)
result = self.backend.add(z, permuted_or)
return self.backend.astype(
self.backend.threshold(result, threshold=1.5, above=1, below=0),
'int32',
)
encode_sequence(items, use_ngrams=False, n=2)
Encode sequence of items.
Two strategies: 1. Position binding: item_i ⊗ ρⁱ(position) 2. N-grams: Bundle all n-grams in sequence
Args: items: Sequence of hypervectors use_ngrams: If True, use n-gram encoding (default: False) n: N-gram size (default: 2 for bigrams)
Returns: Sequence hypervector
Raises: ValueError: If items is empty
Source code in holovec/models/bsdc.py
def encode_sequence(
self,
items: Sequence[Array],
use_ngrams: bool = False,
n: int = 2
) -> Array:
"""Encode sequence of items.
Two strategies:
1. Position binding: item_i ⊗ ρⁱ(position)
2. N-grams: Bundle all n-grams in sequence
Args:
items: Sequence of hypervectors
use_ngrams: If True, use n-gram encoding (default: False)
n: N-gram size (default: 2 for bigrams)
Returns:
Sequence hypervector
Raises:
ValueError: If items is empty
"""
if not items:
raise ValueError("Cannot encode empty sequence")
items = list(items)
if use_ngrams:
# N-gram encoding
if len(items) < n:
# Sequence too short for n-grams, fall back to simple bundle
return self.bundle(items)
ngrams = []
for i in range(len(items) - n + 1):
# Create n-gram by binding n consecutive items
ngram = items[i]
for j in range(1, n):
ngram = self.bind(ngram, items[i + j])
ngrams.append(ngram)
return self.bundle(ngrams)
else:
# Position binding encoding
pos = self.random(seed=42) # Fixed position vector
bound_items = []
for i, item in enumerate(items):
permuted_pos = self.permute(pos, k=i)
bound_items.append(self.bind(item, permuted_pos))
return self.bundle(bound_items)
measure_sparsity(vec)
Measure actual sparsity of a vector.
Args: vec: Hypervector to measure
Returns: Fraction of 1s in the vector
Source code in holovec/models/bsdc.py
def measure_sparsity(self, vec: Array) -> float:
"""Measure actual sparsity of a vector.
Args:
vec: Hypervector to measure
Returns:
Fraction of 1s in the vector
"""
vec_np = self.backend.to_numpy(vec)
count_ones = np.sum(vec_np)
return float(count_ones) / len(vec_np)
permute(vec, k=1)
Permute using circular shift.
Shifts vector elements by k positions. For sparse codes, this maintains sparsity perfectly.
Args: vec: Hypervector to permute k: Number of positions to shift (default: 1)
Returns: Permuted hypervector
Source code in holovec/models/bsdc.py
def permute(self, vec: Array, k: int = 1) -> Array:
"""Permute using circular shift.
Shifts vector elements by k positions. For sparse codes,
this maintains sparsity perfectly.
Args:
vec: Hypervector to permute
k: Number of positions to shift (default: 1)
Returns:
Permuted hypervector
"""
return self.backend.roll(vec, shift=k, axis=0)
rehash(vec)
Rehash vector to restore optimal sparsity.
Useful after multiple operations that may have changed sparsity. Randomly selects positions to maintain target sparsity while preserving as much similarity as possible.
Args: vec: Hypervector to rehash
Returns: Rehashed hypervector with target sparsity
Source code in holovec/models/bsdc.py
def rehash(self, vec: Array) -> Array:
"""Rehash vector to restore optimal sparsity.
Useful after multiple operations that may have changed sparsity.
Randomly selects positions to maintain target sparsity while
preserving as much similarity as possible.
Args:
vec: Hypervector to rehash
Returns:
Rehashed hypervector with target sparsity
"""
vec_np = self.backend.to_numpy(vec)
target_ones = int(self.sparsity * self.dimension)
# Get current 1 positions
current_ones = np.where(vec_np == 1)[0]
current_count = len(current_ones)
if current_count == target_ones:
# Already at target sparsity
return vec
elif current_count > target_ones:
# Too many 1s: randomly remove some
keep_indices = np.random.choice(
current_ones, size=target_ones, replace=False
)
result = np.zeros_like(vec_np)
result[keep_indices] = 1
else:
# Too few 1s: randomly add some
current_zeros = np.where(vec_np == 0)[0]
add_count = target_ones - current_count
add_indices = np.random.choice(
current_zeros, size=add_count, replace=False
)
result = vec_np.copy()
result[add_indices] = 1
return self.backend.from_numpy(result.astype(np.int32))
unbind(a, b)
Unbind to recover value.
Behavior depends on binding_mode: - 'xor': XOR is self-inverse, exact recovery: unbind(bind(a, b), b) = a - 'cdt': No inverse exists; returns the bound vector itself since it's already similar to the components (use similarity search for retrieval)
Args: a: Bound hypervector (or first operand) b: Second operand (key for XOR mode, ignored for CDT mode)
Returns: For XOR: Exact unbound hypervector For CDT: The bound vector (use similarity search to find components)
Source code in holovec/models/bsdc.py
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind to recover value.
Behavior depends on binding_mode:
- 'xor': XOR is self-inverse, exact recovery: unbind(bind(a, b), b) = a
- 'cdt': No inverse exists; returns the bound vector itself since it's
already similar to the components (use similarity search for retrieval)
Args:
a: Bound hypervector (or first operand)
b: Second operand (key for XOR mode, ignored for CDT mode)
Returns:
For XOR: Exact unbound hypervector
For CDT: The bound vector (use similarity search to find components)
"""
if self.binding_mode == 'cdt':
# CDT doesn't have an inverse operation
# The bound vector is already similar to its components,
# so return it for similarity-based retrieval
return a
else:
# XOR is self-inverse
return self.backend.xor(a, b)
BSDC-SEG
holovec.models.bsdc_seg.BSDCSEGModel
Bases: VSAModel
Segment-sparse binary VSA model (BSDC-SEG).
Binding: XOR (element-wise, self-inverse) Unbinding: XOR (self-inverse) Bundling: segment-wise majority (exactly 1 per segment) Permutation: circular shift
Uses SparseSegmentSpace with S segments (D % S == 0).
Source code in holovec/models/bsdc_seg.py
class BSDCSEGModel(VSAModel):
"""Segment-sparse binary VSA model (BSDC-SEG).
Binding: XOR (element-wise, self-inverse)
Unbinding: XOR (self-inverse)
Bundling: segment-wise majority (exactly 1 per segment)
Permutation: circular shift
Uses SparseSegmentSpace with S segments (D % S == 0).
"""
def __init__(
self,
dimension: int,
segments: int | None = None,
space: VectorSpace | None = None,
backend: Backend | None = None,
seed: int | None = None,
):
if space is None:
if segments is None:
raise ValueError("segments is required when space is not provided")
from ..backends import get_backend
backend = backend if backend is not None else get_backend()
space = SparseSegmentSpace(dimension, segments=segments, backend=backend, seed=seed)
elif not isinstance(space, SparseSegmentSpace):
raise TypeError(f"space must be SparseSegmentSpace, got {type(space)}")
super().__init__(space, backend)
self.segments = space.segments
self.segment_length = space.segment_length
@property
def model_name(self) -> str:
return "BSDC-SEG"
@property
def is_self_inverse(self) -> bool:
return True
@property
def is_commutative(self) -> bool:
return True
@property
def is_exact_inverse(self) -> bool:
return True
def bind(self, a: Array, b: Array) -> Array:
"""Bind using XOR (self-inverse)."""
return self.backend.xor(a, b)
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind using XOR (self-inverse)."""
return self.bind(a, b)
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Segment-wise majority with exactly 1 winner per segment.
Counts votes per index within each segment and selects the index with
maximum count (deterministic tie-break: lowest index).
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
# Normalize each to a valid segment pattern first
seg_norm = [self.space.normalize(v) for v in vectors]
counts = self.backend.sum(self.backend.stack(seg_norm, axis=0), axis=0)
return self.space.normalize(counts)
def permute(self, vec: Array, k: int = 1) -> Array:
return self.backend.roll(vec, shift=k, axis=0)
bind(a, b)
Bind using XOR (self-inverse).
Source code in holovec/models/bsdc_seg.py
def bind(self, a: Array, b: Array) -> Array:
"""Bind using XOR (self-inverse)."""
return self.backend.xor(a, b)
bundle(vectors)
Segment-wise majority with exactly 1 winner per segment.
Counts votes per index within each segment and selects the index with maximum count (deterministic tie-break: lowest index).
Source code in holovec/models/bsdc_seg.py
def bundle(self, vectors: Sequence[Array]) -> Array:
"""Segment-wise majority with exactly 1 winner per segment.
Counts votes per index within each segment and selects the index with
maximum count (deterministic tie-break: lowest index).
"""
if not vectors:
raise ValueError("Cannot bundle empty sequence")
# Normalize each to a valid segment pattern first
seg_norm = [self.space.normalize(v) for v in vectors]
counts = self.backend.sum(self.backend.stack(seg_norm, axis=0), axis=0)
return self.space.normalize(counts)
unbind(a, b)
Unbind using XOR (self-inverse).
Source code in holovec/models/bsdc_seg.py
def unbind(self, a: Array, b: Array) -> Array:
"""Unbind using XOR (self-inverse)."""
return self.bind(a, b)
Encoders
Scalar Encoders
holovec.encoders.scalar.FractionalPowerEncoder
Bases: ScalarEncoder
Fractional Power Encoding (FPE) for continuous scalars.
Based on Frady et al. (2021) "Computing on Functions Using Randomized Vector Representations". Encodes scalars by exponentiating a random phasor base vector: encode(x) = φ^x.
The inner product between encoded vectors approximates a similarity kernel (sinc for uniform phase distribution). This encoding preserves linearity and enables precise decoding via sinc kernel reconstruction.
Works best with FHRR (complex domain) but also supports HRR (real domain).
References: Frady et al. (2021): https://arxiv.org/abs/2109.03429 Verges et al. (2025): Learning encoding phasors with FPE
Attributes: bandwidth: Controls kernel width (lower = wider kernel) base_phasor: Random phasor vector φ = [e^(iφ₁), ..., e^(iφₙ)]
Source code in holovec/encoders/scalar.py
class FractionalPowerEncoder(ScalarEncoder):
"""
Fractional Power Encoding (FPE) for continuous scalars.
Based on Frady et al. (2021) "Computing on Functions Using Randomized
Vector Representations". Encodes scalars by exponentiating a random
phasor base vector: encode(x) = φ^x.
The inner product between encoded vectors approximates a similarity
kernel (sinc for uniform phase distribution). This encoding preserves
linearity and enables precise decoding via sinc kernel reconstruction.
Works best with FHRR (complex domain) but also supports HRR (real domain).
References:
Frady et al. (2021): https://arxiv.org/abs/2109.03429
Verges et al. (2025): Learning encoding phasors with FPE
Attributes:
bandwidth: Controls kernel width (lower = wider kernel)
base_phasor: Random phasor vector φ = [e^(iφ₁), ..., e^(iφₙ)]
"""
def __init__(
self,
model: VSAModel,
min_val: float,
max_val: float,
bandwidth: float = 1.0,
seed: int | None = None,
phase_dist: str = "uniform",
mixture_bandwidths: list[float] | None = None,
mixture_weights: list[float] | None = None,
):
"""
Initialize FractionalPowerEncoder.
Parameters
----------
model : VSAModel
VSA model (FHRR or HRR). FHRR (complex-valued) is preferred for
exact fractional powers. HRR (real-valued) uses cosine projection.
min_val : float
Minimum value of encoding range. Values below this will be clipped.
max_val : float
Maximum value of encoding range. Values above this will be clipped.
bandwidth : float, optional
Bandwidth parameter β controlling kernel width (default: 1.0).
**Mathematical Role:**
- Encoding: z(x) = φ^(β·x_normalized)
- Kernel: K(x₁, x₂) ≈ sinc(β·π·|x₁ - x₂|) for uniform phase distribution
- Smaller β → wider kernel → more generalization
- Larger β → narrower kernel → more discrimination
**Typical Values:**
- β = 0.01: Wide kernel, high generalization (classification)
- β = 1.0: Medium kernel (default)
- β = 10.0: Narrow kernel, low generalization (regression)
seed : int or None, optional
Random seed for generating base phasor (for reproducibility).
Different seeds produce different random frequency vectors θ.
phase_dist : str, optional
Distribution for sampling frequency vector θ (default: 'uniform').
**Available Distributions:**
- 'uniform': θⱼ ~ Uniform[-π, π] → sinc kernel (default)
- 'gaussian': θⱼ ~ N(0, 1) → Gaussian kernel approximation
- 'laplace': θⱼ ~ Laplace(0, 1) → Exponential kernel, heavy tails
- 'cauchy': θⱼ ~ Cauchy(0, 1) → Very heavy tails, long-range
- 'student': θⱼ ~ Student-t(df=3) → Moderate tails, robust
Different distributions induce different similarity kernels,
affecting generalization properties.
mixture_bandwidths : List[float] or None, optional
List of K bandwidth values [β₁, β₂, ..., βₖ] for mixture encoding.
**Mixture Encoding:**
Instead of single bandwidth β, use weighted combination:
z_mix(x) = Σₖ αₖ · φ^(βₖ·x)
where αₖ are mixture_weights. This creates multi-scale representation
combining coarse (small β) and fine (large β) kernels.
**Example:**
mixture_bandwidths = [0.01, 0.1, 1.0, 10.0] # 4 scales
Creates encoding with both local and global similarity.
mixture_weights : List[float] or None, optional
Weights αₖ for each bandwidth in mixture (must sum to 1).
If None and mixture_bandwidths is provided, uses uniform weights:
αₖ = 1/K for all k
Weights can be:
1. Hand-crafted (domain knowledge)
2. Learned via `learn_mixture_weights()` (ridge regression)
3. Uniform (default)
Raises
------
ValueError
If phase_dist not in valid set, or if mixture_weights/mixture_bandwidths
have mismatched lengths.
Notes
-----
**Mathematical Foundation:**
Fractional Power Encoding maps scalar x to hypervector via:
z(x) = φ^(β·x_normalized)
where:
- φ = [e^(iθ₁), e^(iθ₂), ..., e^(iθₐ)] is base phasor (D dimensions)
- θⱼ are random frequencies sampled from phase_dist
- x_normalized ∈ [0, 1] is x mapped to unit interval
- β is bandwidth parameter
**Inner Product Kernel:**
For uniform phase distribution θⱼ ~ Uniform[-π, π]:
⟨z(x₁), z(x₂)⟩ / D ≈ sinc(β·π·|x₁ - x₂|)
This sinc kernel has important properties:
- Smooth interpolation between similar values
- Exact at x₁ = x₂ (similarity = 1)
- Decreases monotonically with distance
- Zero-crossings at integer multiples of 1/β
**Comparison to Random Fourier Features:**
FPE is equivalent to Random Fourier Features (Rahimi & Recht, 2007)
for kernel approximation:
k(x₁, x₂) ≈ φ(x₁)ᵀφ(x₂) / D
where φ(x) = [cos(θ₁x), sin(θ₁x), ..., cos(θₐx), sin(θₐx)]
For complex hypervectors, FPE uses complex exponentials instead:
φ(x) = [e^(iθ₁x), e^(iθ₂x), ..., e^(iθₐx)]
which provides more compact representation and supports exact
fractional power operations in frequency domain.
References
----------
- Frady et al. (2021): "Computing on Functions Using Randomized
Vector Representations" - Original FPE paper
- Rahimi & Recht (2007): "Random Features for Large-Scale Kernel Machines"
- Sutherland & Schneider (2015): "On the Error of Random Fourier Features"
- Verges et al. (2025): "Learning Encoding Phasors with Fractional Power Encoding"
Examples
--------
>>> # Basic FPE for temperature encoding
>>> model = VSA.create('FHRR', dim=10000)
>>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
>>> temp_25 = encoder.encode(25.0)
>>> temp_26 = encoder.encode(26.0)
>>> similarity = model.similarity(temp_25, temp_26) # ≈ 0.95
>>> # Multi-scale mixture encoding
>>> encoder_mix = FractionalPowerEncoder(
... model, min_val=0, max_val=100,
... mixture_bandwidths=[0.01, 0.1, 1.0, 10.0],
... mixture_weights=[0.4, 0.3, 0.2, 0.1] # Emphasize coarse scales
... )
>>> # Alternative kernel via phase distribution
>>> encoder_gauss = FractionalPowerEncoder(
... model, min_val=0, max_val=100,
... phase_dist='gaussian' # Gaussian kernel instead of sinc
... )
"""
super().__init__(model, min_val, max_val)
self.bandwidth = bandwidth
self.seed = seed
# Distribution controls for frequencies (theta)
self.phase_dist = (phase_dist or "uniform").lower()
valid = {"uniform", "gaussian", "laplace", "cauchy", "student"}
if self.phase_dist not in valid:
raise ValueError(f"Unsupported phase_dist '{phase_dist}'. Choose from {sorted(valid)}.")
# Mixture support (optional)
self.mixture_bandwidths = mixture_bandwidths
self.mixture_weights = mixture_weights
if self.mixture_bandwidths is not None:
if len(self.mixture_bandwidths) == 0:
raise ValueError("mixture_bandwidths must be non-empty if provided")
if self.mixture_weights is None:
self.mixture_weights = [1.0 / len(self.mixture_bandwidths)] * len(self.mixture_bandwidths)
if len(self.mixture_weights) != len(self.mixture_bandwidths):
raise ValueError("mixture_weights must match mixture_bandwidths length")
# Normalize weights
s = sum(self.mixture_weights)
if s <= 0:
raise ValueError("mixture_weights must sum to positive value")
self.mixture_weights = [w / s for w in self.mixture_weights]
# Check complex vs real
self.is_complex = self.model.space.space_name == "complex"
# Base phases/frequencies θ_j
# For uniform, we can derive from a random phasor; for others, sample numeric theta
if self.phase_dist == "uniform":
# Maintain backward compatibility using base phasor
self.base_phasor = self._generate_base_phasor(seed)
# Derive angles from the base phasor
self.theta = self.backend.angle(self.base_phasor)
else:
# Numeric theta sampled in init; store as backend array
self.theta = self._generate_theta_distribution(self.phase_dist, seed)
# For complex path we do not need base_phasor; for real path, we’ll compute cos(theta * exponent)
self.base_phasor = None
def _generate_base_phasor(self, seed: int | None) -> Array:
"""
Generate random phasor base vector with uniform phase distribution.
For uniform phases φᵢ ~ Uniform[-π, π], this induces the sinc kernel:
K(d) = sinc(πd)
Args:
seed: Random seed for reproducibility
Returns:
Base phasor vector φ = [e^(iφ₁), e^(iφ₂), ..., e^(iφₙ)]
"""
# Generate random phasors using backend (fully backend-agnostic)
if self.is_complex:
# For complex models (FHRR), generate random phasors directly
phasor = self.backend.random_phasor(
shape=self.dimension,
dtype='complex64',
seed=seed
)
else:
# For real models (HRR), generate phasors then project to real
phasor_complex = self.backend.random_phasor(
shape=self.dimension,
dtype='complex64',
seed=seed
)
# Project to real via inverse FFT
phasor_real = self.backend.ifft(phasor_complex).real
# Normalize to unit norm using backend
phasor = self.backend.normalize(phasor_real)
return phasor
def _generate_theta_distribution(self, phase_dist: str, seed: int | None) -> Array:
"""
Generate frequency vector θ according to specified distribution.
Parameters
----------
phase_dist : str
Distribution name for sampling frequencies.
seed : int or None
Random seed for reproducibility.
Returns
-------
Array
Frequency vector θ of shape (D,) in backend format.
Notes
-----
**Distribution Choices and Induced Kernels:**
Different frequency distributions induce different similarity kernels
via the Fourier transform relationship:
1. **Uniform θⱼ ~ Uniform[-π, π]** (default):
- Kernel: K(d) = sinc(π·d) = sin(π·d)/(π·d)
- Properties: Smooth, monotonic decay, oscillatory
- Best for: General-purpose continuous encoding
- Zero-crossings at integer distances
2. **Gaussian θⱼ ~ N(0, 1)**:
- Kernel: K(d) ≈ exp(-d²/2) (Gaussian RBF kernel)
- Properties: Smooth, no oscillations, fast decay
- Best for: Local similarity, smooth interpolation
- Widely used in kernel methods (SVMs, GPs)
3. **Laplace θⱼ ~ Laplace(0, 1)**:
- Kernel: K(d) ∝ exp(-|d|) (Exponential kernel)
- Properties: Heavy tails, slower than Gaussian decay
- Best for: Robust similarity, outlier tolerance
- More forgiving to distant values
4. **Cauchy θⱼ ~ Cauchy(0, 1)**:
- Kernel: K(d) ∝ 1/(1 + d²) (Rational quadratic)
- Properties: Very heavy tails, long-range interactions
- Best for: Multi-scale similarity, hierarchical data
- Cauchy kernel is limit of Student-t as df→∞
5. **Student-t θⱼ ~ Student-t(df=3)**:
- Kernel: K(d) ∝ (1 + d²/3)^(-2) (generalized Student)
- Properties: Moderate heavy tails (df=3 chosen empirically)
- Best for: Robust regression, noisy data
- Interpolates between Gaussian (df→∞) and Cauchy (df→0)
**Mathematical Background:**
The relationship between frequency distribution p(θ) and
similarity kernel K(d) follows from Bochner's theorem:
A continuous kernel K(x₁, x₂) = K(x₁ - x₂) is positive definite
if and only if K(d) is the Fourier transform of a non-negative
measure (the frequency distribution p(θ)):
K(d) = ∫ exp(i·θ·d) p(θ) dθ
For FPE, the inner product is:
⟨z(x₁), z(x₂)⟩ / D ≈ 𝔼_θ[exp(i·θ·β·(x₁ - x₂))]
= ∫ exp(i·θ·β·d) p(θ) dθ
= K(β·d)
where d = x₁ - x₂ is the distance between scalars.
**Sampling Methods:**
- **Uniform, Gaussian, Student-t**: Direct sampling from distribution
- **Laplace**: Inverse CDF transform from uniform:
θ = -sign(u) · log(1 - 2|u|) where u ~ Uniform(-0.5, 0.5)
- **Cauchy**: Inverse CDF transform:
θ = tan(π·u) where u ~ Uniform(-0.5, 0.5)
**NumPy Usage Justification:**
Uses local NumPy import because special distributions (Laplace, Cauchy)
are not available in backend abstraction. Frequencies are converted
to backend array immediately via `from_numpy()`.
References
----------
- Rahimi & Recht (2007): "Random Features for Large-Scale Kernel Machines"
Section 3: Relationship between frequency distribution and kernel
- Sutherland & Schneider (2015): "On the Error of Random Fourier Features"
Analysis of approximation quality for different kernels
- Bochner (1932): "Vorlesungen über Fouriersche Integrale"
Original Bochner's theorem
- Rasmussen & Williams (2006): "Gaussian Processes for Machine Learning"
Chapter 4: Covariance functions and kernel design
Examples
--------
>>> # Gaussian kernel for smooth similarity
>>> model = VSA.create('FHRR', dim=10000)
>>> enc = FractionalPowerEncoder(model, 0, 100, phase_dist='gaussian')
>>> # Cauchy kernel for long-range similarity
>>> enc_cauchy = FractionalPowerEncoder(model, 0, 100, phase_dist='cauchy')
"""
import numpy as _np
rng = _np.random.default_rng(seed)
D = self.dimension
if phase_dist == "gaussian":
theta_np = rng.normal(0.0, 1.0, size=(D,)).astype(_np.float32)
elif phase_dist == "laplace":
# Laplace via inverse transform: scale=1
u = rng.uniform(-0.5, 0.5, size=(D,)).astype(_np.float32)
theta_np = (_np.sign(u) * _np.log1p(-2.0 * _np.abs(u))).astype(_np.float32) * -1.0
elif phase_dist == "cauchy":
u = rng.uniform(-0.5, 0.5, size=(D,)).astype(_np.float32)
theta_np = _np.tan(_np.pi * u).astype(_np.float32)
elif phase_dist == "student":
theta_np = rng.standard_t(df=3.0, size=(D,)).astype(_np.float32)
else:
# Default to uniform angles; match base_phasor angle convention [-π, π]
theta_np = rng.uniform(-_np.pi, _np.pi, size=(D,)).astype(_np.float32)
return self.backend.from_numpy(theta_np)
def encode(self, value: float) -> Array:
"""
Encode scalar value to hypervector using fractional power.
Parameters
----------
value : float
Scalar value to encode. Will be clipped to [min_val, max_val].
Returns
-------
Array
Encoded hypervector of shape (dimension,) in backend format.
Notes
-----
**Single Bandwidth Encoding:**
For single bandwidth β, implements:
z(x) = φ^(β·x_normalized)
where:
- x_normalized = (value - min_val) / (max_val - min_val) ∈ [0, 1]
- φ = [e^(iθ₁), ..., e^(iθₐ)] is base phasor with random frequencies θⱼ
- Result is normalized according to model's space
Element-wise computation:
z_j(x) = e^(i·θⱼ·β·x_normalized) (complex models)
z_j(x) = cos(θⱼ·β·x_normalized) (real models)
**Mixture Encoding:**
When mixture_bandwidths = [β₁, ..., βₖ] is provided, uses weighted sum:
z_mix(x) = Σₖ αₖ · φ^(βₖ·x_normalized)
where αₖ are mixture_weights (default: uniform αₖ = 1/K).
**Advantages of Mixture Encoding:**
1. **Multi-Scale Representation**: Combines coarse (small β) and
fine (large β) similarity kernels in single hypervector
2. **Improved Generalization**: Coarse scales provide robustness,
fine scales provide discrimination
3. **Learned Weights**: Weights αₖ can be learned via
`learn_mixture_weights()` to optimize for specific task
4. **Kernel Combination**: Mixture is equivalent to combining
multiple kernels: K_mix(d) = Σₖ αₖ·K_βₖ(d)
**Computational Complexity:**
- Single bandwidth: O(D) operations (element-wise exponential)
- Mixture with K bandwidths: O(K·D) operations
- Backend operations (exp, multiply) are vectorized/GPU-accelerated
**Normalization:**
Output is normalized using model's normalization scheme:
- FHRR/HRR: L2 normalization (unit norm)
- MAP: Element-wise normalization
- BSC/BSDC: No normalization (binary)
This ensures hypervectors are in valid space for subsequent
binding/bundling operations.
Examples
--------
>>> # Basic encoding
>>> model = VSA.create('FHRR', dim=10000)
>>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
>>> hv_25 = encoder.encode(25.0) # Encode temperature 25°C
>>> hv_26 = encoder.encode(26.0)
>>> similarity = model.similarity(hv_25, hv_26)
>>> print(f"Similarity: {similarity:.3f}") # ≈ 0.950 (close values)
>>> # Mixture encoding for multi-scale representation
>>> encoder_mix = FractionalPowerEncoder(
... model, min_val=0, max_val=100,
... mixture_bandwidths=[0.01, 1.0, 100.0]
... )
>>> hv_mix = encoder_mix.encode(25.0) # Combines 3 scales
>>> # Effect of bandwidth on similarity
>>> enc_wide = FractionalPowerEncoder(model, 0, 100, bandwidth=0.1)
>>> enc_narrow = FractionalPowerEncoder(model, 0, 100, bandwidth=10.0)
>>> sim_wide = model.similarity(enc_wide.encode(25), enc_wide.encode(30))
>>> sim_narrow = model.similarity(enc_narrow.encode(25), enc_narrow.encode(30))
>>> # sim_wide > sim_narrow (wider kernel → more generalization)
"""
# Normalize value to [0, 1]
normalized = self.normalize(value)
# Handle mixture: list of beta_k and weights alpha_k
betas: list[float]
alphas: list[float]
if self.mixture_bandwidths is not None:
betas = list(self.mixture_bandwidths)
alphas = list(self.mixture_weights or [])
else:
betas = [self.bandwidth]
alphas = [1.0]
parts = []
for alpha, beta in zip(alphas, betas, strict=True):
exponent = beta * normalized
if self.is_complex:
# Complex: encode as exp(i * theta * exponent)
theta = self.theta if self.theta is not None else self.backend.angle(self.base_phasor)
phase = self.backend.multiply_scalar(theta, exponent)
phasor = self.backend.exp(1j * phase)
parts.append(self.backend.multiply_scalar(phasor, alpha))
else:
# Real: use cosine features directly: cos(theta * exponent)
theta = self.theta if self.theta is not None else self.backend.angle(self.base_phasor)
phase = self.backend.multiply_scalar(theta, exponent)
# cos(phase) = Re(exp(i*phase))
phasor = self.backend.real(self.backend.exp(1j * phase))
parts.append(self.backend.multiply_scalar(phasor, alpha))
if len(parts) == 1:
encoded = parts[0]
else:
encoded = self.backend.sum(self.backend.stack(parts, axis=0), axis=0)
# Normalize output according to space
return self.model.normalize(encoded)
def decode(
self,
hypervector: Array,
resolution: int = 1000,
max_iterations: int = 100,
tolerance: float = 1e-6
) -> float:
"""
Decode hypervector back to scalar value using two-stage optimization.
Parameters
----------
hypervector : Array
Hypervector to decode (typically a noisy/bundled encoding).
resolution : int, optional
Number of grid points for coarse search (default: 1000).
Higher resolution improves initial guess but increases cost.
max_iterations : int, optional
Maximum gradient descent iterations (default: 100).
Typical convergence: 20-50 iterations.
tolerance : float, optional
Convergence tolerance for gradient descent (default: 1e-6).
Stop when |Δx| < tolerance.
Returns
-------
float
Decoded scalar value in [min_val, max_val].
Notes
-----
**Decoding Algorithm:**
Uses two-stage optimization to find value x maximizing similarity:
x* = argmax_x ⟨encode(x), hypervector⟩
**Stage 1: Coarse Grid Search** (O(resolution · D))
- Evaluate similarity at `resolution` uniformly-spaced points
- Find x₀ with highest similarity
- Provides good initialization for gradient descent
**Stage 2: Gradient Descent** (O(max_iterations · D))
- Starting from x₀, perform gradient ascent:
x_{t+1} = x_t + η_t · ∇_x ⟨encode(x_t), hypervector⟩
- Gradient computed via finite differences:
∇_x ≈ (sim(x + ε) - sim(x)) / ε
- Step size η_t decays: η_t = η_0 · 0.95^t (prevents oscillation)
- Clips updates to [0, 1] normalized range
**Why This Works:**
For FPE with sinc kernel K(x₁, x₂) = sinc(β·π·|x₁ - x₂|):
- Similarity function is unimodal (single peak)
- Peak occurs at x = x_true (encoded value)
- Gradient descent converges to global maximum
However, for noisy hypervectors (e.g., bundled encodings):
- Multiple local maxima may exist
- Coarse search reduces chance of local minimum trap
- Wider kernels (small β) → smoother objective → easier optimization
**Approximation Quality:**
Decoding accuracy depends on several factors:
1. **Dimension D**: Higher D → more accurate encoding → better decoding
- D = 1000: Moderate accuracy (similarity ≈ 0.85)
- D = 10000: High accuracy (similarity ≈ 0.99)
2. **Signal-to-Noise Ratio**: Clean encoding vs bundled/noisy
- Clean: Near-perfect recovery (error < 1%)
- Bundled (10 items): Good recovery (error ≈ 5-10%)
- Bundled (100 items): Degraded (error ≈ 20-30%)
3. **Bandwidth β**: Wider kernels → smoother similarity landscape
- β = 0.01: Very smooth, easy to optimize
- β = 10.0: Narrow kernel, may have local maxima
4. **Mixture Encoding**: Multiple bandwidths complicate landscape
- May require finer grid search (higher resolution)
- May need more gradient descent iterations
**Computational Cost:**
Total operations: O(resolution · D + max_iterations · D)
Typical values:
- resolution = 1000, max_iterations = 100, D = 10000
- Total: ~1.1M evaluations
- Runtime: ~0.1-1.0 seconds (CPU), ~0.01-0.1 seconds (GPU)
For real-time applications, reduce resolution or max_iterations:
- resolution = 100 (coarser search)
- max_iterations = 20 (early stopping)
**Comparison to Other Decoders:**
- **Codebook Lookup** (LevelEncoder): O(K · D) for K levels
Faster but discrete, no interpolation
- **Resonator Network** (cleanup): O(iterations · M · D) for M items
Better for structured/compositional decoding
- **FPE Gradient Descent**: O(resolution · D + iterations · D)
Best for continuous scalar recovery
References
----------
- Frady et al. (2021): "Computing on Functions Using Randomized
Vector Representations" - Section on FPE decoding
- Nocedal & Wright (2006): "Numerical Optimization" - Gradient descent
methods and convergence analysis
Examples
--------
>>> # Basic decoding
>>> model = VSA.create('FHRR', dim=10000)
>>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
>>> hv = encoder.encode(25.0)
>>> decoded = encoder.decode(hv)
>>> print(f"Decoded: {decoded:.2f}") # ≈ 25.00
>>> # Decoding noisy hypervector (bundled encoding)
>>> hv_bundle = model.bundle([encoder.encode(25.0), encoder.encode(26.0)])
>>> decoded_bundle = encoder.decode(hv_bundle)
>>> print(f"Decoded bundle: {decoded_bundle:.2f}") # ≈ 25.5
>>> # Fast decoding (lower resolution/iterations)
>>> decoded_fast = encoder.decode(hv, resolution=100, max_iterations=20)
"""
# Coarse search: evaluate on grid
normalized_grid = self.backend.linspace(0, 1, resolution)
best_similarity = -float('inf')
best_normalized = 0.5 # Start in middle
for norm_val_np in self.backend.to_numpy(normalized_grid):
norm_val = float(norm_val_np)
encoded = self.encode(self.denormalize(norm_val))
similarity = float(
self.backend.to_numpy(
self.model.similarity(encoded, hypervector)
)
)
if similarity > best_similarity:
best_similarity = similarity
best_normalized = norm_val
# Fine search: gradient descent around best coarse value
# For simplicity, use finite differences for gradient
current = best_normalized
step_size = 0.01
for _ in range(max_iterations):
# Evaluate at current position
encoded_curr = self.encode(self.denormalize(current))
sim_curr = float(
self.backend.to_numpy(
self.model.similarity(encoded_curr, hypervector)
)
)
# Evaluate at current + epsilon
epsilon = 1e-4
encoded_plus = self.encode(self.denormalize(current + epsilon))
sim_plus = float(
self.backend.to_numpy(
self.model.similarity(encoded_plus, hypervector)
)
)
# Compute gradient
gradient = (sim_plus - sim_curr) / epsilon
# Update (gradient ascent)
new_current = current + step_size * gradient
# Clip to [0, 1]
new_current = max(0.0, min(1.0, new_current))
# Check convergence
if abs(new_current - current) < tolerance:
break
current = new_current
step_size *= 0.95 # Decay step size
# Denormalize and return
return self.denormalize(current)
@property
def is_reversible(self) -> bool:
"""FPE supports approximate decoding."""
return True
@property
def compatible_models(self) -> list[str]:
"""FPE works best with FHRR, also compatible with HRR."""
return ["FHRR", "HRR"]
def __repr__(self) -> str:
"""String representation."""
return (
f"FractionalPowerEncoder("
f"model={self.model.model_name}, "
f"range=[{self.min_val}, {self.max_val}], "
f"bandwidth={self.bandwidth}, "
f"phase_dist={self.phase_dist}, "
f"mixture={'yes' if self.mixture_bandwidths else 'no'}, "
f"dimension={self.dimension})"
)
# ====== M2: Learned mixture weights (ridge-style closed form) ======
def learn_mixture_weights(
self,
values: list[float],
labels: list[int],
reg: float = 1e-3,
) -> list[float]:
"""
Learn mixture weights (alphas) for fixed mixture_bandwidths using a simple
ridge-style objective that aligns encoded mixtures to per-class prototypes.
Approach:
- Build class prototypes p_c as the mean of current encodings (using current weights)
- For each sample i, compute per-band encodings E_i = [e_{i1},...,e_{iK}] (shape d×K)
- Solve (Σ E_i^T E_i + reg I) α = Σ E_i^T p_{y_i}
- Project α onto simplex (nonnegative, sum=1)
Args:
values: list of scalar inputs
labels: list of integer class labels (same length as values)
reg: L2 regularization strength (default 1e-3)
Returns:
Learned mixture weights (list of floats summing to 1)
Notes:
- Requires mixture_bandwidths to be set (K>=2)
- Uses numpy for solving normal equations; backend remains unchanged
"""
import numpy as _np
if self.mixture_bandwidths is None or len(self.mixture_bandwidths) < 2:
raise ValueError("learn_mixture_weights requires mixture_bandwidths with K >= 2")
# Prepare classes and group samples
values = list(values)
labels = list(labels)
if len(values) != len(labels):
raise ValueError("values and labels must have same length")
classes = sorted(set(labels))
K = len(self.mixture_bandwidths)
# Build current encodings to compute class prototypes (using current mixture weights)
encodings = [self.encode(v) for v in values]
# Convert to numpy arrays for prototype computation
enc_np = [_np.array(self.model.backend.to_numpy(e)) for e in encodings]
# Class prototypes: mean of encodings per class (vector length d)
prototypes = {}
for c in classes:
idxs = [i for i, y in enumerate(labels) if y == c]
if not idxs:
continue
prototypes[c] = _np.mean(_np.stack([enc_np[i] for i in idxs], axis=0), axis=0)
# Helper to compute per-band encodings matrix E_i (d×K) for a value
def _per_band_matrix(val: float) -> _np.ndarray:
assert self.mixture_bandwidths is not None
norm = self.normalize(val)
cols = []
for beta in self.mixture_bandwidths:
exponent = beta * norm
theta = self.theta if self.theta is not None else self.model.backend.angle(self.base_phasor)
phase = self.model.backend.multiply_scalar(theta, exponent)
if self.is_complex:
ph = self.model.backend.exp(1j * phase)
col = self.model.backend.to_numpy(ph)
else:
col = self.model.backend.to_numpy(self.model.backend.real(self.model.backend.exp(1j * phase)))
cols.append(_np.array(col))
# Stack columns to d×K
return _np.stack(cols, axis=1)
# Accumulate normal equations
A = _np.zeros((K, K), dtype=_np.float64)
b = _np.zeros((K,), dtype=_np.float64)
for v, y in zip(values, labels, strict=True):
E = _per_band_matrix(v) # d×K
p = prototypes[y] # d
# E^T E and E^T p
A += E.T @ E
b += E.T @ p
# Regularization
A += reg * _np.eye(K, dtype=_np.float64)
# Solve
try:
alpha = _np.linalg.solve(A, b)
except _np.linalg.LinAlgError:
alpha = _np.linalg.lstsq(A, b, rcond=None)[0]
# Project to simplex (≥0, sum=1)
alpha = _np.maximum(alpha, 0.0)
s = float(_np.sum(alpha))
if s <= 0:
alpha = _np.ones_like(alpha) / len(alpha)
else:
alpha = alpha / s
# Update in encoder
self.mixture_weights = [float(a) for a in alpha.tolist()]
return self.mixture_weights
compatible_models
property
FPE works best with FHRR, also compatible with HRR.
is_reversible
property
FPE supports approximate decoding.
__init__(model, min_val, max_val, bandwidth=1.0, seed=None, phase_dist='uniform', mixture_bandwidths=None, mixture_weights=None)
Initialize FractionalPowerEncoder.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
model
|
VSAModel
|
VSA model (FHRR or HRR). FHRR (complex-valued) is preferred for exact fractional powers. HRR (real-valued) uses cosine projection. |
required |
min_val
|
float
|
Minimum value of encoding range. Values below this will be clipped. |
required |
max_val
|
float
|
Maximum value of encoding range. Values above this will be clipped. |
required |
bandwidth
|
float
|
Bandwidth parameter β controlling kernel width (default: 1.0). Mathematical Role: - Encoding: z(x) = φ^(β·x_normalized) - Kernel: K(x₁, x₂) ≈ sinc(β·π·|x₁ - x₂|) for uniform phase distribution - Smaller β → wider kernel → more generalization - Larger β → narrower kernel → more discrimination Typical Values: - β = 0.01: Wide kernel, high generalization (classification) - β = 1.0: Medium kernel (default) - β = 10.0: Narrow kernel, low generalization (regression) |
1.0
|
seed
|
int or None
|
Random seed for generating base phasor (for reproducibility). Different seeds produce different random frequency vectors θ. |
None
|
phase_dist
|
str
|
Distribution for sampling frequency vector θ (default: 'uniform'). Available Distributions: - 'uniform': θⱼ ~ Uniform[-π, π] → sinc kernel (default) - 'gaussian': θⱼ ~ N(0, 1) → Gaussian kernel approximation - 'laplace': θⱼ ~ Laplace(0, 1) → Exponential kernel, heavy tails - 'cauchy': θⱼ ~ Cauchy(0, 1) → Very heavy tails, long-range - 'student': θⱼ ~ Student-t(df=3) → Moderate tails, robust Different distributions induce different similarity kernels, affecting generalization properties. |
'uniform'
|
mixture_bandwidths
|
List[float] or None
|
List of K bandwidth values [β₁, β₂, ..., βₖ] for mixture encoding. Mixture Encoding: Instead of single bandwidth β, use weighted combination: z_mix(x) = Σₖ αₖ · φ^(βₖ·x) where αₖ are mixture_weights. This creates multi-scale representation combining coarse (small β) and fine (large β) kernels. Example: mixture_bandwidths = [0.01, 0.1, 1.0, 10.0] # 4 scales Creates encoding with both local and global similarity. |
None
|
mixture_weights
|
List[float] or None
|
Weights αₖ for each bandwidth in mixture (must sum to 1). If None and mixture_bandwidths is provided, uses uniform weights: αₖ = 1/K for all k Weights can be:
1. Hand-crafted (domain knowledge)
2. Learned via |
None
|
Raises:
| Type | Description |
|---|---|
ValueError
|
If phase_dist not in valid set, or if mixture_weights/mixture_bandwidths have mismatched lengths. |
Notes
Mathematical Foundation:
Fractional Power Encoding maps scalar x to hypervector via: z(x) = φ^(β·x_normalized)
where: - φ = [e^(iθ₁), e^(iθ₂), ..., e^(iθₐ)] is base phasor (D dimensions) - θⱼ are random frequencies sampled from phase_dist - x_normalized ∈ [0, 1] is x mapped to unit interval - β is bandwidth parameter
Inner Product Kernel:
For uniform phase distribution θⱼ ~ Uniform[-π, π]: ⟨z(x₁), z(x₂)⟩ / D ≈ sinc(β·π·|x₁ - x₂|)
This sinc kernel has important properties: - Smooth interpolation between similar values - Exact at x₁ = x₂ (similarity = 1) - Decreases monotonically with distance - Zero-crossings at integer multiples of 1/β
Comparison to Random Fourier Features:
FPE is equivalent to Random Fourier Features (Rahimi & Recht, 2007) for kernel approximation: k(x₁, x₂) ≈ φ(x₁)ᵀφ(x₂) / D
where φ(x) = [cos(θ₁x), sin(θ₁x), ..., cos(θₐx), sin(θₐx)]
For complex hypervectors, FPE uses complex exponentials instead: φ(x) = [e^(iθ₁x), e^(iθ₂x), ..., e^(iθₐx)]
which provides more compact representation and supports exact fractional power operations in frequency domain.
References
- Frady et al. (2021): "Computing on Functions Using Randomized Vector Representations" - Original FPE paper
- Rahimi & Recht (2007): "Random Features for Large-Scale Kernel Machines"
- Sutherland & Schneider (2015): "On the Error of Random Fourier Features"
- Verges et al. (2025): "Learning Encoding Phasors with Fractional Power Encoding"
Examples:
>>> # Basic FPE for temperature encoding
>>> model = VSA.create('FHRR', dim=10000)
>>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
>>> temp_25 = encoder.encode(25.0)
>>> temp_26 = encoder.encode(26.0)
>>> similarity = model.similarity(temp_25, temp_26) # ≈ 0.95
>>> # Multi-scale mixture encoding
>>> encoder_mix = FractionalPowerEncoder(
... model, min_val=0, max_val=100,
... mixture_bandwidths=[0.01, 0.1, 1.0, 10.0],
... mixture_weights=[0.4, 0.3, 0.2, 0.1] # Emphasize coarse scales
... )
>>> # Alternative kernel via phase distribution
>>> encoder_gauss = FractionalPowerEncoder(
... model, min_val=0, max_val=100,
... phase_dist='gaussian' # Gaussian kernel instead of sinc
... )
Source code in holovec/encoders/scalar.py
def __init__(
self,
model: VSAModel,
min_val: float,
max_val: float,
bandwidth: float = 1.0,
seed: int | None = None,
phase_dist: str = "uniform",
mixture_bandwidths: list[float] | None = None,
mixture_weights: list[float] | None = None,
):
"""
Initialize FractionalPowerEncoder.
Parameters
----------
model : VSAModel
VSA model (FHRR or HRR). FHRR (complex-valued) is preferred for
exact fractional powers. HRR (real-valued) uses cosine projection.
min_val : float
Minimum value of encoding range. Values below this will be clipped.
max_val : float
Maximum value of encoding range. Values above this will be clipped.
bandwidth : float, optional
Bandwidth parameter β controlling kernel width (default: 1.0).
**Mathematical Role:**
- Encoding: z(x) = φ^(β·x_normalized)
- Kernel: K(x₁, x₂) ≈ sinc(β·π·|x₁ - x₂|) for uniform phase distribution
- Smaller β → wider kernel → more generalization
- Larger β → narrower kernel → more discrimination
**Typical Values:**
- β = 0.01: Wide kernel, high generalization (classification)
- β = 1.0: Medium kernel (default)
- β = 10.0: Narrow kernel, low generalization (regression)
seed : int or None, optional
Random seed for generating base phasor (for reproducibility).
Different seeds produce different random frequency vectors θ.
phase_dist : str, optional
Distribution for sampling frequency vector θ (default: 'uniform').
**Available Distributions:**
- 'uniform': θⱼ ~ Uniform[-π, π] → sinc kernel (default)
- 'gaussian': θⱼ ~ N(0, 1) → Gaussian kernel approximation
- 'laplace': θⱼ ~ Laplace(0, 1) → Exponential kernel, heavy tails
- 'cauchy': θⱼ ~ Cauchy(0, 1) → Very heavy tails, long-range
- 'student': θⱼ ~ Student-t(df=3) → Moderate tails, robust
Different distributions induce different similarity kernels,
affecting generalization properties.
mixture_bandwidths : List[float] or None, optional
List of K bandwidth values [β₁, β₂, ..., βₖ] for mixture encoding.
**Mixture Encoding:**
Instead of single bandwidth β, use weighted combination:
z_mix(x) = Σₖ αₖ · φ^(βₖ·x)
where αₖ are mixture_weights. This creates multi-scale representation
combining coarse (small β) and fine (large β) kernels.
**Example:**
mixture_bandwidths = [0.01, 0.1, 1.0, 10.0] # 4 scales
Creates encoding with both local and global similarity.
mixture_weights : List[float] or None, optional
Weights αₖ for each bandwidth in mixture (must sum to 1).
If None and mixture_bandwidths is provided, uses uniform weights:
αₖ = 1/K for all k
Weights can be:
1. Hand-crafted (domain knowledge)
2. Learned via `learn_mixture_weights()` (ridge regression)
3. Uniform (default)
Raises
------
ValueError
If phase_dist not in valid set, or if mixture_weights/mixture_bandwidths
have mismatched lengths.
Notes
-----
**Mathematical Foundation:**
Fractional Power Encoding maps scalar x to hypervector via:
z(x) = φ^(β·x_normalized)
where:
- φ = [e^(iθ₁), e^(iθ₂), ..., e^(iθₐ)] is base phasor (D dimensions)
- θⱼ are random frequencies sampled from phase_dist
- x_normalized ∈ [0, 1] is x mapped to unit interval
- β is bandwidth parameter
**Inner Product Kernel:**
For uniform phase distribution θⱼ ~ Uniform[-π, π]:
⟨z(x₁), z(x₂)⟩ / D ≈ sinc(β·π·|x₁ - x₂|)
This sinc kernel has important properties:
- Smooth interpolation between similar values
- Exact at x₁ = x₂ (similarity = 1)
- Decreases monotonically with distance
- Zero-crossings at integer multiples of 1/β
**Comparison to Random Fourier Features:**
FPE is equivalent to Random Fourier Features (Rahimi & Recht, 2007)
for kernel approximation:
k(x₁, x₂) ≈ φ(x₁)ᵀφ(x₂) / D
where φ(x) = [cos(θ₁x), sin(θ₁x), ..., cos(θₐx), sin(θₐx)]
For complex hypervectors, FPE uses complex exponentials instead:
φ(x) = [e^(iθ₁x), e^(iθ₂x), ..., e^(iθₐx)]
which provides more compact representation and supports exact
fractional power operations in frequency domain.
References
----------
- Frady et al. (2021): "Computing on Functions Using Randomized
Vector Representations" - Original FPE paper
- Rahimi & Recht (2007): "Random Features for Large-Scale Kernel Machines"
- Sutherland & Schneider (2015): "On the Error of Random Fourier Features"
- Verges et al. (2025): "Learning Encoding Phasors with Fractional Power Encoding"
Examples
--------
>>> # Basic FPE for temperature encoding
>>> model = VSA.create('FHRR', dim=10000)
>>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
>>> temp_25 = encoder.encode(25.0)
>>> temp_26 = encoder.encode(26.0)
>>> similarity = model.similarity(temp_25, temp_26) # ≈ 0.95
>>> # Multi-scale mixture encoding
>>> encoder_mix = FractionalPowerEncoder(
... model, min_val=0, max_val=100,
... mixture_bandwidths=[0.01, 0.1, 1.0, 10.0],
... mixture_weights=[0.4, 0.3, 0.2, 0.1] # Emphasize coarse scales
... )
>>> # Alternative kernel via phase distribution
>>> encoder_gauss = FractionalPowerEncoder(
... model, min_val=0, max_val=100,
... phase_dist='gaussian' # Gaussian kernel instead of sinc
... )
"""
super().__init__(model, min_val, max_val)
self.bandwidth = bandwidth
self.seed = seed
# Distribution controls for frequencies (theta)
self.phase_dist = (phase_dist or "uniform").lower()
valid = {"uniform", "gaussian", "laplace", "cauchy", "student"}
if self.phase_dist not in valid:
raise ValueError(f"Unsupported phase_dist '{phase_dist}'. Choose from {sorted(valid)}.")
# Mixture support (optional)
self.mixture_bandwidths = mixture_bandwidths
self.mixture_weights = mixture_weights
if self.mixture_bandwidths is not None:
if len(self.mixture_bandwidths) == 0:
raise ValueError("mixture_bandwidths must be non-empty if provided")
if self.mixture_weights is None:
self.mixture_weights = [1.0 / len(self.mixture_bandwidths)] * len(self.mixture_bandwidths)
if len(self.mixture_weights) != len(self.mixture_bandwidths):
raise ValueError("mixture_weights must match mixture_bandwidths length")
# Normalize weights
s = sum(self.mixture_weights)
if s <= 0:
raise ValueError("mixture_weights must sum to positive value")
self.mixture_weights = [w / s for w in self.mixture_weights]
# Check complex vs real
self.is_complex = self.model.space.space_name == "complex"
# Base phases/frequencies θ_j
# For uniform, we can derive from a random phasor; for others, sample numeric theta
if self.phase_dist == "uniform":
# Maintain backward compatibility using base phasor
self.base_phasor = self._generate_base_phasor(seed)
# Derive angles from the base phasor
self.theta = self.backend.angle(self.base_phasor)
else:
# Numeric theta sampled in init; store as backend array
self.theta = self._generate_theta_distribution(self.phase_dist, seed)
# For complex path we do not need base_phasor; for real path, we’ll compute cos(theta * exponent)
self.base_phasor = None
__repr__()
String representation.
Source code in holovec/encoders/scalar.py
def __repr__(self) -> str:
"""String representation."""
return (
f"FractionalPowerEncoder("
f"model={self.model.model_name}, "
f"range=[{self.min_val}, {self.max_val}], "
f"bandwidth={self.bandwidth}, "
f"phase_dist={self.phase_dist}, "
f"mixture={'yes' if self.mixture_bandwidths else 'no'}, "
f"dimension={self.dimension})"
)
decode(hypervector, resolution=1000, max_iterations=100, tolerance=1e-06)
Decode hypervector back to scalar value using two-stage optimization.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
hypervector
|
Array
|
Hypervector to decode (typically a noisy/bundled encoding). |
required |
resolution
|
int
|
Number of grid points for coarse search (default: 1000). Higher resolution improves initial guess but increases cost. |
1000
|
max_iterations
|
int
|
Maximum gradient descent iterations (default: 100). Typical convergence: 20-50 iterations. |
100
|
tolerance
|
float
|
Convergence tolerance for gradient descent (default: 1e-6). Stop when |Δx| < tolerance. |
1e-06
|
Returns:
| Type | Description |
|---|---|
float
|
Decoded scalar value in [min_val, max_val]. |
Notes
Decoding Algorithm:
Uses two-stage optimization to find value x maximizing similarity: x* = argmax_x ⟨encode(x), hypervector⟩
Stage 1: Coarse Grid Search (O(resolution · D))
- Evaluate similarity at resolution uniformly-spaced points
- Find x₀ with highest similarity
- Provides good initialization for gradient descent
Stage 2: Gradient Descent (O(max_iterations · D)) - Starting from x₀, perform gradient ascent: x_{t+1} = x_t + η_t · ∇_x ⟨encode(x_t), hypervector⟩ - Gradient computed via finite differences: ∇_x ≈ (sim(x + ε) - sim(x)) / ε - Step size η_t decays: η_t = η_0 · 0.95^t (prevents oscillation) - Clips updates to [0, 1] normalized range
Why This Works:
For FPE with sinc kernel K(x₁, x₂) = sinc(β·π·|x₁ - x₂|): - Similarity function is unimodal (single peak) - Peak occurs at x = x_true (encoded value) - Gradient descent converges to global maximum
However, for noisy hypervectors (e.g., bundled encodings): - Multiple local maxima may exist - Coarse search reduces chance of local minimum trap - Wider kernels (small β) → smoother objective → easier optimization
Approximation Quality:
Decoding accuracy depends on several factors:
- Dimension D: Higher D → more accurate encoding → better decoding
- D = 1000: Moderate accuracy (similarity ≈ 0.85)
-
D = 10000: High accuracy (similarity ≈ 0.99)
-
Signal-to-Noise Ratio: Clean encoding vs bundled/noisy
- Clean: Near-perfect recovery (error < 1%)
- Bundled (10 items): Good recovery (error ≈ 5-10%)
-
Bundled (100 items): Degraded (error ≈ 20-30%)
-
Bandwidth β: Wider kernels → smoother similarity landscape
- β = 0.01: Very smooth, easy to optimize
-
β = 10.0: Narrow kernel, may have local maxima
-
Mixture Encoding: Multiple bandwidths complicate landscape
- May require finer grid search (higher resolution)
- May need more gradient descent iterations
Computational Cost:
Total operations: O(resolution · D + max_iterations · D)
Typical values: - resolution = 1000, max_iterations = 100, D = 10000 - Total: ~1.1M evaluations - Runtime: ~0.1-1.0 seconds (CPU), ~0.01-0.1 seconds (GPU)
For real-time applications, reduce resolution or max_iterations: - resolution = 100 (coarser search) - max_iterations = 20 (early stopping)
Comparison to Other Decoders:
-
Codebook Lookup (LevelEncoder): O(K · D) for K levels Faster but discrete, no interpolation
-
Resonator Network (cleanup): O(iterations · M · D) for M items Better for structured/compositional decoding
-
FPE Gradient Descent: O(resolution · D + iterations · D) Best for continuous scalar recovery
References
- Frady et al. (2021): "Computing on Functions Using Randomized Vector Representations" - Section on FPE decoding
- Nocedal & Wright (2006): "Numerical Optimization" - Gradient descent methods and convergence analysis
Examples:
>>> # Basic decoding
>>> model = VSA.create('FHRR', dim=10000)
>>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
>>> hv = encoder.encode(25.0)
>>> decoded = encoder.decode(hv)
>>> print(f"Decoded: {decoded:.2f}") # ≈ 25.00
>>> # Decoding noisy hypervector (bundled encoding)
>>> hv_bundle = model.bundle([encoder.encode(25.0), encoder.encode(26.0)])
>>> decoded_bundle = encoder.decode(hv_bundle)
>>> print(f"Decoded bundle: {decoded_bundle:.2f}") # ≈ 25.5
>>> # Fast decoding (lower resolution/iterations)
>>> decoded_fast = encoder.decode(hv, resolution=100, max_iterations=20)
Source code in holovec/encoders/scalar.py
def decode(
self,
hypervector: Array,
resolution: int = 1000,
max_iterations: int = 100,
tolerance: float = 1e-6
) -> float:
"""
Decode hypervector back to scalar value using two-stage optimization.
Parameters
----------
hypervector : Array
Hypervector to decode (typically a noisy/bundled encoding).
resolution : int, optional
Number of grid points for coarse search (default: 1000).
Higher resolution improves initial guess but increases cost.
max_iterations : int, optional
Maximum gradient descent iterations (default: 100).
Typical convergence: 20-50 iterations.
tolerance : float, optional
Convergence tolerance for gradient descent (default: 1e-6).
Stop when |Δx| < tolerance.
Returns
-------
float
Decoded scalar value in [min_val, max_val].
Notes
-----
**Decoding Algorithm:**
Uses two-stage optimization to find value x maximizing similarity:
x* = argmax_x ⟨encode(x), hypervector⟩
**Stage 1: Coarse Grid Search** (O(resolution · D))
- Evaluate similarity at `resolution` uniformly-spaced points
- Find x₀ with highest similarity
- Provides good initialization for gradient descent
**Stage 2: Gradient Descent** (O(max_iterations · D))
- Starting from x₀, perform gradient ascent:
x_{t+1} = x_t + η_t · ∇_x ⟨encode(x_t), hypervector⟩
- Gradient computed via finite differences:
∇_x ≈ (sim(x + ε) - sim(x)) / ε
- Step size η_t decays: η_t = η_0 · 0.95^t (prevents oscillation)
- Clips updates to [0, 1] normalized range
**Why This Works:**
For FPE with sinc kernel K(x₁, x₂) = sinc(β·π·|x₁ - x₂|):
- Similarity function is unimodal (single peak)
- Peak occurs at x = x_true (encoded value)
- Gradient descent converges to global maximum
However, for noisy hypervectors (e.g., bundled encodings):
- Multiple local maxima may exist
- Coarse search reduces chance of local minimum trap
- Wider kernels (small β) → smoother objective → easier optimization
**Approximation Quality:**
Decoding accuracy depends on several factors:
1. **Dimension D**: Higher D → more accurate encoding → better decoding
- D = 1000: Moderate accuracy (similarity ≈ 0.85)
- D = 10000: High accuracy (similarity ≈ 0.99)
2. **Signal-to-Noise Ratio**: Clean encoding vs bundled/noisy
- Clean: Near-perfect recovery (error < 1%)
- Bundled (10 items): Good recovery (error ≈ 5-10%)
- Bundled (100 items): Degraded (error ≈ 20-30%)
3. **Bandwidth β**: Wider kernels → smoother similarity landscape
- β = 0.01: Very smooth, easy to optimize
- β = 10.0: Narrow kernel, may have local maxima
4. **Mixture Encoding**: Multiple bandwidths complicate landscape
- May require finer grid search (higher resolution)
- May need more gradient descent iterations
**Computational Cost:**
Total operations: O(resolution · D + max_iterations · D)
Typical values:
- resolution = 1000, max_iterations = 100, D = 10000
- Total: ~1.1M evaluations
- Runtime: ~0.1-1.0 seconds (CPU), ~0.01-0.1 seconds (GPU)
For real-time applications, reduce resolution or max_iterations:
- resolution = 100 (coarser search)
- max_iterations = 20 (early stopping)
**Comparison to Other Decoders:**
- **Codebook Lookup** (LevelEncoder): O(K · D) for K levels
Faster but discrete, no interpolation
- **Resonator Network** (cleanup): O(iterations · M · D) for M items
Better for structured/compositional decoding
- **FPE Gradient Descent**: O(resolution · D + iterations · D)
Best for continuous scalar recovery
References
----------
- Frady et al. (2021): "Computing on Functions Using Randomized
Vector Representations" - Section on FPE decoding
- Nocedal & Wright (2006): "Numerical Optimization" - Gradient descent
methods and convergence analysis
Examples
--------
>>> # Basic decoding
>>> model = VSA.create('FHRR', dim=10000)
>>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
>>> hv = encoder.encode(25.0)
>>> decoded = encoder.decode(hv)
>>> print(f"Decoded: {decoded:.2f}") # ≈ 25.00
>>> # Decoding noisy hypervector (bundled encoding)
>>> hv_bundle = model.bundle([encoder.encode(25.0), encoder.encode(26.0)])
>>> decoded_bundle = encoder.decode(hv_bundle)
>>> print(f"Decoded bundle: {decoded_bundle:.2f}") # ≈ 25.5
>>> # Fast decoding (lower resolution/iterations)
>>> decoded_fast = encoder.decode(hv, resolution=100, max_iterations=20)
"""
# Coarse search: evaluate on grid
normalized_grid = self.backend.linspace(0, 1, resolution)
best_similarity = -float('inf')
best_normalized = 0.5 # Start in middle
for norm_val_np in self.backend.to_numpy(normalized_grid):
norm_val = float(norm_val_np)
encoded = self.encode(self.denormalize(norm_val))
similarity = float(
self.backend.to_numpy(
self.model.similarity(encoded, hypervector)
)
)
if similarity > best_similarity:
best_similarity = similarity
best_normalized = norm_val
# Fine search: gradient descent around best coarse value
# For simplicity, use finite differences for gradient
current = best_normalized
step_size = 0.01
for _ in range(max_iterations):
# Evaluate at current position
encoded_curr = self.encode(self.denormalize(current))
sim_curr = float(
self.backend.to_numpy(
self.model.similarity(encoded_curr, hypervector)
)
)
# Evaluate at current + epsilon
epsilon = 1e-4
encoded_plus = self.encode(self.denormalize(current + epsilon))
sim_plus = float(
self.backend.to_numpy(
self.model.similarity(encoded_plus, hypervector)
)
)
# Compute gradient
gradient = (sim_plus - sim_curr) / epsilon
# Update (gradient ascent)
new_current = current + step_size * gradient
# Clip to [0, 1]
new_current = max(0.0, min(1.0, new_current))
# Check convergence
if abs(new_current - current) < tolerance:
break
current = new_current
step_size *= 0.95 # Decay step size
# Denormalize and return
return self.denormalize(current)
encode(value)
Encode scalar value to hypervector using fractional power.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
value
|
float
|
Scalar value to encode. Will be clipped to [min_val, max_val]. |
required |
Returns:
| Type | Description |
|---|---|
Array
|
Encoded hypervector of shape (dimension,) in backend format. |
Notes
Single Bandwidth Encoding:
For single bandwidth β, implements: z(x) = φ^(β·x_normalized)
where: - x_normalized = (value - min_val) / (max_val - min_val) ∈ [0, 1] - φ = [e^(iθ₁), ..., e^(iθₐ)] is base phasor with random frequencies θⱼ - Result is normalized according to model's space
Element-wise computation: z_j(x) = e^(i·θⱼ·β·x_normalized) (complex models) z_j(x) = cos(θⱼ·β·x_normalized) (real models)
Mixture Encoding:
When mixture_bandwidths = [β₁, ..., βₖ] is provided, uses weighted sum: z_mix(x) = Σₖ αₖ · φ^(βₖ·x_normalized)
where αₖ are mixture_weights (default: uniform αₖ = 1/K).
Advantages of Mixture Encoding:
-
Multi-Scale Representation: Combines coarse (small β) and fine (large β) similarity kernels in single hypervector
-
Improved Generalization: Coarse scales provide robustness, fine scales provide discrimination
-
Learned Weights: Weights αₖ can be learned via
learn_mixture_weights()to optimize for specific task -
Kernel Combination: Mixture is equivalent to combining multiple kernels: K_mix(d) = Σₖ αₖ·K_βₖ(d)
Computational Complexity:
- Single bandwidth: O(D) operations (element-wise exponential)
- Mixture with K bandwidths: O(K·D) operations
- Backend operations (exp, multiply) are vectorized/GPU-accelerated
Normalization:
Output is normalized using model's normalization scheme: - FHRR/HRR: L2 normalization (unit norm) - MAP: Element-wise normalization - BSC/BSDC: No normalization (binary)
This ensures hypervectors are in valid space for subsequent binding/bundling operations.
Examples:
>>> # Basic encoding
>>> model = VSA.create('FHRR', dim=10000)
>>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
>>> hv_25 = encoder.encode(25.0) # Encode temperature 25°C
>>> hv_26 = encoder.encode(26.0)
>>> similarity = model.similarity(hv_25, hv_26)
>>> print(f"Similarity: {similarity:.3f}") # ≈ 0.950 (close values)
>>> # Mixture encoding for multi-scale representation
>>> encoder_mix = FractionalPowerEncoder(
... model, min_val=0, max_val=100,
... mixture_bandwidths=[0.01, 1.0, 100.0]
... )
>>> hv_mix = encoder_mix.encode(25.0) # Combines 3 scales
>>> # Effect of bandwidth on similarity
>>> enc_wide = FractionalPowerEncoder(model, 0, 100, bandwidth=0.1)
>>> enc_narrow = FractionalPowerEncoder(model, 0, 100, bandwidth=10.0)
>>> sim_wide = model.similarity(enc_wide.encode(25), enc_wide.encode(30))
>>> sim_narrow = model.similarity(enc_narrow.encode(25), enc_narrow.encode(30))
>>> # sim_wide > sim_narrow (wider kernel → more generalization)
Source code in holovec/encoders/scalar.py
def encode(self, value: float) -> Array:
"""
Encode scalar value to hypervector using fractional power.
Parameters
----------
value : float
Scalar value to encode. Will be clipped to [min_val, max_val].
Returns
-------
Array
Encoded hypervector of shape (dimension,) in backend format.
Notes
-----
**Single Bandwidth Encoding:**
For single bandwidth β, implements:
z(x) = φ^(β·x_normalized)
where:
- x_normalized = (value - min_val) / (max_val - min_val) ∈ [0, 1]
- φ = [e^(iθ₁), ..., e^(iθₐ)] is base phasor with random frequencies θⱼ
- Result is normalized according to model's space
Element-wise computation:
z_j(x) = e^(i·θⱼ·β·x_normalized) (complex models)
z_j(x) = cos(θⱼ·β·x_normalized) (real models)
**Mixture Encoding:**
When mixture_bandwidths = [β₁, ..., βₖ] is provided, uses weighted sum:
z_mix(x) = Σₖ αₖ · φ^(βₖ·x_normalized)
where αₖ are mixture_weights (default: uniform αₖ = 1/K).
**Advantages of Mixture Encoding:**
1. **Multi-Scale Representation**: Combines coarse (small β) and
fine (large β) similarity kernels in single hypervector
2. **Improved Generalization**: Coarse scales provide robustness,
fine scales provide discrimination
3. **Learned Weights**: Weights αₖ can be learned via
`learn_mixture_weights()` to optimize for specific task
4. **Kernel Combination**: Mixture is equivalent to combining
multiple kernels: K_mix(d) = Σₖ αₖ·K_βₖ(d)
**Computational Complexity:**
- Single bandwidth: O(D) operations (element-wise exponential)
- Mixture with K bandwidths: O(K·D) operations
- Backend operations (exp, multiply) are vectorized/GPU-accelerated
**Normalization:**
Output is normalized using model's normalization scheme:
- FHRR/HRR: L2 normalization (unit norm)
- MAP: Element-wise normalization
- BSC/BSDC: No normalization (binary)
This ensures hypervectors are in valid space for subsequent
binding/bundling operations.
Examples
--------
>>> # Basic encoding
>>> model = VSA.create('FHRR', dim=10000)
>>> encoder = FractionalPowerEncoder(model, min_val=0, max_val=100)
>>> hv_25 = encoder.encode(25.0) # Encode temperature 25°C
>>> hv_26 = encoder.encode(26.0)
>>> similarity = model.similarity(hv_25, hv_26)
>>> print(f"Similarity: {similarity:.3f}") # ≈ 0.950 (close values)
>>> # Mixture encoding for multi-scale representation
>>> encoder_mix = FractionalPowerEncoder(
... model, min_val=0, max_val=100,
... mixture_bandwidths=[0.01, 1.0, 100.0]
... )
>>> hv_mix = encoder_mix.encode(25.0) # Combines 3 scales
>>> # Effect of bandwidth on similarity
>>> enc_wide = FractionalPowerEncoder(model, 0, 100, bandwidth=0.1)
>>> enc_narrow = FractionalPowerEncoder(model, 0, 100, bandwidth=10.0)
>>> sim_wide = model.similarity(enc_wide.encode(25), enc_wide.encode(30))
>>> sim_narrow = model.similarity(enc_narrow.encode(25), enc_narrow.encode(30))
>>> # sim_wide > sim_narrow (wider kernel → more generalization)
"""
# Normalize value to [0, 1]
normalized = self.normalize(value)
# Handle mixture: list of beta_k and weights alpha_k
betas: list[float]
alphas: list[float]
if self.mixture_bandwidths is not None:
betas = list(self.mixture_bandwidths)
alphas = list(self.mixture_weights or [])
else:
betas = [self.bandwidth]
alphas = [1.0]
parts = []
for alpha, beta in zip(alphas, betas, strict=True):
exponent = beta * normalized
if self.is_complex:
# Complex: encode as exp(i * theta * exponent)
theta = self.theta if self.theta is not None else self.backend.angle(self.base_phasor)
phase = self.backend.multiply_scalar(theta, exponent)
phasor = self.backend.exp(1j * phase)
parts.append(self.backend.multiply_scalar(phasor, alpha))
else:
# Real: use cosine features directly: cos(theta * exponent)
theta = self.theta if self.theta is not None else self.backend.angle(self.base_phasor)
phase = self.backend.multiply_scalar(theta, exponent)
# cos(phase) = Re(exp(i*phase))
phasor = self.backend.real(self.backend.exp(1j * phase))
parts.append(self.backend.multiply_scalar(phasor, alpha))
if len(parts) == 1:
encoded = parts[0]
else:
encoded = self.backend.sum(self.backend.stack(parts, axis=0), axis=0)
# Normalize output according to space
return self.model.normalize(encoded)
learn_mixture_weights(values, labels, reg=0.001)
Learn mixture weights (alphas) for fixed mixture_bandwidths using a simple ridge-style objective that aligns encoded mixtures to per-class prototypes.
Approach: - Build class prototypes p_c as the mean of current encodings (using current weights) - For each sample i, compute per-band encodings E_i = [e_{i1},...,e_{iK}] (shape d×K) - Solve (Σ E_i^T E_i + reg I) α = Σ E_i^T p_{y_i} - Project α onto simplex (nonnegative, sum=1)
Args: values: list of scalar inputs labels: list of integer class labels (same length as values) reg: L2 regularization strength (default 1e-3)
Returns: Learned mixture weights (list of floats summing to 1)
Notes: - Requires mixture_bandwidths to be set (K>=2) - Uses numpy for solving normal equations; backend remains unchanged
Source code in holovec/encoders/scalar.py
def learn_mixture_weights(
self,
values: list[float],
labels: list[int],
reg: float = 1e-3,
) -> list[float]:
"""
Learn mixture weights (alphas) for fixed mixture_bandwidths using a simple
ridge-style objective that aligns encoded mixtures to per-class prototypes.
Approach:
- Build class prototypes p_c as the mean of current encodings (using current weights)
- For each sample i, compute per-band encodings E_i = [e_{i1},...,e_{iK}] (shape d×K)
- Solve (Σ E_i^T E_i + reg I) α = Σ E_i^T p_{y_i}
- Project α onto simplex (nonnegative, sum=1)
Args:
values: list of scalar inputs
labels: list of integer class labels (same length as values)
reg: L2 regularization strength (default 1e-3)
Returns:
Learned mixture weights (list of floats summing to 1)
Notes:
- Requires mixture_bandwidths to be set (K>=2)
- Uses numpy for solving normal equations; backend remains unchanged
"""
import numpy as _np
if self.mixture_bandwidths is None or len(self.mixture_bandwidths) < 2:
raise ValueError("learn_mixture_weights requires mixture_bandwidths with K >= 2")
# Prepare classes and group samples
values = list(values)
labels = list(labels)
if len(values) != len(labels):
raise ValueError("values and labels must have same length")
classes = sorted(set(labels))
K = len(self.mixture_bandwidths)
# Build current encodings to compute class prototypes (using current mixture weights)
encodings = [self.encode(v) for v in values]
# Convert to numpy arrays for prototype computation
enc_np = [_np.array(self.model.backend.to_numpy(e)) for e in encodings]
# Class prototypes: mean of encodings per class (vector length d)
prototypes = {}
for c in classes:
idxs = [i for i, y in enumerate(labels) if y == c]
if not idxs:
continue
prototypes[c] = _np.mean(_np.stack([enc_np[i] for i in idxs], axis=0), axis=0)
# Helper to compute per-band encodings matrix E_i (d×K) for a value
def _per_band_matrix(val: float) -> _np.ndarray:
assert self.mixture_bandwidths is not None
norm = self.normalize(val)
cols = []
for beta in self.mixture_bandwidths:
exponent = beta * norm
theta = self.theta if self.theta is not None else self.model.backend.angle(self.base_phasor)
phase = self.model.backend.multiply_scalar(theta, exponent)
if self.is_complex:
ph = self.model.backend.exp(1j * phase)
col = self.model.backend.to_numpy(ph)
else:
col = self.model.backend.to_numpy(self.model.backend.real(self.model.backend.exp(1j * phase)))
cols.append(_np.array(col))
# Stack columns to d×K
return _np.stack(cols, axis=1)
# Accumulate normal equations
A = _np.zeros((K, K), dtype=_np.float64)
b = _np.zeros((K,), dtype=_np.float64)
for v, y in zip(values, labels, strict=True):
E = _per_band_matrix(v) # d×K
p = prototypes[y] # d
# E^T E and E^T p
A += E.T @ E
b += E.T @ p
# Regularization
A += reg * _np.eye(K, dtype=_np.float64)
# Solve
try:
alpha = _np.linalg.solve(A, b)
except _np.linalg.LinAlgError:
alpha = _np.linalg.lstsq(A, b, rcond=None)[0]
# Project to simplex (≥0, sum=1)
alpha = _np.maximum(alpha, 0.0)
s = float(_np.sum(alpha))
if s <= 0:
alpha = _np.ones_like(alpha) / len(alpha)
else:
alpha = alpha / s
# Update in encoder
self.mixture_weights = [float(a) for a in alpha.tolist()]
return self.mixture_weights
holovec.encoders.scalar.ThermometerEncoder
Bases: ScalarEncoder
Thermometer encoding for scalar values.
Divides value range into N bins and encodes a value as the bundle of all bins it exceeds. Creates monotonic similarity profile.
Simpler and more robust than FPE, but with coarser granularity. Works with all VSA models.
References: Kanerva (2009): "Hyperdimensional Computing"
Source code in holovec/encoders/scalar.py
class ThermometerEncoder(ScalarEncoder):
"""
Thermometer encoding for scalar values.
Divides value range into N bins and encodes a value as the bundle
of all bins it exceeds. Creates monotonic similarity profile.
Simpler and more robust than FPE, but with coarser granularity.
Works with all VSA models.
References:
Kanerva (2009): "Hyperdimensional Computing"
"""
def __init__(
self,
model: VSAModel,
min_val: float,
max_val: float,
n_bins: int = 100,
seed: int | None = None
):
"""
Initialize ThermometerEncoder.
Args:
model: VSA model (any)
min_val: Minimum value of encoding range
max_val: Maximum value of encoding range
n_bins: Number of bins to divide range into (default 100)
seed: Random seed for generating bin vectors
Raises:
ValueError: If n_bins < 2
"""
super().__init__(model, min_val, max_val)
if n_bins < 2:
raise ValueError(f"n_bins must be >= 2, got {n_bins}")
self.n_bins = n_bins
self.seed = seed
# Generate random vectors for each bin
self.bin_vectors = [
model.random(seed=seed + i if seed is not None else None)
for i in range(n_bins)
]
# Compute bin edges
self.bin_width = self.range / n_bins
def encode(self, value: float) -> Array:
"""
Encode scalar as bundle of all bins it exceeds.
Args:
value: Scalar value to encode
Returns:
Encoded hypervector (bundle of activated bins)
"""
# Normalize value
normalized = self.normalize(value)
# Determine which bin the value falls into
bin_index = int(normalized * self.n_bins)
bin_index = min(bin_index, self.n_bins - 1) # Handle edge case
# Bundle all bins from 0 to bin_index (inclusive)
if bin_index == 0:
return self.bin_vectors[0]
activated_bins = self.bin_vectors[:bin_index + 1]
return self.model.bundle(activated_bins)
def decode(self, hypervector: Array) -> float:
"""
Decode is not implemented for ThermometerEncoder.
Thermometer encoding is not easily reversible without
storing additional information.
Raises:
NotImplementedError: Always raises
"""
raise NotImplementedError(
"ThermometerEncoder does not support decoding. "
"Use FractionalPowerEncoder if decoding is required."
)
@property
def is_reversible(self) -> bool:
"""Thermometer encoding is not reversible."""
return False
@property
def compatible_models(self) -> list[str]:
"""Works with all VSA models."""
return ["MAP", "FHRR", "HRR", "BSC", "GHRR", "VTB", "BSDC"]
def __repr__(self) -> str:
"""String representation."""
return (
f"ThermometerEncoder("
f"model={self.model.model_name}, "
f"range=[{self.min_val}, {self.max_val}], "
f"n_bins={self.n_bins}, "
f"dimension={self.dimension})"
)
compatible_models
property
Works with all VSA models.
is_reversible
property
Thermometer encoding is not reversible.
__init__(model, min_val, max_val, n_bins=100, seed=None)
Initialize ThermometerEncoder.
Args: model: VSA model (any) min_val: Minimum value of encoding range max_val: Maximum value of encoding range n_bins: Number of bins to divide range into (default 100) seed: Random seed for generating bin vectors
Raises: ValueError: If n_bins < 2
Source code in holovec/encoders/scalar.py
def __init__(
self,
model: VSAModel,
min_val: float,
max_val: float,
n_bins: int = 100,
seed: int | None = None
):
"""
Initialize ThermometerEncoder.
Args:
model: VSA model (any)
min_val: Minimum value of encoding range
max_val: Maximum value of encoding range
n_bins: Number of bins to divide range into (default 100)
seed: Random seed for generating bin vectors
Raises:
ValueError: If n_bins < 2
"""
super().__init__(model, min_val, max_val)
if n_bins < 2:
raise ValueError(f"n_bins must be >= 2, got {n_bins}")
self.n_bins = n_bins
self.seed = seed
# Generate random vectors for each bin
self.bin_vectors = [
model.random(seed=seed + i if seed is not None else None)
for i in range(n_bins)
]
# Compute bin edges
self.bin_width = self.range / n_bins
__repr__()
String representation.
Source code in holovec/encoders/scalar.py
def __repr__(self) -> str:
"""String representation."""
return (
f"ThermometerEncoder("
f"model={self.model.model_name}, "
f"range=[{self.min_val}, {self.max_val}], "
f"n_bins={self.n_bins}, "
f"dimension={self.dimension})"
)
decode(hypervector)
Decode is not implemented for ThermometerEncoder.
Thermometer encoding is not easily reversible without storing additional information.
Raises: NotImplementedError: Always raises
Source code in holovec/encoders/scalar.py
def decode(self, hypervector: Array) -> float:
"""
Decode is not implemented for ThermometerEncoder.
Thermometer encoding is not easily reversible without
storing additional information.
Raises:
NotImplementedError: Always raises
"""
raise NotImplementedError(
"ThermometerEncoder does not support decoding. "
"Use FractionalPowerEncoder if decoding is required."
)
encode(value)
Encode scalar as bundle of all bins it exceeds.
Args: value: Scalar value to encode
Returns: Encoded hypervector (bundle of activated bins)
Source code in holovec/encoders/scalar.py
def encode(self, value: float) -> Array:
"""
Encode scalar as bundle of all bins it exceeds.
Args:
value: Scalar value to encode
Returns:
Encoded hypervector (bundle of activated bins)
"""
# Normalize value
normalized = self.normalize(value)
# Determine which bin the value falls into
bin_index = int(normalized * self.n_bins)
bin_index = min(bin_index, self.n_bins - 1) # Handle edge case
# Bundle all bins from 0 to bin_index (inclusive)
if bin_index == 0:
return self.bin_vectors[0]
activated_bins = self.bin_vectors[:bin_index + 1]
return self.model.bundle(activated_bins)
holovec.encoders.scalar.LevelEncoder
Bases: ScalarEncoder
Level (codebook) encoding for discrete scalar values.
Maps discrete levels to random orthogonal vectors via lookup table. Fast (O(1) encode/decode) and exact for discrete values.
Best used when you have a small number of discrete values rather than continuous range.
Example: >>> # Encode weekdays (7 discrete values) >>> model = VSA.create('FHRR', dim=10000) >>> encoder = LevelEncoder(model, min_val=0, max_val=6, n_levels=7) >>> monday = encoder.encode(0) # Exact encoding >>> friday = encoder.encode(4)
Source code in holovec/encoders/scalar.py
class LevelEncoder(ScalarEncoder):
"""
Level (codebook) encoding for discrete scalar values.
Maps discrete levels to random orthogonal vectors via lookup table.
Fast (O(1) encode/decode) and exact for discrete values.
Best used when you have a small number of discrete values rather
than continuous range.
Example:
>>> # Encode weekdays (7 discrete values)
>>> model = VSA.create('FHRR', dim=10000)
>>> encoder = LevelEncoder(model, min_val=0, max_val=6, n_levels=7)
>>> monday = encoder.encode(0) # Exact encoding
>>> friday = encoder.encode(4)
"""
def __init__(
self,
model: VSAModel,
min_val: float,
max_val: float,
n_levels: int,
seed: int | None = None
):
"""
Initialize LevelEncoder.
Args:
model: VSA model (any)
min_val: Minimum value (corresponds to level 0)
max_val: Maximum value (corresponds to level n_levels-1)
n_levels: Number of discrete levels
seed: Random seed for generating level vectors
Raises:
ValueError: If n_levels < 2
"""
super().__init__(model, min_val, max_val)
if n_levels < 2:
raise ValueError(f"n_levels must be >= 2, got {n_levels}")
self.n_levels = n_levels
self.seed = seed
# Generate random vector for each level
self.level_vectors = [
model.random(seed=seed + i if seed is not None else None)
for i in range(n_levels)
]
# Compute level width
self.level_width = self.range / (n_levels - 1)
def encode(self, value: float) -> Array:
"""
Encode scalar to nearest level's hypervector.
Args:
value: Scalar value to encode
Returns:
Hypervector corresponding to nearest level
"""
# Normalize to [0, 1]
normalized = self.normalize(value)
# Map to level index (round to nearest)
level_index = int(round(normalized * (self.n_levels - 1)))
level_index = max(0, min(level_index, self.n_levels - 1))
return self.level_vectors[level_index]
def decode(self, hypervector: Array) -> float:
"""
Decode hypervector to nearest level value.
Args:
hypervector: Hypervector to decode
Returns:
Decoded scalar value (will be one of the discrete levels)
"""
# Find most similar level vector
best_similarity = -float('inf')
best_level = 0
for level_idx, level_vec in enumerate(self.level_vectors):
similarity = float(
self.backend.to_numpy(
self.model.similarity(hypervector, level_vec)
)
)
if similarity > best_similarity:
best_similarity = similarity
best_level = level_idx
# Convert level index back to value
normalized = best_level / (self.n_levels - 1)
return self.denormalize(normalized)
@property
def is_reversible(self) -> bool:
"""Level encoding is reversible (to nearest level)."""
return True
@property
def compatible_models(self) -> list[str]:
"""Works with all VSA models."""
return ["MAP", "FHRR", "HRR", "BSC", "GHRR", "VTB", "BSDC"]
def __repr__(self) -> str:
"""String representation."""
return (
f"LevelEncoder("
f"model={self.model.model_name}, "
f"range=[{self.min_val}, {self.max_val}], "
f"n_levels={self.n_levels}, "
f"dimension={self.dimension})"
)
compatible_models
property
Works with all VSA models.
is_reversible
property
Level encoding is reversible (to nearest level).
__init__(model, min_val, max_val, n_levels, seed=None)
Initialize LevelEncoder.
Args: model: VSA model (any) min_val: Minimum value (corresponds to level 0) max_val: Maximum value (corresponds to level n_levels-1) n_levels: Number of discrete levels seed: Random seed for generating level vectors
Raises: ValueError: If n_levels < 2
Source code in holovec/encoders/scalar.py
def __init__(
self,
model: VSAModel,
min_val: float,
max_val: float,
n_levels: int,
seed: int | None = None
):
"""
Initialize LevelEncoder.
Args:
model: VSA model (any)
min_val: Minimum value (corresponds to level 0)
max_val: Maximum value (corresponds to level n_levels-1)
n_levels: Number of discrete levels
seed: Random seed for generating level vectors
Raises:
ValueError: If n_levels < 2
"""
super().__init__(model, min_val, max_val)
if n_levels < 2:
raise ValueError(f"n_levels must be >= 2, got {n_levels}")
self.n_levels = n_levels
self.seed = seed
# Generate random vector for each level
self.level_vectors = [
model.random(seed=seed + i if seed is not None else None)
for i in range(n_levels)
]
# Compute level width
self.level_width = self.range / (n_levels - 1)
__repr__()
String representation.
Source code in holovec/encoders/scalar.py
def __repr__(self) -> str:
"""String representation."""
return (
f"LevelEncoder("
f"model={self.model.model_name}, "
f"range=[{self.min_val}, {self.max_val}], "
f"n_levels={self.n_levels}, "
f"dimension={self.dimension})"
)
decode(hypervector)
Decode hypervector to nearest level value.
Args: hypervector: Hypervector to decode
Returns: Decoded scalar value (will be one of the discrete levels)
Source code in holovec/encoders/scalar.py
def decode(self, hypervector: Array) -> float:
"""
Decode hypervector to nearest level value.
Args:
hypervector: Hypervector to decode
Returns:
Decoded scalar value (will be one of the discrete levels)
"""
# Find most similar level vector
best_similarity = -float('inf')
best_level = 0
for level_idx, level_vec in enumerate(self.level_vectors):
similarity = float(
self.backend.to_numpy(
self.model.similarity(hypervector, level_vec)
)
)
if similarity > best_similarity:
best_similarity = similarity
best_level = level_idx
# Convert level index back to value
normalized = best_level / (self.n_levels - 1)
return self.denormalize(normalized)
encode(value)
Encode scalar to nearest level's hypervector.
Args: value: Scalar value to encode
Returns: Hypervector corresponding to nearest level
Source code in holovec/encoders/scalar.py
def encode(self, value: float) -> Array:
"""
Encode scalar to nearest level's hypervector.
Args:
value: Scalar value to encode
Returns:
Hypervector corresponding to nearest level
"""
# Normalize to [0, 1]
normalized = self.normalize(value)
# Map to level index (round to nearest)
level_index = int(round(normalized * (self.n_levels - 1)))
level_index = max(0, min(level_index, self.n_levels - 1))
return self.level_vectors[level_index]
Sequence Encoders
holovec.encoders.sequence.PositionBindingEncoder
Bases: SequenceEncoder
Position binding encoder for sequences using permutation-based positions.
Based on Plate (2003) "Holographic Reduced Representations" and Schlegel et al. (2021) "A comparison of vector symbolic architectures".
Encodes sequences by binding each element with a position-specific permutation of a base position vector:
encode([A, B, C]) = bind(A, ρ¹) + bind(B, ρ²) + bind(C, ρ³)
where ρ is the permutation operation and ρⁱ represents i applications.
This encoding is: - Order-sensitive: Different positions create different bindings - Variable-length: Works with any sequence length - Partial-match capable: Similar sequences have similar encodings
Attributes: codebook: Dictionary mapping symbols to hypervectors auto_generate: Whether to auto-generate vectors for unknown symbols seed_offset: Offset for generating consistent symbol vectors
Example: >>> model = VSA.create('MAP', dim=10000) >>> encoder = PositionBindingEncoder(model) >>> >>> # Encode a sequence of symbols >>> seq = ['hello', 'world', '!'] >>> hv = encoder.encode(seq) >>> >>> # Similar sequences have high similarity >>> seq2 = ['hello', 'world'] >>> hv2 = encoder.encode(seq2) >>> model.similarity(hv, hv2) # High (shared prefix)
Source code in holovec/encoders/sequence.py
class PositionBindingEncoder(SequenceEncoder):
"""
Position binding encoder for sequences using permutation-based positions.
Based on Plate (2003) "Holographic Reduced Representations" and
Schlegel et al. (2021) "A comparison of vector symbolic architectures".
Encodes sequences by binding each element with a position-specific
permutation of a base position vector:
encode([A, B, C]) = bind(A, ρ¹) + bind(B, ρ²) + bind(C, ρ³)
where ρ is the permutation operation and ρⁱ represents i applications.
This encoding is:
- Order-sensitive: Different positions create different bindings
- Variable-length: Works with any sequence length
- Partial-match capable: Similar sequences have similar encodings
Attributes:
codebook: Dictionary mapping symbols to hypervectors
auto_generate: Whether to auto-generate vectors for unknown symbols
seed_offset: Offset for generating consistent symbol vectors
Example:
>>> model = VSA.create('MAP', dim=10000)
>>> encoder = PositionBindingEncoder(model)
>>>
>>> # Encode a sequence of symbols
>>> seq = ['hello', 'world', '!']
>>> hv = encoder.encode(seq)
>>>
>>> # Similar sequences have high similarity
>>> seq2 = ['hello', 'world']
>>> hv2 = encoder.encode(seq2)
>>> model.similarity(hv, hv2) # High (shared prefix)
"""
def __init__(
self,
model: VSAModel,
codebook: dict[Symbol, Array] | None = None,
max_length: int | None = None,
auto_generate: bool = True,
seed: int | None = None
):
"""
Initialize position binding encoder.
Args:
model: VSA model instance
codebook: Pre-defined symbol → hypervector mapping (optional)
max_length: Maximum sequence length (None for unlimited)
auto_generate: Auto-generate vectors for unknown symbols (default: True)
seed: Random seed for generating symbol vectors
Raises:
ValueError: If model is not compatible
"""
super().__init__(model, max_length)
self.codebook: dict[Symbol, Array] = dict(codebook) if codebook is not None else {}
self.auto_generate = auto_generate
self.seed = seed
def encode(self, sequence: list[str | int]) -> Array:
"""
Encode sequence using position binding.
Each element is bound with a position-specific permutation and
all bound pairs are bundled:
result = Σᵢ bind(element_i, permute(position_vector, i))
Args:
sequence: List of symbols (strings or integers) to encode
Returns:
Hypervector representing the sequence
Raises:
ValueError: If sequence is empty
ValueError: If sequence exceeds max_length
ValueError: If symbol not in codebook and auto_generate=False
Example:
>>> encoder.encode(['cat', 'sat', 'on', 'mat'])
"""
if not sequence:
raise ValueError("Cannot encode empty sequence")
if self.max_length is not None and len(sequence) > self.max_length:
raise ValueError(
f"Sequence length {len(sequence)} exceeds max_length {self.max_length}"
)
# Get or generate hypervectors for each symbol
symbol_vectors = []
for symbol in sequence:
if symbol not in self.codebook:
if not self.auto_generate:
raise ValueError(
f"Symbol '{symbol}' not in codebook and auto_generate=False"
)
# Generate new vector for this symbol
self.codebook[symbol] = self._generate_symbol_vector(symbol)
symbol_vectors.append(self.codebook[symbol])
# Bind each symbol with its position and bundle
position_bound = []
for i, symbol_vec in enumerate(symbol_vectors):
# Position encoding: permute by position index
# permute(vec, i) applies permutation i times
position_vec = self.model.permute(symbol_vec, k=i)
position_bound.append(position_vec)
# Bundle all position-bound vectors
sequence_hv = self.model.bundle(position_bound)
return sequence_hv
def decode(
self,
hypervector: Array,
max_positions: int = 10,
threshold: float = 0.3
) -> list[Symbol]:
"""
Decode sequence hypervector to recover symbols.
Uses cleanup memory approach: for each position, unpermute and
find most similar symbol in codebook.
Args:
hypervector: Sequence hypervector to decode
max_positions: Maximum positions to try decoding (default: 10)
threshold: Minimum similarity threshold for valid symbols (default: 0.3)
Returns:
List of decoded symbols (may be shorter than original)
Raises:
RuntimeError: If codebook is empty
Note:
Decoding is approximate and works best for sequences shorter
than max_positions with high SNR.
Example:
>>> encoded = encoder.encode(['a', 'b', 'c'])
>>> decoded = encoder.decode(encoded, max_positions=5)
>>> decoded # ['a', 'b', 'c'] (approximate)
"""
if not self.codebook:
raise RuntimeError("Cannot decode: codebook is empty")
# Convert codebook to symbol → vector for faster lookup
symbols = list(self.codebook.keys())
vectors = [self.codebook[s] for s in symbols]
decoded: list[Symbol] = []
for pos in range(max_positions):
# Unpermute by position to recover symbol at this position
unpermuted = self.model.unpermute(hypervector, k=pos)
# Find most similar symbol in codebook
best_similarity = -float('inf')
best_symbol = None
for symbol, symbol_vec in zip(symbols, vectors, strict=True):
sim = float(self.model.similarity(unpermuted, symbol_vec))
if sim > best_similarity:
best_similarity = sim
best_symbol = symbol
# Only include if above threshold
if best_similarity >= threshold:
assert best_symbol is not None
decoded.append(best_symbol)
else:
# No strong match - likely end of sequence
break
return decoded
def _generate_symbol_vector(self, symbol: str | int) -> Array:
"""
Generate a random hypervector for a new symbol.
Uses consistent seeding based on symbol to ensure reproducibility.
Args:
symbol: Symbol to generate vector for
Returns:
Random hypervector for this symbol
"""
payload = json.dumps(
{
"base_seed": self.seed,
"symbol_type": type(symbol).__name__,
"symbol_value": symbol,
},
ensure_ascii=False,
separators=(",", ":"),
sort_keys=True,
).encode("utf-8")
digest = hashlib.blake2b(payload, digest_size=8).digest()
symbol_seed = int.from_bytes(digest, byteorder="big") % (2**31 - 1)
return self.model.random(seed=symbol_seed)
def add_symbol(self, symbol: Symbol, vector: Array | None = None) -> None:
"""
Add a symbol to the codebook.
Args:
symbol: Symbol to add
vector: Hypervector to associate (generated if None)
Example:
>>> # Pre-define a vector for a special symbol
>>> special_vec = model.random(seed=42)
>>> encoder.add_symbol('<START>', special_vec)
"""
if vector is None:
vector = self._generate_symbol_vector(symbol)
self.codebook[symbol] = vector
def get_codebook_size(self) -> int:
"""
Get number of symbols in codebook.
Returns:
Number of symbols stored
"""
return len(self.codebook)
@property
def is_reversible(self) -> bool:
"""
PositionBindingEncoder supports approximate decoding.
Returns:
True (approximate decoding available)
"""
return True
@property
def compatible_models(self) -> list[str]:
"""
Works with all VSA models that support permutation.
Returns:
List of all model names
"""
return ["MAP", "FHRR", "HRR", "BSC", "GHRR", "VTB", "BSDC"]
def __repr__(self) -> str:
"""String representation."""
return (
f"PositionBindingEncoder("
f"model={self.model.model_name}, "
f"codebook_size={len(self.codebook)}, "
f"max_length={self.max_length}, "
f"auto_generate={self.auto_generate})"
)
compatible_models
property
Works with all VSA models that support permutation.
Returns: List of all model names
is_reversible
property
PositionBindingEncoder supports approximate decoding.
Returns: True (approximate decoding available)
__init__(model, codebook=None, max_length=None, auto_generate=True, seed=None)
Initialize position binding encoder.
Args: model: VSA model instance codebook: Pre-defined symbol → hypervector mapping (optional) max_length: Maximum sequence length (None for unlimited) auto_generate: Auto-generate vectors for unknown symbols (default: True) seed: Random seed for generating symbol vectors
Raises: ValueError: If model is not compatible
Source code in holovec/encoders/sequence.py
def __init__(
self,
model: VSAModel,
codebook: dict[Symbol, Array] | None = None,
max_length: int | None = None,
auto_generate: bool = True,
seed: int | None = None
):
"""
Initialize position binding encoder.
Args:
model: VSA model instance
codebook: Pre-defined symbol → hypervector mapping (optional)
max_length: Maximum sequence length (None for unlimited)
auto_generate: Auto-generate vectors for unknown symbols (default: True)
seed: Random seed for generating symbol vectors
Raises:
ValueError: If model is not compatible
"""
super().__init__(model, max_length)
self.codebook: dict[Symbol, Array] = dict(codebook) if codebook is not None else {}
self.auto_generate = auto_generate
self.seed = seed
__repr__()
String representation.
Source code in holovec/encoders/sequence.py
def __repr__(self) -> str:
"""String representation."""
return (
f"PositionBindingEncoder("
f"model={self.model.model_name}, "
f"codebook_size={len(self.codebook)}, "
f"max_length={self.max_length}, "
f"auto_generate={self.auto_generate})"
)
add_symbol(symbol, vector=None)
Add a symbol to the codebook.
Args: symbol: Symbol to add vector: Hypervector to associate (generated if None)
Example:
>>> # Pre-define a vector for a special symbol
>>> special_vec = model.random(seed=42)
>>> encoder.add_symbol('
Source code in holovec/encoders/sequence.py
def add_symbol(self, symbol: Symbol, vector: Array | None = None) -> None:
"""
Add a symbol to the codebook.
Args:
symbol: Symbol to add
vector: Hypervector to associate (generated if None)
Example:
>>> # Pre-define a vector for a special symbol
>>> special_vec = model.random(seed=42)
>>> encoder.add_symbol('<START>', special_vec)
"""
if vector is None:
vector = self._generate_symbol_vector(symbol)
self.codebook[symbol] = vector
decode(hypervector, max_positions=10, threshold=0.3)
Decode sequence hypervector to recover symbols.
Uses cleanup memory approach: for each position, unpermute and find most similar symbol in codebook.
Args: hypervector: Sequence hypervector to decode max_positions: Maximum positions to try decoding (default: 10) threshold: Minimum similarity threshold for valid symbols (default: 0.3)
Returns: List of decoded symbols (may be shorter than original)
Raises: RuntimeError: If codebook is empty
Note: Decoding is approximate and works best for sequences shorter than max_positions with high SNR.
Example: >>> encoded = encoder.encode(['a', 'b', 'c']) >>> decoded = encoder.decode(encoded, max_positions=5) >>> decoded # ['a', 'b', 'c'] (approximate)
Source code in holovec/encoders/sequence.py
def decode(
self,
hypervector: Array,
max_positions: int = 10,
threshold: float = 0.3
) -> list[Symbol]:
"""
Decode sequence hypervector to recover symbols.
Uses cleanup memory approach: for each position, unpermute and
find most similar symbol in codebook.
Args:
hypervector: Sequence hypervector to decode
max_positions: Maximum positions to try decoding (default: 10)
threshold: Minimum similarity threshold for valid symbols (default: 0.3)
Returns:
List of decoded symbols (may be shorter than original)
Raises:
RuntimeError: If codebook is empty
Note:
Decoding is approximate and works best for sequences shorter
than max_positions with high SNR.
Example:
>>> encoded = encoder.encode(['a', 'b', 'c'])
>>> decoded = encoder.decode(encoded, max_positions=5)
>>> decoded # ['a', 'b', 'c'] (approximate)
"""
if not self.codebook:
raise RuntimeError("Cannot decode: codebook is empty")
# Convert codebook to symbol → vector for faster lookup
symbols = list(self.codebook.keys())
vectors = [self.codebook[s] for s in symbols]
decoded: list[Symbol] = []
for pos in range(max_positions):
# Unpermute by position to recover symbol at this position
unpermuted = self.model.unpermute(hypervector, k=pos)
# Find most similar symbol in codebook
best_similarity = -float('inf')
best_symbol = None
for symbol, symbol_vec in zip(symbols, vectors, strict=True):
sim = float(self.model.similarity(unpermuted, symbol_vec))
if sim > best_similarity:
best_similarity = sim
best_symbol = symbol
# Only include if above threshold
if best_similarity >= threshold:
assert best_symbol is not None
decoded.append(best_symbol)
else:
# No strong match - likely end of sequence
break
return decoded
encode(sequence)
Encode sequence using position binding.
Each element is bound with a position-specific permutation and all bound pairs are bundled:
result = Σᵢ bind(element_i, permute(position_vector, i))
Args: sequence: List of symbols (strings or integers) to encode
Returns: Hypervector representing the sequence
Raises: ValueError: If sequence is empty ValueError: If sequence exceeds max_length ValueError: If symbol not in codebook and auto_generate=False
Example: >>> encoder.encode(['cat', 'sat', 'on', 'mat'])
Source code in holovec/encoders/sequence.py
def encode(self, sequence: list[str | int]) -> Array:
"""
Encode sequence using position binding.
Each element is bound with a position-specific permutation and
all bound pairs are bundled:
result = Σᵢ bind(element_i, permute(position_vector, i))
Args:
sequence: List of symbols (strings or integers) to encode
Returns:
Hypervector representing the sequence
Raises:
ValueError: If sequence is empty
ValueError: If sequence exceeds max_length
ValueError: If symbol not in codebook and auto_generate=False
Example:
>>> encoder.encode(['cat', 'sat', 'on', 'mat'])
"""
if not sequence:
raise ValueError("Cannot encode empty sequence")
if self.max_length is not None and len(sequence) > self.max_length:
raise ValueError(
f"Sequence length {len(sequence)} exceeds max_length {self.max_length}"
)
# Get or generate hypervectors for each symbol
symbol_vectors = []
for symbol in sequence:
if symbol not in self.codebook:
if not self.auto_generate:
raise ValueError(
f"Symbol '{symbol}' not in codebook and auto_generate=False"
)
# Generate new vector for this symbol
self.codebook[symbol] = self._generate_symbol_vector(symbol)
symbol_vectors.append(self.codebook[symbol])
# Bind each symbol with its position and bundle
position_bound = []
for i, symbol_vec in enumerate(symbol_vectors):
# Position encoding: permute by position index
# permute(vec, i) applies permutation i times
position_vec = self.model.permute(symbol_vec, k=i)
position_bound.append(position_vec)
# Bundle all position-bound vectors
sequence_hv = self.model.bundle(position_bound)
return sequence_hv
get_codebook_size()
Get number of symbols in codebook.
Returns: Number of symbols stored
Source code in holovec/encoders/sequence.py
def get_codebook_size(self) -> int:
"""
Get number of symbols in codebook.
Returns:
Number of symbols stored
"""
return len(self.codebook)
holovec.encoders.sequence.NGramEncoder
Bases: SequenceEncoder
N-gram encoder for capturing local sequence patterns using sliding windows.
Based on Plate (2003), Rachkovskij (1996), and Kleyko et al. (2023) Section 3.3.4.
Encodes sequences by extracting n-grams (sliding windows of n consecutive symbols) and encoding each n-gram compositionally:
For sequence [A, B, C, D] with n=2, stride=1:
- Extract n-grams: [A,B], [B,C], [C,D]
- Encode each n-gram using position binding
- Combine via bundling or chaining
Two encoding modes:
- Bundling mode (bag-of-ngrams): encode(seq) = bundle([encode_ngram([A,B]), encode_ngram([B,C]), ...])
- Order-invariant across n-grams (but preserves within n-gram)
- Good for classification (e.g., text categorization)
-
Similar to bag-of-words but with local context
-
Chaining mode (ordered n-grams): encode(seq) = Σᵢ bind(encode_ngram(ngramᵢ), ρⁱ)
- Order-sensitive across n-grams
- Good for sequence matching
- Enables partial decoding
Attributes: n: Size of n-grams (1=unigrams, 2=bigrams, 3=trigrams, etc.) stride: Step size between n-grams (1=overlapping, n=non-overlapping) mode: 'bundling' or 'chaining' ngram_encoder: Internal PositionBindingEncoder for individual n-grams
Example: >>> model = VSA.create('MAP', dim=10000) >>> encoder = NGramEncoder(model, n=2, stride=1, mode='bundling') >>> >>> # Encode text as bigrams >>> seq = ['the', 'cat', 'sat', 'on', 'mat'] >>> hv = encoder.encode(seq) # Bigrams: [the,cat], [cat,sat], [sat,on], [on,mat] >>> >>> # Similar text has high similarity >>> seq2 = ['the', 'cat', 'sat', 'on', 'hat'] >>> hv2 = encoder.encode(seq2) # Shares 3/4 bigrams >>> model.similarity(hv, hv2) # High similarity
Source code in holovec/encoders/sequence.py
class NGramEncoder(SequenceEncoder):
"""
N-gram encoder for capturing local sequence patterns using sliding windows.
Based on Plate (2003), Rachkovskij (1996), and Kleyko et al. (2023) Section 3.3.4.
Encodes sequences by extracting n-grams (sliding windows of n consecutive symbols)
and encoding each n-gram compositionally:
For sequence [A, B, C, D] with n=2, stride=1:
- Extract n-grams: [A,B], [B,C], [C,D]
- Encode each n-gram using position binding
- Combine via bundling or chaining
Two encoding modes:
1. **Bundling mode** (bag-of-ngrams):
encode(seq) = bundle([encode_ngram([A,B]), encode_ngram([B,C]), ...])
- Order-invariant across n-grams (but preserves within n-gram)
- Good for classification (e.g., text categorization)
- Similar to bag-of-words but with local context
2. **Chaining mode** (ordered n-grams):
encode(seq) = Σᵢ bind(encode_ngram(ngramᵢ), ρⁱ)
- Order-sensitive across n-grams
- Good for sequence matching
- Enables partial decoding
Attributes:
n: Size of n-grams (1=unigrams, 2=bigrams, 3=trigrams, etc.)
stride: Step size between n-grams (1=overlapping, n=non-overlapping)
mode: 'bundling' or 'chaining'
ngram_encoder: Internal PositionBindingEncoder for individual n-grams
Example:
>>> model = VSA.create('MAP', dim=10000)
>>> encoder = NGramEncoder(model, n=2, stride=1, mode='bundling')
>>>
>>> # Encode text as bigrams
>>> seq = ['the', 'cat', 'sat', 'on', 'mat']
>>> hv = encoder.encode(seq) # Bigrams: [the,cat], [cat,sat], [sat,on], [on,mat]
>>>
>>> # Similar text has high similarity
>>> seq2 = ['the', 'cat', 'sat', 'on', 'hat']
>>> hv2 = encoder.encode(seq2) # Shares 3/4 bigrams
>>> model.similarity(hv, hv2) # High similarity
"""
def __init__(
self,
model: VSAModel,
n: int = 2,
stride: int = 1,
mode: str = 'bundling',
codebook: dict[Symbol, Array] | None = None,
auto_generate: bool = True,
seed: int | None = None
):
"""
Initialize n-gram encoder.
Args:
model: VSA model instance
n: Size of n-grams (must be >= 1)
stride: Step between n-grams (must be >= 1)
mode: 'bundling' for bag-of-ngrams or 'chaining' for ordered n-grams
codebook: Optional pre-defined symbol → hypervector mapping
auto_generate: Auto-generate vectors for unknown symbols
seed: Random seed for symbol vector generation
Raises:
ValueError: If n < 1, stride < 1, or mode is invalid
"""
super().__init__(model, max_length=None)
if n < 1:
raise ValueError(f"n must be >= 1, got {n}")
if stride < 1:
raise ValueError(f"stride must be >= 1, got {stride}")
if mode not in ['bundling', 'chaining']:
raise ValueError(f"mode must be 'bundling' or 'chaining', got '{mode}'")
self.n = n
self.stride = stride
self.mode = mode
# Internal encoder for individual n-grams
# Each n-gram is encoded as a position-bound sequence
self.ngram_encoder = PositionBindingEncoder(
model=model,
codebook=codebook,
max_length=n, # Each n-gram has length n
auto_generate=auto_generate,
seed=seed
)
def encode(self, sequence: list[str | int]) -> Array:
"""
Encode sequence using n-gram representation.
Extracts all n-grams using sliding window with specified stride,
encodes each n-gram, then combines via bundling or chaining.
Args:
sequence: List of symbols to encode
Returns:
Hypervector representing the sequence as n-grams
Raises:
ValueError: If sequence is too short (length < n)
Example:
>>> # Bigrams with stride=1 (overlapping)
>>> encoder = NGramEncoder(model, n=2, stride=1)
>>> encoder.encode(['A', 'B', 'C']) # N-grams: AB, BC
>>>
>>> # Trigrams with stride=2 (partial overlap)
>>> encoder = NGramEncoder(model, n=3, stride=2)
>>> encoder.encode(['A', 'B', 'C', 'D', 'E']) # N-grams: ABC, CDE
"""
if len(sequence) < self.n:
raise ValueError(
f"Sequence length {len(sequence)} is less than n={self.n}"
)
# Extract all n-grams using sliding window
ngrams = []
for i in range(0, len(sequence) - self.n + 1, self.stride):
ngram = sequence[i:i + self.n]
ngrams.append(ngram)
if not ngrams:
raise ValueError("No n-grams extracted from sequence")
# Encode each n-gram using position binding
ngram_hvs = []
for ngram in ngrams:
ngram_hv = self.ngram_encoder.encode(ngram)
ngram_hvs.append(ngram_hv)
# Combine n-gram hypervectors based on mode
if self.mode == 'bundling':
# Bag-of-ngrams: simple bundle (order-invariant)
sequence_hv = self.model.bundle(ngram_hvs)
else: # mode == 'chaining'
# Ordered n-grams: bind each with position
position_bound = []
for i, ngram_hv in enumerate(ngram_hvs):
# Position encoding: permute by n-gram index
position_hv = self.model.permute(ngram_hv, k=i)
position_bound.append(position_hv)
sequence_hv = self.model.bundle(position_bound)
return sequence_hv
def decode(
self,
hypervector: Array,
max_ngrams: int = 10,
threshold: float = 0.3
) -> list[list[str | int]]:
"""
Decode n-gram hypervector to recover n-grams.
Only supported for 'chaining' mode. For 'bundling' mode,
n-grams are order-invariant and cannot be sequentially decoded.
Args:
hypervector: Encoded sequence hypervector
max_ngrams: Maximum number of n-grams to decode
threshold: Minimum similarity threshold for valid n-grams
Returns:
List of decoded n-grams, each as a list of symbols
Raises:
NotImplementedError: If mode is 'bundling' (not decodable)
RuntimeError: If codebook is empty
Example:
>>> encoder = NGramEncoder(model, n=2, mode='chaining')
>>> hv = encoder.encode(['A', 'B', 'C'])
>>> decoder.decode(hv, max_ngrams=3) # [['A', 'B'], ['B', 'C']]
"""
if self.mode != 'chaining':
raise NotImplementedError(
f"Decoding only supported for 'chaining' mode, not '{self.mode}'"
)
if not self.ngram_encoder.codebook:
raise RuntimeError("Cannot decode: codebook is empty")
# For chaining mode, unpermute each position and decode the n-gram
decoded_ngrams = []
for pos in range(max_ngrams):
# Unpermute by position to recover n-gram at this index
unpermuted = self.model.unpermute(hypervector, k=pos)
# Decode the n-gram using ngram_encoder
try:
ngram_symbols = self.ngram_encoder.decode(
unpermuted,
max_positions=self.n,
threshold=threshold
)
# Only include if we got a full n-gram
if len(ngram_symbols) >= self.n:
decoded_ngrams.append(ngram_symbols[:self.n])
else:
# Incomplete n-gram - likely end of sequence
break
except Exception:
# Decoding failed - likely end of sequence
break
return decoded_ngrams
def get_codebook(self) -> dict[Symbol, Array]:
"""
Get the internal symbol codebook.
Returns:
Dictionary mapping symbols to hypervectors
"""
return self.ngram_encoder.codebook
def get_codebook_size(self) -> int:
"""
Get number of unique symbols in codebook.
Returns:
Number of symbols
"""
return self.ngram_encoder.get_codebook_size()
@property
def is_reversible(self) -> bool:
"""
NGramEncoder supports decoding only in 'chaining' mode.
Returns:
True if mode is 'chaining', False if 'bundling'
"""
return self.mode == 'chaining'
@property
def compatible_models(self) -> list[str]:
"""
Works with all VSA models.
Returns:
List of all model names
"""
return ["MAP", "FHRR", "HRR", "BSC", "GHRR", "VTB", "BSDC"]
def __repr__(self) -> str:
"""String representation."""
return (
f"NGramEncoder("
f"model={self.model.model_name}, "
f"n={self.n}, "
f"stride={self.stride}, "
f"mode='{self.mode}', "
f"codebook_size={self.get_codebook_size()})"
)
compatible_models
property
Works with all VSA models.
Returns: List of all model names
is_reversible
property
NGramEncoder supports decoding only in 'chaining' mode.
Returns: True if mode is 'chaining', False if 'bundling'
__init__(model, n=2, stride=1, mode='bundling', codebook=None, auto_generate=True, seed=None)
Initialize n-gram encoder.
Args: model: VSA model instance n: Size of n-grams (must be >= 1) stride: Step between n-grams (must be >= 1) mode: 'bundling' for bag-of-ngrams or 'chaining' for ordered n-grams codebook: Optional pre-defined symbol → hypervector mapping auto_generate: Auto-generate vectors for unknown symbols seed: Random seed for symbol vector generation
Raises: ValueError: If n < 1, stride < 1, or mode is invalid
Source code in holovec/encoders/sequence.py
def __init__(
self,
model: VSAModel,
n: int = 2,
stride: int = 1,
mode: str = 'bundling',
codebook: dict[Symbol, Array] | None = None,
auto_generate: bool = True,
seed: int | None = None
):
"""
Initialize n-gram encoder.
Args:
model: VSA model instance
n: Size of n-grams (must be >= 1)
stride: Step between n-grams (must be >= 1)
mode: 'bundling' for bag-of-ngrams or 'chaining' for ordered n-grams
codebook: Optional pre-defined symbol → hypervector mapping
auto_generate: Auto-generate vectors for unknown symbols
seed: Random seed for symbol vector generation
Raises:
ValueError: If n < 1, stride < 1, or mode is invalid
"""
super().__init__(model, max_length=None)
if n < 1:
raise ValueError(f"n must be >= 1, got {n}")
if stride < 1:
raise ValueError(f"stride must be >= 1, got {stride}")
if mode not in ['bundling', 'chaining']:
raise ValueError(f"mode must be 'bundling' or 'chaining', got '{mode}'")
self.n = n
self.stride = stride
self.mode = mode
# Internal encoder for individual n-grams
# Each n-gram is encoded as a position-bound sequence
self.ngram_encoder = PositionBindingEncoder(
model=model,
codebook=codebook,
max_length=n, # Each n-gram has length n
auto_generate=auto_generate,
seed=seed
)
__repr__()
String representation.
Source code in holovec/encoders/sequence.py
def __repr__(self) -> str:
"""String representation."""
return (
f"NGramEncoder("
f"model={self.model.model_name}, "
f"n={self.n}, "
f"stride={self.stride}, "
f"mode='{self.mode}', "
f"codebook_size={self.get_codebook_size()})"
)
decode(hypervector, max_ngrams=10, threshold=0.3)
Decode n-gram hypervector to recover n-grams.
Only supported for 'chaining' mode. For 'bundling' mode, n-grams are order-invariant and cannot be sequentially decoded.
Args: hypervector: Encoded sequence hypervector max_ngrams: Maximum number of n-grams to decode threshold: Minimum similarity threshold for valid n-grams
Returns: List of decoded n-grams, each as a list of symbols
Raises: NotImplementedError: If mode is 'bundling' (not decodable) RuntimeError: If codebook is empty
Example: >>> encoder = NGramEncoder(model, n=2, mode='chaining') >>> hv = encoder.encode(['A', 'B', 'C']) >>> decoder.decode(hv, max_ngrams=3) # [['A', 'B'], ['B', 'C']]
Source code in holovec/encoders/sequence.py
def decode(
self,
hypervector: Array,
max_ngrams: int = 10,
threshold: float = 0.3
) -> list[list[str | int]]:
"""
Decode n-gram hypervector to recover n-grams.
Only supported for 'chaining' mode. For 'bundling' mode,
n-grams are order-invariant and cannot be sequentially decoded.
Args:
hypervector: Encoded sequence hypervector
max_ngrams: Maximum number of n-grams to decode
threshold: Minimum similarity threshold for valid n-grams
Returns:
List of decoded n-grams, each as a list of symbols
Raises:
NotImplementedError: If mode is 'bundling' (not decodable)
RuntimeError: If codebook is empty
Example:
>>> encoder = NGramEncoder(model, n=2, mode='chaining')
>>> hv = encoder.encode(['A', 'B', 'C'])
>>> decoder.decode(hv, max_ngrams=3) # [['A', 'B'], ['B', 'C']]
"""
if self.mode != 'chaining':
raise NotImplementedError(
f"Decoding only supported for 'chaining' mode, not '{self.mode}'"
)
if not self.ngram_encoder.codebook:
raise RuntimeError("Cannot decode: codebook is empty")
# For chaining mode, unpermute each position and decode the n-gram
decoded_ngrams = []
for pos in range(max_ngrams):
# Unpermute by position to recover n-gram at this index
unpermuted = self.model.unpermute(hypervector, k=pos)
# Decode the n-gram using ngram_encoder
try:
ngram_symbols = self.ngram_encoder.decode(
unpermuted,
max_positions=self.n,
threshold=threshold
)
# Only include if we got a full n-gram
if len(ngram_symbols) >= self.n:
decoded_ngrams.append(ngram_symbols[:self.n])
else:
# Incomplete n-gram - likely end of sequence
break
except Exception:
# Decoding failed - likely end of sequence
break
return decoded_ngrams
encode(sequence)
Encode sequence using n-gram representation.
Extracts all n-grams using sliding window with specified stride, encodes each n-gram, then combines via bundling or chaining.
Args: sequence: List of symbols to encode
Returns: Hypervector representing the sequence as n-grams
Raises: ValueError: If sequence is too short (length < n)
Example: >>> # Bigrams with stride=1 (overlapping) >>> encoder = NGramEncoder(model, n=2, stride=1) >>> encoder.encode(['A', 'B', 'C']) # N-grams: AB, BC >>> >>> # Trigrams with stride=2 (partial overlap) >>> encoder = NGramEncoder(model, n=3, stride=2) >>> encoder.encode(['A', 'B', 'C', 'D', 'E']) # N-grams: ABC, CDE
Source code in holovec/encoders/sequence.py
def encode(self, sequence: list[str | int]) -> Array:
"""
Encode sequence using n-gram representation.
Extracts all n-grams using sliding window with specified stride,
encodes each n-gram, then combines via bundling or chaining.
Args:
sequence: List of symbols to encode
Returns:
Hypervector representing the sequence as n-grams
Raises:
ValueError: If sequence is too short (length < n)
Example:
>>> # Bigrams with stride=1 (overlapping)
>>> encoder = NGramEncoder(model, n=2, stride=1)
>>> encoder.encode(['A', 'B', 'C']) # N-grams: AB, BC
>>>
>>> # Trigrams with stride=2 (partial overlap)
>>> encoder = NGramEncoder(model, n=3, stride=2)
>>> encoder.encode(['A', 'B', 'C', 'D', 'E']) # N-grams: ABC, CDE
"""
if len(sequence) < self.n:
raise ValueError(
f"Sequence length {len(sequence)} is less than n={self.n}"
)
# Extract all n-grams using sliding window
ngrams = []
for i in range(0, len(sequence) - self.n + 1, self.stride):
ngram = sequence[i:i + self.n]
ngrams.append(ngram)
if not ngrams:
raise ValueError("No n-grams extracted from sequence")
# Encode each n-gram using position binding
ngram_hvs = []
for ngram in ngrams:
ngram_hv = self.ngram_encoder.encode(ngram)
ngram_hvs.append(ngram_hv)
# Combine n-gram hypervectors based on mode
if self.mode == 'bundling':
# Bag-of-ngrams: simple bundle (order-invariant)
sequence_hv = self.model.bundle(ngram_hvs)
else: # mode == 'chaining'
# Ordered n-grams: bind each with position
position_bound = []
for i, ngram_hv in enumerate(ngram_hvs):
# Position encoding: permute by n-gram index
position_hv = self.model.permute(ngram_hv, k=i)
position_bound.append(position_hv)
sequence_hv = self.model.bundle(position_bound)
return sequence_hv
get_codebook()
Get the internal symbol codebook.
Returns: Dictionary mapping symbols to hypervectors
Source code in holovec/encoders/sequence.py
def get_codebook(self) -> dict[Symbol, Array]:
"""
Get the internal symbol codebook.
Returns:
Dictionary mapping symbols to hypervectors
"""
return self.ngram_encoder.codebook
get_codebook_size()
Get number of unique symbols in codebook.
Returns: Number of symbols
Source code in holovec/encoders/sequence.py
def get_codebook_size(self) -> int:
"""
Get number of unique symbols in codebook.
Returns:
Number of symbols
"""
return self.ngram_encoder.get_codebook_size()
holovec.encoders.sequence.TrajectoryEncoder
Bases: SequenceEncoder
Trajectory encoder for continuous sequences (time series, paths, motion).
Based on Frady et al. (2021) "Computing on Functions" and position binding from Plate (2003), encoding trajectories by binding temporal information with spatial positions.
A trajectory is a sequence of positions over time: - 1D: time series [v₁, v₂, v₃, ...] - 2D: path [(x₁,y₁), (x₂,y₂), ...] - 3D: motion [(x₁,y₁,z₁), (x₂,y₂,z₂), ...]
Encoding strategy: For each time step tᵢ with position pᵢ: 1. Encode time: time_hv = scalar_encode(tᵢ) 2. Encode position coords: coord_hvs = [scalar_encode(c) for c in pᵢ] 3. Bind coords to dimensions: pos_hv = Σⱼ bind(Dⱼ, coord_hv_j) 4. Bind time with position: point_hv = bind(time_hv, pos_hv) 5. Permute by index: indexed_hv = permute(point_hv, i)
trajectory_hv = Σᵢ indexed_hv
This creates an encoding that: - Preserves temporal ordering (via permutation) - Captures smooth trajectories (via continuous scalar encoding) - Enables partial matching and interpolation - Supports multi-dimensional paths
Attributes: scalar_encoder: Encoder for continuous values (FPE or Thermometer) n_dimensions: Dimensionality of trajectory (1D, 2D, or 3D) time_range: (min_time, max_time) for temporal normalization dim_vectors: Hypervectors for spatial dimensions (x, y, z)
Example: >>> from holovec import VSA >>> from holovec.encoders import FractionalPowerEncoder, TrajectoryEncoder >>> >>> model = VSA.create('FHRR', dim=10000) >>> scalar_enc = FractionalPowerEncoder(model, min_val=0, max_val=100) >>> encoder = TrajectoryEncoder(model, scalar_encoder=scalar_enc, n_dimensions=2) >>> >>> # Encode a 2D path >>> path = [(10, 20), (15, 25), (20, 30), (25, 35)] >>> hv = encoder.encode(path) >>> >>> # Similar paths have high similarity >>> path2 = [(10, 20), (15, 25), (20, 30), (25, 40)] # Slightly different >>> hv2 = encoder.encode(path2) >>> model.similarity(hv, hv2) # High similarity
Source code in holovec/encoders/sequence.py
class TrajectoryEncoder(SequenceEncoder):
"""
Trajectory encoder for continuous sequences (time series, paths, motion).
Based on Frady et al. (2021) "Computing on Functions" and position binding
from Plate (2003), encoding trajectories by binding temporal information
with spatial positions.
A trajectory is a sequence of positions over time:
- 1D: time series [v₁, v₂, v₃, ...]
- 2D: path [(x₁,y₁), (x₂,y₂), ...]
- 3D: motion [(x₁,y₁,z₁), (x₂,y₂,z₂), ...]
Encoding strategy:
For each time step tᵢ with position pᵢ:
1. Encode time: time_hv = scalar_encode(tᵢ)
2. Encode position coords: coord_hvs = [scalar_encode(c) for c in pᵢ]
3. Bind coords to dimensions: pos_hv = Σⱼ bind(Dⱼ, coord_hv_j)
4. Bind time with position: point_hv = bind(time_hv, pos_hv)
5. Permute by index: indexed_hv = permute(point_hv, i)
trajectory_hv = Σᵢ indexed_hv
This creates an encoding that:
- Preserves temporal ordering (via permutation)
- Captures smooth trajectories (via continuous scalar encoding)
- Enables partial matching and interpolation
- Supports multi-dimensional paths
Attributes:
scalar_encoder: Encoder for continuous values (FPE or Thermometer)
n_dimensions: Dimensionality of trajectory (1D, 2D, or 3D)
time_range: (min_time, max_time) for temporal normalization
dim_vectors: Hypervectors for spatial dimensions (x, y, z)
Example:
>>> from holovec import VSA
>>> from holovec.encoders import FractionalPowerEncoder, TrajectoryEncoder
>>>
>>> model = VSA.create('FHRR', dim=10000)
>>> scalar_enc = FractionalPowerEncoder(model, min_val=0, max_val=100)
>>> encoder = TrajectoryEncoder(model, scalar_encoder=scalar_enc, n_dimensions=2)
>>>
>>> # Encode a 2D path
>>> path = [(10, 20), (15, 25), (20, 30), (25, 35)]
>>> hv = encoder.encode(path)
>>>
>>> # Similar paths have high similarity
>>> path2 = [(10, 20), (15, 25), (20, 30), (25, 40)] # Slightly different
>>> hv2 = encoder.encode(path2)
>>> model.similarity(hv, hv2) # High similarity
"""
def __init__(
self,
model: VSAModel,
scalar_encoder: ScalarEncoder,
n_dimensions: int = 1,
time_range: tuple[float, float] | None = None,
seed: int | None = None
):
"""
Initialize trajectory encoder.
Args:
model: VSA model instance
scalar_encoder: Encoder for continuous values (FPE or Thermometer recommended)
n_dimensions: Trajectory dimensionality (1, 2, or 3)
time_range: (min, max) time values for normalization (optional)
seed: Random seed for dimension vector generation
Raises:
ValueError: If n_dimensions not in {1, 2, 3}
TypeError: If scalar_encoder is not reversible
"""
super().__init__(model, max_length=None)
if n_dimensions not in {1, 2, 3}:
raise ValueError(
f"n_dimensions must be 1, 2, or 3, got {n_dimensions}"
)
if not isinstance(scalar_encoder, ScalarEncoder):
raise TypeError(
f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}"
)
# Check model compatibility
if model != scalar_encoder.model:
raise ValueError(
"scalar_encoder must use the same VSA model as TrajectoryEncoder"
)
self.scalar_encoder = scalar_encoder
self.n_dimensions = n_dimensions
self.time_range = time_range
self.seed = seed
# Generate dimension hypervectors (for x, y, z coordinates)
self.dim_vectors: list[Array] = []
for i in range(n_dimensions):
dim_seed = (seed + i) if seed is not None else (1000 + i)
self.dim_vectors.append(model.random(seed=dim_seed))
def encode(self, trajectory: list[float | tuple[float, ...]]) -> Array:
"""
Encode a trajectory as a hypervector.
Each point in the trajectory is encoded with temporal information,
then all points are combined with position-based permutation.
Args:
trajectory: List of points
- 1D: List[float] e.g., [1.0, 2.5, 3.7, ...]
- 2D: List[Tuple[float, float]] e.g., [(1,2), (3,4), ...]
- 3D: List[Tuple[float, float, float]] e.g., [(1,2,3), ...]
Returns:
Hypervector representing the trajectory
Raises:
ValueError: If trajectory is empty or points have wrong dimensionality
Example:
>>> # 1D time series
>>> encoder_1d = TrajectoryEncoder(model, scalar_enc, n_dimensions=1)
>>> hv = encoder_1d.encode([1.0, 2.5, 3.7, 5.2])
>>>
>>> # 2D path
>>> encoder_2d = TrajectoryEncoder(model, scalar_enc, n_dimensions=2)
>>> hv = encoder_2d.encode([(0,0), (1,1), (2,2)])
"""
if len(trajectory) == 0:
raise ValueError("Cannot encode empty trajectory")
# Encode each point with temporal binding
point_hvs = []
for i, point in enumerate(trajectory):
coords: tuple[float, ...]
# Normalize point to tuple format
if self.n_dimensions == 1:
# 1D: scalar → (scalar,)
if isinstance(point, int | float):
coords = (float(point),)
else:
coords = (float(point[0]),)
else:
# 2D/3D: accept tuple, list, or array-like
if isinstance(point, int | float):
raise ValueError(
f"Expected iterable for {self.n_dimensions}D point, got {type(point)}"
)
try:
# Convert to tuple (works for tuple, list, numpy array, etc.)
coords = tuple(float(c) for c in point)
except (TypeError, ValueError) as exc:
raise ValueError(
f"Expected iterable for {self.n_dimensions}D point, got {type(point)}"
) from exc
# Validate dimensionality
if len(coords) != self.n_dimensions:
raise ValueError(
f"Expected {self.n_dimensions}D point, got {len(coords)}D: {coords}"
)
# Encode time (index as time if no time_range specified)
if self.time_range is not None:
# Normalize time to range
t = i / len(trajectory) # [0, 1]
t_scaled = self.time_range[0] + t * (self.time_range[1] - self.time_range[0])
time_hv = self.scalar_encoder.encode(t_scaled)
else:
# Use index directly
time_hv = self.scalar_encoder.encode(float(i))
# Encode position (bind each coordinate with its dimension)
coord_hvs = []
for j, coord_val in enumerate(coords):
coord_hv = self.scalar_encoder.encode(coord_val)
dim_hv = self.dim_vectors[j]
bound_coord = self.model.bind(dim_hv, coord_hv)
coord_hvs.append(bound_coord)
# Bundle coordinates to create position hypervector
pos_hv = self.model.bundle(coord_hvs)
# Bind time with position
point_hv = self.model.bind(time_hv, pos_hv)
# Apply position-specific permutation (for ordering)
indexed_hv = self.model.permute(point_hv, k=i)
point_hvs.append(indexed_hv)
# Bundle all points
trajectory_hv = self.model.bundle(point_hvs)
return trajectory_hv
def decode(self, hypervector: Array, max_points: int = 10) -> list[tuple[float, ...]]:
"""
Decode trajectory hypervector to recover approximate points.
Note: Trajectory decoding is not yet implemented. It requires:
1. Unpermuting each position
2. Unbinding time from position
3. Unbinding each coordinate from dimension vectors
4. Decoding scalar values
5. Interpolation for smooth trajectories
Args:
hypervector: Encoded trajectory hypervector
max_points: Maximum points to decode
Returns:
List of decoded points (not implemented yet)
Raises
------
NotImplementedError
Trajectory decoding requires solving nested binding inverse problem.
Notes
-----
Trajectory decoding is not implemented because it requires multi-level
unbinding with cascading error accumulation:
**Mathematical Challenge:**
The encoding process creates nested bindings:
trajectory_hv = bundle([
bind(time(t), bind(dimension(d), scalar(coord[t,d])))
for all t, d
])
To decode a single point at time t:
1. Unbind time: point_hv[t] = unbind(trajectory_hv, time(t))
2. For each dimension d:
a. Unbind dimension: coord_hv[d] = unbind(point_hv[t], dimension(d))
b. Decode scalar: coord[t,d] = scalar_decode(coord_hv[d])
**Why This Is Intractable:**
- **Two-level unbinding**: Time then dimension (or vice versa)
- **Error compounding**: Each unbind adds noise
- **No known time points**: Must search over possible time values
- **Interpolation complexity**: Smooth trajectory requires dense sampling
- **Computational cost**:
* For T time points, D dimensions
* Requires: T × D × (decode_iterations) evaluations
* Example: 100 points × 3D × 100 iterations = 30,000 evals
**Additional Challenges:**
1. **Order Ambiguity**: Don't know which time point comes first
2. **Density Unknown**: Don't know temporal sampling rate
3. **Dimension Count**: Must know dimensionality a priori
4. **Coordinate Ranges**: Scalar decoder needs value bounds
**Possible Approaches (Future Work):**
1. **Constrained Decoding**: If time points are known:
- Unbind each known time point
- Decode coordinates independently
- Complexity: O(T × D × decode_cost)
2. **Template Matching**: Pre-encode common trajectory patterns
- Create codebook of canonical trajectories
- Use cleanup to find nearest match
- Works for classification, not reconstruction
3. **Learned Decoder**: Train neural network trajectory_hv → points
- Requires large training dataset
- Can learn to handle noise and ambiguity
- See: Imani et al. (2019) for similar approach
4. **Iterative Resonator**: Use resonator cleanup at each level
- Unbind time with resonator cleanup
- Unbind dimension with resonator cleanup
- Requires codebooks for both time and coordinates
**Current Recommendation:**
Use TrajectoryEncoder for one-way encoding in applications like:
- Trajectory classification (gesture recognition, motion analysis)
- Trajectory similarity search (find similar paths)
- Trajectory clustering (group similar motions)
For reconstruction, consider storing original trajectories separately
and using hypervector encoding only for similarity queries.
References
----------
- Plate (2003): "Holographic Reduced Representations" - Section 4.3
on error accumulation in multi-level binding
- Räsänen & Saarinen (2016): "Sequence prediction with sparse
distributed hyperdimensional coding" - Analysis of temporal binding
"""
raise NotImplementedError(
"Trajectory decoding is not implemented due to nested binding complexity. "
"See docstring for detailed mathematical explanation. "
"For reconstruction tasks, store original trajectories and use "
"hypervector encoding for similarity-based retrieval only."
)
@property
def is_reversible(self) -> bool:
"""
TrajectoryEncoder does not yet support decoding.
Returns:
False (decoding not implemented)
Note:
Decoding requires multi-level unbinding and interpolation,
which will be implemented in a future version.
"""
return False
@property
def compatible_models(self) -> list[str]:
"""
Works with all VSA models.
Returns:
List of all model names
"""
return ["MAP", "FHRR", "HRR", "BSC", "GHRR", "VTB", "BSDC"]
@property
def input_type(self) -> str:
"""Input type description."""
dim_names = {1: "1D time series", 2: "2D path", 3: "3D trajectory"}
return dim_names[self.n_dimensions]
def __repr__(self) -> str:
"""String representation."""
return (
f"TrajectoryEncoder("
f"model={self.model.model_name}, "
f"scalar_encoder={type(self.scalar_encoder).__name__}, "
f"n_dimensions={self.n_dimensions}, "
f"time_range={self.time_range})"
)
compatible_models
property
Works with all VSA models.
Returns: List of all model names
input_type
property
Input type description.
is_reversible
property
TrajectoryEncoder does not yet support decoding.
Returns: False (decoding not implemented)
Note: Decoding requires multi-level unbinding and interpolation, which will be implemented in a future version.
__init__(model, scalar_encoder, n_dimensions=1, time_range=None, seed=None)
Initialize trajectory encoder.
Args: model: VSA model instance scalar_encoder: Encoder for continuous values (FPE or Thermometer recommended) n_dimensions: Trajectory dimensionality (1, 2, or 3) time_range: (min, max) time values for normalization (optional) seed: Random seed for dimension vector generation
Raises: ValueError: If n_dimensions not in {1, 2, 3} TypeError: If scalar_encoder is not reversible
Source code in holovec/encoders/sequence.py
def __init__(
self,
model: VSAModel,
scalar_encoder: ScalarEncoder,
n_dimensions: int = 1,
time_range: tuple[float, float] | None = None,
seed: int | None = None
):
"""
Initialize trajectory encoder.
Args:
model: VSA model instance
scalar_encoder: Encoder for continuous values (FPE or Thermometer recommended)
n_dimensions: Trajectory dimensionality (1, 2, or 3)
time_range: (min, max) time values for normalization (optional)
seed: Random seed for dimension vector generation
Raises:
ValueError: If n_dimensions not in {1, 2, 3}
TypeError: If scalar_encoder is not reversible
"""
super().__init__(model, max_length=None)
if n_dimensions not in {1, 2, 3}:
raise ValueError(
f"n_dimensions must be 1, 2, or 3, got {n_dimensions}"
)
if not isinstance(scalar_encoder, ScalarEncoder):
raise TypeError(
f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}"
)
# Check model compatibility
if model != scalar_encoder.model:
raise ValueError(
"scalar_encoder must use the same VSA model as TrajectoryEncoder"
)
self.scalar_encoder = scalar_encoder
self.n_dimensions = n_dimensions
self.time_range = time_range
self.seed = seed
# Generate dimension hypervectors (for x, y, z coordinates)
self.dim_vectors: list[Array] = []
for i in range(n_dimensions):
dim_seed = (seed + i) if seed is not None else (1000 + i)
self.dim_vectors.append(model.random(seed=dim_seed))
__repr__()
String representation.
Source code in holovec/encoders/sequence.py
def __repr__(self) -> str:
"""String representation."""
return (
f"TrajectoryEncoder("
f"model={self.model.model_name}, "
f"scalar_encoder={type(self.scalar_encoder).__name__}, "
f"n_dimensions={self.n_dimensions}, "
f"time_range={self.time_range})"
)
decode(hypervector, max_points=10)
Decode trajectory hypervector to recover approximate points.
Note: Trajectory decoding is not yet implemented. It requires: 1. Unpermuting each position 2. Unbinding time from position 3. Unbinding each coordinate from dimension vectors 4. Decoding scalar values 5. Interpolation for smooth trajectories
Args: hypervector: Encoded trajectory hypervector max_points: Maximum points to decode
Returns: List of decoded points (not implemented yet)
Raises:
| Type | Description |
|---|---|
NotImplementedError
|
Trajectory decoding requires solving nested binding inverse problem. |
Notes
Trajectory decoding is not implemented because it requires multi-level unbinding with cascading error accumulation:
Mathematical Challenge:
The encoding process creates nested bindings: trajectory_hv = bundle([ bind(time(t), bind(dimension(d), scalar(coord[t,d]))) for all t, d ])
To decode a single point at time t: 1. Unbind time: point_hv[t] = unbind(trajectory_hv, time(t)) 2. For each dimension d: a. Unbind dimension: coord_hv[d] = unbind(point_hv[t], dimension(d)) b. Decode scalar: coord[t,d] = scalar_decode(coord_hv[d])
Why This Is Intractable:
- Two-level unbinding: Time then dimension (or vice versa)
- Error compounding: Each unbind adds noise
- No known time points: Must search over possible time values
- Interpolation complexity: Smooth trajectory requires dense sampling
- Computational cost:
- For T time points, D dimensions
- Requires: T × D × (decode_iterations) evaluations
- Example: 100 points × 3D × 100 iterations = 30,000 evals
Additional Challenges:
- Order Ambiguity: Don't know which time point comes first
- Density Unknown: Don't know temporal sampling rate
- Dimension Count: Must know dimensionality a priori
- Coordinate Ranges: Scalar decoder needs value bounds
Possible Approaches (Future Work):
- Constrained Decoding: If time points are known:
- Unbind each known time point
- Decode coordinates independently
-
Complexity: O(T × D × decode_cost)
-
Template Matching: Pre-encode common trajectory patterns
- Create codebook of canonical trajectories
- Use cleanup to find nearest match
-
Works for classification, not reconstruction
-
Learned Decoder: Train neural network trajectory_hv → points
- Requires large training dataset
- Can learn to handle noise and ambiguity
-
See: Imani et al. (2019) for similar approach
-
Iterative Resonator: Use resonator cleanup at each level
- Unbind time with resonator cleanup
- Unbind dimension with resonator cleanup
- Requires codebooks for both time and coordinates
Current Recommendation:
Use TrajectoryEncoder for one-way encoding in applications like: - Trajectory classification (gesture recognition, motion analysis) - Trajectory similarity search (find similar paths) - Trajectory clustering (group similar motions)
For reconstruction, consider storing original trajectories separately and using hypervector encoding only for similarity queries.
References
- Plate (2003): "Holographic Reduced Representations" - Section 4.3 on error accumulation in multi-level binding
- Räsänen & Saarinen (2016): "Sequence prediction with sparse distributed hyperdimensional coding" - Analysis of temporal binding
Source code in holovec/encoders/sequence.py
def decode(self, hypervector: Array, max_points: int = 10) -> list[tuple[float, ...]]:
"""
Decode trajectory hypervector to recover approximate points.
Note: Trajectory decoding is not yet implemented. It requires:
1. Unpermuting each position
2. Unbinding time from position
3. Unbinding each coordinate from dimension vectors
4. Decoding scalar values
5. Interpolation for smooth trajectories
Args:
hypervector: Encoded trajectory hypervector
max_points: Maximum points to decode
Returns:
List of decoded points (not implemented yet)
Raises
------
NotImplementedError
Trajectory decoding requires solving nested binding inverse problem.
Notes
-----
Trajectory decoding is not implemented because it requires multi-level
unbinding with cascading error accumulation:
**Mathematical Challenge:**
The encoding process creates nested bindings:
trajectory_hv = bundle([
bind(time(t), bind(dimension(d), scalar(coord[t,d])))
for all t, d
])
To decode a single point at time t:
1. Unbind time: point_hv[t] = unbind(trajectory_hv, time(t))
2. For each dimension d:
a. Unbind dimension: coord_hv[d] = unbind(point_hv[t], dimension(d))
b. Decode scalar: coord[t,d] = scalar_decode(coord_hv[d])
**Why This Is Intractable:**
- **Two-level unbinding**: Time then dimension (or vice versa)
- **Error compounding**: Each unbind adds noise
- **No known time points**: Must search over possible time values
- **Interpolation complexity**: Smooth trajectory requires dense sampling
- **Computational cost**:
* For T time points, D dimensions
* Requires: T × D × (decode_iterations) evaluations
* Example: 100 points × 3D × 100 iterations = 30,000 evals
**Additional Challenges:**
1. **Order Ambiguity**: Don't know which time point comes first
2. **Density Unknown**: Don't know temporal sampling rate
3. **Dimension Count**: Must know dimensionality a priori
4. **Coordinate Ranges**: Scalar decoder needs value bounds
**Possible Approaches (Future Work):**
1. **Constrained Decoding**: If time points are known:
- Unbind each known time point
- Decode coordinates independently
- Complexity: O(T × D × decode_cost)
2. **Template Matching**: Pre-encode common trajectory patterns
- Create codebook of canonical trajectories
- Use cleanup to find nearest match
- Works for classification, not reconstruction
3. **Learned Decoder**: Train neural network trajectory_hv → points
- Requires large training dataset
- Can learn to handle noise and ambiguity
- See: Imani et al. (2019) for similar approach
4. **Iterative Resonator**: Use resonator cleanup at each level
- Unbind time with resonator cleanup
- Unbind dimension with resonator cleanup
- Requires codebooks for both time and coordinates
**Current Recommendation:**
Use TrajectoryEncoder for one-way encoding in applications like:
- Trajectory classification (gesture recognition, motion analysis)
- Trajectory similarity search (find similar paths)
- Trajectory clustering (group similar motions)
For reconstruction, consider storing original trajectories separately
and using hypervector encoding only for similarity queries.
References
----------
- Plate (2003): "Holographic Reduced Representations" - Section 4.3
on error accumulation in multi-level binding
- Räsänen & Saarinen (2016): "Sequence prediction with sparse
distributed hyperdimensional coding" - Analysis of temporal binding
"""
raise NotImplementedError(
"Trajectory decoding is not implemented due to nested binding complexity. "
"See docstring for detailed mathematical explanation. "
"For reconstruction tasks, store original trajectories and use "
"hypervector encoding for similarity-based retrieval only."
)
encode(trajectory)
Encode a trajectory as a hypervector.
Each point in the trajectory is encoded with temporal information, then all points are combined with position-based permutation.
Args: trajectory: List of points - 1D: List[float] e.g., [1.0, 2.5, 3.7, ...] - 2D: List[Tuple[float, float]] e.g., [(1,2), (3,4), ...] - 3D: List[Tuple[float, float, float]] e.g., [(1,2,3), ...]
Returns: Hypervector representing the trajectory
Raises: ValueError: If trajectory is empty or points have wrong dimensionality
Example: >>> # 1D time series >>> encoder_1d = TrajectoryEncoder(model, scalar_enc, n_dimensions=1) >>> hv = encoder_1d.encode([1.0, 2.5, 3.7, 5.2]) >>> >>> # 2D path >>> encoder_2d = TrajectoryEncoder(model, scalar_enc, n_dimensions=2) >>> hv = encoder_2d.encode([(0,0), (1,1), (2,2)])
Source code in holovec/encoders/sequence.py
def encode(self, trajectory: list[float | tuple[float, ...]]) -> Array:
"""
Encode a trajectory as a hypervector.
Each point in the trajectory is encoded with temporal information,
then all points are combined with position-based permutation.
Args:
trajectory: List of points
- 1D: List[float] e.g., [1.0, 2.5, 3.7, ...]
- 2D: List[Tuple[float, float]] e.g., [(1,2), (3,4), ...]
- 3D: List[Tuple[float, float, float]] e.g., [(1,2,3), ...]
Returns:
Hypervector representing the trajectory
Raises:
ValueError: If trajectory is empty or points have wrong dimensionality
Example:
>>> # 1D time series
>>> encoder_1d = TrajectoryEncoder(model, scalar_enc, n_dimensions=1)
>>> hv = encoder_1d.encode([1.0, 2.5, 3.7, 5.2])
>>>
>>> # 2D path
>>> encoder_2d = TrajectoryEncoder(model, scalar_enc, n_dimensions=2)
>>> hv = encoder_2d.encode([(0,0), (1,1), (2,2)])
"""
if len(trajectory) == 0:
raise ValueError("Cannot encode empty trajectory")
# Encode each point with temporal binding
point_hvs = []
for i, point in enumerate(trajectory):
coords: tuple[float, ...]
# Normalize point to tuple format
if self.n_dimensions == 1:
# 1D: scalar → (scalar,)
if isinstance(point, int | float):
coords = (float(point),)
else:
coords = (float(point[0]),)
else:
# 2D/3D: accept tuple, list, or array-like
if isinstance(point, int | float):
raise ValueError(
f"Expected iterable for {self.n_dimensions}D point, got {type(point)}"
)
try:
# Convert to tuple (works for tuple, list, numpy array, etc.)
coords = tuple(float(c) for c in point)
except (TypeError, ValueError) as exc:
raise ValueError(
f"Expected iterable for {self.n_dimensions}D point, got {type(point)}"
) from exc
# Validate dimensionality
if len(coords) != self.n_dimensions:
raise ValueError(
f"Expected {self.n_dimensions}D point, got {len(coords)}D: {coords}"
)
# Encode time (index as time if no time_range specified)
if self.time_range is not None:
# Normalize time to range
t = i / len(trajectory) # [0, 1]
t_scaled = self.time_range[0] + t * (self.time_range[1] - self.time_range[0])
time_hv = self.scalar_encoder.encode(t_scaled)
else:
# Use index directly
time_hv = self.scalar_encoder.encode(float(i))
# Encode position (bind each coordinate with its dimension)
coord_hvs = []
for j, coord_val in enumerate(coords):
coord_hv = self.scalar_encoder.encode(coord_val)
dim_hv = self.dim_vectors[j]
bound_coord = self.model.bind(dim_hv, coord_hv)
coord_hvs.append(bound_coord)
# Bundle coordinates to create position hypervector
pos_hv = self.model.bundle(coord_hvs)
# Bind time with position
point_hv = self.model.bind(time_hv, pos_hv)
# Apply position-specific permutation (for ordering)
indexed_hv = self.model.permute(point_hv, k=i)
point_hvs.append(indexed_hv)
# Bundle all points
trajectory_hv = self.model.bundle(point_hvs)
return trajectory_hv
holovec.encoders.residue.ResidueEncoder
Residue Hyperdimensional Computing encoder.
Encodes integers using residue number system with multiple co-prime moduli. Enables both additive (Hadamard) and multiplicative (star) binding.
Based on Kymn et al. 2024 "Computing With Residue Numbers in HD".
Properties: - Range: M = ∏ᵢ mᵢ (product of moduli via Chinese Remainder Theorem) - Codebook size: ∑ᵢ mᵢ (sum of moduli - logarithmic scaling) - Addition: z(x₁+x₂) = z(x₁) ⊙ z(x₂) (Hadamard product) - Subtraction: z(x₁-x₂) = z(x₁) ⊙ z(x₂)* (Hadamard with conjugate)
Args: dim: Hypervector dimension (default 1000) moduli: List of co-prime moduli (e.g., [3, 5, 7] for range 105) model: FHRR model instance (optional, creates one if None) seed: Random seed for reproducibility
Raises: ValueError: If moduli are not pairwise co-prime
Example: >>> encoder = ResidueEncoder(dim=2000, moduli=[3, 5, 7]) >>> z_20 = encoder.encode(20) # 20 = [2 mod 3, 0 mod 5, 6 mod 7] >>> z_5 = encoder.encode(5) >>> z_25 = encoder.add(z_20, z_5) # Represents 25 >>> decoded = encoder.decode(z_25) # Returns 25
Attributes: dim: Hypervector dimension moduli: List of co-prime moduli M: Total range (product of moduli) K: Number of moduli model: FHRR model for complex phasor operations backend: Computational backend
Source code in holovec/encoders/residue.py
class ResidueEncoder:
"""Residue Hyperdimensional Computing encoder.
Encodes integers using residue number system with multiple co-prime moduli.
Enables both additive (Hadamard) and multiplicative (star) binding.
Based on Kymn et al. 2024 "Computing With Residue Numbers in HD".
Properties:
- Range: M = ∏ᵢ mᵢ (product of moduli via Chinese Remainder Theorem)
- Codebook size: ∑ᵢ mᵢ (sum of moduli - logarithmic scaling)
- Addition: z(x₁+x₂) = z(x₁) ⊙ z(x₂) (Hadamard product)
- Subtraction: z(x₁-x₂) = z(x₁) ⊙ z(x₂)* (Hadamard with conjugate)
Args:
dim: Hypervector dimension (default 1000)
moduli: List of co-prime moduli (e.g., [3, 5, 7] for range 105)
model: FHRR model instance (optional, creates one if None)
seed: Random seed for reproducibility
Raises:
ValueError: If moduli are not pairwise co-prime
Example:
>>> encoder = ResidueEncoder(dim=2000, moduli=[3, 5, 7])
>>> z_20 = encoder.encode(20) # 20 = [2 mod 3, 0 mod 5, 6 mod 7]
>>> z_5 = encoder.encode(5)
>>> z_25 = encoder.add(z_20, z_5) # Represents 25
>>> decoded = encoder.decode(z_25) # Returns 25
Attributes:
dim: Hypervector dimension
moduli: List of co-prime moduli
M: Total range (product of moduli)
K: Number of moduli
model: FHRR model for complex phasor operations
backend: Computational backend
"""
def __init__(
self,
dim: int = 1000,
moduli: list[int] | None = None,
model: FHRRModel | None = None,
seed: int | None = None,
):
"""Initialize ResidueEncoder.
Args:
dim: Hypervector dimension (default 1000)
moduli: List of co-prime moduli (default [3, 5, 7] for range 105)
model: FHRR model instance (optional, creates one if None)
seed: Random seed for reproducibility
"""
self.dim = dim
self.moduli = moduli or [3, 5, 7] # Default: range 105
self._validate_coprime(self.moduli)
self.M = math.prod(self.moduli) # Total range
self.K = len(self.moduli) # Number of moduli
# Create FHRR model if not provided
if model is None:
model = FHRRModel(dimension=dim, seed=seed)
self.model = model
self.backend = model.backend
self._seed = seed
# Generate base vectors for each modulus (mth roots of unity)
self._base_vectors = self._generate_base_vectors()
# Pre-compute codebooks for efficient decoding
self._codebooks = self._generate_codebooks()
def _validate_coprime(self, moduli: list[int]) -> None:
"""Verify all moduli are pairwise co-prime.
Args:
moduli: List of moduli to validate
Raises:
ValueError: If any pair of moduli share a common factor > 1
"""
for i, m1 in enumerate(moduli):
if m1 < 2:
raise ValueError(f"Moduli must be >= 2, got {m1}")
for m2 in moduli[i + 1 :]:
if math.gcd(m1, m2) != 1:
raise ValueError(
f"Moduli must be co-prime: gcd({m1}, {m2}) = {math.gcd(m1, m2)} != 1"
)
def _generate_base_vectors(self) -> dict[int, Array]:
"""Generate base phasor for each modulus using mth roots of unity.
For modulus m, phases are restricted to {2πk/m | k=0,...,m-1}.
This ensures z_m(x + m) = z_m(x) (periodicity).
The key insight from Kymn et al. is that constraining phases to
mth roots of unity enables exact periodic arithmetic modulo m.
Returns:
Dictionary mapping modulus m to its base phasor vector.
"""
base_vectors = {}
rng = np.random.default_rng(self._seed)
for m in self.moduli:
# Sample random integers k ∈ {0, 1, ..., m-1} for each dimension
# Each dimension gets a random frequency from the allowed set
k_values = rng.integers(0, m, size=self.dim)
# Convert to phases: θ = 2πk/m (restricts to mth roots of unity)
phases = 2 * np.pi * k_values / m
# Create phasor: z = exp(iθ)
phasor = np.exp(1j * phases).astype(np.complex64)
base_vectors[m] = self.backend.from_numpy(phasor)
return base_vectors
def _generate_codebooks(self) -> dict[int, list[Array]]:
"""Pre-compute codebook vectors for each modulus.
For modulus m, generates vectors z_m(0), z_m(1), ..., z_m(m-1)
where z_m(i) = base^i (element-wise exponentiation).
Returns:
Dictionary mapping modulus m to list of m codebook vectors.
"""
codebooks: dict[int, list[Array]] = {}
for m in self.moduli:
base = self._base_vectors[m]
base_np = self.backend.to_numpy(base)
# Generate z_m(0), z_m(1), ..., z_m(m-1)
codebook = []
for i in range(m):
# z_m(i) = base^i (element-wise exponentiation)
# For phasor e^(iθ), raising to power i gives e^(i·i·θ)
phasor_i = np.power(base_np, i).astype(np.complex64)
codebook.append(self.backend.from_numpy(phasor_i))
codebooks[m] = codebook
return codebooks
def encode(self, x: int) -> Array:
"""Encode integer x into hypervector.
Uses residue number system encoding:
z(x) = z_m₁(x mod m₁) ⊙ z_m₂(x mod m₂) ⊙ ... ⊙ z_mₖ(x mod mₖ)
Where each z_mₖ(rₖ) = base_mₖ^rₖ is the rₖth codebook vector for modulus mₖ.
Args:
x: Integer to encode (0 ≤ x < M)
Returns:
Complex hypervector encoding x
Raises:
ValueError: If x is outside valid range [0, M)
"""
if not (0 <= x < self.M):
raise ValueError(f"x must be in [0, {self.M}), got {x}")
# Start with first modulus's residue encoding
first_m = self.moduli[0]
result = self._codebooks[first_m][x % first_m]
# Hadamard product with remaining moduli
for m in self.moduli[1:]:
remainder = x % m
z_m = self._codebooks[m][remainder]
result = self.backend.multiply(result, z_m)
return result
def add(self, z1: Array, z2: Array) -> Array:
"""Additive binding: result represents x₁ + x₂ (mod M).
z(x₁ + x₂) = z(x₁) ⊙ z(x₂) (Hadamard product)
This works because for each modulus m:
z_m(r₁) ⊙ z_m(r₂) = base^r₁ ⊙ base^r₂ = base^(r₁+r₂)
= z_m((r₁ + r₂) mod m)
Args:
z1: Encoded hypervector for x₁
z2: Encoded hypervector for x₂
Returns:
Encoded hypervector representing x₁ + x₂ (mod M)
"""
return self.backend.multiply(z1, z2)
def subtract(self, z1: Array, z2: Array) -> Array:
"""Subtractive unbinding: result represents x₁ - x₂ (mod M).
z(x₁ - x₂) = z(x₁) ⊙ z(x₂)* (Hadamard with conjugate)
This works because conjugate inverts the phase:
z_m(r)* = base^(-r) = base^(m-r) = z_m(-r mod m)
Args:
z1: Encoded hypervector for x₁
z2: Encoded hypervector for x₂
Returns:
Encoded hypervector representing x₁ - x₂ (mod M)
"""
z2_conj = self.backend.conjugate(z2)
return self.backend.multiply(z1, z2_conj)
def negate(self, z: Array) -> Array:
"""Negate: result represents -x (mod M).
z(-x) = z(x)* (conjugate)
Args:
z: Encoded hypervector for x
Returns:
Encoded hypervector representing -x (mod M)
"""
return self.backend.conjugate(z)
def multiply_from_values(self, x1: int, x2: int) -> Array:
"""Multiply two known integer values.
z(x₁ × x₂) computed directly from values.
Note: Full multiply() on encoded vectors requires factorization and
is deferred to Phase 2. Use this method when you know the values.
Args:
x1: First integer
x2: Second integer
Returns:
Encoded hypervector representing x₁ × x₂ (mod M)
"""
product = (x1 * x2) % self.M
return self.encode(product)
def decode(self, z: Array, method: str = "auto") -> int:
"""Decode hypervector back to integer.
Args:
z: Encoded hypervector
method: Decoding method:
- "auto": Use brute force for range <= 10000, else iterative
- "brute_force": Try all possible values (exact but slow)
- "iterative": Use iterative unbinding (fast but approximate)
Returns:
Decoded integer value in [0, M)
"""
if method == "auto":
method = "brute_force" if self.M <= 10000 else "iterative"
if method == "brute_force":
return self._decode_brute_force(z)
else:
remainders = self._decode_iterative(z)
return self._chinese_remainder_theorem(remainders)
def _decode_brute_force(self, z: Array) -> int:
"""Decode by trying all possible values.
For each integer x in [0, M), compute encode(x) and compare with z.
Return the x with highest similarity.
This is exact but O(M * D) complexity.
Args:
z: Encoded hypervector
Returns:
Decoded integer with highest similarity
"""
z_np = self.backend.to_numpy(z)
dim = self.dim
best_sim = -float("inf")
best_x = 0
for x in range(self.M):
encoded = self.encode(x)
encoded_np = self.backend.to_numpy(encoded)
similarity = float(np.real(np.vdot(encoded_np, z_np))) / dim
if similarity > best_sim:
best_sim = similarity
best_x = x
return best_x
def _decode_iterative(
self, z: Array, max_iterations: int = 10
) -> dict[int, int]:
"""Decode using iterative unbinding with multiple starting points.
Algorithm:
1. Try different initializations for the first modulus
2. For each initialization, run iterative refinement:
- For each modulus m: unbind others, find best match
- Repeat until convergence
3. Pick the result with highest reconstruction similarity
This handles the cold start problem by trying multiple starting
points and selecting the one that reconstructs best.
Args:
z: Encoded hypervector
max_iterations: Maximum iterations per starting point
Returns:
Dictionary mapping modulus to decoded remainder
"""
dim = self.dim
z_np = self.backend.to_numpy(z)
# Try different starting points for the first (smallest) modulus
first_m = self.moduli[0]
best_remainders = dict.fromkeys(self.moduli, 0)
best_reconstruction_sim = -float("inf")
for start_i in range(first_m):
# Initialize with this starting point
remainders = dict.fromkeys(self.moduli, 0)
remainders[first_m] = start_i
# Iterative refinement
for _iteration in range(max_iterations):
changed = False
for m in self.moduli:
# Compute product of other moduli's current estimates
other_product = self._compute_other_product(remainders, m)
# Unbind other moduli from z
unbound = self.backend.multiply(
z, self.backend.conjugate(other_product)
)
unbound_np = self.backend.to_numpy(unbound)
# Find best matching codebook entry for this modulus
best_sim = -float("inf")
best_match = remainders[m]
for i in range(m):
codebook_np = self.backend.to_numpy(self._codebooks[m][i])
similarity = float(np.real(np.vdot(codebook_np, unbound_np))) / dim
if similarity > best_sim:
best_sim = similarity
best_match = i
if best_match != remainders[m]:
remainders[m] = best_match
changed = True
if not changed:
break
# Evaluate reconstruction quality
reconstruction = self._reconstruct(remainders)
reconstruction_np = self.backend.to_numpy(reconstruction)
reconstruction_sim = float(np.real(np.vdot(reconstruction_np, z_np))) / dim
if reconstruction_sim > best_reconstruction_sim:
best_reconstruction_sim = reconstruction_sim
best_remainders = remainders.copy()
return best_remainders
def _reconstruct(self, remainders: dict[int, int]) -> Array:
"""Reconstruct encoded vector from remainders.
Args:
remainders: Dictionary mapping modulus to remainder
Returns:
Reconstructed hypervector
"""
result = self._codebooks[self.moduli[0]][remainders[self.moduli[0]]]
for m in self.moduli[1:]:
result = self.backend.multiply(result, self._codebooks[m][remainders[m]])
return result
def _compute_other_product(
self, remainders: dict[int, int], exclude_m: int
) -> Array:
"""Compute product of all moduli's encodings except one.
Args:
remainders: Current residue estimates for all moduli
exclude_m: Modulus to exclude from product
Returns:
Hadamard product of codebook entries for other moduli
"""
result = None
for m in self.moduli:
if m == exclude_m:
continue
z_m = self._codebooks[m][remainders[m]]
if result is None:
result = z_m
else:
result = self.backend.multiply(result, z_m)
if result is None:
# Only one modulus - return ones vector (identity)
ones = np.ones(self.dim, dtype=np.complex64)
return self.backend.from_numpy(ones)
return result
def _chinese_remainder_theorem(self, remainders: dict[int, int]) -> int:
"""Combine remainders to recover original integer via CRT.
Given x ≡ rᵢ (mod mᵢ) for all i, finds unique x in [0, M).
The Chinese Remainder Theorem guarantees a unique solution exists
when the moduli are pairwise co-prime.
Algorithm:
x = Σᵢ rᵢ · Mᵢ · yᵢ (mod M)
where:
- Mᵢ = M / mᵢ (product of all other moduli)
- yᵢ = Mᵢ⁻¹ (mod mᵢ) (modular multiplicative inverse)
Args:
remainders: Dictionary mapping modulus to remainder
Returns:
Recovered integer x in [0, M)
"""
x = 0
for m in self.moduli:
r = remainders[m]
# M_i = M / m_i
M_i = self.M // m
# y_i such that M_i * y_i ≡ 1 (mod m_i)
y_i = pow(M_i, -1, m)
x += r * M_i * y_i
return x % self.M
def encode_with_residues(self, x: int) -> tuple[Array, dict[int, int]]:
"""Encode integer and return both hypervector and residues.
Useful for debugging or when you need access to intermediate values.
Args:
x: Integer to encode (0 ≤ x < M)
Returns:
Tuple of (encoded hypervector, dict of modulus → remainder)
"""
residues = {m: x % m for m in self.moduli}
z = self.encode(x)
return z, residues
@property
def range(self) -> int:
"""Maximum encodable value (exclusive)."""
return self.M
@property
def codebook_size(self) -> int:
"""Total number of codebook vectors needed (sum of moduli)."""
return sum(self.moduli)
@property
def dimension(self) -> int:
"""Hypervector dimension."""
return self.dim
def __repr__(self) -> str:
"""String representation."""
return (
f"ResidueEncoder(dim={self.dim}, moduli={self.moduli}, "
f"range={self.M}, codebook_size={self.codebook_size})"
)
codebook_size
property
Total number of codebook vectors needed (sum of moduli).
dimension
property
Hypervector dimension.
range
property
Maximum encodable value (exclusive).
__init__(dim=1000, moduli=None, model=None, seed=None)
Initialize ResidueEncoder.
Args: dim: Hypervector dimension (default 1000) moduli: List of co-prime moduli (default [3, 5, 7] for range 105) model: FHRR model instance (optional, creates one if None) seed: Random seed for reproducibility
Source code in holovec/encoders/residue.py
def __init__(
self,
dim: int = 1000,
moduli: list[int] | None = None,
model: FHRRModel | None = None,
seed: int | None = None,
):
"""Initialize ResidueEncoder.
Args:
dim: Hypervector dimension (default 1000)
moduli: List of co-prime moduli (default [3, 5, 7] for range 105)
model: FHRR model instance (optional, creates one if None)
seed: Random seed for reproducibility
"""
self.dim = dim
self.moduli = moduli or [3, 5, 7] # Default: range 105
self._validate_coprime(self.moduli)
self.M = math.prod(self.moduli) # Total range
self.K = len(self.moduli) # Number of moduli
# Create FHRR model if not provided
if model is None:
model = FHRRModel(dimension=dim, seed=seed)
self.model = model
self.backend = model.backend
self._seed = seed
# Generate base vectors for each modulus (mth roots of unity)
self._base_vectors = self._generate_base_vectors()
# Pre-compute codebooks for efficient decoding
self._codebooks = self._generate_codebooks()
__repr__()
String representation.
Source code in holovec/encoders/residue.py
def __repr__(self) -> str:
"""String representation."""
return (
f"ResidueEncoder(dim={self.dim}, moduli={self.moduli}, "
f"range={self.M}, codebook_size={self.codebook_size})"
)
add(z1, z2)
Additive binding: result represents x₁ + x₂ (mod M).
z(x₁ + x₂) = z(x₁) ⊙ z(x₂) (Hadamard product)
This works because for each modulus m: z_m(r₁) ⊙ z_m(r₂) = base^r₁ ⊙ base^r₂ = base^(r₁+r₂) = z_m((r₁ + r₂) mod m)
Args: z1: Encoded hypervector for x₁ z2: Encoded hypervector for x₂
Returns: Encoded hypervector representing x₁ + x₂ (mod M)
Source code in holovec/encoders/residue.py
def add(self, z1: Array, z2: Array) -> Array:
"""Additive binding: result represents x₁ + x₂ (mod M).
z(x₁ + x₂) = z(x₁) ⊙ z(x₂) (Hadamard product)
This works because for each modulus m:
z_m(r₁) ⊙ z_m(r₂) = base^r₁ ⊙ base^r₂ = base^(r₁+r₂)
= z_m((r₁ + r₂) mod m)
Args:
z1: Encoded hypervector for x₁
z2: Encoded hypervector for x₂
Returns:
Encoded hypervector representing x₁ + x₂ (mod M)
"""
return self.backend.multiply(z1, z2)
decode(z, method='auto')
Decode hypervector back to integer.
Args: z: Encoded hypervector method: Decoding method: - "auto": Use brute force for range <= 10000, else iterative - "brute_force": Try all possible values (exact but slow) - "iterative": Use iterative unbinding (fast but approximate)
Returns: Decoded integer value in [0, M)
Source code in holovec/encoders/residue.py
def decode(self, z: Array, method: str = "auto") -> int:
"""Decode hypervector back to integer.
Args:
z: Encoded hypervector
method: Decoding method:
- "auto": Use brute force for range <= 10000, else iterative
- "brute_force": Try all possible values (exact but slow)
- "iterative": Use iterative unbinding (fast but approximate)
Returns:
Decoded integer value in [0, M)
"""
if method == "auto":
method = "brute_force" if self.M <= 10000 else "iterative"
if method == "brute_force":
return self._decode_brute_force(z)
else:
remainders = self._decode_iterative(z)
return self._chinese_remainder_theorem(remainders)
encode(x)
Encode integer x into hypervector.
Uses residue number system encoding: z(x) = z_m₁(x mod m₁) ⊙ z_m₂(x mod m₂) ⊙ ... ⊙ z_mₖ(x mod mₖ)
Where each z_mₖ(rₖ) = base_mₖ^rₖ is the rₖth codebook vector for modulus mₖ.
Args: x: Integer to encode (0 ≤ x < M)
Returns: Complex hypervector encoding x
Raises: ValueError: If x is outside valid range [0, M)
Source code in holovec/encoders/residue.py
def encode(self, x: int) -> Array:
"""Encode integer x into hypervector.
Uses residue number system encoding:
z(x) = z_m₁(x mod m₁) ⊙ z_m₂(x mod m₂) ⊙ ... ⊙ z_mₖ(x mod mₖ)
Where each z_mₖ(rₖ) = base_mₖ^rₖ is the rₖth codebook vector for modulus mₖ.
Args:
x: Integer to encode (0 ≤ x < M)
Returns:
Complex hypervector encoding x
Raises:
ValueError: If x is outside valid range [0, M)
"""
if not (0 <= x < self.M):
raise ValueError(f"x must be in [0, {self.M}), got {x}")
# Start with first modulus's residue encoding
first_m = self.moduli[0]
result = self._codebooks[first_m][x % first_m]
# Hadamard product with remaining moduli
for m in self.moduli[1:]:
remainder = x % m
z_m = self._codebooks[m][remainder]
result = self.backend.multiply(result, z_m)
return result
encode_with_residues(x)
Encode integer and return both hypervector and residues.
Useful for debugging or when you need access to intermediate values.
Args: x: Integer to encode (0 ≤ x < M)
Returns: Tuple of (encoded hypervector, dict of modulus → remainder)
Source code in holovec/encoders/residue.py
def encode_with_residues(self, x: int) -> tuple[Array, dict[int, int]]:
"""Encode integer and return both hypervector and residues.
Useful for debugging or when you need access to intermediate values.
Args:
x: Integer to encode (0 ≤ x < M)
Returns:
Tuple of (encoded hypervector, dict of modulus → remainder)
"""
residues = {m: x % m for m in self.moduli}
z = self.encode(x)
return z, residues
multiply_from_values(x1, x2)
Multiply two known integer values.
z(x₁ × x₂) computed directly from values.
Note: Full multiply() on encoded vectors requires factorization and is deferred to Phase 2. Use this method when you know the values.
Args: x1: First integer x2: Second integer
Returns: Encoded hypervector representing x₁ × x₂ (mod M)
Source code in holovec/encoders/residue.py
def multiply_from_values(self, x1: int, x2: int) -> Array:
"""Multiply two known integer values.
z(x₁ × x₂) computed directly from values.
Note: Full multiply() on encoded vectors requires factorization and
is deferred to Phase 2. Use this method when you know the values.
Args:
x1: First integer
x2: Second integer
Returns:
Encoded hypervector representing x₁ × x₂ (mod M)
"""
product = (x1 * x2) % self.M
return self.encode(product)
negate(z)
Negate: result represents -x (mod M).
z(-x) = z(x)* (conjugate)
Args: z: Encoded hypervector for x
Returns: Encoded hypervector representing -x (mod M)
Source code in holovec/encoders/residue.py
def negate(self, z: Array) -> Array:
"""Negate: result represents -x (mod M).
z(-x) = z(x)* (conjugate)
Args:
z: Encoded hypervector for x
Returns:
Encoded hypervector representing -x (mod M)
"""
return self.backend.conjugate(z)
subtract(z1, z2)
Subtractive unbinding: result represents x₁ - x₂ (mod M).
z(x₁ - x₂) = z(x₁) ⊙ z(x₂)* (Hadamard with conjugate)
This works because conjugate inverts the phase: z_m(r)* = base^(-r) = base^(m-r) = z_m(-r mod m)
Args: z1: Encoded hypervector for x₁ z2: Encoded hypervector for x₂
Returns: Encoded hypervector representing x₁ - x₂ (mod M)
Source code in holovec/encoders/residue.py
def subtract(self, z1: Array, z2: Array) -> Array:
"""Subtractive unbinding: result represents x₁ - x₂ (mod M).
z(x₁ - x₂) = z(x₁) ⊙ z(x₂)* (Hadamard with conjugate)
This works because conjugate inverts the phase:
z_m(r)* = base^(-r) = base^(m-r) = z_m(-r mod m)
Args:
z1: Encoded hypervector for x₁
z2: Encoded hypervector for x₂
Returns:
Encoded hypervector representing x₁ - x₂ (mod M)
"""
z2_conj = self.backend.conjugate(z2)
return self.backend.multiply(z1, z2_conj)
Spatial Encoders
holovec.encoders.spatial.ImageEncoder
Bases: Encoder
Image encoder for 2D images (grayscale, RGB, or RGBA).
Encodes images by binding spatial positions (x, y) with pixel values. For color images, each channel is bound to a channel dimension vector before being combined with position information.
Encoding strategy: For each pixel at position (x, y) with value v: 1. Encode position: pos_hv = bundle([bind(X, enc(x)), bind(Y, enc(y))]) 2. Encode value(s): - Grayscale: val_hv = enc(v) - RGB: val_hv = bundle([bind(R, enc(r)), bind(G, enc(g)), bind(B, enc(b))]) 3. Bind position with value: pixel_hv = bind(pos_hv, val_hv) 4. Bundle all pixels: image_hv = bundle([all pixel_hvs])
This creates a distributed representation that preserves both spatial structure and pixel values, enabling similarity-based image comparison.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
model
|
VSAModel
|
The VSA model to use for encoding operations. |
required |
scalar_encoder
|
ScalarEncoder
|
Encoder for continuous pixel values (0-255 typically). |
required |
normalize_pixels
|
bool
|
Whether to normalize pixel values to [0, 1] before encoding. Default is True. |
True
|
seed
|
int
|
Random seed for reproducibility. Default is None. |
None
|
Attributes:
| Name | Type | Description |
|---|---|---|
n_channels |
int
|
Number of channels in the last encoded image (1, 3, or 4). |
image_shape |
tuple
|
Shape (height, width, channels) of the last encoded image. |
Examples:
>>> from holovec import VSA
>>> from holovec.encoders import ImageEncoder, ThermometerEncoder
>>> import numpy as np
>>>
>>> model = VSA.create('MAP', dim=10000, seed=42)
>>> scalar_enc = ThermometerEncoder(model, min_val=0, max_val=1, n_bins=256, seed=42)
>>> encoder = ImageEncoder(model, scalar_enc, normalize_pixels=True, seed=42)
>>>
>>> # Encode a small grayscale image
>>> image = np.array([[100, 150], [200, 250]], dtype=np.uint8)
>>> hv = encoder.encode(image)
>>> print(hv.shape) # (10000,)
>>>
>>> # Encode RGB image
>>> rgb_image = np.random.randint(0, 256, (28, 28, 3), dtype=np.uint8)
>>> hv_rgb = encoder.encode(rgb_image)
Source code in holovec/encoders/spatial.py
class ImageEncoder(Encoder):
"""
Image encoder for 2D images (grayscale, RGB, or RGBA).
Encodes images by binding spatial positions (x, y) with pixel values.
For color images, each channel is bound to a channel dimension vector
before being combined with position information.
Encoding strategy:
For each pixel at position (x, y) with value v:
1. Encode position: pos_hv = bundle([bind(X, enc(x)), bind(Y, enc(y))])
2. Encode value(s):
- Grayscale: val_hv = enc(v)
- RGB: val_hv = bundle([bind(R, enc(r)), bind(G, enc(g)), bind(B, enc(b))])
3. Bind position with value: pixel_hv = bind(pos_hv, val_hv)
4. Bundle all pixels: image_hv = bundle([all pixel_hvs])
This creates a distributed representation that preserves both spatial
structure and pixel values, enabling similarity-based image comparison.
Parameters
----------
model : VSAModel
The VSA model to use for encoding operations.
scalar_encoder : ScalarEncoder
Encoder for continuous pixel values (0-255 typically).
normalize_pixels : bool, optional
Whether to normalize pixel values to [0, 1] before encoding.
Default is True.
seed : int, optional
Random seed for reproducibility. Default is None.
Attributes
----------
n_channels : int
Number of channels in the last encoded image (1, 3, or 4).
image_shape : tuple
Shape (height, width, channels) of the last encoded image.
Examples
--------
>>> from holovec import VSA
>>> from holovec.encoders import ImageEncoder, ThermometerEncoder
>>> import numpy as np
>>>
>>> model = VSA.create('MAP', dim=10000, seed=42)
>>> scalar_enc = ThermometerEncoder(model, min_val=0, max_val=1, n_bins=256, seed=42)
>>> encoder = ImageEncoder(model, scalar_enc, normalize_pixels=True, seed=42)
>>>
>>> # Encode a small grayscale image
>>> image = np.array([[100, 150], [200, 250]], dtype=np.uint8)
>>> hv = encoder.encode(image)
>>> print(hv.shape) # (10000,)
>>>
>>> # Encode RGB image
>>> rgb_image = np.random.randint(0, 256, (28, 28, 3), dtype=np.uint8)
>>> hv_rgb = encoder.encode(rgb_image)
"""
def __init__(
self,
model: VSAModel,
scalar_encoder: ScalarEncoder,
normalize_pixels: bool = True,
seed: int | None = None,
):
"""Initialize ImageEncoder."""
# Validate and set scalar_encoder BEFORE calling super().__init__
# because base class checks compatible_models which references it
if not isinstance(scalar_encoder, ScalarEncoder):
raise TypeError(f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}")
if scalar_encoder.model != model:
raise ValueError("scalar_encoder must use the same VSA model as the ImageEncoder")
self.scalar_encoder = scalar_encoder
self.normalize_pixels = normalize_pixels
super().__init__(model)
# Generate dimension vectors for spatial coordinates
base_seed = seed if seed is not None else 2000
self.X = model.random(seed=base_seed) # X dimension
self.Y = model.random(seed=base_seed + 1) # Y dimension
# Generate dimension vectors for color channels (RGB/RGBA)
self.R = model.random(seed=base_seed + 2) # Red channel
self.G = model.random(seed=base_seed + 3) # Green channel
self.B = model.random(seed=base_seed + 4) # Blue channel
self.A = model.random(seed=base_seed + 5) # Alpha channel
# Track last encoded image properties
self.n_channels: int | None = None
self.image_shape: tuple[int, ...] | None = None
def encode(self, image: Array | np.ndarray) -> Array:
"""
Encode an image into a hypervector.
Parameters
----------
image : array-like
Image array with shape (height, width) for grayscale or
(height, width, channels) for color images.
Pixel values should be in range [0, 255] for uint8 or
[0, 1] for float.
Typically a NumPy array from PIL, OpenCV, or similar libraries.
Returns
-------
Array
Hypervector encoding of the image.
Raises
------
ValueError
If image has invalid shape or number of channels.
Notes
-----
This encoder accepts images as NumPy arrays (the standard format from
image libraries like PIL, OpenCV, scikit-image) and processes them using
the configured backend. While input must be NumPy, internal VSA operations
use the model's backend (NumPy/PyTorch/JAX).
"""
# Import numpy locally to avoid module-level backend dependency
# Images from external sources (PIL, OpenCV) are numpy arrays
import numpy as _np
# Convert to numpy array if needed (handles lists, tuples, etc.)
if not isinstance(image, _np.ndarray):
image = _np.array(image)
# Validate and normalize image shape
if image.ndim == 2:
# Grayscale image
height, width = image.shape
n_channels = 1
# Add channel dimension: (H, W) -> (H, W, 1)
image = _np.expand_dims(image, axis=-1)
elif image.ndim == 3:
height, width, n_channels = image.shape
if n_channels not in [1, 3, 4]:
raise ValueError(f"Image must have 1, 3, or 4 channels, got {n_channels}")
else:
raise ValueError(f"Image must be 2D (grayscale) or 3D (color), got shape {image.shape}")
# Store image properties
self.n_channels = n_channels
self.image_shape = (height, width, n_channels)
# Normalize pixel values if requested
if self.normalize_pixels:
# Check dtype using string representation to avoid dtype dependency
dtype_str = str(image.dtype)
if "uint8" in dtype_str:
image = image.astype(_np.float32) / 255.0
elif "int" in dtype_str:
# Other integer types: normalize assuming 0-255 range
image = image.astype(_np.float32) / 255.0
# If already float, assume it's in [0, 1]
# Encode all pixels
pixel_hvs = []
for y in range(height):
for x in range(width):
# Encode spatial position
x_hv = self.scalar_encoder.encode(float(x))
y_hv = self.scalar_encoder.encode(float(y))
x_bound = self.model.bind(self.X, x_hv)
y_bound = self.model.bind(self.Y, y_hv)
pos_hv = self.model.bundle([x_bound, y_bound])
# Encode pixel value(s)
if n_channels == 1:
# Grayscale: just encode the intensity
val_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
elif n_channels == 3:
# RGB: bind each channel to its dimension vector
r_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
g_hv = self.scalar_encoder.encode(float(image[y, x, 1]))
b_hv = self.scalar_encoder.encode(float(image[y, x, 2]))
r_bound = self.model.bind(self.R, r_hv)
g_bound = self.model.bind(self.G, g_hv)
b_bound = self.model.bind(self.B, b_hv)
val_hv = self.model.bundle([r_bound, g_bound, b_bound])
else: # n_channels == 4
# RGBA: bind each channel including alpha
r_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
g_hv = self.scalar_encoder.encode(float(image[y, x, 1]))
b_hv = self.scalar_encoder.encode(float(image[y, x, 2]))
a_hv = self.scalar_encoder.encode(float(image[y, x, 3]))
r_bound = self.model.bind(self.R, r_hv)
g_bound = self.model.bind(self.G, g_hv)
b_bound = self.model.bind(self.B, b_hv)
a_bound = self.model.bind(self.A, a_hv)
val_hv = self.model.bundle([r_bound, g_bound, b_bound, a_bound])
# Bind position with value
pixel_hv = self.model.bind(pos_hv, val_hv)
pixel_hvs.append(pixel_hv)
# Bundle all pixels to create image hypervector
image_hv = self.model.bundle(pixel_hvs)
return image_hv
def decode(
self, hypervector: Array, height: int, width: int, n_channels: int = 1
) -> np.ndarray:
"""
Decode a hypervector to reconstruct an approximate image.
Note: Image decoding is approximate and requires knowing the target
image dimensions. Reconstruction quality depends on the scalar encoder's
decoding capabilities and may require candidate value search.
Parameters
----------
hypervector : Array
The hypervector to decode.
height : int
Target image height.
width : int
Target image width.
n_channels : int, optional
Number of channels (1, 3, or 4). Default is 1.
Returns
-------
np.ndarray
Reconstructed image with shape (height, width) for grayscale
or (height, width, n_channels) for color.
Raises
------
NotImplementedError
Image decoding is computationally intractable without additional constraints.
Notes
-----
Image decoding is not implemented because it requires solving a high-dimensional
inverse problem that is fundamentally ill-posed:
**Mathematical Challenge:**
The encoding process binds pixel values with position vectors:
image_hv = bundle([bind(position(i,j), scalar(pixel[i,j])) for all i,j])
To decode, we must:
1. Unbind each position: pixel_hv[i,j] = unbind(image_hv, position(i,j))
2. Decode each scalar: pixel[i,j] = scalar_decode(pixel_hv[i,j])
**Why This Is Intractable:**
- Unbinding is approximate (except for FHRR with exact inverse)
- Each unbind operation introduces noise
- For H×W image: H×W unbind operations compound errors
- Scalar decoding via optimization (1000 evals × 100 iterations)
- Total: ~100M evaluations for 100×100 image
- No gradient available for joint optimization
**Alternative Approaches:**
1. **Database Retrieval**: Encode query image, find nearest match in database
- Complexity: O(N) for N known images
- Works well for classification/recognition tasks
2. **Iterative Resonator**: Use resonator cleanup with pixel codebook
- Requires pre-built codebook of common pixel patterns
- May reconstruct coarse structure but not fine details
3. **Neural Decoder**: Train neural network image_hv → image
- Requires supervised training data
- Can learn inverse mapping empirically
- See: Imani et al. (2019) "VoiceHD" for similar approach
For practical applications, use ImageEncoder for one-way encoding
(e.g., image→hypervector→classifier) rather than reconstruction.
References
----------
- Imani et al. (2019): "VoiceHD: Hyperdimensional Computing for
Efficient Speech Recognition"
- Plate (2003): "Holographic Reduced Representations" - Chapter 4 on
approximate unbinding and error accumulation
"""
raise NotImplementedError(
"Image decoding is not implemented due to computational intractability. "
"See docstring for detailed mathematical explanation and alternatives. "
"For reconstruction tasks, use similarity-based retrieval from a database "
"of known images, or train a neural decoder network."
)
@property
def is_reversible(self) -> bool:
"""
Whether the encoder supports decoding.
Returns
-------
bool
False - image decoding not yet implemented.
"""
return False
@property
def compatible_models(self) -> list[str]:
"""
List of compatible VSA model names.
Returns
-------
list of str
All VSA models supported (depends on scalar encoder compatibility).
"""
return self.scalar_encoder.compatible_models
@property
def input_type(self) -> str:
"""
Description of expected input type.
Returns
-------
str
Description of input format.
"""
if self.n_channels is None or self.image_shape is None:
return "2D array (grayscale) or 3D array (color) with shape (H, W) or (H, W, C)"
if self.n_channels == 1:
return f"Grayscale image ({self.image_shape[0]}x{self.image_shape[1]})"
if self.n_channels == 3:
return f"RGB image ({self.image_shape[0]}x{self.image_shape[1]}x3)"
return f"RGBA image ({self.image_shape[0]}x{self.image_shape[1]}x4)"
def __repr__(self) -> str:
"""Return string representation."""
return (
f"ImageEncoder(model={self.model.model_name}, "
f"scalar_encoder={self.scalar_encoder.__class__.__name__}, "
f"normalize_pixels={self.normalize_pixels})"
)
compatible_models
property
List of compatible VSA model names.
Returns:
| Type | Description |
|---|---|
list of str
|
All VSA models supported (depends on scalar encoder compatibility). |
input_type
property
Description of expected input type.
Returns:
| Type | Description |
|---|---|
str
|
Description of input format. |
is_reversible
property
Whether the encoder supports decoding.
Returns:
| Type | Description |
|---|---|
bool
|
False - image decoding not yet implemented. |
__init__(model, scalar_encoder, normalize_pixels=True, seed=None)
Initialize ImageEncoder.
Source code in holovec/encoders/spatial.py
def __init__(
self,
model: VSAModel,
scalar_encoder: ScalarEncoder,
normalize_pixels: bool = True,
seed: int | None = None,
):
"""Initialize ImageEncoder."""
# Validate and set scalar_encoder BEFORE calling super().__init__
# because base class checks compatible_models which references it
if not isinstance(scalar_encoder, ScalarEncoder):
raise TypeError(f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}")
if scalar_encoder.model != model:
raise ValueError("scalar_encoder must use the same VSA model as the ImageEncoder")
self.scalar_encoder = scalar_encoder
self.normalize_pixels = normalize_pixels
super().__init__(model)
# Generate dimension vectors for spatial coordinates
base_seed = seed if seed is not None else 2000
self.X = model.random(seed=base_seed) # X dimension
self.Y = model.random(seed=base_seed + 1) # Y dimension
# Generate dimension vectors for color channels (RGB/RGBA)
self.R = model.random(seed=base_seed + 2) # Red channel
self.G = model.random(seed=base_seed + 3) # Green channel
self.B = model.random(seed=base_seed + 4) # Blue channel
self.A = model.random(seed=base_seed + 5) # Alpha channel
# Track last encoded image properties
self.n_channels: int | None = None
self.image_shape: tuple[int, ...] | None = None
__repr__()
Return string representation.
Source code in holovec/encoders/spatial.py
def __repr__(self) -> str:
"""Return string representation."""
return (
f"ImageEncoder(model={self.model.model_name}, "
f"scalar_encoder={self.scalar_encoder.__class__.__name__}, "
f"normalize_pixels={self.normalize_pixels})"
)
decode(hypervector, height, width, n_channels=1)
Decode a hypervector to reconstruct an approximate image.
Note: Image decoding is approximate and requires knowing the target image dimensions. Reconstruction quality depends on the scalar encoder's decoding capabilities and may require candidate value search.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
hypervector
|
Array
|
The hypervector to decode. |
required |
height
|
int
|
Target image height. |
required |
width
|
int
|
Target image width. |
required |
n_channels
|
int
|
Number of channels (1, 3, or 4). Default is 1. |
1
|
Returns:
| Type | Description |
|---|---|
ndarray
|
Reconstructed image with shape (height, width) for grayscale or (height, width, n_channels) for color. |
Raises:
| Type | Description |
|---|---|
NotImplementedError
|
Image decoding is computationally intractable without additional constraints. |
Notes
Image decoding is not implemented because it requires solving a high-dimensional inverse problem that is fundamentally ill-posed:
Mathematical Challenge:
The encoding process binds pixel values with position vectors: image_hv = bundle([bind(position(i,j), scalar(pixel[i,j])) for all i,j])
To decode, we must: 1. Unbind each position: pixel_hv[i,j] = unbind(image_hv, position(i,j)) 2. Decode each scalar: pixel[i,j] = scalar_decode(pixel_hv[i,j])
Why This Is Intractable:
- Unbinding is approximate (except for FHRR with exact inverse)
- Each unbind operation introduces noise
- For H×W image: H×W unbind operations compound errors
- Scalar decoding via optimization (1000 evals × 100 iterations)
- Total: ~100M evaluations for 100×100 image
- No gradient available for joint optimization
Alternative Approaches:
- Database Retrieval: Encode query image, find nearest match in database
- Complexity: O(N) for N known images
-
Works well for classification/recognition tasks
-
Iterative Resonator: Use resonator cleanup with pixel codebook
- Requires pre-built codebook of common pixel patterns
-
May reconstruct coarse structure but not fine details
-
Neural Decoder: Train neural network image_hv → image
- Requires supervised training data
- Can learn inverse mapping empirically
- See: Imani et al. (2019) "VoiceHD" for similar approach
For practical applications, use ImageEncoder for one-way encoding (e.g., image→hypervector→classifier) rather than reconstruction.
References
- Imani et al. (2019): "VoiceHD: Hyperdimensional Computing for Efficient Speech Recognition"
- Plate (2003): "Holographic Reduced Representations" - Chapter 4 on approximate unbinding and error accumulation
Source code in holovec/encoders/spatial.py
def decode(
self, hypervector: Array, height: int, width: int, n_channels: int = 1
) -> np.ndarray:
"""
Decode a hypervector to reconstruct an approximate image.
Note: Image decoding is approximate and requires knowing the target
image dimensions. Reconstruction quality depends on the scalar encoder's
decoding capabilities and may require candidate value search.
Parameters
----------
hypervector : Array
The hypervector to decode.
height : int
Target image height.
width : int
Target image width.
n_channels : int, optional
Number of channels (1, 3, or 4). Default is 1.
Returns
-------
np.ndarray
Reconstructed image with shape (height, width) for grayscale
or (height, width, n_channels) for color.
Raises
------
NotImplementedError
Image decoding is computationally intractable without additional constraints.
Notes
-----
Image decoding is not implemented because it requires solving a high-dimensional
inverse problem that is fundamentally ill-posed:
**Mathematical Challenge:**
The encoding process binds pixel values with position vectors:
image_hv = bundle([bind(position(i,j), scalar(pixel[i,j])) for all i,j])
To decode, we must:
1. Unbind each position: pixel_hv[i,j] = unbind(image_hv, position(i,j))
2. Decode each scalar: pixel[i,j] = scalar_decode(pixel_hv[i,j])
**Why This Is Intractable:**
- Unbinding is approximate (except for FHRR with exact inverse)
- Each unbind operation introduces noise
- For H×W image: H×W unbind operations compound errors
- Scalar decoding via optimization (1000 evals × 100 iterations)
- Total: ~100M evaluations for 100×100 image
- No gradient available for joint optimization
**Alternative Approaches:**
1. **Database Retrieval**: Encode query image, find nearest match in database
- Complexity: O(N) for N known images
- Works well for classification/recognition tasks
2. **Iterative Resonator**: Use resonator cleanup with pixel codebook
- Requires pre-built codebook of common pixel patterns
- May reconstruct coarse structure but not fine details
3. **Neural Decoder**: Train neural network image_hv → image
- Requires supervised training data
- Can learn inverse mapping empirically
- See: Imani et al. (2019) "VoiceHD" for similar approach
For practical applications, use ImageEncoder for one-way encoding
(e.g., image→hypervector→classifier) rather than reconstruction.
References
----------
- Imani et al. (2019): "VoiceHD: Hyperdimensional Computing for
Efficient Speech Recognition"
- Plate (2003): "Holographic Reduced Representations" - Chapter 4 on
approximate unbinding and error accumulation
"""
raise NotImplementedError(
"Image decoding is not implemented due to computational intractability. "
"See docstring for detailed mathematical explanation and alternatives. "
"For reconstruction tasks, use similarity-based retrieval from a database "
"of known images, or train a neural decoder network."
)
encode(image)
Encode an image into a hypervector.
Parameters:
| Name | Type | Description | Default |
|---|---|---|---|
image
|
array - like
|
Image array with shape (height, width) for grayscale or (height, width, channels) for color images. Pixel values should be in range [0, 255] for uint8 or [0, 1] for float. Typically a NumPy array from PIL, OpenCV, or similar libraries. |
required |
Returns:
| Type | Description |
|---|---|
Array
|
Hypervector encoding of the image. |
Raises:
| Type | Description |
|---|---|
ValueError
|
If image has invalid shape or number of channels. |
Notes
This encoder accepts images as NumPy arrays (the standard format from image libraries like PIL, OpenCV, scikit-image) and processes them using the configured backend. While input must be NumPy, internal VSA operations use the model's backend (NumPy/PyTorch/JAX).
Source code in holovec/encoders/spatial.py
def encode(self, image: Array | np.ndarray) -> Array:
"""
Encode an image into a hypervector.
Parameters
----------
image : array-like
Image array with shape (height, width) for grayscale or
(height, width, channels) for color images.
Pixel values should be in range [0, 255] for uint8 or
[0, 1] for float.
Typically a NumPy array from PIL, OpenCV, or similar libraries.
Returns
-------
Array
Hypervector encoding of the image.
Raises
------
ValueError
If image has invalid shape or number of channels.
Notes
-----
This encoder accepts images as NumPy arrays (the standard format from
image libraries like PIL, OpenCV, scikit-image) and processes them using
the configured backend. While input must be NumPy, internal VSA operations
use the model's backend (NumPy/PyTorch/JAX).
"""
# Import numpy locally to avoid module-level backend dependency
# Images from external sources (PIL, OpenCV) are numpy arrays
import numpy as _np
# Convert to numpy array if needed (handles lists, tuples, etc.)
if not isinstance(image, _np.ndarray):
image = _np.array(image)
# Validate and normalize image shape
if image.ndim == 2:
# Grayscale image
height, width = image.shape
n_channels = 1
# Add channel dimension: (H, W) -> (H, W, 1)
image = _np.expand_dims(image, axis=-1)
elif image.ndim == 3:
height, width, n_channels = image.shape
if n_channels not in [1, 3, 4]:
raise ValueError(f"Image must have 1, 3, or 4 channels, got {n_channels}")
else:
raise ValueError(f"Image must be 2D (grayscale) or 3D (color), got shape {image.shape}")
# Store image properties
self.n_channels = n_channels
self.image_shape = (height, width, n_channels)
# Normalize pixel values if requested
if self.normalize_pixels:
# Check dtype using string representation to avoid dtype dependency
dtype_str = str(image.dtype)
if "uint8" in dtype_str:
image = image.astype(_np.float32) / 255.0
elif "int" in dtype_str:
# Other integer types: normalize assuming 0-255 range
image = image.astype(_np.float32) / 255.0
# If already float, assume it's in [0, 1]
# Encode all pixels
pixel_hvs = []
for y in range(height):
for x in range(width):
# Encode spatial position
x_hv = self.scalar_encoder.encode(float(x))
y_hv = self.scalar_encoder.encode(float(y))
x_bound = self.model.bind(self.X, x_hv)
y_bound = self.model.bind(self.Y, y_hv)
pos_hv = self.model.bundle([x_bound, y_bound])
# Encode pixel value(s)
if n_channels == 1:
# Grayscale: just encode the intensity
val_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
elif n_channels == 3:
# RGB: bind each channel to its dimension vector
r_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
g_hv = self.scalar_encoder.encode(float(image[y, x, 1]))
b_hv = self.scalar_encoder.encode(float(image[y, x, 2]))
r_bound = self.model.bind(self.R, r_hv)
g_bound = self.model.bind(self.G, g_hv)
b_bound = self.model.bind(self.B, b_hv)
val_hv = self.model.bundle([r_bound, g_bound, b_bound])
else: # n_channels == 4
# RGBA: bind each channel including alpha
r_hv = self.scalar_encoder.encode(float(image[y, x, 0]))
g_hv = self.scalar_encoder.encode(float(image[y, x, 1]))
b_hv = self.scalar_encoder.encode(float(image[y, x, 2]))
a_hv = self.scalar_encoder.encode(float(image[y, x, 3]))
r_bound = self.model.bind(self.R, r_hv)
g_bound = self.model.bind(self.G, g_hv)
b_bound = self.model.bind(self.B, b_hv)
a_bound = self.model.bind(self.A, a_hv)
val_hv = self.model.bundle([r_bound, g_bound, b_bound, a_bound])
# Bind position with value
pixel_hv = self.model.bind(pos_hv, val_hv)
pixel_hvs.append(pixel_hv)
# Bundle all pixels to create image hypervector
image_hv = self.model.bundle(pixel_hvs)
return image_hv
holovec.encoders.structured.VectorEncoder
Bases: StructuredEncoder
Vector encoder for multi-dimensional numeric data using role-filler binding.
Encodes vectors by binding each dimension with its scalar-encoded value:
encode([v₁, v₂, ..., vₐ]) = Σᵢ bind(Dᵢ, scalar_encode(vᵢ))
where: - Dᵢ is a random hypervector for dimension i - scalar_encode(vᵢ) encodes the scalar value using FPE/Thermometer/Level - bind() creates a role-filler binding - Σ bundles all dimension-value pairs
This creates a compositional encoding where: - Each dimension has explicit representation (Dᵢ) - Similar values in corresponding dimensions → higher similarity - Supports partial matching across dimensions - Enables approximate decoding via unbinding
Attributes: scalar_encoder: Encoder for individual scalar values n_dimensions: Number of dimensions in input vectors dim_vectors: List of dimension hypervectors (Dᵢ) normalize_input: Whether to normalize input vectors
Example: >>> from holovec import VSA >>> from holovec.encoders import FractionalPowerEncoder, VectorEncoder >>> >>> model = VSA.create('FHRR', dim=10000) >>> scalar_enc = FractionalPowerEncoder(model, min_val=0, max_val=1) >>> encoder = VectorEncoder(model, scalar_encoder=scalar_enc, n_dims=128) >>> >>> # Encode a feature vector (list or any backend array) >>> features = [0.5] * 128 # Can also use numpy/torch/jax arrays >>> hv = encoder.encode(features) >>> >>> # Similar vectors have high similarity >>> features2 = [0.51] * 128 # Slightly different >>> hv2 = encoder.encode(features2) >>> model.similarity(hv, hv2) # High similarity >>> >>> # Decode to recover approximate values >>> recovered = encoder.decode(hv) >>> # Verify approximate recovery via similarity >>> model.similarity(encoder.encode(recovered), hv) > 0.9
Source code in holovec/encoders/structured.py
class VectorEncoder(StructuredEncoder):
"""
Vector encoder for multi-dimensional numeric data using role-filler binding.
Encodes vectors by binding each dimension with its scalar-encoded value:
encode([v₁, v₂, ..., vₐ]) = Σᵢ bind(Dᵢ, scalar_encode(vᵢ))
where:
- Dᵢ is a random hypervector for dimension i
- scalar_encode(vᵢ) encodes the scalar value using FPE/Thermometer/Level
- bind() creates a role-filler binding
- Σ bundles all dimension-value pairs
This creates a compositional encoding where:
- Each dimension has explicit representation (Dᵢ)
- Similar values in corresponding dimensions → higher similarity
- Supports partial matching across dimensions
- Enables approximate decoding via unbinding
Attributes:
scalar_encoder: Encoder for individual scalar values
n_dimensions: Number of dimensions in input vectors
dim_vectors: List of dimension hypervectors (Dᵢ)
normalize_input: Whether to normalize input vectors
Example:
>>> from holovec import VSA
>>> from holovec.encoders import FractionalPowerEncoder, VectorEncoder
>>>
>>> model = VSA.create('FHRR', dim=10000)
>>> scalar_enc = FractionalPowerEncoder(model, min_val=0, max_val=1)
>>> encoder = VectorEncoder(model, scalar_encoder=scalar_enc, n_dims=128)
>>>
>>> # Encode a feature vector (list or any backend array)
>>> features = [0.5] * 128 # Can also use numpy/torch/jax arrays
>>> hv = encoder.encode(features)
>>>
>>> # Similar vectors have high similarity
>>> features2 = [0.51] * 128 # Slightly different
>>> hv2 = encoder.encode(features2)
>>> model.similarity(hv, hv2) # High similarity
>>>
>>> # Decode to recover approximate values
>>> recovered = encoder.decode(hv)
>>> # Verify approximate recovery via similarity
>>> model.similarity(encoder.encode(recovered), hv) > 0.9
"""
def __init__(
self,
model: VSAModel,
scalar_encoder: ScalarEncoder,
n_dimensions: int,
normalize_input: bool = False,
seed: int | None = None
):
"""
Initialize vector encoder.
Args:
model: VSA model instance
scalar_encoder: Encoder for individual scalar values
n_dimensions: Number of dimensions in input vectors
normalize_input: Whether to normalize input vectors to unit length
seed: Random seed for dimension vector generation
Raises:
ValueError: If n_dimensions < 1
TypeError: If scalar_encoder is not a ScalarEncoder
"""
super().__init__(model)
if n_dimensions < 1:
raise ValueError(f"n_dimensions must be >= 1, got {n_dimensions}")
if not isinstance(scalar_encoder, ScalarEncoder):
raise TypeError(
f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}"
)
# Check model compatibility
if model != scalar_encoder.model:
raise ValueError(
"scalar_encoder must use the same VSA model as VectorEncoder"
)
self.scalar_encoder = scalar_encoder
self.n_dimensions = n_dimensions
self.normalize_input = normalize_input
self.seed = seed
# Generate dimension hypervectors (one per dimension)
# These are the "roles" in role-filler binding
self.dim_vectors: list[Array] = []
for i in range(n_dimensions):
# Use deterministic seeding for reproducibility
if seed is not None:
dim_seed = seed + i
else:
dim_seed = i + 1000 # Offset to avoid collision with symbol seeds
self.dim_vectors.append(model.random(seed=dim_seed))
def encode(self, vector: Array) -> Array:
"""
Encode a vector using dimension binding.
Each element is bound with its corresponding dimension vector:
result = Σᵢ bind(Dᵢ, scalar_encode(vector[i]))
Args:
vector: Input vector to encode, shape (n_dimensions,)
Returns:
Hypervector representing the vector
Raises:
ValueError: If vector shape doesn't match n_dimensions
Example:
>>> encoder = VectorEncoder(model, scalar_enc, n_dims=3)
>>> vector = [1.0, 2.0, 3.0] # Can also be numpy/torch/jax array
>>> hv = encoder.encode(vector)
"""
# Convert to backend array if needed
vector = self.backend.array(vector)
if vector.shape != (self.n_dimensions,):
raise ValueError(
f"Expected vector of shape ({self.n_dimensions},), "
f"got {vector.shape}"
)
# Optional: normalize to unit length
if self.normalize_input:
vector = self.backend.normalize(vector)
# Bind each dimension with its scalar-encoded value
bound_dims = []
for i, value in enumerate(vector):
# Encode scalar value as hypervector
value_hv = self.scalar_encoder.encode(float(value))
# Bind dimension role with value filler
dim_hv = self.dim_vectors[i]
bound = self.model.bind(dim_hv, value_hv)
bound_dims.append(bound)
# Bundle all dimension-value bindings
vector_hv = self.model.bundle(bound_dims)
return vector_hv
def decode(self, hypervector: Array) -> Array:
"""
Decode vector hypervector to recover approximate values.
For each dimension i:
1. Unbind dimension: value_hv = unbind(hypervector, Dᵢ)
2. Decode scalar: value ≈ scalar_encoder.decode(value_hv)
Args:
hypervector: Vector hypervector to decode, shape (dimension,)
Returns:
Decoded vector, shape (n_dimensions,) (backend array type)
Raises:
NotImplementedError: If scalar_encoder doesn't support decoding
Note:
Decoding is approximate and quality depends on:
- VSA model (exact vs. approximate binding)
- Scalar encoder precision
- Number of dimensions (more dims → more noise)
Example:
>>> original = [1.0, 2.0, 3.0]
>>> encoded = encoder.encode(original)
>>> decoded = encoder.decode(encoded)
>>> # Check approximate recovery (using backend operations)
>>> model.similarity(encoder.encode(decoded), encoded) > 0.9
"""
if not self.scalar_encoder.is_reversible:
raise NotImplementedError(
f"Cannot decode: scalar_encoder {type(self.scalar_encoder).__name__} "
"does not support decoding"
)
decoded_values = []
for i in range(self.n_dimensions):
# Unbind dimension to recover value hypervector
dim_hv = self.dim_vectors[i]
value_hv = self.model.unbind(hypervector, dim_hv)
# Decode scalar value
value = self.scalar_encoder.decode(value_hv)
decoded_values.append(value)
return self.backend.array(decoded_values)
@property
def is_reversible(self) -> bool:
"""
VectorEncoder supports approximate decoding if scalar_encoder does.
Returns:
True if scalar_encoder supports decoding, False otherwise
"""
return self.scalar_encoder.is_reversible
@property
def compatible_models(self) -> list[str]:
"""
Works with all VSA models.
Decoding quality varies:
- Exact models (FHRR, MAP): High accuracy
- Approximate models (HRR, BSC): Moderate accuracy
Returns:
List of all model names
"""
return ["MAP", "FHRR", "HRR", "BSC", "GHRR", "VTB", "BSDC"]
@property
def input_type(self) -> str:
"""Input type description."""
return f"{self.n_dimensions}-dimensional vector"
def __repr__(self) -> str:
"""String representation."""
return (
f"VectorEncoder("
f"model={self.model.model_name}, "
f"scalar_encoder={type(self.scalar_encoder).__name__}, "
f"n_dimensions={self.n_dimensions}, "
f"normalize_input={self.normalize_input})"
)
compatible_models
property
Works with all VSA models.
Decoding quality varies: - Exact models (FHRR, MAP): High accuracy - Approximate models (HRR, BSC): Moderate accuracy
Returns: List of all model names
input_type
property
Input type description.
is_reversible
property
VectorEncoder supports approximate decoding if scalar_encoder does.
Returns: True if scalar_encoder supports decoding, False otherwise
__init__(model, scalar_encoder, n_dimensions, normalize_input=False, seed=None)
Initialize vector encoder.
Args: model: VSA model instance scalar_encoder: Encoder for individual scalar values n_dimensions: Number of dimensions in input vectors normalize_input: Whether to normalize input vectors to unit length seed: Random seed for dimension vector generation
Raises: ValueError: If n_dimensions < 1 TypeError: If scalar_encoder is not a ScalarEncoder
Source code in holovec/encoders/structured.py
def __init__(
self,
model: VSAModel,
scalar_encoder: ScalarEncoder,
n_dimensions: int,
normalize_input: bool = False,
seed: int | None = None
):
"""
Initialize vector encoder.
Args:
model: VSA model instance
scalar_encoder: Encoder for individual scalar values
n_dimensions: Number of dimensions in input vectors
normalize_input: Whether to normalize input vectors to unit length
seed: Random seed for dimension vector generation
Raises:
ValueError: If n_dimensions < 1
TypeError: If scalar_encoder is not a ScalarEncoder
"""
super().__init__(model)
if n_dimensions < 1:
raise ValueError(f"n_dimensions must be >= 1, got {n_dimensions}")
if not isinstance(scalar_encoder, ScalarEncoder):
raise TypeError(
f"scalar_encoder must be a ScalarEncoder, got {type(scalar_encoder)}"
)
# Check model compatibility
if model != scalar_encoder.model:
raise ValueError(
"scalar_encoder must use the same VSA model as VectorEncoder"
)
self.scalar_encoder = scalar_encoder
self.n_dimensions = n_dimensions
self.normalize_input = normalize_input
self.seed = seed
# Generate dimension hypervectors (one per dimension)
# These are the "roles" in role-filler binding
self.dim_vectors: list[Array] = []
for i in range(n_dimensions):
# Use deterministic seeding for reproducibility
if seed is not None:
dim_seed = seed + i
else:
dim_seed = i + 1000 # Offset to avoid collision with symbol seeds
self.dim_vectors.append(model.random(seed=dim_seed))
__repr__()
String representation.
Source code in holovec/encoders/structured.py
def __repr__(self) -> str:
"""String representation."""
return (
f"VectorEncoder("
f"model={self.model.model_name}, "
f"scalar_encoder={type(self.scalar_encoder).__name__}, "
f"n_dimensions={self.n_dimensions}, "
f"normalize_input={self.normalize_input})"
)
decode(hypervector)
Decode vector hypervector to recover approximate values.
For each dimension i: 1. Unbind dimension: value_hv = unbind(hypervector, Dᵢ) 2. Decode scalar: value ≈ scalar_encoder.decode(value_hv)
Args: hypervector: Vector hypervector to decode, shape (dimension,)
Returns: Decoded vector, shape (n_dimensions,) (backend array type)
Raises: NotImplementedError: If scalar_encoder doesn't support decoding
Note: Decoding is approximate and quality depends on: - VSA model (exact vs. approximate binding) - Scalar encoder precision - Number of dimensions (more dims → more noise)
Example: >>> original = [1.0, 2.0, 3.0] >>> encoded = encoder.encode(original) >>> decoded = encoder.decode(encoded) >>> # Check approximate recovery (using backend operations) >>> model.similarity(encoder.encode(decoded), encoded) > 0.9
Source code in holovec/encoders/structured.py
def decode(self, hypervector: Array) -> Array:
"""
Decode vector hypervector to recover approximate values.
For each dimension i:
1. Unbind dimension: value_hv = unbind(hypervector, Dᵢ)
2. Decode scalar: value ≈ scalar_encoder.decode(value_hv)
Args:
hypervector: Vector hypervector to decode, shape (dimension,)
Returns:
Decoded vector, shape (n_dimensions,) (backend array type)
Raises:
NotImplementedError: If scalar_encoder doesn't support decoding
Note:
Decoding is approximate and quality depends on:
- VSA model (exact vs. approximate binding)
- Scalar encoder precision
- Number of dimensions (more dims → more noise)
Example:
>>> original = [1.0, 2.0, 3.0]
>>> encoded = encoder.encode(original)
>>> decoded = encoder.decode(encoded)
>>> # Check approximate recovery (using backend operations)
>>> model.similarity(encoder.encode(decoded), encoded) > 0.9
"""
if not self.scalar_encoder.is_reversible:
raise NotImplementedError(
f"Cannot decode: scalar_encoder {type(self.scalar_encoder).__name__} "
"does not support decoding"
)
decoded_values = []
for i in range(self.n_dimensions):
# Unbind dimension to recover value hypervector
dim_hv = self.dim_vectors[i]
value_hv = self.model.unbind(hypervector, dim_hv)
# Decode scalar value
value = self.scalar_encoder.decode(value_hv)
decoded_values.append(value)
return self.backend.array(decoded_values)
encode(vector)
Encode a vector using dimension binding.
Each element is bound with its corresponding dimension vector:
result = Σᵢ bind(Dᵢ, scalar_encode(vector[i]))
Args: vector: Input vector to encode, shape (n_dimensions,)
Returns: Hypervector representing the vector
Raises: ValueError: If vector shape doesn't match n_dimensions
Example: >>> encoder = VectorEncoder(model, scalar_enc, n_dims=3) >>> vector = [1.0, 2.0, 3.0] # Can also be numpy/torch/jax array >>> hv = encoder.encode(vector)
Source code in holovec/encoders/structured.py
def encode(self, vector: Array) -> Array:
"""
Encode a vector using dimension binding.
Each element is bound with its corresponding dimension vector:
result = Σᵢ bind(Dᵢ, scalar_encode(vector[i]))
Args:
vector: Input vector to encode, shape (n_dimensions,)
Returns:
Hypervector representing the vector
Raises:
ValueError: If vector shape doesn't match n_dimensions
Example:
>>> encoder = VectorEncoder(model, scalar_enc, n_dims=3)
>>> vector = [1.0, 2.0, 3.0] # Can also be numpy/torch/jax array
>>> hv = encoder.encode(vector)
"""
# Convert to backend array if needed
vector = self.backend.array(vector)
if vector.shape != (self.n_dimensions,):
raise ValueError(
f"Expected vector of shape ({self.n_dimensions},), "
f"got {vector.shape}"
)
# Optional: normalize to unit length
if self.normalize_input:
vector = self.backend.normalize(vector)
# Bind each dimension with its scalar-encoded value
bound_dims = []
for i, value in enumerate(vector):
# Encode scalar value as hypervector
value_hv = self.scalar_encoder.encode(float(value))
# Bind dimension role with value filler
dim_hv = self.dim_vectors[i]
bound = self.model.bind(dim_hv, value_hv)
bound_dims.append(bound)
# Bundle all dimension-value bindings
vector_hv = self.model.bundle(bound_dims)
return vector_hv
Retrieval
Codebook
holovec.retrieval.codebook.Codebook
Thin wrapper for label→vector mappings with convenience methods.
Keeps insertion order of labels. Vectors are backend arrays.
Source code in holovec/retrieval/codebook.py
class Codebook:
"""Thin wrapper for label→vector mappings with convenience methods.
Keeps insertion order of labels. Vectors are backend arrays.
"""
FORMAT_VERSION = 2
def __init__(self, items: dict[str, Array] | None = None, backend: Backend | None = None):
self._items: dict[str, Array] = {}
self._backend: Backend = backend if backend is not None else get_backend("numpy")
if items:
self.extend(items)
# Basic operations
def add(self, label: str, vector: Array) -> None:
self._items[label] = vector
def extend(self, items: dict[str, Array]) -> None:
for k, v in items.items():
self.add(k, v)
@property
def labels(self) -> list[str]:
return list(self._items.keys())
@property
def size(self) -> int:
return len(self._items)
# Dict-like interface
def __getitem__(self, label: str) -> Array:
"""Get vector by label. Raises KeyError if not found."""
return self._items[label]
def __contains__(self, label: str) -> bool:
"""Check if label exists in codebook."""
return label in self._items
def __len__(self) -> int:
"""Return number of items in codebook."""
return len(self._items)
def __iter__(self) -> Iterator[str]:
"""Iterate over labels."""
return iter(self._items)
def items(self) -> ItemsView[str, Array]:
"""Return iterator over (label, vector) pairs."""
return self._items.items()
def keys(self) -> KeysView[str]:
"""Return iterator over labels."""
return self._items.keys()
def values(self) -> ValuesView[Array]:
"""Return iterator over vectors."""
return self._items.values()
def get(self, label: str, default: Array | None = None) -> Array | None:
"""Get vector by label, returning default if not found."""
return self._items.get(label, default)
def as_list(self) -> list[tuple[str, Array]]:
return list(self._items.items())
def as_matrix(self, backend: Backend | None = None) -> tuple[list[str], Array]:
"""Return (labels, matrix) where matrix has shape (L, D)."""
be = backend or self._backend
if self.size == 0:
return [], be.zeros((0,), dtype="float32")
labels = self.labels
stacked = be.stack([self._items[lbl] for lbl in labels], axis=0)
return labels, stacked
# Persistence (npz)
def save(self, path: str) -> None:
labels, mat = self.as_matrix()
mat_np = self._backend.to_numpy(mat)
np.savez(
path,
format_version=np.array(self.FORMAT_VERSION, dtype=np.int64),
labels=np.asarray(labels, dtype=np.str_),
matrix=mat_np,
)
@classmethod
def load(
cls,
path: str,
backend: Backend | None = None,
*,
allow_unsafe_legacy: bool = False,
) -> "Codebook":
be = backend or get_backend("numpy")
try:
with np.load(path, allow_pickle=False) as data:
format_version = int(np.asarray(data["format_version"]).item())
if format_version != cls.FORMAT_VERSION:
raise ValueError(
f"Unsupported codebook format version {format_version}; "
f"expected {cls.FORMAT_VERSION}"
)
labels = [str(x) for x in data["labels"].tolist()]
mat = data["matrix"]
except KeyError as exc:
if not allow_unsafe_legacy:
raise ValueError(
"Legacy codebook files require unsafe pickle-backed loading. "
"Re-run with allow_unsafe_legacy=True, then re-save the file."
) from exc
with np.load(path, allow_pickle=True) as data:
labels = [str(x) for x in data["labels"].tolist()]
mat = data["matrix"]
except ValueError as exc:
message = str(exc)
is_pickle_error = "Object arrays cannot be loaded when allow_pickle=False" in message
if not is_pickle_error or not allow_unsafe_legacy:
if is_pickle_error:
raise ValueError(
"Legacy codebook files require unsafe pickle-backed loading. "
"Re-run with allow_unsafe_legacy=True, then re-save the file."
) from exc
raise
with np.load(path, allow_pickle=True) as data:
labels = [str(x) for x in data["labels"].tolist()]
mat = data["matrix"]
items: dict[str, Array] = {}
for i, lbl in enumerate(labels):
items[lbl] = be.from_numpy(mat[i])
return cls(items=items, backend=be)
__contains__(label)
Check if label exists in codebook.
Source code in holovec/retrieval/codebook.py
def __contains__(self, label: str) -> bool:
"""Check if label exists in codebook."""
return label in self._items
__getitem__(label)
Get vector by label. Raises KeyError if not found.
Source code in holovec/retrieval/codebook.py
def __getitem__(self, label: str) -> Array:
"""Get vector by label. Raises KeyError if not found."""
return self._items[label]
__iter__()
Iterate over labels.
Source code in holovec/retrieval/codebook.py
def __iter__(self) -> Iterator[str]:
"""Iterate over labels."""
return iter(self._items)
__len__()
Return number of items in codebook.
Source code in holovec/retrieval/codebook.py
def __len__(self) -> int:
"""Return number of items in codebook."""
return len(self._items)
as_matrix(backend=None)
Return (labels, matrix) where matrix has shape (L, D).
Source code in holovec/retrieval/codebook.py
def as_matrix(self, backend: Backend | None = None) -> tuple[list[str], Array]:
"""Return (labels, matrix) where matrix has shape (L, D)."""
be = backend or self._backend
if self.size == 0:
return [], be.zeros((0,), dtype="float32")
labels = self.labels
stacked = be.stack([self._items[lbl] for lbl in labels], axis=0)
return labels, stacked
get(label, default=None)
Get vector by label, returning default if not found.
Source code in holovec/retrieval/codebook.py
def get(self, label: str, default: Array | None = None) -> Array | None:
"""Get vector by label, returning default if not found."""
return self._items.get(label, default)
items()
Return iterator over (label, vector) pairs.
Source code in holovec/retrieval/codebook.py
def items(self) -> ItemsView[str, Array]:
"""Return iterator over (label, vector) pairs."""
return self._items.items()
keys()
Return iterator over labels.
Source code in holovec/retrieval/codebook.py
def keys(self) -> KeysView[str]:
"""Return iterator over labels."""
return self._items.keys()
values()
Return iterator over vectors.
Source code in holovec/retrieval/codebook.py
def values(self) -> ValuesView[Array]:
"""Return iterator over vectors."""
return self._items.values()
ItemStore
holovec.retrieval.itemstore.ItemStore
Thin retrieval wrapper around a Codebook and a CleanupStrategy.
Provides nearest-neighbor queries and multi-factor factorization via the configured cleanup strategy.
Source code in holovec/retrieval/itemstore.py
class ItemStore:
"""Thin retrieval wrapper around a Codebook and a CleanupStrategy.
Provides nearest-neighbor queries and multi-factor factorization via
the configured cleanup strategy.
"""
def __init__(
self,
model: VSAModel,
cleanup: CleanupStrategy | None = None,
) -> None:
self.model = model
self.cleanup: CleanupStrategy = cleanup if cleanup is not None else BruteForceCleanup()
self.codebook = Codebook(backend=model.backend)
def fit(self, items: dict[str, Array] | Codebook) -> "ItemStore":
if isinstance(items, Codebook):
self.codebook = items
else:
self.codebook = Codebook(items, backend=self.model.backend)
return self
def add(self, label: str, vector: Array) -> None:
self.codebook.add(label, vector)
def extend(self, items: dict[str, Array]) -> None:
self.codebook.extend(items)
def query(
self,
vec: Array,
k: int = 1,
return_similarities: bool = True,
fast: bool = True,
) -> list[tuple[str, float]]:
"""Query top-k nearest items.
If fast=True, uses a batched matrix routine when possible, otherwise
falls back to scalar nearest_neighbors.
"""
if fast and self.codebook.size > 0:
labels, mat = self.codebook.as_matrix(self.model.backend)
be = self.model.backend
# Continuous spaces: cosine-like; ComplexSpace handled specially
space_name = self.model.space.space_name
try:
if space_name.startswith("complex"):
# sim = Re(conj(C) @ v) / D
v = vec
conjC = be.conjugate(mat)
dots = be.matmul(conjC, v) # (L,)
sims_arr = be.real(dots)
sims_np = be.to_numpy(sims_arr) / float(self.model.dimension)
else:
# cosine: (C v) / (||C_i|| * ||v||)
dots = be.matmul(mat, vec) # (L,)
# norms per row
# norm(C_i) = sqrt(sum(C_i^2)) → use l2 along axis=1
row_norms = be.norm(mat, ord=2, axis=1)
v_norm = be.norm(vec, ord=2)
denom = be.multiply(row_norms, v_norm)
sims_arr = be.divide(dots, denom)
sims_np = be.to_numpy(sims_arr)
# Prepare top-k
import numpy as _np
sims_np = sims_np.astype(float)
if k >= len(labels):
order = _np.argsort(-sims_np)
else:
# partial sort then full sort within top-k
idx_part = _np.argpartition(-sims_np, kth=k - 1)[:k]
order = idx_part[_np.argsort(-sims_np[idx_part])]
out = [(labels[i], float(sims_np[i])) for i in order[:k]]
if return_similarities:
return out
else:
return [(lbl, 0.0) for lbl, _ in out]
except (AttributeError, NotImplementedError, TypeError, ValueError):
# Fallback to scalar path on any backend issues
pass
labels, sims = nearest_neighbors(
vec, self.codebook._items, self.model, k=k, return_similarities=True
)
return (
list(zip(labels, sims or [], strict=True))
if return_similarities
else [(lbl, 0.0) for lbl in labels]
)
def factorize(
self,
vec: Array,
n_factors: int,
max_iterations: int | None = None,
threshold: float | None = None,
temperature: float = 20.0,
top_k: int = 1,
patience: int = 3,
min_delta: float = 1e-4,
mode: str = "hard",
) -> tuple[list[str], list[float]]:
return self.cleanup.factorize(
vec,
self.codebook._items,
self.model,
n_factors=n_factors,
max_iterations=max_iterations,
threshold=threshold,
temperature=temperature,
top_k=top_k,
patience=patience,
min_delta=min_delta,
mode=mode,
)
# Persistence delegates to Codebook
def save(self, path: str) -> None:
self.codebook.save(path)
@classmethod
def load(
cls,
model: VSAModel,
path: str,
cleanup: CleanupStrategy | None = None,
*,
allow_unsafe_legacy: bool = False,
) -> "ItemStore":
store = cls(model=model, cleanup=cleanup)
store.codebook = Codebook.load(
path,
backend=model.backend,
allow_unsafe_legacy=allow_unsafe_legacy,
)
return store
query(vec, k=1, return_similarities=True, fast=True)
Query top-k nearest items.
If fast=True, uses a batched matrix routine when possible, otherwise falls back to scalar nearest_neighbors.
Source code in holovec/retrieval/itemstore.py
def query(
self,
vec: Array,
k: int = 1,
return_similarities: bool = True,
fast: bool = True,
) -> list[tuple[str, float]]:
"""Query top-k nearest items.
If fast=True, uses a batched matrix routine when possible, otherwise
falls back to scalar nearest_neighbors.
"""
if fast and self.codebook.size > 0:
labels, mat = self.codebook.as_matrix(self.model.backend)
be = self.model.backend
# Continuous spaces: cosine-like; ComplexSpace handled specially
space_name = self.model.space.space_name
try:
if space_name.startswith("complex"):
# sim = Re(conj(C) @ v) / D
v = vec
conjC = be.conjugate(mat)
dots = be.matmul(conjC, v) # (L,)
sims_arr = be.real(dots)
sims_np = be.to_numpy(sims_arr) / float(self.model.dimension)
else:
# cosine: (C v) / (||C_i|| * ||v||)
dots = be.matmul(mat, vec) # (L,)
# norms per row
# norm(C_i) = sqrt(sum(C_i^2)) → use l2 along axis=1
row_norms = be.norm(mat, ord=2, axis=1)
v_norm = be.norm(vec, ord=2)
denom = be.multiply(row_norms, v_norm)
sims_arr = be.divide(dots, denom)
sims_np = be.to_numpy(sims_arr)
# Prepare top-k
import numpy as _np
sims_np = sims_np.astype(float)
if k >= len(labels):
order = _np.argsort(-sims_np)
else:
# partial sort then full sort within top-k
idx_part = _np.argpartition(-sims_np, kth=k - 1)[:k]
order = idx_part[_np.argsort(-sims_np[idx_part])]
out = [(labels[i], float(sims_np[i])) for i in order[:k]]
if return_similarities:
return out
else:
return [(lbl, 0.0) for lbl, _ in out]
except (AttributeError, NotImplementedError, TypeError, ValueError):
# Fallback to scalar path on any backend issues
pass
labels, sims = nearest_neighbors(
vec, self.codebook._items, self.model, k=k, return_similarities=True
)
return (
list(zip(labels, sims or [], strict=True))
if return_similarities
else [(lbl, 0.0) for lbl in labels]
)
AssocStore
holovec.retrieval.assocstore.AssocStore
Lean heteroassociative store: keys → values via aligned codebooks.
Stores two codebooks with aligned label order. Query by a key vector returns the best-matching key label and its corresponding value label/vector.
Source code in holovec/retrieval/assocstore.py
class AssocStore:
"""Lean heteroassociative store: keys → values via aligned codebooks.
Stores two codebooks with aligned label order. Query by a key vector returns
the best-matching key label and its corresponding value label/vector.
"""
def __init__(self, model: VSAModel) -> None:
self.model = model
self.keys = Codebook(backend=model.backend)
self.values = Codebook(backend=model.backend)
def fit(self, key_items: dict[str, Array], value_items: dict[str, Array]) -> "AssocStore":
# Intersect labels and preserve deterministic order
labels = [lbl for lbl in key_items.keys() if lbl in value_items]
self.keys = Codebook({lbl: key_items[lbl] for lbl in labels}, backend=self.model.backend)
self.values = Codebook(
{lbl: value_items[lbl] for lbl in labels}, backend=self.model.backend
)
return self
def add(self, label: str, key_vec: Array, value_vec: Array) -> None:
self.keys.add(label, key_vec)
self.values.add(label, value_vec)
def query_label(self, key_vec: Array, k: int = 1) -> list[tuple[str, float]]:
labels, sims = nearest_neighbors(
key_vec, self.keys._items, self.model, k=k, return_similarities=True
)
return list(zip(labels, sims or [], strict=True))
def query_value(self, key_vec: Array) -> tuple[str, Array]:
lbls = self.query_label(key_vec, k=1)
if not lbls:
raise ValueError("No items in store")
lbl = lbls[0][0]
return lbl, self.values._items[lbl]
def save(self, keys_path: str, values_path: str) -> None:
self.keys.save(keys_path)
self.values.save(values_path)
@classmethod
def load(
cls,
model: VSAModel,
keys_path: str,
values_path: str,
*,
allow_unsafe_legacy: bool = False,
) -> "AssocStore":
st = cls(model)
st.keys = Codebook.load(
keys_path,
backend=model.backend,
allow_unsafe_legacy=allow_unsafe_legacy,
)
st.values = Codebook.load(
values_path,
backend=model.backend,
allow_unsafe_legacy=allow_unsafe_legacy,
)
return st
Cleanup
holovec.utils.cleanup.BruteForceCleanup
Bases: CleanupStrategy
Brute-force cleanup via exhaustive codebook search.
Source code in holovec/utils/cleanup/bruteforce.py
class BruteForceCleanup(CleanupStrategy):
"""Brute-force cleanup via exhaustive codebook search."""
def cleanup(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
) -> tuple[str, float]:
"""Find best match via exhaustive search."""
if query is None:
raise TypeError("query cannot be None")
if not isinstance(codebook, dict):
raise TypeError(f"codebook must be dict, got {type(codebook)}")
if not isinstance(model, VSAModel):
raise TypeError(f"model must be VSAModel, got {type(model)}")
if len(codebook) == 0:
raise ValueError("codebook must not be empty")
try:
query_shape = model.backend.shape(query)
expected_shape = (model.dimension,)
if query_shape != expected_shape:
raise ValueError(
f"query must have shape {expected_shape}, got {query_shape}. "
f"Ensure query is a 1-D hypervector matching model dimension."
)
except (AttributeError, TypeError) as e:
raise TypeError(
f"query must be a valid array compatible with model backend, got {type(query)}. "
f"Backend error: {e}"
) from e
codebook_iter = iter(codebook.items())
best_label, best_vector = next(codebook_iter)
best_similarity = float(model.similarity(query, best_vector))
for label, vector in codebook_iter:
similarity = model.similarity(query, vector)
if similarity > best_similarity:
best_similarity = similarity
best_label = label
return best_label, float(best_similarity)
def factorize(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
n_factors: int = 2,
max_iterations: int | None = None,
threshold: float | None = None,
temperature: float = 20.0,
top_k: int = 1,
patience: int = 3,
min_delta: float = 1e-4,
mode: str = "hard",
**kwargs: object,
) -> tuple[list[str], list[float]]:
"""Factorize via iterative cleanup and unbinding."""
if query is None:
raise TypeError("query cannot be None")
if not isinstance(codebook, dict):
raise TypeError(f"codebook must be dict, got {type(codebook)}")
if not isinstance(model, VSAModel):
raise TypeError(f"model must be VSAModel, got {type(model)}")
if not isinstance(n_factors, int):
raise TypeError(f"n_factors must be int, got {type(n_factors)}")
if max_iterations is None:
max_iterations = 20
if threshold is None:
threshold = 0.99
if not isinstance(max_iterations, int):
raise TypeError(f"max_iterations must be int, got {type(max_iterations)}")
if not isinstance(threshold, int | float):
raise TypeError(f"threshold must be numeric, got {type(threshold)}")
if n_factors < 1:
raise ValueError(f"n_factors must be >= 1, got {n_factors}")
if len(codebook) == 0:
raise ValueError("codebook must not be empty")
if max_iterations < 1:
raise ValueError(f"max_iterations must be >= 1, got {max_iterations}")
if not (0.0 <= threshold <= 1.0):
raise ValueError(f"threshold must be in [0.0, 1.0], got {threshold}")
try:
query_shape = model.backend.shape(query)
expected_shape = (model.dimension,)
if query_shape != expected_shape:
raise ValueError(
f"query must have shape {expected_shape}, got {query_shape}. "
f"Ensure query is a 1-D hypervector matching model dimension."
)
except (AttributeError, TypeError) as e:
raise TypeError(
f"query must be a valid array compatible with model backend, got {type(query)}. "
f"Backend error: {e}"
) from e
labels: list[str] = []
similarities: list[float] = []
current = query
for _ in range(n_factors):
label, similarity = self.cleanup(current, codebook, model)
labels.append(label)
similarities.append(similarity)
factor_vector = codebook[label]
current = model.unbind(current, factor_vector)
return labels, similarities
cleanup(query, codebook, model)
Find best match via exhaustive search.
Source code in holovec/utils/cleanup/bruteforce.py
def cleanup(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
) -> tuple[str, float]:
"""Find best match via exhaustive search."""
if query is None:
raise TypeError("query cannot be None")
if not isinstance(codebook, dict):
raise TypeError(f"codebook must be dict, got {type(codebook)}")
if not isinstance(model, VSAModel):
raise TypeError(f"model must be VSAModel, got {type(model)}")
if len(codebook) == 0:
raise ValueError("codebook must not be empty")
try:
query_shape = model.backend.shape(query)
expected_shape = (model.dimension,)
if query_shape != expected_shape:
raise ValueError(
f"query must have shape {expected_shape}, got {query_shape}. "
f"Ensure query is a 1-D hypervector matching model dimension."
)
except (AttributeError, TypeError) as e:
raise TypeError(
f"query must be a valid array compatible with model backend, got {type(query)}. "
f"Backend error: {e}"
) from e
codebook_iter = iter(codebook.items())
best_label, best_vector = next(codebook_iter)
best_similarity = float(model.similarity(query, best_vector))
for label, vector in codebook_iter:
similarity = model.similarity(query, vector)
if similarity > best_similarity:
best_similarity = similarity
best_label = label
return best_label, float(best_similarity)
factorize(query, codebook, model, n_factors=2, max_iterations=None, threshold=None, temperature=20.0, top_k=1, patience=3, min_delta=0.0001, mode='hard', **kwargs)
Factorize via iterative cleanup and unbinding.
Source code in holovec/utils/cleanup/bruteforce.py
def factorize(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
n_factors: int = 2,
max_iterations: int | None = None,
threshold: float | None = None,
temperature: float = 20.0,
top_k: int = 1,
patience: int = 3,
min_delta: float = 1e-4,
mode: str = "hard",
**kwargs: object,
) -> tuple[list[str], list[float]]:
"""Factorize via iterative cleanup and unbinding."""
if query is None:
raise TypeError("query cannot be None")
if not isinstance(codebook, dict):
raise TypeError(f"codebook must be dict, got {type(codebook)}")
if not isinstance(model, VSAModel):
raise TypeError(f"model must be VSAModel, got {type(model)}")
if not isinstance(n_factors, int):
raise TypeError(f"n_factors must be int, got {type(n_factors)}")
if max_iterations is None:
max_iterations = 20
if threshold is None:
threshold = 0.99
if not isinstance(max_iterations, int):
raise TypeError(f"max_iterations must be int, got {type(max_iterations)}")
if not isinstance(threshold, int | float):
raise TypeError(f"threshold must be numeric, got {type(threshold)}")
if n_factors < 1:
raise ValueError(f"n_factors must be >= 1, got {n_factors}")
if len(codebook) == 0:
raise ValueError("codebook must not be empty")
if max_iterations < 1:
raise ValueError(f"max_iterations must be >= 1, got {max_iterations}")
if not (0.0 <= threshold <= 1.0):
raise ValueError(f"threshold must be in [0.0, 1.0], got {threshold}")
try:
query_shape = model.backend.shape(query)
expected_shape = (model.dimension,)
if query_shape != expected_shape:
raise ValueError(
f"query must have shape {expected_shape}, got {query_shape}. "
f"Ensure query is a 1-D hypervector matching model dimension."
)
except (AttributeError, TypeError) as e:
raise TypeError(
f"query must be a valid array compatible with model backend, got {type(query)}. "
f"Backend error: {e}"
) from e
labels: list[str] = []
similarities: list[float] = []
current = query
for _ in range(n_factors):
label, similarity = self.cleanup(current, codebook, model)
labels.append(label)
similarities.append(similarity)
factor_vector = codebook[label]
current = model.unbind(current, factor_vector)
return labels, similarities
holovec.utils.cleanup.ResonatorCleanup
Bases: CleanupStrategy
Resonator network cleanup via iterative refinement.
Source code in holovec/utils/cleanup/resonator.py
class ResonatorCleanup(CleanupStrategy):
"""Resonator network cleanup via iterative refinement."""
def cleanup(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
) -> tuple[str, float]:
"""Single-factor cleanup reduces to brute force."""
return BruteForceCleanup().cleanup(query, codebook, model)
def factorize(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
n_factors: int = 2,
max_iterations: int | None = None,
threshold: float | None = None,
temperature: float = 20.0,
top_k: int = 1,
patience: int = 3,
min_delta: float = 1e-4,
mode: str = "hard",
**kwargs: object,
) -> tuple[list[str], list[float]]:
"""Factorize via resonator network iteration."""
if max_iterations is None:
max_iterations = 20
if threshold is None:
threshold = 0.99
if query is None:
raise TypeError("query cannot be None")
if not isinstance(codebook, dict):
raise TypeError(f"codebook must be dict, got {type(codebook)}")
if not isinstance(model, VSAModel):
raise TypeError(f"model must be VSAModel, got {type(model)}")
if not isinstance(n_factors, int):
raise TypeError(f"n_factors must be int, got {type(n_factors)}")
if not isinstance(max_iterations, int):
raise TypeError(f"max_iterations must be int, got {type(max_iterations)}")
if not isinstance(threshold, int | float):
raise TypeError(f"threshold must be numeric, got {type(threshold)}")
if n_factors < 1:
raise ValueError(f"n_factors must be >= 1, got {n_factors}")
if len(codebook) == 0:
raise ValueError("codebook must not be empty")
if max_iterations < 1:
raise ValueError(f"max_iterations must be >= 1, got {max_iterations}")
if not (0.0 <= threshold <= 1.0):
raise ValueError(f"threshold must be in [0.0, 1.0], got {threshold}")
try:
query_shape = model.backend.shape(query)
expected_shape = (model.dimension,)
if query_shape != expected_shape:
raise ValueError(
f"query must have shape {expected_shape}, got {query_shape}. "
f"Ensure query is a 1-D hypervector matching model dimension."
)
except (AttributeError, TypeError) as e:
raise TypeError(
f"query must be a valid array compatible with model backend, got {type(query)}. "
f"Backend error: {e}"
) from e
codebook_labels = list(codebook.keys())
estimates: list[Array] = []
estimate_labels: list[str] = []
for i in range(n_factors):
label = codebook_labels[i % len(codebook_labels)]
estimates.append(codebook[label])
estimate_labels.append(label)
best_avg = -1.0
no_improve = 0
for _iteration in range(max_iterations):
converged = True
for i in range(n_factors):
isolated = query
for j in range(n_factors):
if j != i:
isolated = model.unbind(isolated, estimates[j])
sims: list[tuple[str, float]] = []
for lbl, vec in codebook.items():
sims.append((lbl, float(model.similarity(isolated, vec))))
sims.sort(key=lambda t: t[1], reverse=True)
use_soft = (mode == "soft") or (top_k > 1)
if not use_soft:
label, similarity = sims[0]
estimates[i] = codebook[label]
estimate_labels[i] = label
else:
k = min(max(2, top_k), len(sims))
top = sims[:k]
import numpy as _np
vals = _np.array([s for _, s in top], dtype=_np.float64)
logits = vals * float(temperature)
logits = logits - logits.max()
w = _np.exp(logits)
w = w / (w.sum() + 1e-12)
parts = []
for (lbl, _score), wt in zip(top, w.tolist(), strict=True):
parts.append(model.backend.multiply_scalar(codebook[lbl], float(wt)))
estimates[i] = model.backend.sum(model.backend.stack(parts, axis=0), axis=0)
estimate_labels[i] = top[0][0]
similarity = float(top[0][1])
if similarity < threshold:
converged = False
curr_sims: list[float] = []
for i in range(n_factors):
isolated = query
for j in range(n_factors):
if j != i:
isolated = model.unbind(isolated, estimates[j])
curr_sims.append(float(model.similarity(isolated, estimates[i])))
avg_sim = sum(curr_sims) / max(1, len(curr_sims))
if avg_sim > best_avg + min_delta:
best_avg = avg_sim
no_improve = 0
else:
no_improve += 1
if converged or no_improve >= patience:
break
similarities: list[float] = []
for i in range(n_factors):
isolated = query
for j in range(n_factors):
if j != i:
isolated = model.unbind(isolated, estimates[j])
similarities.append(float(model.similarity(isolated, estimates[i])))
return estimate_labels, similarities
def factorize_verbose(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
n_factors: int = 2,
max_iterations: int = 20,
threshold: float = 0.99,
temperature: float = 20.0,
top_k: int = 1,
patience: int = 3,
min_delta: float = 1e-4,
mode: str = "hard",
) -> tuple[list[str], list[float], list[float]]:
"""Like factorize(), but also returns average-similarity history."""
codebook_labels = list(codebook.keys())
estimates: list[Array] = []
estimate_labels: list[str] = []
for i in range(n_factors):
label = codebook_labels[i % len(codebook_labels)]
estimates.append(codebook[label])
estimate_labels.append(label)
history: list[float] = []
best_avg = -1.0
no_improve = 0
for _iter in range(max_iterations):
converged = True
for i in range(n_factors):
isolated = query
for j in range(n_factors):
if j != i:
isolated = model.unbind(isolated, estimates[j])
sims = [(lbl, float(model.similarity(isolated, vec))) for lbl, vec in codebook.items()]
sims.sort(key=lambda t: t[1], reverse=True)
use_soft = (mode == "soft") or (top_k > 1)
if not use_soft:
label, similarity = sims[0]
estimates[i] = codebook[label]
estimate_labels[i] = label
else:
k = min(max(2, top_k), len(sims))
top = sims[:k]
import numpy as _np
vals = _np.array([s for _, s in top], dtype=_np.float64)
logits = vals * float(temperature)
logits = logits - logits.max()
w = _np.exp(logits)
w = w / (w.sum() + 1e-12)
parts = []
for (lbl, _score), wt in zip(top, w.tolist(), strict=True):
parts.append(model.backend.multiply_scalar(codebook[lbl], float(wt)))
estimates[i] = model.backend.sum(model.backend.stack(parts, axis=0), axis=0)
estimate_labels[i] = top[0][0]
similarity = float(top[0][1])
if similarity < threshold:
converged = False
curr_sims: list[float] = []
for i in range(n_factors):
isolated = query
for j in range(n_factors):
if j != i:
isolated = model.unbind(isolated, estimates[j])
curr_sims.append(float(model.similarity(isolated, estimates[i])))
avg_sim = sum(curr_sims) / max(1, len(curr_sims))
history.append(avg_sim)
if avg_sim > best_avg + min_delta:
best_avg = avg_sim
no_improve = 0
else:
no_improve += 1
if converged or no_improve >= patience:
break
final_sims: list[float] = []
for i in range(n_factors):
isolated = query
for j in range(n_factors):
if j != i:
isolated = model.unbind(isolated, estimates[j])
final_sims.append(float(model.similarity(isolated, estimates[i])))
return estimate_labels, final_sims, history
cleanup(query, codebook, model)
Single-factor cleanup reduces to brute force.
Source code in holovec/utils/cleanup/resonator.py
def cleanup(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
) -> tuple[str, float]:
"""Single-factor cleanup reduces to brute force."""
return BruteForceCleanup().cleanup(query, codebook, model)
factorize(query, codebook, model, n_factors=2, max_iterations=None, threshold=None, temperature=20.0, top_k=1, patience=3, min_delta=0.0001, mode='hard', **kwargs)
Factorize via resonator network iteration.
Source code in holovec/utils/cleanup/resonator.py
def factorize(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
n_factors: int = 2,
max_iterations: int | None = None,
threshold: float | None = None,
temperature: float = 20.0,
top_k: int = 1,
patience: int = 3,
min_delta: float = 1e-4,
mode: str = "hard",
**kwargs: object,
) -> tuple[list[str], list[float]]:
"""Factorize via resonator network iteration."""
if max_iterations is None:
max_iterations = 20
if threshold is None:
threshold = 0.99
if query is None:
raise TypeError("query cannot be None")
if not isinstance(codebook, dict):
raise TypeError(f"codebook must be dict, got {type(codebook)}")
if not isinstance(model, VSAModel):
raise TypeError(f"model must be VSAModel, got {type(model)}")
if not isinstance(n_factors, int):
raise TypeError(f"n_factors must be int, got {type(n_factors)}")
if not isinstance(max_iterations, int):
raise TypeError(f"max_iterations must be int, got {type(max_iterations)}")
if not isinstance(threshold, int | float):
raise TypeError(f"threshold must be numeric, got {type(threshold)}")
if n_factors < 1:
raise ValueError(f"n_factors must be >= 1, got {n_factors}")
if len(codebook) == 0:
raise ValueError("codebook must not be empty")
if max_iterations < 1:
raise ValueError(f"max_iterations must be >= 1, got {max_iterations}")
if not (0.0 <= threshold <= 1.0):
raise ValueError(f"threshold must be in [0.0, 1.0], got {threshold}")
try:
query_shape = model.backend.shape(query)
expected_shape = (model.dimension,)
if query_shape != expected_shape:
raise ValueError(
f"query must have shape {expected_shape}, got {query_shape}. "
f"Ensure query is a 1-D hypervector matching model dimension."
)
except (AttributeError, TypeError) as e:
raise TypeError(
f"query must be a valid array compatible with model backend, got {type(query)}. "
f"Backend error: {e}"
) from e
codebook_labels = list(codebook.keys())
estimates: list[Array] = []
estimate_labels: list[str] = []
for i in range(n_factors):
label = codebook_labels[i % len(codebook_labels)]
estimates.append(codebook[label])
estimate_labels.append(label)
best_avg = -1.0
no_improve = 0
for _iteration in range(max_iterations):
converged = True
for i in range(n_factors):
isolated = query
for j in range(n_factors):
if j != i:
isolated = model.unbind(isolated, estimates[j])
sims: list[tuple[str, float]] = []
for lbl, vec in codebook.items():
sims.append((lbl, float(model.similarity(isolated, vec))))
sims.sort(key=lambda t: t[1], reverse=True)
use_soft = (mode == "soft") or (top_k > 1)
if not use_soft:
label, similarity = sims[0]
estimates[i] = codebook[label]
estimate_labels[i] = label
else:
k = min(max(2, top_k), len(sims))
top = sims[:k]
import numpy as _np
vals = _np.array([s for _, s in top], dtype=_np.float64)
logits = vals * float(temperature)
logits = logits - logits.max()
w = _np.exp(logits)
w = w / (w.sum() + 1e-12)
parts = []
for (lbl, _score), wt in zip(top, w.tolist(), strict=True):
parts.append(model.backend.multiply_scalar(codebook[lbl], float(wt)))
estimates[i] = model.backend.sum(model.backend.stack(parts, axis=0), axis=0)
estimate_labels[i] = top[0][0]
similarity = float(top[0][1])
if similarity < threshold:
converged = False
curr_sims: list[float] = []
for i in range(n_factors):
isolated = query
for j in range(n_factors):
if j != i:
isolated = model.unbind(isolated, estimates[j])
curr_sims.append(float(model.similarity(isolated, estimates[i])))
avg_sim = sum(curr_sims) / max(1, len(curr_sims))
if avg_sim > best_avg + min_delta:
best_avg = avg_sim
no_improve = 0
else:
no_improve += 1
if converged or no_improve >= patience:
break
similarities: list[float] = []
for i in range(n_factors):
isolated = query
for j in range(n_factors):
if j != i:
isolated = model.unbind(isolated, estimates[j])
similarities.append(float(model.similarity(isolated, estimates[i])))
return estimate_labels, similarities
factorize_verbose(query, codebook, model, n_factors=2, max_iterations=20, threshold=0.99, temperature=20.0, top_k=1, patience=3, min_delta=0.0001, mode='hard')
Like factorize(), but also returns average-similarity history.
Source code in holovec/utils/cleanup/resonator.py
def factorize_verbose(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
n_factors: int = 2,
max_iterations: int = 20,
threshold: float = 0.99,
temperature: float = 20.0,
top_k: int = 1,
patience: int = 3,
min_delta: float = 1e-4,
mode: str = "hard",
) -> tuple[list[str], list[float], list[float]]:
"""Like factorize(), but also returns average-similarity history."""
codebook_labels = list(codebook.keys())
estimates: list[Array] = []
estimate_labels: list[str] = []
for i in range(n_factors):
label = codebook_labels[i % len(codebook_labels)]
estimates.append(codebook[label])
estimate_labels.append(label)
history: list[float] = []
best_avg = -1.0
no_improve = 0
for _iter in range(max_iterations):
converged = True
for i in range(n_factors):
isolated = query
for j in range(n_factors):
if j != i:
isolated = model.unbind(isolated, estimates[j])
sims = [(lbl, float(model.similarity(isolated, vec))) for lbl, vec in codebook.items()]
sims.sort(key=lambda t: t[1], reverse=True)
use_soft = (mode == "soft") or (top_k > 1)
if not use_soft:
label, similarity = sims[0]
estimates[i] = codebook[label]
estimate_labels[i] = label
else:
k = min(max(2, top_k), len(sims))
top = sims[:k]
import numpy as _np
vals = _np.array([s for _, s in top], dtype=_np.float64)
logits = vals * float(temperature)
logits = logits - logits.max()
w = _np.exp(logits)
w = w / (w.sum() + 1e-12)
parts = []
for (lbl, _score), wt in zip(top, w.tolist(), strict=True):
parts.append(model.backend.multiply_scalar(codebook[lbl], float(wt)))
estimates[i] = model.backend.sum(model.backend.stack(parts, axis=0), axis=0)
estimate_labels[i] = top[0][0]
similarity = float(top[0][1])
if similarity < threshold:
converged = False
curr_sims: list[float] = []
for i in range(n_factors):
isolated = query
for j in range(n_factors):
if j != i:
isolated = model.unbind(isolated, estimates[j])
curr_sims.append(float(model.similarity(isolated, estimates[i])))
avg_sim = sum(curr_sims) / max(1, len(curr_sims))
history.append(avg_sim)
if avg_sim > best_avg + min_delta:
best_avg = avg_sim
no_improve = 0
else:
no_improve += 1
if converged or no_improve >= patience:
break
final_sims: list[float] = []
for i in range(n_factors):
isolated = query
for j in range(n_factors):
if j != i:
isolated = model.unbind(isolated, estimates[j])
final_sims.append(float(model.similarity(isolated, estimates[i])))
return estimate_labels, final_sims, history
holovec.utils.cleanup.AttentionResonatorCleanup
Bases: CleanupStrategy
Attention-based resonator network using the modern Hopfield update rule.
Source code in holovec/utils/cleanup/attention.py
class AttentionResonatorCleanup(CleanupStrategy):
"""Attention-based resonator network using the modern Hopfield update rule."""
def __init__(
self,
beta: float = 250.0,
max_iterations: int = 100,
convergence_threshold: float = 0.99,
patience: int = 5,
) -> None:
if beta <= 0:
raise ValueError(f"beta must be positive, got {beta}")
if max_iterations < 1:
raise ValueError(f"max_iterations must be >= 1, got {max_iterations}")
if not (0.0 < convergence_threshold <= 1.0):
raise ValueError(
f"convergence_threshold must be in (0, 1], got {convergence_threshold}"
)
if patience < 1:
raise ValueError(f"patience must be >= 1, got {patience}")
self.beta = beta
self.max_iterations = max_iterations
self.convergence_threshold = convergence_threshold
self.patience = patience
def cleanup(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
) -> tuple[str, float]:
"""Find best match via attention-weighted cleanup."""
if query is None:
raise TypeError("query cannot be None")
if not isinstance(codebook, dict):
raise TypeError(f"codebook must be dict, got {type(codebook)}")
if not isinstance(model, VSAModel):
raise TypeError(f"model must be VSAModel, got {type(model)}")
if len(codebook) == 0:
raise ValueError("codebook must not be empty")
labels = list(codebook.keys())
vectors = [codebook[lbl] for lbl in labels]
similarities = np.array([float(model.similarity(query, vec)) for vec in vectors])
best_idx = int(np.argmax(similarities))
return labels[best_idx], float(similarities[best_idx])
def factorize(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
n_factors: int = 2,
max_iterations: int | None = None,
threshold: float | None = None,
temperature: float = 20.0,
top_k: int = 1,
patience: int = 3,
min_delta: float = 1e-4,
mode: str = "hard",
**kwargs: object,
) -> tuple[list[str], list[float]]:
"""Factorize via attention-based resonator network."""
if max_iterations is None:
max_iterations = self.max_iterations
if threshold is None:
threshold = self.convergence_threshold
if query is None:
raise TypeError("query cannot be None")
if not isinstance(codebook, dict):
raise TypeError(f"codebook must be dict, got {type(codebook)}")
if not isinstance(model, VSAModel):
raise TypeError(f"model must be VSAModel, got {type(model)}")
if not isinstance(n_factors, int):
raise TypeError(f"n_factors must be int, got {type(n_factors)}")
if n_factors < 1:
raise ValueError(f"n_factors must be >= 1, got {n_factors}")
if len(codebook) == 0:
raise ValueError("codebook must not be empty")
labels = list(codebook.keys())
n_items = len(labels)
dim = model.dimension
codebook_vectors = [codebook[lbl] for lbl in labels]
is_complex = hasattr(model.space, "is_complex") and model.space.is_complex
codebook_stacked = model.backend.stack(codebook_vectors, axis=0)
codebook_mean = model.backend.mean(codebook_stacked, axis=0)
if is_complex and hasattr(model, "normalize"):
codebook_mean = model.normalize(codebook_mean)
estimates = [codebook_mean for _ in range(n_factors)]
best_avg_sim = -1.0
no_improve_count = 0
for _iteration in range(max_iterations):
converged = True
for j in range(n_factors):
other_product = self._compute_other_product(estimates, j, model, is_complex)
noisy_estimate = model.unbind(query, other_product)
if is_complex:
similarities = np.array(
[
float(
np.real(
model.backend.sum(
model.backend.multiply(
model.backend.conjugate(codebook_vectors[i]),
noisy_estimate,
)
)
)
)
/ dim
for i in range(n_items)
]
)
else:
similarities = np.array(
[float(model.similarity(noisy_estimate, codebook_vectors[i])) for i in range(n_items)]
)
attention_weights = _softmax(self.beta * similarities)
new_estimate = self._weighted_combination(codebook_vectors, attention_weights, model)
if hasattr(model, "normalize"):
new_estimate = model.normalize(new_estimate)
estimates[j] = new_estimate
best_sim = float(np.max(similarities))
if best_sim < threshold:
converged = False
avg_sim = self._compute_avg_similarity(
query, estimates, codebook_vectors, model, is_complex, dim
)
if avg_sim > best_avg_sim + min_delta:
best_avg_sim = avg_sim
no_improve_count = 0
else:
no_improve_count += 1
if converged or no_improve_count >= self.patience:
break
final_labels: list[str] = []
final_similarities: list[float] = []
for j in range(n_factors):
if is_complex:
similarities = np.array(
[
float(
np.real(
model.backend.sum(
model.backend.multiply(
model.backend.conjugate(codebook_vectors[i]),
estimates[j],
)
)
)
)
/ dim
for i in range(n_items)
]
)
else:
similarities = np.array(
[float(model.similarity(estimates[j], codebook_vectors[i])) for i in range(n_items)]
)
best_idx = int(np.argmax(similarities))
final_labels.append(labels[best_idx])
final_similarities.append(float(similarities[best_idx]))
return final_labels, final_similarities
def _compute_other_product(
self,
estimates: list[Array],
exclude_idx: int,
model: VSAModel,
is_complex: bool,
) -> Array:
"""Compute the product of all estimates except one."""
n_factors = len(estimates)
if n_factors == 1:
if is_complex:
return model.backend.ones(model.dimension, dtype="complex128")
return model.backend.ones(model.dimension, dtype="float64")
others: list[Array] = []
for i in range(n_factors):
if i != exclude_idx:
others.append(estimates[i])
result = others[0]
for vec in others[1:]:
result = model.bind(result, vec)
return result
def _weighted_combination(
self,
vectors: list[Array],
weights: np.ndarray,
model: VSAModel,
) -> Array:
"""Compute a weighted combination of vectors."""
weighted: list[Array] = []
for vec, w in zip(vectors, weights, strict=True):
weighted.append(model.backend.multiply_scalar(vec, float(w)))
stacked = model.backend.stack(weighted, axis=0)
return model.backend.sum(stacked, axis=0)
def _compute_avg_similarity(
self,
query: Array,
estimates: list[Array],
codebook_vectors: list[Array],
model: VSAModel,
is_complex: bool,
dim: int,
) -> float:
"""Compute average max similarity across factors for early stopping."""
n_factors = len(estimates)
n_items = len(codebook_vectors)
total_sim = 0.0
for j in range(n_factors):
if is_complex:
similarities = [
float(
np.real(
model.backend.sum(
model.backend.multiply(
model.backend.conjugate(codebook_vectors[i]),
estimates[j],
)
)
)
)
/ dim
for i in range(n_items)
]
else:
similarities = [
float(model.similarity(estimates[j], codebook_vectors[i])) for i in range(n_items)
]
total_sim += max(similarities)
return total_sim / n_factors
def factorize_verbose(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
n_factors: int = 2,
max_iterations: int | None = None,
threshold: float | None = None,
) -> tuple[list[str], list[float], list[float]]:
"""Like factorize(), but also returns average-similarity history."""
if max_iterations is None:
max_iterations = self.max_iterations
if threshold is None:
threshold = self.convergence_threshold
labels_list = list(codebook.keys())
n_items = len(labels_list)
dim = model.dimension
codebook_vectors = [codebook[lbl] for lbl in labels_list]
is_complex = hasattr(model.space, "is_complex") and model.space.is_complex
codebook_stacked = model.backend.stack(codebook_vectors, axis=0)
codebook_mean = model.backend.mean(codebook_stacked, axis=0)
if is_complex and hasattr(model, "normalize"):
codebook_mean = model.normalize(codebook_mean)
estimates = [codebook_mean for _ in range(n_factors)]
history: list[float] = []
best_avg_sim = -1.0
no_improve_count = 0
for _iteration in range(max_iterations):
converged = True
for j in range(n_factors):
other_product = self._compute_other_product(estimates, j, model, is_complex)
noisy_estimate = model.unbind(query, other_product)
if is_complex:
similarities = np.array(
[
float(
np.real(
model.backend.sum(
model.backend.multiply(
model.backend.conjugate(codebook_vectors[i]),
noisy_estimate,
)
)
)
)
/ dim
for i in range(n_items)
]
)
else:
similarities = np.array(
[float(model.similarity(noisy_estimate, codebook_vectors[i])) for i in range(n_items)]
)
attention_weights = _softmax(self.beta * similarities)
new_estimate = self._weighted_combination(codebook_vectors, attention_weights, model)
if hasattr(model, "normalize"):
new_estimate = model.normalize(new_estimate)
estimates[j] = new_estimate
if float(np.max(similarities)) < threshold:
converged = False
avg_sim = self._compute_avg_similarity(
query, estimates, codebook_vectors, model, is_complex, dim
)
history.append(avg_sim)
if avg_sim > best_avg_sim + 1e-4:
best_avg_sim = avg_sim
no_improve_count = 0
else:
no_improve_count += 1
if converged or no_improve_count >= self.patience:
break
final_labels: list[str] = []
final_similarities: list[float] = []
for j in range(n_factors):
if is_complex:
similarities = np.array(
[
float(
np.real(
model.backend.sum(
model.backend.multiply(
model.backend.conjugate(codebook_vectors[i]),
estimates[j],
)
)
)
)
/ dim
for i in range(n_items)
]
)
else:
similarities = np.array(
[float(model.similarity(estimates[j], codebook_vectors[i])) for i in range(n_items)]
)
best_idx = int(np.argmax(similarities))
final_labels.append(labels_list[best_idx])
final_similarities.append(float(similarities[best_idx]))
return final_labels, final_similarities, history
cleanup(query, codebook, model)
Find best match via attention-weighted cleanup.
Source code in holovec/utils/cleanup/attention.py
def cleanup(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
) -> tuple[str, float]:
"""Find best match via attention-weighted cleanup."""
if query is None:
raise TypeError("query cannot be None")
if not isinstance(codebook, dict):
raise TypeError(f"codebook must be dict, got {type(codebook)}")
if not isinstance(model, VSAModel):
raise TypeError(f"model must be VSAModel, got {type(model)}")
if len(codebook) == 0:
raise ValueError("codebook must not be empty")
labels = list(codebook.keys())
vectors = [codebook[lbl] for lbl in labels]
similarities = np.array([float(model.similarity(query, vec)) for vec in vectors])
best_idx = int(np.argmax(similarities))
return labels[best_idx], float(similarities[best_idx])
factorize(query, codebook, model, n_factors=2, max_iterations=None, threshold=None, temperature=20.0, top_k=1, patience=3, min_delta=0.0001, mode='hard', **kwargs)
Factorize via attention-based resonator network.
Source code in holovec/utils/cleanup/attention.py
def factorize(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
n_factors: int = 2,
max_iterations: int | None = None,
threshold: float | None = None,
temperature: float = 20.0,
top_k: int = 1,
patience: int = 3,
min_delta: float = 1e-4,
mode: str = "hard",
**kwargs: object,
) -> tuple[list[str], list[float]]:
"""Factorize via attention-based resonator network."""
if max_iterations is None:
max_iterations = self.max_iterations
if threshold is None:
threshold = self.convergence_threshold
if query is None:
raise TypeError("query cannot be None")
if not isinstance(codebook, dict):
raise TypeError(f"codebook must be dict, got {type(codebook)}")
if not isinstance(model, VSAModel):
raise TypeError(f"model must be VSAModel, got {type(model)}")
if not isinstance(n_factors, int):
raise TypeError(f"n_factors must be int, got {type(n_factors)}")
if n_factors < 1:
raise ValueError(f"n_factors must be >= 1, got {n_factors}")
if len(codebook) == 0:
raise ValueError("codebook must not be empty")
labels = list(codebook.keys())
n_items = len(labels)
dim = model.dimension
codebook_vectors = [codebook[lbl] for lbl in labels]
is_complex = hasattr(model.space, "is_complex") and model.space.is_complex
codebook_stacked = model.backend.stack(codebook_vectors, axis=0)
codebook_mean = model.backend.mean(codebook_stacked, axis=0)
if is_complex and hasattr(model, "normalize"):
codebook_mean = model.normalize(codebook_mean)
estimates = [codebook_mean for _ in range(n_factors)]
best_avg_sim = -1.0
no_improve_count = 0
for _iteration in range(max_iterations):
converged = True
for j in range(n_factors):
other_product = self._compute_other_product(estimates, j, model, is_complex)
noisy_estimate = model.unbind(query, other_product)
if is_complex:
similarities = np.array(
[
float(
np.real(
model.backend.sum(
model.backend.multiply(
model.backend.conjugate(codebook_vectors[i]),
noisy_estimate,
)
)
)
)
/ dim
for i in range(n_items)
]
)
else:
similarities = np.array(
[float(model.similarity(noisy_estimate, codebook_vectors[i])) for i in range(n_items)]
)
attention_weights = _softmax(self.beta * similarities)
new_estimate = self._weighted_combination(codebook_vectors, attention_weights, model)
if hasattr(model, "normalize"):
new_estimate = model.normalize(new_estimate)
estimates[j] = new_estimate
best_sim = float(np.max(similarities))
if best_sim < threshold:
converged = False
avg_sim = self._compute_avg_similarity(
query, estimates, codebook_vectors, model, is_complex, dim
)
if avg_sim > best_avg_sim + min_delta:
best_avg_sim = avg_sim
no_improve_count = 0
else:
no_improve_count += 1
if converged or no_improve_count >= self.patience:
break
final_labels: list[str] = []
final_similarities: list[float] = []
for j in range(n_factors):
if is_complex:
similarities = np.array(
[
float(
np.real(
model.backend.sum(
model.backend.multiply(
model.backend.conjugate(codebook_vectors[i]),
estimates[j],
)
)
)
)
/ dim
for i in range(n_items)
]
)
else:
similarities = np.array(
[float(model.similarity(estimates[j], codebook_vectors[i])) for i in range(n_items)]
)
best_idx = int(np.argmax(similarities))
final_labels.append(labels[best_idx])
final_similarities.append(float(similarities[best_idx]))
return final_labels, final_similarities
factorize_verbose(query, codebook, model, n_factors=2, max_iterations=None, threshold=None)
Like factorize(), but also returns average-similarity history.
Source code in holovec/utils/cleanup/attention.py
def factorize_verbose(
self,
query: Array,
codebook: dict[str, Array],
model: VSAModel,
n_factors: int = 2,
max_iterations: int | None = None,
threshold: float | None = None,
) -> tuple[list[str], list[float], list[float]]:
"""Like factorize(), but also returns average-similarity history."""
if max_iterations is None:
max_iterations = self.max_iterations
if threshold is None:
threshold = self.convergence_threshold
labels_list = list(codebook.keys())
n_items = len(labels_list)
dim = model.dimension
codebook_vectors = [codebook[lbl] for lbl in labels_list]
is_complex = hasattr(model.space, "is_complex") and model.space.is_complex
codebook_stacked = model.backend.stack(codebook_vectors, axis=0)
codebook_mean = model.backend.mean(codebook_stacked, axis=0)
if is_complex and hasattr(model, "normalize"):
codebook_mean = model.normalize(codebook_mean)
estimates = [codebook_mean for _ in range(n_factors)]
history: list[float] = []
best_avg_sim = -1.0
no_improve_count = 0
for _iteration in range(max_iterations):
converged = True
for j in range(n_factors):
other_product = self._compute_other_product(estimates, j, model, is_complex)
noisy_estimate = model.unbind(query, other_product)
if is_complex:
similarities = np.array(
[
float(
np.real(
model.backend.sum(
model.backend.multiply(
model.backend.conjugate(codebook_vectors[i]),
noisy_estimate,
)
)
)
)
/ dim
for i in range(n_items)
]
)
else:
similarities = np.array(
[float(model.similarity(noisy_estimate, codebook_vectors[i])) for i in range(n_items)]
)
attention_weights = _softmax(self.beta * similarities)
new_estimate = self._weighted_combination(codebook_vectors, attention_weights, model)
if hasattr(model, "normalize"):
new_estimate = model.normalize(new_estimate)
estimates[j] = new_estimate
if float(np.max(similarities)) < threshold:
converged = False
avg_sim = self._compute_avg_similarity(
query, estimates, codebook_vectors, model, is_complex, dim
)
history.append(avg_sim)
if avg_sim > best_avg_sim + 1e-4:
best_avg_sim = avg_sim
no_improve_count = 0
else:
no_improve_count += 1
if converged or no_improve_count >= self.patience:
break
final_labels: list[str] = []
final_similarities: list[float] = []
for j in range(n_factors):
if is_complex:
similarities = np.array(
[
float(
np.real(
model.backend.sum(
model.backend.multiply(
model.backend.conjugate(codebook_vectors[i]),
estimates[j],
)
)
)
)
/ dim
for i in range(n_items)
]
)
else:
similarities = np.array(
[float(model.similarity(estimates[j], codebook_vectors[i])) for i in range(n_items)]
)
best_idx = int(np.argmax(similarities))
final_labels.append(labels_list[best_idx])
final_similarities.append(float(similarities[best_idx]))
return final_labels, final_similarities, history
Backends
holovec.backends.base.Backend
Bases: ABC
Abstract base class for computational backends.
All backends must implement these operations to support VSA computations across different frameworks (NumPy, PyTorch, JAX).
Source code in holovec/backends/base.py
class Backend(ABC):
"""Abstract base class for computational backends.
All backends must implement these operations to support VSA computations
across different frameworks (NumPy, PyTorch, JAX).
"""
@property
@abstractmethod
def name(self) -> str:
"""Return the backend name (e.g., 'numpy', 'torch', 'jax')."""
pass
@abstractmethod
def is_available(self) -> bool:
"""Check if the backend is available in the current environment."""
pass
# ===== Capability Probes =====
def supports_complex(self) -> bool:
"""Check if backend supports complex number operations.
Complex operations are required for FHRR (Fourier HRR) and other
frequency-domain VSA models.
Returns:
True if backend can handle complex dtypes (complex64, complex128)
"""
return True # Default: assume support (override in backend if needed)
def supports_sparse(self) -> bool:
"""Check if backend supports sparse array operations.
Sparse operations are beneficial for BSC (Binary Spatter Codes) and
BSDC (Binary Sparse Distributed Codes) which have high sparsity.
Returns:
True if backend has native sparse array support
"""
return False # Default: no sparse support (override if available)
def supports_gpu(self) -> bool:
"""Check if backend has GPU acceleration support.
GPU support enables significant speedups for large-scale operations
and is critical for production deployments.
Returns:
True if backend can utilize GPU hardware
"""
return False # Default: CPU only (override for PyTorch/JAX)
def supports_jit(self) -> bool:
"""Check if backend supports Just-In-Time (JIT) compilation.
JIT compilation can provide 10-100x speedups for certain operations
by compiling Python code to optimized machine code.
Returns:
True if backend has JIT compilation (e.g., JAX, Numba)
"""
return False # Default: no JIT (override for JAX)
def supports_device(self, device: str) -> bool:
"""Check if backend supports a specific device.
Args:
device: Device identifier (e.g., 'cpu', 'cuda', 'cuda:0', 'mps')
Returns:
True if the specified device is available
Examples:
>>> backend.supports_device('cpu') # Always True
>>> backend.supports_device('cuda') # True if CUDA GPU available
>>> backend.supports_device('mps') # True if Apple Metal available
"""
# Default: only CPU supported
return device.lower() in ('cpu', 'cpu:0')
# ===== Array Creation =====
@abstractmethod
def zeros(self, shape: int | tuple[int, ...], dtype: str = 'float32') -> Array:
"""Create an array of zeros with the given shape and dtype."""
pass
@abstractmethod
def ones(self, shape: int | tuple[int, ...], dtype: str = 'float32') -> Array:
"""Create an array of ones with the given shape and dtype."""
pass
@abstractmethod
def random_normal(
self,
shape: int | tuple[int, ...],
mean: float = 0.0,
std: float = 1.0,
dtype: str = 'float32',
seed: int | None = None
) -> Array:
"""Create an array of random values from a normal distribution."""
pass
@abstractmethod
def random_uniform(
self,
shape: int | tuple[int, ...],
low: float = 0.0,
high: float = 1.0,
dtype: str = 'float32',
seed: int | None = None
) -> Array:
"""Create an array of random values from a uniform distribution."""
pass
@abstractmethod
def random_binary(
self,
shape: int | tuple[int, ...],
p: float = 0.5,
dtype: str = 'int32',
seed: int | None = None
) -> Array:
"""Create a binary array with probability p of being 1."""
pass
@abstractmethod
def randint(
self,
shape: int | tuple[int, ...],
low: int,
high: int,
dtype: str = 'int32',
seed: int | None = None,
) -> Array:
"""Create an integer array with values sampled from [low, high)."""
pass
@abstractmethod
def random_bipolar(
self,
shape: int | tuple[int, ...],
p: float = 0.5,
dtype: str = 'float32',
seed: int | None = None
) -> Array:
"""Create a bipolar array {-1, +1} with probability p of being +1."""
pass
@abstractmethod
def random_phasor(
self,
shape: int | tuple[int, ...],
dtype: str = 'complex64',
seed: int | None = None
) -> Array:
"""Create an array of random unit phasors (complex numbers with magnitude 1)."""
pass
@abstractmethod
def array(self, data: Any, dtype: str | None = None) -> Array:
"""Create an array from Python data (list, tuple, etc.)."""
pass
# ===== Element-wise Operations =====
@abstractmethod
def multiply(self, a: Array, b: Array) -> Array:
"""Element-wise multiplication."""
pass
@abstractmethod
def add(self, a: Array, b: Array) -> Array:
"""Element-wise addition."""
pass
@abstractmethod
def subtract(self, a: Array, b: Array) -> Array:
"""Element-wise subtraction."""
pass
@abstractmethod
def divide(self, a: Array, b: Array) -> Array:
"""Element-wise division."""
pass
@abstractmethod
def xor(self, a: Array, b: Array) -> Array:
"""Element-wise XOR (for binary/bipolar)."""
pass
@abstractmethod
def conjugate(self, a: Array) -> Array:
"""Complex conjugate (for complex arrays)."""
pass
@abstractmethod
def exp(self, a: Array) -> Array:
"""Element-wise exponential: e^a.
Args:
a: Input array
Returns:
Array with exp applied element-wise
"""
pass
@abstractmethod
def log(self, a: Array) -> Array:
"""Element-wise natural logarithm: ln(a).
Args:
a: Input array (must be positive)
Returns:
Array with log applied element-wise
"""
pass
# ===== Additional Element-wise Utilities =====
@abstractmethod
def power(self, a: Array, exponent: float) -> Array:
"""Element-wise power: a**exponent."""
pass
@abstractmethod
def angle(self, a: Array) -> Array:
"""Element-wise phase/angle for complex arrays (radians)."""
pass
@abstractmethod
def real(self, a: Array) -> Array:
"""Element-wise real part of (possibly complex) array."""
pass
@abstractmethod
def imag(self, a: Array) -> Array:
"""Element-wise imaginary part of (possibly complex) array."""
pass
@abstractmethod
def multiply_scalar(self, a: Array, scalar: float) -> Array:
"""Multiply array by a Python scalar."""
pass
@abstractmethod
def linspace(self, start: float, stop: float, num: int) -> Array:
"""Create linearly spaced array of length num in [start, stop]."""
pass
# ===== Reductions =====
@abstractmethod
def sum(self, a: Array, axis: int | None = None, keepdims: bool = False) -> Array:
"""Sum along an axis."""
pass
@abstractmethod
def mean(self, a: Array, axis: int | None = None, keepdims: bool = False) -> Array:
"""Mean along an axis."""
pass
@abstractmethod
def norm(self, a: Array, ord: int | str = 2, axis: int | None = None) -> Array:
"""Compute the norm of an array."""
pass
@abstractmethod
def dot(self, a: Array, b: Array) -> Array:
"""Dot product of two vectors."""
pass
@abstractmethod
def max(self, a: Array, axis: int | None = None, keepdims: bool = False) -> Array:
"""Maximum value along an axis.
Args:
a: Input array
axis: Axis along which to compute max (None for global max)
keepdims: Whether to keep dimensions
Returns:
Maximum value(s)
"""
pass
@abstractmethod
def min(self, a: Array, axis: int | None = None, keepdims: bool = False) -> Array:
"""Minimum value along an axis.
Args:
a: Input array
axis: Axis along which to compute min (None for global min)
keepdims: Whether to keep dimensions
Returns:
Minimum value(s)
"""
pass
@abstractmethod
def argmax(self, a: Array, axis: int | None = None) -> Array:
"""Index of maximum value along an axis.
Args:
a: Input array
axis: Axis along which to find argmax (None for global argmax)
Returns:
Index/indices of maximum value(s)
"""
pass
@abstractmethod
def argmin(self, a: Array, axis: int | None = None) -> Array:
"""Index of minimum value along an axis.
Args:
a: Input array
axis: Axis along which to find argmin (None for global argmin)
Returns:
Index/indices of minimum value(s)
"""
pass
# ===== Normalization =====
@abstractmethod
def normalize(self, a: Array, ord: int | str = 2, axis: int | None = None, eps: float = 1e-12) -> Array:
"""Normalize an array to unit norm."""
pass
@abstractmethod
def softmax(self, a: Array, axis: int = -1) -> Array:
"""Softmax function with numerical stability.
Computes: softmax(x_i) = exp(x_i - max(x)) / Σ exp(x_j - max(x))
The max subtraction provides numerical stability by preventing overflow
in the exponential function.
Args:
a: Input array
axis: Axis along which to compute softmax
Returns:
Array with softmax applied along specified axis
References:
- Bricken & Pehlevan (2022): "Attention Approximates Sparse Distributed Memory"
- Furlong & Eliasmith (2023): "Fractional binding in VSAs"
"""
pass
# ===== FFT Operations =====
@abstractmethod
def fft(self, a: Array) -> Array:
"""1D Fast Fourier Transform."""
pass
@abstractmethod
def ifft(self, a: Array) -> Array:
"""1D Inverse Fast Fourier Transform."""
pass
# ===== Circular Operations =====
@abstractmethod
def circular_convolve(self, a: Array, b: Array) -> Array:
"""Circular convolution of two vectors."""
pass
@abstractmethod
def circular_correlate(self, a: Array, b: Array) -> Array:
"""Circular correlation of two vectors."""
pass
# ===== Permutations =====
@abstractmethod
def permute(self, a: Array, indices: Array) -> Array:
"""Permute array elements according to indices."""
pass
@abstractmethod
def roll(self, a: Array, shift: int, axis: int | None = None) -> Array:
"""Roll array elements along an axis."""
pass
# ===== Similarity Measures =====
@abstractmethod
def cosine_similarity(self, a: Array, b: Array) -> float:
"""Compute cosine similarity between two vectors."""
pass
@abstractmethod
def hamming_distance(self, a: Array, b: Array) -> float:
"""Compute Hamming distance between two binary/bipolar vectors."""
pass
@abstractmethod
def euclidean_distance(self, a: Array, b: Array) -> float:
"""Compute Euclidean distance between two vectors."""
pass
# ===== Utilities =====
@abstractmethod
def shape(self, a: Array) -> tuple[int, ...]:
"""Return the shape of an array."""
pass
@abstractmethod
def dtype(self, a: Array) -> str:
"""Return the dtype of an array as a string."""
pass
@abstractmethod
def to_numpy(self, a: Array) -> Any:
"""Convert array to NumPy array (for compatibility)."""
pass
@abstractmethod
def from_numpy(self, a: Any) -> Array:
"""Create backend array from NumPy array."""
pass
@abstractmethod
def clip(self, a: Array, min_val: float, max_val: float) -> Array:
"""Clip array values to [min_val, max_val]."""
pass
@abstractmethod
def abs(self, a: Array) -> Array:
"""Element-wise absolute value."""
pass
@abstractmethod
def sign(self, a: Array) -> Array:
"""Element-wise sign."""
pass
@abstractmethod
def astype(self, a: Array, dtype: str) -> Array:
"""Convert an array to the requested dtype."""
pass
@abstractmethod
def threshold(self, a: Array, threshold: float, above: float = 1.0, below: float = 0.0) -> Array:
"""Threshold array values."""
pass
@abstractmethod
def where(self, condition: Array, x: Array, y: Array) -> Array:
"""Select elements from x or y depending on boolean condition."""
pass
@abstractmethod
def stack(self, arrays: Sequence[Array], axis: int = 0) -> Array:
"""Stack arrays along a new axis."""
pass
@abstractmethod
def concatenate(self, arrays: Sequence[Array], axis: int = 0) -> Array:
"""Concatenate arrays along an existing axis."""
pass
# ===== Matrix Operations (for GHRR, VTB) =====
@abstractmethod
def matmul(self, a: Array, b: Array) -> Array:
"""Matrix multiplication (or batched matrix multiplication).
Args:
a: Matrix or batch of matrices
b: Matrix or batch of matrices
Returns:
Matrix product
"""
pass
@abstractmethod
def matrix_transpose(self, a: Array) -> Array:
"""Transpose last two dimensions of array.
For 2D: standard transpose
For 3D+: transpose last two dimensions (batch transpose)
Args:
a: Array with at least 2 dimensions
Returns:
Transposed array
"""
pass
@abstractmethod
def matrix_trace(self, a: Array) -> Array:
"""Compute trace of matrix or batch of matrices.
For 2D array: returns scalar
For 3D+ array: returns trace of each matrix in batch
Args:
a: Matrix or batch of matrices (last 2 dims are matrix)
Returns:
Scalar or array of traces
"""
pass
@abstractmethod
def svd(self, a: Array, full_matrices: bool = True) -> tuple[Array, Array, Array]:
"""Compute Singular Value Decomposition (SVD).
Decomposes matrix A as A = U @ diag(S) @ Vh, where:
- U: left singular vectors (unitary)
- S: singular values (non-negative, sorted descending)
- Vh: conjugate transpose of right singular vectors (unitary)
For batched matrices (3D+), computes SVD for each matrix in batch.
Args:
a: Matrix or batch of matrices (shape [..., m, n])
full_matrices: If True, U and Vh have shapes [..., m, m] and [..., n, n].
If False, shapes are [..., m, k] and [..., k, n] where k=min(m,n).
Returns:
Tuple of (U, S, Vh) arrays
Examples:
>>> A = backend.random_normal((3, 3))
>>> U, S, Vh = backend.svd(A)
>>> # Verify: A ≈ U @ diag(S) @ Vh
"""
pass
@abstractmethod
def reshape(self, a: Array, shape: tuple[int, ...]) -> Array:
"""Reshape array to new shape.
Args:
a: Array to reshape
shape: Target shape
Returns:
Reshaped array
"""
pass
@abstractmethod
def eye(self, n: int, dtype: str = 'float32') -> Array:
"""Create an identity matrix with shape (n, n)."""
pass
zeros(shape, dtype='float32')
abstractmethod
Create an array of zeros with the given shape and dtype.
Source code in holovec/backends/base.py
@abstractmethod
def zeros(self, shape: int | tuple[int, ...], dtype: str = 'float32') -> Array:
"""Create an array of zeros with the given shape and dtype."""
pass
ones(shape, dtype='float32')
abstractmethod
Create an array of ones with the given shape and dtype.
Source code in holovec/backends/base.py
@abstractmethod
def ones(self, shape: int | tuple[int, ...], dtype: str = 'float32') -> Array:
"""Create an array of ones with the given shape and dtype."""
pass
random_normal(shape, mean=0.0, std=1.0, dtype='float32', seed=None)
abstractmethod
Create an array of random values from a normal distribution.
Source code in holovec/backends/base.py
@abstractmethod
def random_normal(
self,
shape: int | tuple[int, ...],
mean: float = 0.0,
std: float = 1.0,
dtype: str = 'float32',
seed: int | None = None
) -> Array:
"""Create an array of random values from a normal distribution."""
pass
add(a, b)
abstractmethod
Element-wise addition.
Source code in holovec/backends/base.py
@abstractmethod
def add(self, a: Array, b: Array) -> Array:
"""Element-wise addition."""
pass
multiply(a, b)
abstractmethod
Element-wise multiplication.
Source code in holovec/backends/base.py
@abstractmethod
def multiply(self, a: Array, b: Array) -> Array:
"""Element-wise multiplication."""
pass
dot(a, b)
abstractmethod
Dot product of two vectors.
Source code in holovec/backends/base.py
@abstractmethod
def dot(self, a: Array, b: Array) -> Array:
"""Dot product of two vectors."""
pass
fft(a)
abstractmethod
1D Fast Fourier Transform.
Source code in holovec/backends/base.py
@abstractmethod
def fft(self, a: Array) -> Array:
"""1D Fast Fourier Transform."""
pass
ifft(a)
abstractmethod
1D Inverse Fast Fourier Transform.
Source code in holovec/backends/base.py
@abstractmethod
def ifft(self, a: Array) -> Array:
"""1D Inverse Fast Fourier Transform."""
pass
norm(a, ord=2, axis=None)
abstractmethod
Compute the norm of an array.
Source code in holovec/backends/base.py
@abstractmethod
def norm(self, a: Array, ord: int | str = 2, axis: int | None = None) -> Array:
"""Compute the norm of an array."""
pass
normalize(a, ord=2, axis=None, eps=1e-12)
abstractmethod
Normalize an array to unit norm.
Source code in holovec/backends/base.py
@abstractmethod
def normalize(self, a: Array, ord: int | str = 2, axis: int | None = None, eps: float = 1e-12) -> Array:
"""Normalize an array to unit norm."""
pass
to_numpy(a)
abstractmethod
Convert array to NumPy array (for compatibility).
Source code in holovec/backends/base.py
@abstractmethod
def to_numpy(self, a: Array) -> Any:
"""Convert array to NumPy array (for compatibility)."""
pass
from_numpy(a)
abstractmethod
Create backend array from NumPy array.
Source code in holovec/backends/base.py
@abstractmethod
def from_numpy(self, a: Any) -> Array:
"""Create backend array from NumPy array."""
pass
supports_gpu()
Check if backend has GPU acceleration support.
GPU support enables significant speedups for large-scale operations and is critical for production deployments.
Returns: True if backend can utilize GPU hardware
Source code in holovec/backends/base.py
def supports_gpu(self) -> bool:
"""Check if backend has GPU acceleration support.
GPU support enables significant speedups for large-scale operations
and is critical for production deployments.
Returns:
True if backend can utilize GPU hardware
"""
return False # Default: CPU only (override for PyTorch/JAX)
supports_complex()
Check if backend supports complex number operations.
Complex operations are required for FHRR (Fourier HRR) and other frequency-domain VSA models.
Returns: True if backend can handle complex dtypes (complex64, complex128)
Source code in holovec/backends/base.py
def supports_complex(self) -> bool:
"""Check if backend supports complex number operations.
Complex operations are required for FHRR (Fourier HRR) and other
frequency-domain VSA models.
Returns:
True if backend can handle complex dtypes (complex64, complex128)
"""
return True # Default: assume support (override in backend if needed)
supports_sparse()
Check if backend supports sparse array operations.
Sparse operations are beneficial for BSC (Binary Spatter Codes) and BSDC (Binary Sparse Distributed Codes) which have high sparsity.
Returns: True if backend has native sparse array support
Source code in holovec/backends/base.py
def supports_sparse(self) -> bool:
"""Check if backend supports sparse array operations.
Sparse operations are beneficial for BSC (Binary Spatter Codes) and
BSDC (Binary Sparse Distributed Codes) which have high sparsity.
Returns:
True if backend has native sparse array support
"""
return False # Default: no sparse support (override if available)