gchq · rg936672 · Sep 4, 2024 · Aug 22, 2024 · Aug 22, 2024 · Aug 23, 2024
diff --git a/.cspell/custom_misc.txt b/.cspell/custom_misc.txt
@@ -25,7 +25,7 @@ kernelised
 kernelized
 KSD
 linewidth
-Matérn
+Matern
 ml.p3.8xlarge
 ndmin
 parsable

diff --git a/.cspell/people.txt b/.cspell/people.txt
@@ -14,6 +14,7 @@ Jiaxin
 Jitkrittum
 Kanagawa
 Martinsson
+Matérn
 Motonobu
 Nystr
 Nystrom

diff --git a/coreax/approximation.py b/coreax/approximation.py
@@ -36,10 +36,10 @@
 import jax.numpy as jnp
 import jax.random as jr
 from jax import Array
-from jax.typing import ArrayLike
+from jaxtyping import Shaped
 from typing_extensions import TYPE_CHECKING, Literal, override
 
-from coreax.data import Data
+from coreax.data import Data, _atleast_2d_consistent
 from coreax.kernels import UniCompositeKernel
 from coreax.util import KeyArrayLike
 
@@ -77,26 +77,28 @@ def _random_indices(
 
 def _random_least_squares(
     key: KeyArrayLike,
-    data: Array,
-    features: Array,
+    data: Shaped[Array, " n p"],
+    features: Shaped[Array, " n n"],
     num_indices: int,
-    target_map: Callable[[Array], Array] = lambda x: x,
-) -> Array:
+    target_map: Callable[[Shaped[Array, " n p"]], Shaped[Array, " n p"]] = lambda x: x,
+) -> Shaped[Array, " n p"]:
     r"""
     Solve the least-square problem on a random subset of the system.
 
-    A linear system :math:`Ax = b`, solved via least-squares as :math:`x = A^+ b`, can
-    be approximated by random least-square as `x \approx \hat{x} = \hat{A}^+ \hat{b}`,
-    where :math:`\hat{A} = A_i\ \text{and}\ \hat{b} = b_i\, \forall i \in I]`. `I` is a
-    random subset of indices for the original system of equations.
+    A linear system :math:`AX = B`, solved via least-squares as :math:`X = A^+ B`, can
+    be approximated by random least-square as `X \approx \hat{X} = \hat{A}^+ \hat{B}`,
+    where
+    :math:`\hat{A} = A_{i\cdot}\ \text{and}\ \hat{B} = B_{i\cdot}\, \forall i \in I]`.
+    :math:`I` is a random subset of indices for the original system of equations.
 
     :param key: RNG key for seeding the random selection
-    :param data: The data :math:`z`; yields :math:`b` when pushed through the target map
-    :param features: The feature matrix :math:`A`
+    :param data: The data :math:`Z \in \mathbb{R}^{n \times p}`; yields
+        :math:`B \in \mathbb{R}^{n \times p}` when pushed through the target map
+    :param features: The feature matrix :math:`A \in \mathbb{R}^{n \times n}`
     :param num_indices: The size of the random subset of indices :math:`I`
     :param target_map: The target map :math:`\phi` which defines :math:`b := \phi(z)`,
         where :math:`z` is the input ``data``
-    :return: The push-forward of the approximate solution :math:`A\hat{x}`
+    :return: The push-forward of the approximate solution :math:`A\hat{X}`
     """
     num_data_points = len(data)
     train_idx = _random_indices(key, num_data_points, num_indices, mode="train")
@@ -120,19 +122,19 @@ class ApproximateKernel(UniCompositeKernel):
     """
 
     @override
-    def compute_elementwise(self, x: ArrayLike, y: ArrayLike) -> Array:
+    def compute_elementwise(self, x, y):
         return self.base_kernel.compute_elementwise(x, y)
 
     @override
-    def grad_x_elementwise(self, x: ArrayLike, y: ArrayLike) -> Array:
+    def grad_x_elementwise(self, x, y):
         return self.base_kernel.grad_x_elementwise(x, y)
 
     @override
-    def grad_y_elementwise(self, x: ArrayLike, y: ArrayLike) -> Array:
+    def grad_y_elementwise(self, x, y):
         return self.base_kernel.grad_y_elementwise(x, y)
 
     @override
-    def divergence_x_grad_y_elementwise(self, x: ArrayLike, y: ArrayLike) -> Array:
+    def divergence_x_grad_y_elementwise(self, x, y):
         return self.base_kernel.divergence_x_grad_y_elementwise(x, y)
 
 
@@ -173,7 +175,18 @@ class MonteCarloApproximateKernel(RandomRegressionKernel):
     :param num_train_points: Number of training points used to fit kernel regression
     """
 
-    def gramian_row_mean(self, x: Union[ArrayLike, Data], **kwargs) -> Array:
+    def gramian_row_mean(
+        self,
+        x: Union[
+            Shaped[Array, " n d"],
+            Shaped[Array, " d"],
+            Shaped[Array, ""],
+            float,
+            int,
+            Data,
+        ],
+        **kwargs,
+    ) -> Shaped[Array, " n"]:
         r"""
         Approximate the Gramian row-mean by Monte-Carlo sampling.
 
@@ -184,17 +197,22 @@ def gramian_row_mean(self, x: Union[ArrayLike, Data], **kwargs) -> Array:
         :return: Approximation of the base kernel's Gramian row-mean
         """
         del kwargs
-        data = jnp.atleast_2d(jnp.asarray(x))
-        num_data_points = len(data)
+        # This method does not support weighted computation of the mean, therefore
+        # we need to handle the case where `x` is passed as a `Data` instance
+        if isinstance(x, Data):
+            x = x.data
+        x = _atleast_2d_consistent(x)
+
+        num_data_points = len(x)
         key = self.random_key
         features_idx = _random_indices(key, num_data_points, self.num_kernel_points - 1)
-        features = self.base_kernel.compute(data, data[features_idx])
+        features = self.base_kernel.compute(x, x[features_idx])
         return _random_least_squares(
             key,
-            data,
+            x,
             features,
             self.num_train_points,
-            partial(self.base_kernel.compute_mean, data, axis=0),
+            partial(self.base_kernel.compute_mean, x, axis=0),
         )
 
 
@@ -212,7 +230,18 @@ class ANNchorApproximateKernel(RandomRegressionKernel):
     :param num_train_points: Number of training points used to fit kernel regression
     """
 
-    def gramian_row_mean(self, x: Union[ArrayLike, Data], **kwargs) -> Array:
+    def gramian_row_mean(
+        self,
+        x: Union[
+            Shaped[Array, " n d"],
+            Shaped[Array, " d"],
+            Shaped[Array, ""],
+            float,
+            int,
+            Data,
+        ],
+        **kwargs,
+    ) -> Shaped[Array, " n"]:
         r"""
         Approximate the Gramian row-mean by random regression on ANNchor points.
 
@@ -224,12 +253,19 @@ def gramian_row_mean(self, x: Union[ArrayLike, Data], **kwargs) -> Array:
         :return: Approximation of the base kernel's Gramian row-mean
         """
         del kwargs
-        data = jnp.atleast_2d(jnp.asarray(x))
-        num_data_points = len(data)
+        # This method does not support weighted computation of the mean, therefore
+        # we need to handle the case where `x` is passed as a `Data` instance
+        if isinstance(x, Data):
+            x = x.data
+        x = _atleast_2d_consistent(x)
+
+        num_data_points = len(x)
         features = jnp.zeros((num_data_points, self.num_kernel_points))
-        features = features.at[:, 0].set(self.base_kernel.compute(data, data[0])[:, 0])
+        features = features.at[:, 0].set(self.base_kernel.compute(x, x[0])[:, 0])
 
-        def _annchor_body(idx: int, _features: Array) -> Array:
+        def _annchor_body(
+            idx: int, _features: Shaped[Array, " n num_kernel_points"]
+        ) -> Shaped[Array, " n num_kernel_points"]:
             r"""
             Execute main loop of the ANNchor construction.
 
@@ -239,17 +275,17 @@ def _annchor_body(idx: int, _features: Array) -> Array:
             """
             max_entry = _features.max(axis=1).argmin()
             _features = _features.at[:, idx].set(
-                self.base_kernel.compute(data, data[max_entry])[:, 0]
+                self.base_kernel.compute(x, x[max_entry])[:, 0]
             )
             return _features
 
         features = jax.lax.fori_loop(1, self.num_kernel_points, _annchor_body, features)
         return _random_least_squares(
             self.random_key,
-            data,
+            x,
             features,
             self.num_train_points,
-            partial(self.base_kernel.compute_mean, data, axis=0),
+            partial(self.base_kernel.compute_mean, x, axis=0),
         )
 
 
@@ -267,7 +303,18 @@ class NystromApproximateKernel(RandomRegressionKernel):
     :param num_train_points: Number of training points used to fit kernel regression
     """
 
-    def gramian_row_mean(self, x: Union[ArrayLike, Data], **kwargs) -> Array:
+    def gramian_row_mean(
+        self,
+        x: Union[
+            Shaped[Array, " n d"],
+            Shaped[Array, " d"],
+            Shaped[Array, ""],
+            float,
+            int,
+            Data,
+        ],
+        **kwargs,
+    ) -> Shaped[Array, " n"]:
         r"""
         Approximate the Gramian row-mean by Nystrom approximation.
 
@@ -280,15 +327,20 @@ def gramian_row_mean(self, x: Union[ArrayLike, Data], **kwargs) -> Array:
         :return: Approximation of the base kernel's Gramian row-mean
         """
         del kwargs
-        data = jnp.atleast_2d(jnp.asarray(x))
-        num_data_points = len(data)
+        # This method does not support weighted computation of the mean, therefore
+        # we need to handle the case where `x` is passed as a `Data` instance
+        if isinstance(x, Data):
+            x = x.data
+        x = _atleast_2d_consistent(x)
+
+        num_data_points = len(x)
         feature_idx = _random_indices(
             self.random_key, num_data_points, self.num_kernel_points
         )
-        features = self.base_kernel.compute(data, data[feature_idx])
+        features = self.base_kernel.compute(x, x[feature_idx])
         return _random_least_squares(
             self.random_key,  # intentional key reuse to ensure train_idx = feature_idx
-            data,
+            x,
             features,
             self.num_train_points,
             self.base_kernel.gramian_row_mean,

diff --git a/coreax/coreset.py b/coreax/coreset.py
@@ -21,7 +21,7 @@
 from jaxtyping import Array, Shaped
 from typing_extensions import Self
 
-from coreax.data import Data, SupervisedData, as_data
+from coreax.data import Data, as_data, as_supervised_data
 from coreax.metrics import Metric
 from coreax.weights import WeightsOptimiser
 
@@ -73,9 +73,25 @@ class Coreset(eqx.Module, Generic[_Data]):
     :param pre_coreset_data: The dataset :math:`X` used to construct the coreset.
     """
 
-    nodes: Data = eqx.field(converter=as_data)
+    nodes: _Data
     pre_coreset_data: _Data
 
+    def __init__(self, nodes: _Data, pre_coreset_data: _Data):
+        """Handle type conversion of ``nodes`` and ``pre_coreset_data``."""
+        if isinstance(nodes, Array):
+            self.nodes = as_data(nodes)
+        elif isinstance(nodes, tuple):
+            self.nodes = as_supervised_data(nodes)
+        else:
+            self.nodes = nodes
+
+        if isinstance(pre_coreset_data, Array):
+            self.pre_coreset_data = as_data(pre_coreset_data)
+        elif isinstance(pre_coreset_data, tuple):
+            self.pre_coreset_data = as_supervised_data(pre_coreset_data)
+        else:
+            self.pre_coreset_data = pre_coreset_data
+
     def __check_init__(self):
         """Check that coreset has fewer 'nodes' than the 'pre_coreset_data'."""
         if len(self.nodes) > len(self.pre_coreset_data):
@@ -89,21 +105,23 @@ def __len__(self):
         return len(self.nodes)
 
     @property
-    def coreset(self) -> Data:
+    def coreset(self) -> _Data:
         """Materialised coreset."""
         return self.nodes
 
-    def solve_weights(self, solver: WeightsOptimiser, **solver_kwargs) -> Self:
+    def solve_weights(self, solver: WeightsOptimiser[_Data], **solver_kwargs) -> Self:
         """Return a copy of 'self' with weights solved by 'solver'."""
         weights = solver.solve(self.pre_coreset_data, self.coreset, **solver_kwargs)
         return eqx.tree_at(lambda x: x.nodes.weights, self, weights)
 
-    def compute_metric(self, metric: Metric, **metric_kwargs) -> Array:
+    def compute_metric(
+        self, metric: Metric[_Data], **metric_kwargs
+    ) -> Shaped[Array, ""]:
         """Return metric-distance between `self.pre_coreset_data` and `self.coreset`."""
         return metric.compute(self.pre_coreset_data, self.coreset, **metric_kwargs)
 
 
-class Coresubset(Coreset[_Data], Generic[_Data]):
+class Coresubset(Coreset[Data], Generic[_Data]):
     r"""
     Data structure for representing a coresubset.
 
@@ -131,26 +149,17 @@ class Coresubset(Coreset[_Data], Generic[_Data]):
     :param pre_coreset_data: The dataset :math:`X` used to construct the coreset.
     """
 
-    # Incompatibility between Pylint and eqx.field. Pyright handles this correctly.
-    # pylint: disable=no-member
+    def __init__(self, nodes: Data, pre_coreset_data: _Data):
+        """Handle typing of ``nodes`` being a `Data` instance."""
+        super().__init__(nodes, pre_coreset_data)
+
     @property
-    def coreset(self) -> Data:
+    def coreset(self) -> _Data:
         """Materialise the coresubset from the indices and original data."""
-        coreset_data = self.pre_coreset_data.data[self.unweighted_indices]
-        if isinstance(self.pre_coreset_data, SupervisedData):
-            coreset_supervision = self.pre_coreset_data.supervision[
-                self.unweighted_indices
-            ]
-            return SupervisedData(
-                data=coreset_data,
-                supervision=coreset_supervision,
-                weights=self.nodes.weights,
-            )
-        return Data(data=coreset_data, weights=self.nodes.weights)
+        coreset_data = self.pre_coreset_data[self.unweighted_indices]
+        return eqx.tree_at(lambda x: x.weights, coreset_data, self.nodes.weights)
 
     @property
     def unweighted_indices(self) -> Shaped[Array, " n"]:
         """Unweighted Coresubset indices - attribute access helper."""
         return jnp.squeeze(self.nodes.data)
-
-    # pylint: enable=no-member