Source code for PAsampling.wrappers.dafps_sampler

from ..native_functions import *
from scipy.spatial import cKDTree
import numpy as np

[docs] class DAFPS: """ Implements the Density-Aware Farthest Point Sampling (DA-FPS) algorithm. This class provides a wrapper around the da_fps_np function, allowing for the selection of a subset of samples from a dataset based on the DA-FPS strategy. The selection can be performed using different distance functions and can handle precomputed distance matrices. Attributes: ----------- X : numpy.ndarray (n_samples, n_features) Input points, representing a set of data points. d : numpy.ndarray (n_samples, knn), optional (default=None) knn distance matrix between points. The i-th row contains the sorted distances between the i-th point and its k nearest neighbors. knn : int, optional (default=100) The number of nearest neighbors to consider when computing the knn distance matrix. weights : numpy.ndarray (n_samples, ), optional (default=None) Array of weights for each point. If provided, these weights are used to adjust the selection process. precomputed_distances : bool, optional (default=False) If True, the input X is assumed to be a precomputed distance matrix. """ def __init__(self, X, d=None, knn=100, weights=None, precomputed_distances=False): self.knn = knn self.X = X self.precomputed_distances = precomputed_distances self.weights = weights self.d = d if self.precomputed_distances is False: if self.weights is None: if self.d is None: # If also knn is not provided, raise an error if self.knn is None: raise ValueError("At least one among the following input values must be provided: weights, d (knn matrix) and knn.") else: # Compute knn distance matrix using cKDTree tree = cKDTree(X) self.d, _ = tree.query(X, knn, workers=-1) # If weights are provided, knn distance matrix not needed else: self.d = None else: # If precomputed distances are provided and weights are not provided if self.weights is None: # If also knn is not provided, raise an error if self.knn is None: raise ValueError("At least one among the following input values must be provided: weights, d (knn matrix) and knn.") else: # Compute knn distance matrix from precomputed distances using argsort nearest_indices = np.argsort(X, axis=1)[:, :self.knn] self.d = np.take_along_axis(X, nearest_indices, axis=1) # If weights are provided, knn distance matrix not needed else: self.d = None
[docs] def fit(self, initial_subset, b_samples, mu=0, distance_func=None, verbose=False): """Fits the model to the data X and returns the indices of the selected samples. Parameters: ----------- initial_subset : list List of indices (rows of the input points matrix) representing the initial set of selected elements. b_samples : int The desired number of points to select. mu : int, optional (default=0) The number of initial points to select using FPS before applying the DA-FPS algorithm. mu is expressed as a percentage of the total number of samples in the dataset. Default is 0%. distance_func : callable, optional (default=None) A function to compute pairwise distances. If None, Euclidean distance is used. verbose : bool, optional (default=False) If True, progress messages are printed. Returns: -------- samples : list List of indices representing the selected points using the DAFPS algorithm. """ if int(self.X.shape[0]/100*mu)>= b_samples: raise ValueError("Choose mu smaller. The value of mu exceeds or equals the labeling budget.") if mu == 0: return da_fps_np(self.X, initial_subset, b_samples, self.d, distance_func, self.precomputed_distances, self.weights, verbose) elif mu > 0: initial_subset_fps = fps_np(self.X, initial_subset, int((len(self.X) / 100) * mu)) return da_fps_np(self.X, initial_subset_fps, b_samples, self.d, distance_func, self.precomputed_distances, self.weights, verbose) else: raise ValueError("Invalid mu value. Choose a positive integer.")