Source code for PAsampling.wrappers.fps_plus_sampler

import numpy as np
from ..native_functions import  fps_np
from .kmedoids_sampler import Kmedoids
from .facility_location_sampler import FacilityLocation
from .twin_sampler import Twin
import random

[docs] class FPS_plus: """ Implements a modified version of the Farthest Point Sampling (FPS) algorithm. This class provides a wrapper around the fps_np function and integrates it with various sampling strategies. It allows for the selection of a subset of samples from a dataset based on the FPS strategy, followed by additional selection using different methods such as k-medoids, facility location, random sampling, and twinning. Attributes: ----------- method : str, optional (default='kmedoids') The sampling method to use after the initial FPS selection. Options are 'kmedoids', 'facility_location', 'random', and 'twin'. mu : int, optional (default=3) The number of initial points to select using FPS before applying the respective strategy. mu is expressed as a percentage of the total number of samples in the dataset. Default is 3%. """ def __init__(self, method='kmedoids', mu=3): self.method = method self.mu = mu
[docs] def fit(self, X, initial_subset, b_samples, metric='euclidean', ratio = 5, idx_initial_point = 0, init_kmedoids = 'k-medoids++', random_state=None): """Fits the model to the data X and returns the indices of the selected samples. Parameters: ----------- X : numpy.ndarray (n_samples, n_features) Input points, representing a set of data points. initial_subset : list List of indices (rows of the input points matrix) representing the initial set of selected elements. b_samples : int The desired number of points to select. metric : str, optional (default='euclidean') The metric to use for computing distances. Options are 'euclidean', 'manhattan', etc. ratio : int, optional (default=5) The ratio parameter for the twinning method. idx_initial_point : int, optional (default=0) The initial point index for the twinning method. init_kmedoids : str, optional (default='k-medoids++') The method for initialization in k-medoids. Options are 'random', 'heuristic', 'k-medoids++', and 'build'. random_state : int, optional (default=None) The seed used by the random number generator. Returns: -------- samples : list List of indices representing the selected points using the modified FPS algorithm. """ if self.method == 'kmedoids': return self.fps_kmedoids(X, initial_subset, b_samples, metric, init_kmedoids, random_state) elif self.method == 'facility_location': return self.fps_facility_location(X, initial_subset, b_samples, metric) elif self.method == 'random': return self.fps_random(X, initial_subset, b_samples, random_state=random_state) elif self.method == 'twin': return self.fps_twinning(self, X, initial_subset, ratio, idx_initial_point) else: raise ValueError("Invalid method. Choose 'kmedoids', 'facility_location', 'random', or 'twin'.")
def fps_kmedoids(self, X, initial_subset, b_samples, metric, init_kmedoids = 'k-medoids++', random_state=0): idx = fps_np(X, initial_subset, int((len(X) / 100) * self.mu)) idx_test = list(np.arange(X.shape[0])) idx_test_selected = list(set(idx_test).difference(set(idx))) if random_state is None: random_state = random.randint(0, 1000) b = b_samples - len(idx) kmedoids_sampler = Kmedoids(b_samples= b, metric=metric, init=init_kmedoids, random_state = random_state) kmedoids_indices = kmedoids_sampler.fit(X[idx_test_selected]) idx_train = list(idx) + list(np.asarray(idx_test_selected)[np.asarray(kmedoids_indices)]) return idx_train def fps_facility_location(self, X, initial_subset, b_samples , metric='euclidean'): idx = fps_np(X, initial_subset, int((len(X) / 100) * self.mu)) b = b_samples - len(idx) facility_location_sampler = FacilityLocation(initial_subset = list(idx), b_samples=b, metric= metric) facility_location_indices = facility_location_sampler.fit(X) idx_slctd = list(idx) + list(facility_location_indices) return idx_slctd def fps_random(self, X, initial_subset, b_samples, random_state=None): idx_fps = fps_np(X, initial_subset, int((len(X) / 100) * self.mu)) if random_state is not None: random.seed(random_state) idx_test = list(set(list(np.arange(X.shape[0]))).difference(idx_fps)) idx_selected = idx_fps idx_selected += random.sample(idx_test, b_samples - len(idx_fps)) return idx_selected def fps_twinning(self, X, initial_subset, ratio, idx_initial_point): idx_fps = fps_np(X, initial_subset, int((len(X) / 100) * self.mu)) idx_test = list(set(list(np.arange(X.shape[0]))).difference(idx_fps)) twin_sampler = Twin(ratio=ratio, u1=idx_test[idx_initial_point]) twin_indices = twin_sampler.fit(X[idx_test]) idx_train = list(idx_fps) + list(np.asarray(idx_test)[np.asarray(twin_indices)]) return idx_train