Source code for neural_network.util.weighted_partitioner

import math
import random
from typing import List
import pandas as pd

from .partitioner import Partitioner



[docs]
class WeightedPartitioner(Partitioner):
    """Class to create `m` sets from a list of `n` integers weighted by which
    ground truth class each integer lies in
    """


[docs]
    def __init__(self, n: int, m: int, df: pd.DataFrame,
                 do_regression: bool = False, bins: int = 10):
        """Constructor method

        Parameters
        ----------
        n : int
            Number of integers
        m : int
            Number of sets for the partition
        df : pd.DataFrame
            The classes for the integers
        do_regression : bool
            Whether we are partitioning regressional or classificational data
        bins : int
            If regression is True, this represents the number of bins to
            split the data in to. Otherwise, this parameter is ignored
        """
        super().__init__(n, m)
        if not len(df) == n:
            raise ValueError(f"n must equal the length of the dataframe "
                             f"(n = {n}, len(df) = {len(df)})")
        if do_regression:
            self._num_classes = bins
            class_dict = {j: [] for j in range(self._num_classes)}
            min_y, max_y = min(df['y']), max(df['y'])
            class_width = (max_y - min_y) / bins
            for i in range(len(df)):
                y = df.loc[i, 'y']
                chosen_bin = int((y - min_y) / class_width)
                if chosen_bin == self._num_classes:
                    chosen_bin = self._num_classes - 1
                class_dict[chosen_bin].append(i)

            # Need to account for potentially empty class lists
            for j in range(self._num_classes):
                if not class_dict[j]:
                    class_dict.pop(j)
                    self._num_classes -= 1

            self._class_dict = {}
            for i, j in enumerate(class_dict.keys()):
                # Relabelling the classes
                self._class_dict[i] = class_dict[j]
        else:
            self._num_classes = len(set(df['y'].to_numpy()))
            self._class_dict = {j: [] for j in range(self._num_classes)}
            for i in range(len(df)):
                self._class_dict[int(df.loc[i, 'y'])].append(i)



[docs]
    def __call__(self) -> List[List[int]]:
        """Uses weights for each class to create sets of size `m` containing
        integers (sampled with replacement).

        Returns
        -------
        List[List[int]]
            The list of sets
        """
        # Produces a list of `n` class indices
        chosen_classes = random.choices(population=range(self._num_classes),
                                        k=self._n)
        output_list = []
        num_sets = math.ceil(self._n / self._m)
        for i in range(num_sets - 1):
            inner_list = []
            for j in range(self._m):
                chosen_class = chosen_classes[i * self._m + j]
                inner_list.append(random.choice(
                    self._class_dict[chosen_class]))
            output_list.append(inner_list)
        inner_list = []
        for k in range((num_sets - 1) * self._m, self._n):
            chosen_class = chosen_classes[k]
            inner_list.append(random.choice(self._class_dict[chosen_class]))
        output_list.append(inner_list)
        return output_list
Source code for neural_network.util.weighted_partitioner

Basic Neural Network Project

Navigation

Related Topics