# Copyright (c) 2022 Copyright holder of the paper Structural Kernel Search via Bayesian Optimization and Symbolical Optimal Transport submitted to NeurIPS 2022 for review.
# All rights reserved.
import numpy as np
from typing import Tuple, Optional
from abc import ABC, abstractmethod


class BaseDataset(ABC):
    @abstractmethod
    def load_data_set(self):
        """
        loads dataset (probably from some file - implementation dependent)
        """
        raise NotImplementedError

    @abstractmethod
    def get_complete_dataset(self, **kwargs) -> Tuple[np.array, np.array]:
        """
        Retrieves the complete dataset (of size n with input dimensions d and output dimensions m) as numpy arrays

        Returns
        np.array - x (input values) with shape (n,d)
        np.array - y (output values) with shape (n,m)
        """
        raise NotImplementedError

    @abstractmethod
    def sample(self, n: int, **kwargs) -> Tuple[np.array, np.array]:
        """
        retrieves sample of size n from dataset (without replacement)

        Returns
        np.array - x (input values) with shape (n,d)
        np.array - y (output values) with shape (n,m)
        """
        raise NotImplementedError

    @abstractmethod
    def sample_train_test(self, use_absolute: bool, n_train: int, n_test: int, fraction_train: float):
        """
        retrieves train and test data (mutually exclusive samples) either in absoulte numbers or as fraction from the complete dataset

        Arguments:
            use_absolute - bool specifying if absolute numbers of training and test data should be used or fraction of complete dataset
            n_train - int specifying how many training datapoints should be sampled
            n_test - int scpecifying how many test datapoints should be sampled
            fraction_train - fraction of complete dataset that is used as training data

        Returns
        np.array - x train data with shape (n_train,d)
        np.array - y train data with shape (n_train,m)
        np.array - x test data with shape (n_test,d)
        np.array - y test data with shape (n_test,m)
        """
        raise NotImplementedError

    @abstractmethod
    def get_name(self):
        """
        method to get name of dataset
        """
        raise NotImplementedError
