Skip to content

Utilities

gptempest.utils.load_prepare_data(input, dtype)

Load and prepare the data. Returns a TensorDataset.

Source code in src/gptempest/utils.py
def load_prepare_data(input, dtype):
    """Load and prepare the data. Returns a TensorDataset."""
    scaler = MinMaxScaler()
    features = np.loadtxt(input)
    normalized_features = scaler.fit_transform(features)
    times = np.arange(len(features)).reshape(-1, 1)
    dataset = TensorDataset(
        torch.tensor(normalized_features, dtype=dtype),
        torch.tensor(times, dtype=dtype),
    )
    return dataset

gptempest.utils.generate_yaml_config(output_file)

Generates a YAML config file

Source code in src/gptempest/utils.py
def generate_yaml_config(output_file):
    """Generates a YAML config file"""
    base_name, ext = os.path.splitext(output_file)
    counter = 1
    while os.path.exists(output_file):
        output_file = f'{base_name}_{counter}{ext}'
        counter += 1

    config_data = {
        'data_path': 'path/to/default/data/file',
        'inducing_points_path': 'path/to/default/inducing_points/file',
        'save_path': 'path/to/output/files/',
        'cuda': True,
        'dim_input': 2,
        'dim_latent': 2,
        'neurons_ae': [32, 32, 32],
        'epochs': 100,
        'batch_size': 1024,
        'learning_rate': 1e-4,
        'weight_decay': 1e-6,
        'beta': 50,
        'kernel_nu': 1.5,
        'kernel_scale': 1e3,
    }
    comments = {
        'data_path': 'File path containing the coordinates.',
        'inducing_points_path': 'File path containing the timestamps of the inducing points.',
        'save_path': 'File path which will contain all output files.',
        'cuda': 'If set, the training runs on GPUs (they must be CUDA-compatible)',
        'dim_input': 'Dimensionality of the input layer (number of features)',
        'dim_latent': 'Dimensionality of the latent space',
        'neurons_ae': 'The dimensions of the hidden layers of the autoencoder [q(z|x)].',
        'epochs': 'Number of epochs for the model to train.',
        'batch_size': 'Batch size: number of samples passed through the network at a time',
        'learning_rate': 'Learning rate (usually 1e-2 - 1e-6)',
        'weight_decay': 'The decay rate for the optimizer',
        'beta': 'Weight of the Gaussian process loss term',
        'kernel_nu': 'The parameter nu in the Matern kernel',
        'kernel_scale': 'The scale parameter in the Matern kernel (time scale)',
    }
    with open(output_file, 'w') as yaml_file:
        for key, value in config_data.items():
            if key in comments:
                yaml_file.write(f"# {comments[key]}\n")
            yaml.dump({key: value}, yaml_file, default_flow_style=False)
    return output_file

gptempest.utils.yaml_config_reader(config)

Parse all parameters using the yaml file.

Source code in src/gptempest/utils.py
def yaml_config_reader(config: str):
    """Parse all parameters using the yaml file."""
    with open(config, 'r') as stream:
        params = yaml.safe_load(stream)

    # Create header string with all parameters for reproducibility
    header = (
        "# TEMPEST model configuration Parameters:\n"
        f"# data_path: {params.get('data_path')}\n"
        f"# inducing_points_path: {params.get('inducing_points_path')}\n"
        f"# save_path: {params.get('save_path')}\n"
        f"# cuda: {params.get('cuda')}\n"
        f"# dim_input: {params.get('dim_input')}\n"
        f"# dim_latent: {params.get('dim_latent')}\n"
        f"# neurons_ae: {params.get('neurons_ae')}\n"
        f"# epochs: {params.get('epochs')}\n"
        f"# batch_size: {params.get('batch_size')}\n"
        f"# learning_rate: {params.get('learning_rate')}\n"
        f"# weight_decay: {params.get('weight_decay')}\n"
        f"# beta: {params.get('beta')}\n"
        f"# kernel_nu: {params.get('kernel_nu')}\n"
        f"# kernel_scale: {params.get('kernel_scale')}"
    )
    return (
        params.get('data_path'),
        params.get('inducing_points_path'),
        params.get('save_path'),
        params.get('cuda'),
        int(params.get('dim_input')),
        int(params.get('dim_latent')),
        list(params.get('neurons_ae')),
        int(params.get('epochs')),
        int(params.get('batch_size')),
        float(params.get('learning_rate')),
        float(params.get('weight_decay')),
        float(params.get('beta')),
        float(params.get('kernel_nu')),
        float(params.get('kernel_scale')),
        header,
    )