Source code for pca

import numpy as np


[docs]class PcaModel: """Abstraction for a pca model""" def __init__(self, model_file): Vtm = np.load(model_file) self.Vt = Vtm[0] self.s = Vtm[1] self.n_components = Vtm[2] self.mean_values = Vtm[3][0] self.triangles = Vtm[4]
[docs]def pca(data, mean_values, variance_percentage=90): """ Perform Singlar Value Decomposition Returns: U (ndarray): U matrix s (ndarray): 1d singular values (diagonal in array form) Vt (ndarray): Vt matrix """ # subtract mean zero_mean = data - mean_values U, s, Vt = np.linalg.svd(zero_mean, full_matrices=False) # calculate n_components which captures 90 percent of the variance total = s.sum() subtotal = 0.0 i = 0 while (subtotal * 100.0) / total <= variance_percentage: subtotal += s[i] i += 1 n_components = i return U, s, Vt, n_components
[docs]def reconstruct(feature_vector, Vt, mean_values, n_components=None): """ Reconstruct with U, s, Vt Args: U (numpy ndarray): One feature vector from the reduced SVD. U should have shape (n_features,), (i.e., one dimensional) s (numpy ndarray): The singular values as a one dimensional array Vt (numpy ndarray): Two dimensional array with dimensions (n_features, n_features) mean_values (numpy ndarray): mean values of the features of the model, this should have dimensions (n_features, ) """ if n_components is None: n_components = Vt.shape[1] zm = feature_vector - mean_values yk = np.dot(Vt[:n_components], zm.T) return np.dot(Vt[:n_components].T, yk) + mean_values
[docs]def save(Vt, s, n_components, mean_values, triangles, filename): """ Store the U, s, Vt and mean of all the asf datafiles given by the asf files. It is stored in the following way: np.load(filename, np.assary([Vt, [mean_values]]) And accessed by: Vtm = np.load(args.model_file) Vt = Vtm[0] mean_values = Vtm[1][0] triangles = Vtm[2] """ saving = np.asarray([Vt, s, n_components, [mean_values], triangles]) np.save(filename, saving)
[docs]def load(filename): """ The model stored by pca.store (see ``pca.store`` method above) is loaded as: UsVtm = np.load(args.model_file) Vt = Vtm[0] mean_values = Vtm[1][0] Returns: (tuple): Vt, mean_values Vt (numpy ndarray): Two dimensional array with dimensions (n_features, n_features) mean_values (numpy ndarray): mean values of the features of the model, this should have dimensions (n_featurs, ) """ # load the stored model file Vtm = np.load(filename) Vt = Vtm[0] s = Vtm[1] n_components = Vtm[2] mean_values = Vtm[3][0] triangles = Vtm[4] return Vt, s, n_components, mean_values, triangles
#def load_model(filename): # # load the stored model file # return PcaModel(filename)
[docs]def flatten_feature_vectors(data, dim=0): """ Flattens the feature vectors inside a ndarray Example: input: [ [[1, 2], [3, 4], [5, 6]], ... [[1, 2], [3, 4], [5, 6]] ] output: [ [1, 2, 3, 4, 5, 6], ... [1, 2, 3, 4, 5, 6] ] Args: data (numpy array): array of feature vectors dim (int): dimension to flatten the data return: array: (numpy array): array flattened feature vectors """ flattened = [] n = data.shape[dim] for i in range(n): flattened.append(np.ndarray.flatten(data[i])) return np.array(flattened)