Source code for mmcci.sc

import numpy as np

from . import tl


[docs] def dissimilarity_score( m1, m2, lmbda=0.5, normalise=False, binary=False, trim=False, only_non_zero=False ): """Calculates a dissimilarity score between two matrices. Args: m1, m2 (pd.DataFrame): Two matrices to compare. lmbda (float) (optional): Weighting factor for weighted vs binary dissimilarity (0-1). 0 is fully binary and 1 is fully weighted. Defaults to 0.5. normalise (bool) (optional): Normalizes matrices before comparison. Defaults to False. binary (bool) (optional): Treats matrices as binary (0 or 1). Defaults to False. trim (bool) (optional): Trims matrices to common rows and columns. Otherwise pads 0s to uncommon rows and columns. Defaults to False. only_non_zero (bool) (optional): Only considers non-zero edges for calculation. Defaults to False. Returns: pd.DataFrame: The dissimilarity scores between the two matrices. """ if trim: common_rows = list(set(m1.index) & set(m2.index)) common_cols = list(set(m1.columns) & set(m2.columns)) m1 = m1.loc[common_rows, common_cols] m2 = m2.loc[common_rows, common_cols] else: m1, m2 = tl.align_dataframes(m1, m2) m1 = m1.values m2 = m2.values if normalise: if m1.sum().sum() == 0 and m2.sum().sum() == 0: return 0 if m1.sum().sum() != 0 and m2.sum().sum() != 0: m1 = m1 / m1.sum().sum() m2 = m2 / m2.sum().sum() if binary: m1 = np.where(m1 > 0, 1, 0) m2 = np.where(m2 > 0, 1, 0) n_of_edges = len(m1) ** 2 if only_non_zero: n_of_edges = np.where((m1 + m2) > 0, 1, 0).sum().sum() abs_weight_difference = np.abs(m1 - m2) weight_sum = m1 + m2 # Avoid division by zero weight_sum[weight_sum == 0] = -1 norm_weight_difference = abs_weight_difference / weight_sum norm_weight_difference_sum = np.sum(np.sum(norm_weight_difference)) wt_dissim = lmbda * (norm_weight_difference_sum / n_of_edges) n_diff = np.where(abs_weight_difference > 0, 1, 0).sum().sum() bin_dissim = (1 - lmbda) * (n_diff / n_of_edges) return wt_dissim + bin_dissim
[docs] def multiply_non_zero_values(dataframes, strict=False): """Multiply non-zero values across a list of pandas DataFrames. Args: dataframes (list): A list of pandas DataFrames with the same shape and column/row names. strict (bool) (optional): If True, only interactions where more than 50% of the values are non-zero will be multiplied. Defaults to False. Returns: pd.DataFrame: A new DataFrame where each cell contains the product of non-zero values or zero if more than 50% of the values in the corresponding cells are zero. """ result_df = dataframes[0] for i in range(len(dataframes)): dataframes[i], result_df = tl.align_dataframes(dataframes[i], result_df) for i in range(len(dataframes)): dataframes[i], result_df = tl.align_dataframes(dataframes[i], result_df) result_df = result_df.astype(np.float64) for i, row in result_df.iterrows(): for j in row.index: values = [df.loc[i, j] for df in dataframes] non_zero_values = [value for value in values if value != 0] if strict: if len(non_zero_values) / len(values) <= 0.5: result_df.loc[i, j] = 0 else: result_df.loc[i, j] = np.prod(non_zero_values, dtype=np.float64) else: if len(non_zero_values) == 0: result_df.loc[i, j] = 0 else: result_df.loc[i, j] = np.prod(non_zero_values, dtype=np.float64) result_df = np.power(result_df, 1 / len(values)).fillna(0) return result_df