Source code for mmcci.io

import pandas as pd
import numpy as np
import scanpy
import pickle
import json
from .CCIData_class import CCIData


[docs]
def read_stLearn(path, key="cell_type", save_anndata=True) -> CCIData:
    """Reads a stLearn ligand-receptor analysis output and converts it to a CCIData object.

    Args:
        path (str): The path to the stLearn ligand-receptor analysis output.
        key (str) (optional): The key in adata.obs that was used for CCI. Defaults to "cell_type".
        save_anndata (bool) (optional): Whether to save the AnnData object in the CCIData object. Defaults to True.

    Returns:
        CCIData: The CCIData object.
    """

    adata = scanpy.read_h5ad(path)

    if save_anndata:
        cci_data = CCIData(
            n_spots=adata.shape[0], 
            cci_scores=adata.uns[f"per_lr_cci_raw_{key}"], 
            p_values=adata.uns[f"per_lr_cci_pvals_{key}"],
            adata=adata
            )
    else:
        cci_data = CCIData(
            n_spots=adata.shape[0], 
            cci_scores=adata.uns[f"per_lr_cci_raw_{key}"], 
            p_values=adata.uns[f"per_lr_cci_pvals_{key}"]
            )

    return cci_data




[docs]
def convert_stLearn(adata, key="cell_type", save_anndata=True) -> CCIData:
    """Reads a stLearn ligand-receptor analysis output and converts it to a CCIData object.

    Args:
        adata (AnnData): The stLearn ligand-receptor analysis output.
        key (str) (optional): The key in adata.obs that was used for CCI. Defaults to "cell_type".
        save_anndata (bool) (optional): Whether to save the AnnData object in the CCIData object. Defaults to True.

    Returns:
        CCIData: The CCIData object.
    """

    if save_anndata:
        cci_data = CCIData(
            n_spots=adata.shape[0], 
            cci_scores=adata.uns[f"per_lr_cci_raw_{key}"], 
            p_values=adata.uns[f"per_lr_cci_pvals_{key}"],
            adata=adata
            )
    else:
        cci_data = CCIData(
            n_spots=adata.shape[0], 
            cci_scores=adata.uns[f"per_lr_cci_raw_{key}"], 
            p_values=adata.uns[f"per_lr_cci_pvals_{key}"]
            )

    return cci_data




[docs]
def read_CellPhoneDB(means_path, pvals_path, n_spots=None) -> CCIData:
    """Reads a CellPhoneDB interaction scores txt file and converts it to a CCIDataobject.

    Args:
        means_path (str): Path to the means txt file.
        pvals_path (str): Path to the pvals txt file.
        n_spots (int) (optional): The number of spots. Defaults to None.

    Returns:
        CCIData: The CCIData object.
    """

    cp_obj = pd.read_csv(means_path, delimiter="\t")
    lr_dict = {}
    for ind in cp_obj.index:

        key = cp_obj["interacting_pair"][ind]

        val = cp_obj.iloc[ind, cp_obj.columns.get_loc("classification") + 1:]
        val = pd.DataFrame({"Index": val.index, "Value": val.values})
        val[["Row", "Col"]] = val["Index"].str.split("|", expand=True)
        val = val.drop("Index", axis=1)
        val = val.pivot(index="Row", columns="Col", values="Value")
        val = val.rename_axis(None, axis=1).rename_axis(None)
        lr_dict[key] = val

    cp_obj = pd.read_csv(pvals_path, delimiter="\t")
    p_vals_dict = {}
    for ind in cp_obj.index:

        key = cp_obj["interacting_pair"][ind]

        val = cp_obj.iloc[ind, cp_obj.columns.get_loc("classification") + 1:]
        val = pd.DataFrame({"Index": val.index, "Value": val.values})
        val[["Row", "Col"]] = val["Index"].str.split("|", expand=True)
        val = val.drop("Index", axis=1)
        val = val.pivot(index="Row", columns="Col", values="Value")
        val = val.rename_axis(None, axis=1).rename_axis(None)
        val = val.fillna(1)
        p_vals_dict[key] = val

    cci_data = CCIData(n_spots=n_spots, cci_scores=lr_dict, p_values=p_vals_dict)

    return cci_data




[docs]
def read_Squidpy(result, n_spots=None) -> CCIData:
    """Reads a Squidpy ligand-receptor analysis output and converts it to a CCIData
    object.

    Args:
        result (dict): The output from squidpy.gr.ligrec.
        n_spots (int) (optional): The number of spots. Defaults to None.

    Returns:
        CCIData: The CCIData object.
    """

    lr_dict = {}
    pvals = pd.DataFrame(result["pvalues"])

    cci_names = np.array([col[0] + "--" + col[1] for col in pvals.columns])
    cell_type_set = np.unique([col[0] for col in pvals.columns])

    for i, row in enumerate(pvals.index):

        int_matrix = np.zeros((len(cell_type_set), len(cell_type_set)))
        lr_ = "_".join(list(row))
        lr_ccis = cci_names
        lig = list(row)[0]
        rec = list(row)[1]
        for j, cci in enumerate(lr_ccis):
            c1, c2 = cci.split("--")
            row = np.where(cell_type_set == c1)[0][0]
            col = np.where(cell_type_set == c2)[0][0]
            int_matrix[row, col] = result["means"][c1][c2][lig][rec]

        lr_dict[lr_] = pd.DataFrame(
            int_matrix, index=cell_type_set, columns=cell_type_set
        )

    p_vals_dict = {}

    for i, row in enumerate(pvals.index):

        int_matrix = np.zeros((len(cell_type_set), len(cell_type_set)))
        lr_ = '_'.join(list(row))
        lr_ccis = cci_names
        lig = list(row)[0]
        rec = list(row)[1]
        for j, cci in enumerate(lr_ccis):
            c1, c2 = cci.split('--')
            row = np.where(cell_type_set == c1)[0][0]
            col = np.where(cell_type_set == c2)[0][0]
            int_matrix[row, col] = result['pvalues'][c1][c2][lig][rec]

        df = pd.DataFrame(int_matrix, index=cell_type_set, columns=cell_type_set)
        df = df.fillna(1)
        p_vals_dict[lr_] = df

    cci_data = CCIData(n_spots=n_spots, cci_scores=lr_dict, p_values=p_vals_dict)

    return cci_data




[docs]
def read_CellChat(path, n_spots=None) -> CCIData:
    """Reads a CellChat ligand-receptor analysis output (cellchat@dr) and converts it to
    a CCIData object.

    Args:
        path (str): The output from cellchat@dr.
        n_spots (int) (optional): The number of spots. Defaults to None.

    Returns:
        CCIData: The CCIData object.
    """

    result = pd.read_csv(path)
    lr_dict = {}
    pvals_dict = {}

    cell_type_set = np.unique(np.concatenate([result["source"], result["target"]]))

    for i in result.index:
        lr_ = result['interaction_name'][i]
        if lr_ not in lr_dict.keys():
            int_matrix = np.zeros((len(cell_type_set), len(cell_type_set)))
        else:
            int_matrix = lr_dict[lr_].values
        row = np.where(cell_type_set == result["source"][i])[0][0]
        col = np.where(cell_type_set == result["target"][i])[0][0]
        int_matrix[row, col] = result["prob"][i]

        lr_dict[lr_] = pd.DataFrame(
            int_matrix, index=cell_type_set, columns=cell_type_set
        )

    for i in result.index:
        lr_ = result['interaction_name'][i]
        if lr_ not in pvals_dict.keys():
            int_matrix = np.ones((len(cell_type_set), len(cell_type_set)))
        else:
            int_matrix = pvals_dict[lr_].values
        row = np.where(cell_type_set == result["source"][i])[0][0]
        col = np.where(cell_type_set == result["target"][i])[0][0]
        int_matrix[row, col] = result["pval"][i]

        df = pd.DataFrame(
            int_matrix, index=cell_type_set, columns=cell_type_set
        )
        df = df.fillna(1)
        pvals_dict[lr_] = df

    cci_data = CCIData(n_spots=n_spots, cci_scores=lr_dict, p_values=pvals_dict)

    return cci_data




[docs]
def read_NATMI(path, n_spots=None) -> CCIData:
    """Reads a NATMI ligand-receptor analysis output (Edges_lrc2p.csv) and converts it
    to a CCIData object.
    Args:
        path (str): The path to Edges_lrc2p.csv.
        n_spots (int) (optional): The number of spots. Defaults to None.

    Returns:
        CCIData: The CCIData object.
    """

    result = pd.read_csv(path)
    lr_dict = {}

    cell_type_set = np.unique(np.concatenate(
        [result["Sending cluster"], result["Target cluster"]]))

    for i in result.index:
        lr_ = result['Ligand symbol'][i] + "_" + result['Receptor symbol'][i]
        if lr_ not in lr_dict.keys():
            int_matrix = np.zeros((len(cell_type_set), len(cell_type_set)))
        else:
            int_matrix = lr_dict[lr_].values
        row = np.where(cell_type_set == result["Sending cluster"][i])[0][0]
        col = np.where(cell_type_set == result["Target cluster"][i])[0][0]
        int_matrix[row, col] = result["Edge average expression weight"][i]

        lr_dict[lr_] = pd.DataFrame(
            int_matrix, index=cell_type_set, columns=cell_type_set
        )

    cci_data = CCIData(n_spots=n_spots, cci_scores=lr_dict)
    
    return cci_data




[docs]
def read_CCIData(path: str) -> CCIData:
    """Loads a CCIData object from JSON or pickle file.
    
    Args:
        path (str): Path to the saved CCIData file
        
    Returns:
        CCIData: The loaded CCIData object
    """
    if path.endswith('.json'):
        # Load from JSON file
        with open(path, 'r') as f:
            data_dict = json.load(f)
        return from_dict(data_dict)
    
    elif path.endswith('.pkl'):
        # Load from pickle file for backwards compatibility
        with open(path, 'rb') as f:
            return pickle.load(f)
    else:
        raise ValueError("File must be .json or .pkl format")

    
    

[docs]
def read_network(path: str, metadata: dict = None) -> CCIData:
    """Loads a single network from a csv file.
    
    Args:
        path (str): Path to the saved network file
        metadata (dict) (optional): Dictionary containing other metadata
        
    Returns:
        CCIData: The loaded CCIData object
    """
    if path.endswith('.csv'):
        # Load from CSV file
        data = pd.read_csv(path, index_col=0)
        return CCIData(network=data, other_metadata=metadata)
    
    else:
        raise ValueError("File must be .csv format")




[docs]
def from_dict(data_dict: dict) -> CCIData:
    """Create CCIData object from a dictionary.
    
    Args:
        data_dict: Dictionary containing CCIData representation
        
    Returns:
        CCIData: Reconstructed CCIData object
    """
    # Convert assays back to DataFrames
    assays = {}
    for assay_name, assay in data_dict['assays'].items():
        assays[assay_name] = {}
        for key, value in assay.items():
            if key in ['cci_scores', 'p_values']:
                # Convert dict of DataFrames
                assays[assay_name][key] = {
                    k: pd.DataFrame.from_dict(v) for k,v in value.items()
                }
            elif isinstance(value, dict):
                # Convert single DataFrame
                assays[assay_name][key] = pd.DataFrame.from_dict(value)
            else:
                assays[assay_name][key] = value

    return CCIData(
        other_metadata=data_dict['metadata'],
        assays=assays
    )