Source code for scATAcat.bulk_data_functions

import pandas as pd
import anndata
import numpy as np 
import scipy.sparse
np.random.seed(1234)
[docs] def generate_bulk_AnnData(bulk_df): ''' Generate AnnData object from DataFrame. Parameters: - bulk_df (DataFrame): Dataframe of prototype data: - columns: cell types / samples - rows: featues (cCREs) Returns: - AnnData of prototypes. ''' features = pd.DataFrame(bulk_df.index) features.columns=['cCREs'] features.index = features['cCREs'] cell_types = pd.DataFrame(bulk_df.columns) cell_types.columns=['cell_types'] cell_types.index = cell_types['cell_types'] bulk_complete_adata = anndata.AnnData(np.array(bulk_df.T, dtype= np.float32), var=features, obs=cell_types) return bulk_complete_adata
[docs] def generate_bulk_sparse_AnnData(bulk_df, var_key='cCREs', obs_key='cell_types'): ''' Generate AnnData object from DataFrame. The count matrix is sparse. Parameters: - bulk_df (DataFrame): Dataframe of prototype data: - columns: cell types / samples - rows: featues (cCREs) Returns: - AnnData of prototypes. ''' features = pd.DataFrame(bulk_df.index) features.columns=[var_key] features.index = features[var_key] cell_types = pd.DataFrame(bulk_df.columns) cell_types.columns=[obs_key] cell_types.index = cell_types[obs_key] bulk_csr = scipy.sparse.csr_matrix(np.matrix(bulk_df.T.values,dtype=np.float32)) bulk_complete_adata = anndata.AnnData(bulk_csr, var=features, obs=cell_types) return bulk_complete_adata
[docs] def preprocess_bulk_adata(bulk_adata, remove_chrY=True, var_key = 'cCREs', copy=False): ''' Preprocess a prototype count matrix in AnnData format. This function preprocesses a prototype count matrix in AnnData format by optionally removing features associated with chromosome Y. If `copy` is True, a new AnnData object with the preprocessed data is returned, leaving the original AnnData object unchanged. If `copy` is False, the original AnnData object is modified in place, and the preprocessed AnnData object is returned. Parameters: - bulk_adata (AnnData): An AnnData object containing the prototype count matrix. - remove_chrY (bool, optional): Whether to remove features associated with chromosome Y. Default is True. - var_key (str, optional): Key for accessing feature information in AnnData.var. Default is 'cCREs'. - copy (bool, optional): If True, a copy of the AnnData object is returned; if False, the original AnnData object is modified. Default is False. Returns: - AnnData: The preprocessed AnnData object. ''' feature_coverage = bulk_adata.X.sum(0) if copy==False: bulk_adata = bulk_adata[:,feature_coverage>0] if remove_chrY==True: filtered_chrY= [a for a in bulk_adata.var_names if not a.startswith("chrY")] bulk_adata=bulk_adata[:,sorted(filtered_chrY)] return bulk_adata else: bulk_adata2 = bulk_adata[:,feature_coverage>0].copy() if remove_chrY==True: filtered_chrY= [a for a in bulk_adata2.var_names if not a.startswith("chrY")] bulk_adata2=bulk_adata2[:,sorted(filtered_chrY)] return bulk_adata2