|
10 | 10 | import sqlite3
|
11 | 11 | import pickle
|
12 | 12 | import anndata
|
13 |
| -import diffxpy.api as de |
14 | 13 | from statsmodels.stats.multitest import multipletests
|
15 | 14 | import multiprocessing
|
16 | 15 | from itertools import repeat
|
@@ -336,14 +335,30 @@ def abundance_to_adata(self,
|
336 | 335 | df = df.T
|
337 | 336 |
|
338 | 337 | # get adata components - obs, var, and X
|
| 338 | + |
| 339 | + # var |
339 | 340 | var = df.columns.to_frame()
|
340 | 341 | var.columns = [id_col]
|
341 | 342 | var.index.name = 'tid'
|
| 343 | + |
| 344 | + # obs |
342 | 345 | obs = df.index.to_frame()
|
343 | 346 | obs.columns = ['dataset']
|
| 347 | + |
| 348 | + # if we already have transcript abundance and we're adding genes, |
| 349 | + # copy the obs information there |
| 350 | + if how == 'gene' and self.has_abundance(): |
| 351 | + self.adata.obs = reset_dupe_index(self.adata.obs, 'dataset') |
| 352 | + obs = obs.merge(self.adata.obs, how='left', on='dataset') |
| 353 | + obs.drop('dataset_back', axis=1, inplace=True) |
| 354 | + self.adata.obs = set_dupe_index(self.adata.obs, 'dataset') |
| 355 | + |
344 | 356 | obs.index.name = 'dataset'
|
| 357 | + |
| 358 | + # X |
345 | 359 | X = sparse.csr_matrix(df.to_numpy())
|
346 | 360 |
|
| 361 | + |
347 | 362 | # create transcript-level adata object and filter out unexpressed transcripts
|
348 | 363 | adata = anndata.AnnData(var=var, obs=obs, X=X)
|
349 | 364 | genes, _ = sc.pp.filter_genes(adata, min_counts=1, inplace=False)
|
|
0 commit comments