From 86330a9ec434623ab371375293d9bee64e1a7212 Mon Sep 17 00:00:00 2001 From: falexwolf Date: Thu, 27 Jul 2017 16:24:23 +0200 Subject: [PATCH] fixed bugs in data_graph, dpt and tsne --- scanpy/data_structs/data_graph.py | 29 +++++++++++++++-------------- scanpy/tools/dpt.py | 4 ++-- scanpy/tools/tsne.py | 6 +++--- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/scanpy/data_structs/data_graph.py b/scanpy/data_structs/data_graph.py index 3e4238e745..d60edd9d4c 100644 --- a/scanpy/data_structs/data_graph.py +++ b/scanpy/data_structs/data_graph.py @@ -174,12 +174,19 @@ def __init__(self, recompute_graph=None, flavor='haghverdi16'): self.sym = True # we do not allow asymetric cases + self.flavor = flavor # this is to experiment around + self.n_pcs = n_pcs + self.n_dcs = n_dcs # use the graph in adata if (not recompute_graph + # make sure X_diffmap is there and 'X_diffmap' in adata.smp - and adata.smp['X_diffmap'].shape[1] >= n_dcs-1): - self.n_pcs = n_pcs - self.n_dcs = n_dcs + # make sure enough DCs are there + and adata.smp['X_diffmap'].shape[1] >= n_dcs-1 + # make sure that it's sparse + and issparse(adata.add['Ktilde']) == knn + # make sure n_neighbors matches + and k == adata.add['distance'][0].nonzero()[0].size + 1): self.init_iroot_directly(adata) self.X = adata.X # this is a hack, PCA? self.knn = issparse(adata.add['Ktilde']) @@ -188,7 +195,7 @@ def __init__(self, if self.knn: self.k = adata.add['distance'][0].nonzero()[0].size + 1 else: - self.k = adata.X.shape[0] + self.k = None # currently do not store this, is unknown # for output of spectrum self.X_diffmap = adata.smp['X_diffmap'][:, :n_dcs-1] self.evals = np.r_[1, adata.add['diffmap_evals'][:n_dcs-1]] @@ -205,7 +212,7 @@ def __init__(self, # recompute the graph else: self.k = k if k is not None else 30 - logg.info('compute data graph with `n_neighbors={}`' + logg.info(' computing data graph with n_neighbors = {} ' .format(self.k)) self.evals = None self.rbasis = None @@ -214,13 +221,8 @@ def __init__(self, self.Dsq = None self.knn = knn self.n_jobs = sett.n_jobs if n_jobs is None else n_jobs - self.n_pcs = n_pcs - self.n_dcs = n_dcs - self.flavor = flavor # this is to experiment around - self.iroot = None self.X = adata.X # might be overwritten with X_pca below self.Dchosen = None - self.M = None self.init_iroot_and_X_from_PCA(adata, recompute_pca, n_pcs) if False: # TODO # in case we already computed distance relations @@ -233,16 +235,15 @@ def __init__(self, self.Dsq = adata.add['distance'] def init_iroot_directly(self, adata): + self.iroot = None if 'iroot' in adata.add: if adata.add['iroot'] >= adata.n_smps: logg.warn('Root cell index {} does not exist for {} samples. ' 'Is ignored.' .format(adata.add['iroot'], adata.n_smps)) - self.iroot = None else: self.iroot = adata.add['iroot'] - - + def init_iroot_and_X_from_PCA(self, adata, recompute_pca, n_pcs): # retrieve xroot xroot = None @@ -281,7 +282,7 @@ def update_diffmap(self, n_comps=None): self.n_dcs = n_comps logg.info(' updating number of DCs to', self.n_dcs) if self.evals is None or self.evals.size < self.n_dcs: - logg.info('computing spectral decomposition ("diffmap") with', + logg.info(' computing spectral decomposition ("diffmap") with', self.n_dcs, 'components', r=True) self.compute_transition_matrix() self.embed(n_evals=self.n_dcs) diff --git a/scanpy/tools/dpt.py b/scanpy/tools/dpt.py index f5f7364d5c..69c50422e0 100644 --- a/scanpy/tools/dpt.py +++ b/scanpy/tools/dpt.py @@ -107,6 +107,7 @@ def dpt(adata, n_branchings=0, n_neighbors=30, knn=True, n_pcs=50, n_dcs=10, logg.hint(msg) if n_branchings == 0: logg.m('set parameter `n_branchings` > 0 to detect branchings', v='hint') + logg.m('perform Diffusion Pseudotime analysis', r=True) dpt = DPT(adata, n_neighbors=n_neighbors, knn=knn, n_pcs=n_pcs, n_dcs=n_dcs, min_group_size=min_group_size, n_jobs=n_jobs, recompute_graph=recompute_graph, recompute_pca=recompute_pca, @@ -118,8 +119,7 @@ def dpt(adata, n_branchings=0, n_neighbors=30, knn=True, n_pcs=50, n_dcs=10, adata.add['diffmap_evals'] = dpt.evals[1:] if knn: adata.add['distance'] = dpt.Dsq if knn: adata.add['Ktilde'] = dpt.Ktilde - logg.m('perform Diffusion Pseudotime analysis', r=True) - if n_branchings > 1: logg.info('... this uses a hierarchical implementation') + if n_branchings > 1: logg.info(' this uses a hierarchical implementation') # compute DPT distance matrix, which we refer to as 'Ddiff' if dpt.iroot is not None: dpt.set_pseudotime() # pseudotimes are distances from root point diff --git a/scanpy/tools/tsne.py b/scanpy/tools/tsne.py index d15c14d464..e48c299b35 100644 --- a/scanpy/tools/tsne.py +++ b/scanpy/tools/tsne.py @@ -64,7 +64,7 @@ def tsne(adata, random_state=0, n_pcs=50, perplexity=30, learning_rate=None, Multicore-TSNE GitHub (2017) """ - logg.info('compute tSNE', r=True) + logg.info('computing tSNE', r=True) adata = adata.copy() if copy else adata # preprocessing by PCA if 'X_pca' in adata.smp and adata.smp['X_pca'].shape[1] >= n_pcs: @@ -74,7 +74,7 @@ def tsne(adata, random_state=0, n_pcs=50, perplexity=30, learning_rate=None, else: if n_pcs > 0 and adata.X.shape[1] > n_pcs: logg.info(' preprocess using PCA with', n_pcs, 'PCs') - logg.info('avoid this by setting n_pcs = 0', v='hint') + logg.hint('avoid this by setting n_pcs = 0') X = pca(adata.X, random_state=random_state, n_comps=n_pcs) adata.smp['X_pca'] = X logg.info(' using', n_pcs, 'principal components') @@ -95,7 +95,7 @@ def tsne(adata, random_state=0, n_pcs=50, perplexity=30, learning_rate=None, from MulticoreTSNE import MulticoreTSNE as TSNE params_sklearn['learning_rate'] = 200 if learning_rate is None else learning_rate tsne = TSNE(n_jobs=n_jobs, **params_sklearn) - logg.info(' using package MulticoreTSNE') + logg.info(' using package MulticoreTSNE by D. Ulyanov') X_tsne = tsne.fit_transform(X.astype(np.float64)) except ImportError: multicore_failed = True