#!/usr/bin/env python # coding: utf-8 import re import numpy as np import pandas as pd import seaborn as sns import scanpy as sc import socket if 'private' in socket.gethostname(): topdir = '/home/cboix/data/DEVTRAJ/' else: topdir = '/home/cboix/DEVTRAJ/' # Figure parameters and directory: dbdir = topdir + 'db/' # verbosity: errors (0), warnings (1), info (2), hints (3) sc.settings.verbosity = 3 sc.settings.set_figure_params(dpi=80, color_map='viridis') sc.logging.print_versions() results_file = dbdir + 'CKP25/full_run_010621.h5ad' pltprefix = 'full_sc_results' sc.settings.figdir = topdir + 'img/CKP25' figdir = sc.settings.figdir sc.set_figure_params(scanpy=True, dpi=150, dpi_save=250, frameon=True, vector_friendly=True, color_map=None, format='pdf', transparent=False) markergenes = ['Plp1', 'Gfap', 'Camk2a', 'Gria2', 'Cdkn1a', 'Ubb', 'Bcan', 'Gad1', 'Csf1r'] # Read in the metadata: # --------------------- meta = pd.read_csv(dbdir + "Annotation/CKP25_mouse_metadata_010621.tsv", sep="\t") meta['label'] = [re.sub("_1_sequence.fastq", "", x) for x in meta.fastq1.to_numpy()] meta.index = meta['label'] meta.index.names = ['index'] meta.head() # Read in the counts and column/row names: # ---------------------------------------- datapref = dbdir + "CKP25/all_aggregated" adata = sc.read(datapref + ".counts_mat.tsv", cache=True).T adata.var_names = pd.read_csv(datapref + '.rn.symb.tsv', header=None)[0] adata.obs_names = pd.read_csv(datapref + '.cn.tsv', header=None, sep="\t")[0] adata.var_names_make_unique() adata.var['gene'] = pd.read_csv(datapref + '.rn.tsv', header=None)[0].tolist() # Merge the data with the metadata # -------------------------------- adata.obs['cell'] = adata.obs_names mdf = pd.merge(adata.obs, meta, how='left', left_index=True, right_index=True) # Note adata must be first - othw disorganized. adata.obs['cellid'] = mdf['label'].astype('category') adata.obs['genotype'] = mdf['genotype'].astype('str').astype('category') adata.obs['celltype'] = mdf['celltype'].astype('category') adata.obs['mouse_id'] = mdf['mouse_id'].astype('category') adata.obs['timepoint'] = mdf['timepoint'].astype('category') stg = pd.read_csv(dbdir + 'CKP25/samp_with_transgene.txt', header=None)[0].tolist() adata.obs['p25dummy'] = [x in stg for x in adata.obs['cell']] adata.obs['has_p25'] = adata.obs['p25dummy'].astype('category') adata.obs # Read in fusions data # -------------------- gfdf = pd.read_csv(dbdir + 'CKP25/allprojid_aggregated.starfusion.pred.tsv', sep="\t") sfdf = pd.read_csv(dbdir + 'CKP25/sf_cellcounts.tsv', sep="\t", header=None, names=['sample', 'fcount']) ifdf = pd.read_csv(dbdir + 'CKP25/sf_intra_cellcounts.tsv', sep="\t", header=None, names=['sample', 'icount']) sfdf = pd.merge(sfdf, ifdf, how='left') sfdf.index = sfdf['sample'] sfdf.index.names = ['index'] sfdf.head() sdf = pd.merge(adata.obs, sfdf, how='left', left_index=True, right_index=True) sdf['fcount'] = sdf['fcount'].fillna(0) sdf['icount'] = sdf['icount'].fillna(0) adata.obs['fcount'] = sdf['fcount'] adata.obs['icount'] = sdf['icount'] adata.obs['fdummy'] = sdf['fcount'] > 0 adata.obs['idummy'] = sdf['icount'] > 0 sdf.head() # Read in gene metadata to choose only protein coding (+ other subtypes?): # ------------------------------------------------------------------------ anno = pd.read_csv(dbdir + 'Annotation/' + 'gencode.vM25.basic.annotation.genes.tsv', sep="\t", header=None) anno = anno.rename(columns={0: 'chr', 1: 'from', 2: 'elem', 3: 'start', 4: 'end', 5: 'v1', 6: 'strand', 7: 'v2', 8: 'ENSG', 9: 'type', 10: 'symbol'}) # Compile list of genes: keep_type = ['protein_coding'] pc_genes = list(anno.ENSG[[tp in keep_type for tp in anno['type']]]) print(len(pc_genes)) # Remove non-protein coding genes # ------------------------------- filter_topc = True # Keep PC only if filter_topc: in_pc = np.array([name in pc_genes for name in adata.var['gene']]) print("Keeping only the " + str(sum(in_pc)) + " protein coding genes.") adata = adata[:, in_pc] # Filter cells: # sc.pp.filter_genes(adata, min_cells=3) adata.obs['cell'] = adata.obs_names sc.pp.filter_cells(adata, min_genes=100) print(adata) # Subset genes to kept genes: # --------------------------- # ribo_genes = [name for name in ribo_genes if name in adata.var_names] # mito_genes = [name for name in mito_genes if name in adata.var_names] # Compute metrics for each cell: adata.obs['n_genes'] = np.sum(adata.X > 0, axis=1) adata.obs['n_counts'] = adata.X.sum(axis=1) # adata.obs['percent_mito'] = np.sum(adata[:, mito_genes].X, axis=1).A1 / np.sum(adata.X, axis=1).A1 # adata.obs['percent_ribo'] = np.sum(adata[:, ribo_genes].X, axis=1).A1 / np.sum(adata.X, axis=1).A1 # Normalization: # NOTE: ORIGINAL = no norm, no scale: # -------------- # countsafter=None # sc.pp.normalize_per_cell(adata, counts_per_cell_after=countsafter, # copy=False) # Customary, similar pseudotime results sc.pp.log1p(adata) # sc.pp.scale(adata, max_value=10) print(adata) # Compute PCA, scatterplot: # ------------------------- sc.tl.pca(adata) # PCA with original labels and of markers: sc.pl.pca_scatter(adata, color='celltype', components=['1,2', '2,3', '1,3'], frameon=False) sc.pl.pca_variance_ratio(adata, log=True) adata.write(results_file) adata # Compute dimensionality reductions: # ---------------------------------- adata = sc.read(results_file) # t-SNE with original labels: sc.tl.tsne(adata, random_state=0, perplexity=20) sc.pl.tsne(adata, color='celltype', size=40, frameon=False) sc.pl.tsne(adata, color=['celltype', 'genotype', 'mouse_id', 'has_p25'], size=30, frameon=False) # Run this to get colors first! adata.write(results_file) adata # Compute neighborhood graph + UMAP: # ---------------------------------- adata = sc.read(results_file) sc.pp.neighbors(adata, n_neighbors=20) sc.tl.umap(adata, min_dist=.2, maxiter=None, random_state=2) sc.pl.umap(adata, color='celltype', size=30, frameon=False) sc.pl.umap(adata, color=['genotype', 'mouse_id', 'has_p25', 'timepoint'], size=30, frameon=False, legend_fontweight='bold') # In[ ]: sc.pl.umap(adata, color='celltype', size=30, frameon=False, legend_fontweight='bold', title='Cell Type') sc.pl.umap(adata, color='genotype', size=30, frameon=False, legend_fontweight='bold', title='Genotype') sc.pl.umap(adata, color='timepoint', size=30, frameon=False, legend_fontweight='bold', title='Timepoint') sc.pl.umap(adata, color='mouse_id', size=30, frameon=False, legend_fontweight='bold', title='Mouse ID') sc.pl.umap(adata, color='has_p25', size=30, frameon=False, legend_fontweight='bold', title='Expresses p25 transgene') sns.countplot(x='genotype', hue='has_p25', data=adata.obs) # Cluster the graph + plot summary pages: sc.tl.leiden(adata, resolution=2) adata.obs['clusters'] = adata.obs.leiden sc.pl.umap(adata, color=['leiden'], legend_loc='on data', size=25, legend_fontsize=8, frameon=False, save=pltprefix + 'umap_leidenonly_label_ondata.pdf') sc.pl.tsne(adata, color=['leiden', 'celltype'], legend_loc='on data', size=25, legend_fontsize=8, frameon=False, save=pltprefix + 'leiden_label_ondata.pdf') ax = sns.countplot(x='leiden', hue='has_p25', data=adata.obs) lb = ax.set_xticklabels(ax.get_xticklabels(), rotation=90) ax = sns.countplot(x='celltype', hue='has_p25', data=adata.obs) lb = ax.set_xticklabels(ax.get_xticklabels(), rotation=90) ax = sns.countplot(x='mouse_id', hue='has_p25', data=adata.obs) lb = ax.set_xticklabels(ax.get_xticklabels(), rotation=90) sc.pl.umap(adata, color=['Gria2', 'Celsr2', 'Hjurp', 'Exoc4', 'Lig1', 'Cdkn1a', 'Ubb', 'Fth1'], size=35, save=pltprefix + 'other_markers.pdf', frameon=False) # Label clusters from marker genes # -------------------------------- for group in ['leiden', 'celltype']: sc.tl.rank_genes_groups(adata, group) sc.pl.rank_genes_groups(adata, n_genes=20, save='.pdf', show=False) sc.pl.rank_genes_groups_matrixplot( adata, n_genes=10, groupby=group, cmap='Blues', save=pltprefix + '_' + group + '_rankgenes.pdf') sc.pl.rank_genes_groups_dotplot( adata, n_genes=10, groupby=group, save=pltprefix + "_" + group + '_rankgenes.pdf') # Merge cells/clusters based on the markers: # ------------------------------------------ merge_dict = {'Excitatory': ['4', '5', '0', '8', '2', '14', '10'], 'Inhibitory': ['6', '11'], 'Stage 2': ['3', '7', '15'], 'Oligodendrocyte': ['1', '13'], 'OPC': ['12'], 'Microglia': ['9'], 'Batch': ['16']} merge_dict2 = {'Ex0': ['4', '5', '0', '8'], 'Ex1': ['2'], 'Ex2': ['14'], 'Ex3': ['10'], 'In1': ['11'], 'In0': ['6'], 'Stage 2': ['3'], 'Deid': ['7', '15'], 'Oligodendrocyte': ['1', '13'], 'OPC': ['12'], 'Microglia': ['9'], 'Batch': ['16']} adata.obs['label'] = 'Unlabeled' for key in merge_dict.keys(): idx = [cls in merge_dict[key] for cls in adata.obs['leiden']] adata.obs.loc[idx, 'label'] = key adata.obs['label'] = adata.obs['label'].astype('category') adata.obs['sublabel'] = 'Unlabeled' for key in merge_dict2.keys(): idx = [cls in merge_dict2[key] for cls in adata.obs['leiden']] adata.obs.loc[idx, 'sublabel'] = key adata.obs['sublabel'] = adata.obs['sublabel'].astype('category') # Plot with the appropriate colors: # --------------------------------- adata.uns['label_colors'] = ['lightgrey', '#33A02C', '#1F78B4', '#6A3D9A', '#B15928', '#FF7F00', '#E31A1C'] adata.uns['sublabel_colors'] = ['lightgrey', 'lightgrey', '#33A02C', '#B2DF8A', '#D6BC91', '#FB9A99', '#1F78B4', '#A6CEE3', '#6A3D9A', '#B15928', '#FF7F00', '#E31A1C'] sc.pl.umap(adata, color=['label'], legend_loc='on data', size=25, legend_fontsize=8, frameon=False, save=pltprefix + 'umap_labelonly_label_ondata.pdf') sc.pl.umap(adata, color=['sublabel'], legend_loc='on data', size=25, legend_fontsize=8, frameon=False, save=pltprefix + 'umap_sublabelonly_label_ondata.pdf') # In[24]: for group in ['label', 'sublabel']: sc.tl.rank_genes_groups(adata, group) # sc.pl.rank_genes_groups(adata, n_genes=20, save='.pdf', show=False) sc.pl.rank_genes_groups_matrixplot( adata, n_genes=10, groupby=group, cmap='Blues', save=pltprefix + '_' + group + '_rankgenes.pdf') # sc.pl.rank_genes_groups_dotplot(adata, n_genes=10, groupby=group, # save=pltprefix + "_" + group + '_rankgenes.pdf') ax = sns.countplot(x='sublabel', hue='timepoint', data=adata.obs) legend_labels, _ = ax.get_legend_handles_labels() out = ax.set_xticklabels(ax.get_xticklabels(), rotation=90) # Set up sets for heatmaps: # ------------------------- sig_dict = {'Cytokine': ['Ccl2', 'Ccl20', 'Cxcl10', 'Cxcl16', 'Il1a', 'Il6', 'Il15', 'Il18'], 'ImmuneReg': ['Ifitm3', 'Ifnar1', 'Ifngr1', 'Irf7', 'Irf9', 'Jak3', 'Myd88', 'Nfkbia', 'Rela', 'Relb', 'Socs3', 'Tir3', 'Traf2'], 'Inflam.': ['Psmb8', 'Psmb9'], 'Nuc. Acid Sens.': ['Cgas', 'Ddx41', 'Ddx58', 'Ifih1', 'Ifit1', 'Isg20', 'Oasl2', 'Pkr', 'Zbp1'], 'Cyclin': ['Cdk5', 'Cdk5r1', 'Cdkn1a', 'Cdkn2a']} i = 0 ifl_tplist = [] ifl_lblist = [] ifl_markers = [] for key in sig_dict.keys(): gl = [name for name in sig_dict[key] if name in adata.var_names] n = len(gl) if (n > 0): ifl_markers += gl ifl_lblist.append(key) ifl_tplist.append((i, i+n-1)) i += n group = 'sublabel' fig = sc.pl.matrixplot(adata, ifl_markers, groupby=group, log=True, dendrogram=True, var_group_positions=ifl_tplist, var_group_labels=ifl_lblist, cmap='Blues', save=pltprefix + group + '_inflamm_markers_reord.pdf') # Make heatmap plot: fig = sc.pl.heatmap(adata, ifl_markers, groupby=group, var_group_rotation=90, var_group_positions=ifl_tplist, log=True, cmap='Blues', var_group_labels=ifl_lblist, save=pltprefix + group + '_inflamm_markers_heatmap.pdf') sc.pl.umap(adata, ncols=6, color=ifl_markers, size=30, save=pltprefix + '_inflamm_umap_markers.pdf', frameon=False) # Plot the heatmap of markers: # ---------------------------- trimarker_dict = {'Inhibitory': ['Gad1', 'Gad2'], 'Excitatory': ['Camk2a', 'Gria2', 'Syt1'], 'Astrocyte': ['Gfap'], 'Microglia': ['Cd33', 'Csf1r'], 'OPC': ['Bcan'], 'Oligodendrocyte': ['Plp1', 'Mbp'], 'DNA Damage': ['Hjurp', 'Lig1', 'Zgrf1'], 'Senescence': ['Cdkn1a', 'Cdkn2a', 'H2ax', 'Ubb']} markergenes = ['Plp1', 'Gfap', 'Camk2a', 'Gria2', 'Cdkn1a', 'Ubb', 'Bcan', 'Gad1', 'Csf1r'] # set up sets for heatmaps: i = 0 tplist = [] lblist = [] pltmarkers = [] for key in trimarker_dict.keys(): gl = [name for name in trimarker_dict[key] if name in adata.var_names] n = len(gl) if (n > 0): pltmarkers += gl lblist.append(key) tplist.append((i, i+n-1)) i += n # Make Heatmap plot (plain + reord) for group in ['label', 'sublabel']: fig = sc.pl.matrixplot(adata, pltmarkers, groupby=group, log=True, dendrogram=True, var_group_positions=tplist, var_group_labels=lblist, cmap='Blues', save=pltprefix + "_" + group + '_markers_reord.pdf') # Make heatmap plot: subadata = adata[adata.obs.label != 'Batch'] subadata = subadata[subadata.obs.sublabel != 'Deid'] fig = sc.pl.heatmap( subadata, pltmarkers, groupby=group, var_group_rotation=90, var_group_positions=tplist, var_group_labels=lblist, log=True, cmap='Blues', save=pltprefix + "_" + group + '_markers.pdf') # The properly ordered dictionary: # -------------------------------- trimarker_dict2 = {'Inhibitory': ['Gad1', 'Gad2'], 'Excitatory': ['Camk2a', 'Gria2', 'Syt1'], 'DNA Damage': ['Hjurp', 'Lig1', 'Zgrf1'], 'Senescence': ['Cdkn1a', 'H2ax', 'Ubb', 'Apoe'], 'Astrocyte': ['Gfap'], 'Microglia': ['Cd33', 'Csf1r'], 'OPC': ['Bcan'], 'Oligodendrocyte': ['Plp1', 'Mbp']} # set up sets for heatmaps: i = 0 tplist = [] lblist = [] pltmarkers = [] for key in trimarker_dict2.keys(): gl = [name for name in trimarker_dict2[key] if name in adata.var_names] n = len(gl) if (n > 0): pltmarkers += gl lblist.append(key) tplist.append((i, i+n-1)) i += n sfrom = ['Batch', 'Deid', 'In0', 'In1', 'Ex0', 'Ex1', 'Ex2', 'Ex3', 'Stage 2', 'Microglia', 'OPC', 'Oligodendrocyte'] sto = ['0Batch', '01Deid', '1In0', '2In1', '3Ex0', '4Ex1', '5Ex2', '6Ex3', '7Stage 2', '8Microglia', '9OPC', '99Oligodendrocyte'] adata.obs['sublabelord'] = adata.obs['sublabel'].astype('str') for i in range(len(sto)): adata.obs['sublabelord'].loc[adata.obs['sublabel'] == sfrom[i]] = sto[i] adata.uns['sublabelord_colors'] = ['#1F78B4', '#A6CEE3', '#33A02C', '#B2DF8A', '#D6BC91', '#FB9A99', '#E31A1C', '#6A3D9A', '#B15928', '#FF7F00', 'lightgrey', 'lightgrey'] # Make heatmap plot: group = 'sublabelord' subadata = adata[adata.obs.label != 'Batch'] subadata = subadata[subadata.obs.sublabel != 'Deid'] fig = sc.pl.heatmap( subadata, pltmarkers, groupby=group, var_group_rotation=90, var_group_positions=tplist, var_group_labels=lblist, log=True, cmap='Blues', save=pltprefix + "_" + group + '_markers.pdf') fig = sc.pl.heatmap( subadata, pltmarkers, groupby=group, var_group_rotation=90, var_group_positions=tplist, var_group_labels=lblist, log=True, cmap='Blues', save=pltprefix + "_" + group + '_markers.png') # Trajectory with diffusion and PAGA? # ----------------------------------- subadata = adata[adata.obs.sublabel.isin( ['Ex0', 'Ex1', 'Ex2', 'Ex3', 'Stage 2'])] sc.tl.paga(subadata, groups='sublabel') sc.pl.paga(subadata, color=['sublabel', 'Gria2', 'Cdkn1a', 'Lig1'], save=pltprefix + "_" + "sublabel" + '_paga_genes.pdf') # In[94]: sc.tl.diffmap(subadata, n_comps=20) sc.tl.dpt(subadata, n_branchings=2, n_dcs=15) subadata.uns['iroot'] = 1 sc.pl.diffmap(subadata, color=['dpt_pseudotime', 'dpt_groups', 'sublabel']) sc.pl.paga_path(adata) # ------------------------------ group = 'sublabel' sc.tl.rank_genes_groups(subadata, group) sc.tl.dendrogram(subadata, group) sc.pl.rank_genes_groups_matrixplot( subadata, n_genes=10, groupby=group, cmap='Blues', save=pltprefix + '_' + group + '_subsetted_rankgenes.pdf') # In[71]: a = subadata.uns['rank_genes_groups']['names'] a = pd.DataFrame(a).to_numpy()[0:10, :] pltlist = np.ravel(a.T) # In[97]: a = subadata.uns['rank_genes_groups']['names'] mat = pd.DataFrame(a).to_numpy()[0:20, ].T anam = list(a.dtype.names) learned_markers = {} for i in range(5): learned_markers[anam[i]] = mat[i] # In[98]: # set up sets for heatmaps: i = 0 tplist = [] lblist = [] pltmarkers = [] for key in learned_markers.keys(): gl = [name for name in learned_markers[key] if name in adata.var_names] n = len(gl) if (n > 0): pltmarkers += gl lblist.append(key) tplist.append((i, i+n-1)) i += n # Make Heatmap plot (plain + reord) for group in ['sublabel']: fig = sc.pl.matrixplot( subadata, pltmarkers, groupby=group, log=True, dendrogram=True, var_group_positions=tplist, var_group_labels=lblist, cmap='Blues', save=pltprefix + group + '_subsetted_learned_markers_reord.pdf') # Make heatmap plot: fig = sc.pl.heatmap( subadata, pltmarkers, groupby=group, var_group_rotation=90, var_group_positions=tplist, var_group_labels=lblist, log=True, show_gene_labels=True, cmap='Blues', save=pltprefix + group + '_subsetted_learned_markers.pdf') # In[74]: # WRITE DATA: metadf = adata.obs metadf['Xtsne1'] = adata.obsm['X_tsne'][:, 0] metadf['Xtsne2'] = adata.obsm['X_tsne'][:, 1] metadf['Xumap1'] = adata.obsm['X_umap'][:, 0] metadf['Xumap2'] = adata.obsm['X_umap'][:, 1] metadf.to_csv(dbdir + 'Annotation/CKP25_UMAP_clusters_dataframe.tsv', sep="\t", header=True, index=False) metadf.head(4) # In[75]: # Look at OPCs - are there expression differences group = 'genotype' subadata = adata[adata.obs.label == 'OPC'] sc.tl.rank_genes_groups(subadata, group) sc.tl.dendrogram(subadata, group) sc.pl.rank_genes_groups_matrixplot( subadata, n_genes=10, groupby=group, cmap='Blues', save=pltprefix + '_' + group + '_subsetted_rankgenes.pdf') # Make extended plot: a = subadata.uns['rank_genes_groups']['names'] mat = pd.DataFrame(a).to_numpy()[0:20, ].T anam = list(a.dtype.names) learned_markers = {} for i in range(2): learned_markers[anam[i]] = mat[i] learned_markers['OPC'] = [ 'Apoe', 'Zbtb20', 'Ptprz1', 'Bcan', 'Slc1a3', 'Atp1a2', 'Slc1a2', 'Ntsr2', 'Plpp3', 'Fgfr3'] # set up sets for heatmaps: i = 0 tplist = [] lblist = [] pltmarkers = [] for key in learned_markers.keys(): gl = [name for name in learned_markers[key] if name in adata.var_names] n = len(gl) if (n > 0): pltmarkers += gl lblist.append(key) tplist.append((i, i+n-1)) i += n # Make Heatmap plot (plain + reord) fig = sc.pl.matrixplot( subadata, pltmarkers, groupby=group, log=True, dendrogram=True, var_group_positions=tplist, var_group_labels=lblist, cmap='Blues', save=pltprefix + group + '_subsetted_opc_markers_reord.pdf') # Make heatmap plot: fig = sc.pl.heatmap( subadata, pltmarkers, groupby=group, var_group_rotation=90, var_group_positions=tplist, var_group_labels=lblist, log=True, show_gene_labels=True, cmap='Blues', save=pltprefix + group + '_subsetted_opc_markers.pdf') # In[56]: sc.pl.violin(adata, ['n_counts', 'n_genes'], groupby='leiden', multi_panel=True, save=pltprefix + 'stats_density.pdf') # In[ ]: # In[57]: sc.pl.violin( adata, ['fcount', 'fdummy'], groupby='celltype', multi_panel=True, save=pltprefix + 'fusions_density.pdf') # In[58]: sc.pl.violin(adata, ['fcount', 'fdummy'], groupby='leiden', multi_panel=True, save=pltprefix + 'fusions_density.pdf') # In[88]: ax = sns.countplot(x='sublabel', hue='fdummy', data=adata.obs) legend_labels, _ = ax.get_legend_handles_labels() lgd = ax.legend(legend_labels, ['False', 'True'], title='1+ fusion(s)') out = ax.set_xticklabels(ax.get_xticklabels(), rotation=90) # In[89]: ax = sns.countplot(x='sublabel', hue='idummy', data=adata.obs) legend_labels, _ = ax.get_legend_handles_labels() lgd = ax.legend( legend_labels, ['False', 'True'], title='1+ intra-chrom fusion(s)') out = ax.set_xticklabels(ax.get_xticklabels(), rotation=90) # In[90]: adata.obs['tcount'] = adata.obs['fcount'] - adata.obs['icount'] adata.obs['tdummy'] = adata.obs['tcount'] > 0 ax = sns.countplot(x='sublabel', hue='tdummy', data=adata.obs) legend_labels, _ = ax.get_legend_handles_labels() lgd = ax.legend( legend_labels, ['False', 'True'], title='1+ inter-chrom fusion(s)') out = ax.set_xticklabels(ax.get_xticklabels(), rotation=90) # In[65]: adata.obs['fcat'] = adata.obs['fdummy'].astype('category') sc.pl.violin(adata, ['n_counts', 'n_genes'], groupby='fcat', xlabel='1+ gene fusion(s)', multi_panel=True, save=pltprefix + 'fusions_counts_density.pdf')