-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e5e49a9
commit 2f71378
Showing
5 changed files
with
121,802 additions
and
22 deletions.
There are no files selected for viewing
299 changes: 299 additions & 0 deletions
299
tutorials/analysis_code/.ipynb_checkpoints/compute_contact_prob-checkpoint.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,299 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "3ef4e4c3-bbff-4170-9390-b51feae5efb6", | ||
"metadata": {}, | ||
"source": [ | ||
"## Import the necessary packages" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "be874cb0-b8ff-4af7-ac51-7fd3c914291a", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import sys\n", | ||
"import numpy as np\n", | ||
"import MDAnalysis as mda\n", | ||
"import sklearn.cluster as sk\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import copy\n", | ||
"import subprocess\n", | ||
"import scipy.stats\n", | ||
"import matplotlib.patches as mpatches\n", | ||
"import matplotlib as mpl\n", | ||
"\n", | ||
"mpl.rcParams['pdf.fonttype'] = 42\n", | ||
"\n", | ||
"plt.rcParams['font.family'] = 'Helvetica'\n", | ||
"plt.rcParams['font.size'] = 22" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "b5d8c8a3-795d-423e-bf4b-a55f5f0289b5", | ||
"metadata": {}, | ||
"source": [ | ||
"We defined the important parameters in the next block." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "7b3ae3b2-9dc7-4d74-a860-0b35631f3c28", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# The next two parameters are used when computing the DamID and TSASeq signals\n", | ||
"sigma_tanh = 4\n", | ||
"rc_tanh = 0.75\n", | ||
"\n", | ||
"N_chr_beads = 60642 #Number of chromosome particles \n", | ||
"\n", | ||
"first_frame = 0 #The analysis starts from the first frame, currently, 500\n", | ||
"N_nucleolus_particles = 300 #The number of nucleolus particles\n", | ||
"N_speckles_particles = 1600 #The number of speckle particles\n", | ||
"N_lamina_particles = 8000 #The number of lamina particles\n", | ||
"radius_nucleus = 13.0 #The radius of cell nucleus, 13.0 (LJ unit) = 5.0 µm\n", | ||
"\n", | ||
"\"\"\"Info files\"\"\"\n", | ||
"gLength = np.loadtxt(\"mol_info/gLengthFile.txt\",dtype=int) #The length of each chromosome\n", | ||
"maternalIdx = np.loadtxt(\"mol_info/maternalIdxFile.txt\",dtype=int) #Index of each maternal chromosome\n", | ||
"paternalIdx = np.loadtxt(\"mol_info/paternalIdxFile.txt\",dtype=int) #Index of each paternal chromosome" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "d5c1c187-a093-40c5-b67f-94ffc8c8806e", | ||
"metadata": {}, | ||
"source": [ | ||
"## Compute the DamID and TSASeq\n", | ||
"\n", | ||
"The next block defines a function to calculate the DamID and TSASeq, and is similar to OpenNucleome/openNucleome/utility/DamID_TSASeq_calculation.py" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "29222bbb-8773-4571-ac97-5756c1570785", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def DamID_TSASeq_calculation(traj_data):\n", | ||
" N_frame = len(traj_data)-first_frame\n", | ||
" exp_tsa_seq = np.zeros(N_chr_beads)\n", | ||
" exp_damid = np.zeros(N_chr_beads)\n", | ||
" exp_tsa_seq_all_frames = np.zeros((N_frame,N_chr_beads))\n", | ||
" exp_damid_all_frames = np.zeros((N_frame,N_chr_beads))\n", | ||
" N_speckle = []\n", | ||
" \n", | ||
" for frame_number in range(first_frame,len(traj_data),1):\n", | ||
" chr_I_data = traj_data.trajectory[frame_number].positions[:N_chr_beads]\n", | ||
" speckles_data = traj_data.trajectory[frame_number].positions[(N_chr_beads+N_nucleolus_particles):\n", | ||
" (N_chr_beads+N_nucleolus_particles+N_speckles_particles)]\n", | ||
" lamina_data = traj_data.trajectory[frame_number].positions[(N_chr_beads+N_nucleolus_particles+N_speckles_particles):]\n", | ||
"\n", | ||
" #Following code identifies the speckles clusters in the system and calculates and saves center of mass positions\n", | ||
" \"\"\"Code Snippet from DBSCAN Python Page\"\"\"\n", | ||
" \"\"\"https://scikit-learn.org/stable/auto_examples/cluster/plot_dbscan.html#sphx-glr-auto-examples-cluster-plot-dbscan-py\"\"\"\n", | ||
" db = sk.DBSCAN(eps=1.0).fit(speckles_data)\n", | ||
" core_samples_mask = np.zeros_like(db.labels_, dtype=bool)\n", | ||
" core_samples_mask[db.core_sample_indices_] = True\n", | ||
" labels = db.labels_\n", | ||
" # Number of clusters in labels, ignoring noise if present.\n", | ||
" n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)\n", | ||
" n_noise_ = list(labels).count(-1)\n", | ||
" \n", | ||
" N_speckle.append(n_clusters)\n", | ||
"\n", | ||
" cluster_com_master = np.zeros((n_clusters_,3))\n", | ||
" radius_cluster_master = np.zeros(n_clusters_)\n", | ||
" i = 0\n", | ||
" while(i<=max(labels)):\n", | ||
" #Go cluster by cluster\n", | ||
" r_points_cluster = copy.deepcopy(speckles_data[labels==i])\n", | ||
" cluster_com = np.mean(r_points_cluster,axis=0)\n", | ||
" cluster_com_master[i] = copy.deepcopy(cluster_com)\n", | ||
" radius_cluster_master[i] = (np.sum((r_points_cluster-cluster_com)**2,axis=None)/len(r_points_cluster))**0.5\n", | ||
" i +=1\n", | ||
"\n", | ||
" for i in range(len(chr_I_data)):\n", | ||
" ###Speckles\n", | ||
" distances_from_speckles = np.sum((cluster_com_master-chr_I_data[i])**2,axis=1)**0.5\n", | ||
" distances_from_speckles -= radius_cluster_master\n", | ||
"\n", | ||
" #Do this only for speckles\n", | ||
" exp_tsa_seq_all_frames[frame_number-first_frame,i] = \n", | ||
" np.sum(0.5*(1.0+np.tanh(sigma_tanh*(rc_tanh-distances_from_speckles))),axis=None)\n", | ||
" #exp_tsa_seq will be normalized by total (itself) while getting enrichment and so it does not matter if I divide by number of speckles\n", | ||
" #But have to divide exp_tsa_seq_all_frames by number of speckles so that the values can be interpretted as probabilities\n", | ||
" #when calculating correlations\n", | ||
" exp_tsa_seq_all_frames[frame_number-first_frame,i] /= float(n_clusters_)\n", | ||
" exp_tsa_seq[i] += copy.deepcopy(exp_tsa_seq_all_frames[frame_number-first_frame,i])\n", | ||
"\n", | ||
" ###Lamina\n", | ||
" distances_from_lamina = np.sum((lamina_data-chr_I_data[i])**2,axis=1)**0.5\n", | ||
"\n", | ||
" #Do this only for lamina\n", | ||
" exp_damid_all_frames[frame_number-first_frame,i] = \n", | ||
" np.sum(0.5*(1.0+np.tanh(sigma_tanh*(rc_tanh-distances_from_lamina))),axis=None)\n", | ||
" exp_damid[i] += copy.deepcopy(exp_damid_all_frames[frame_number-first_frame,i])\n", | ||
"\n", | ||
"\n", | ||
" damid_all_frames_haploid = np.zeros((N_frame,int(N_chr_beads/2)))\n", | ||
" tsaseq_all_frames_haploid = np.zeros((N_frame,int(N_chr_beads/2)))\n", | ||
" for i in range(23):\n", | ||
" damid_all_frames_haploid[:,gLength[i]:gLength[i+1]] = \n", | ||
" 0.5*(exp_damid_all_frames[:,maternalIdx[i][0]-1:maternalIdx[i][1]]\n", | ||
" +exp_damid_all_frames[:,paternalIdx[i][0]-1:paternalIdx[i][1]])\n", | ||
" tsaseq_all_frames_haploid[:,gLength[i]:gLength[i+1]] = \n", | ||
" 0.5*(exp_tsa_seq_all_frames[:,maternalIdx[i][0]-1:maternalIdx[i][1]]\n", | ||
" +exp_tsa_seq_all_frames[:,paternalIdx[i][0]-1:paternalIdx[i][1]])\n", | ||
" \n", | ||
" \n", | ||
" return (np.sum(damid_all_frames_haploid,axis=0), np.sum(tsaseq_all_frames_haploid,axis=0), N_speckle) " | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "f1056cbd", | ||
"metadata": {}, | ||
"source": [ | ||
"In the next step, we run a small sample (10 frames from the simulation) with the function above and plotted the simulated DamID and TSASeq against experimental results." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "fd8eec86", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"damid_simulated, tsaseq_simulated, N_spec_clusters = DamID_TSASeq_calculation(\"DUMP_FILE.dcd\")\n", | ||
"\n", | ||
"damid_data_low_res = np.loadtxt(\"mol_info/DAM-ID_HFF_100KB.txt\",usecols=[1])\n", | ||
"tsa_data_low_res = np.loadtxt(\"mol_info/TSA-Seq_SON_HFF_100KB.txt\",usecols=[1])\n", | ||
"\n", | ||
"\n", | ||
"chr_choose = 7\n", | ||
"dpi_ = 3000\n", | ||
"bin_width_ = 0.8\n", | ||
"yaxis_labels = [\"DamID\\n(Model)\",\"DamID\\n(Expt)\",\"TSA-Seq\\n(Model)\",\"TSA-Seq\\n(Expt)\"]\n", | ||
"subplot_dict = dict({\"L\":0,\"S\":2})\n", | ||
"\n", | ||
"x_axis = np.linspace(0,gLength[chr_choose]-gLength[chr_choose-1]-1,\n", | ||
" gLength[chr_choose]-gLength[chr_choose-1],dtype=int)\n", | ||
"fig1, axs1 = plt.subplots(4,figsize=(9,6.3),sharex=True,gridspec_kw={'height_ratios':[1,1,1,1]},dpi=dpi_)\n", | ||
"\n", | ||
"plt.subplots_adjust(hspace=0)\n", | ||
"axs1[0].set_title('Chromosome 7')\n", | ||
"\n", | ||
"damid_data_low_res_chr = copy.deepcopy(damid_data_low_res[paternalIdx[chr_disp-1][0]-1\n", | ||
" :paternalIdx[chr_disp-1][1]])\n", | ||
"tsa_data_low_res_chr = copy.deepcopy(tsa_data_low_res[paternalIdx[chr_disp-1][0]-1\n", | ||
" :paternalIdx[chr_disp-1][1]])\n", | ||
"damid_data_low_res_chr_positive = damid_data_low_res_chr>=0.0\n", | ||
"tsa_data_low_res_positive = tsa_data_low_res_chr>=0.0\n", | ||
"\n", | ||
"for nuclear_body in [\"L\",\"S\"]:\n", | ||
"\n", | ||
" if nuclear_body == 'L':\n", | ||
" laf_chr = damid_simulated[gLength[chr_choose-1]:gLength[chr_choose]]\n", | ||
" else:\n", | ||
" laf_chr = tsaseq_simulated[gLength[chr_choose-1]:gLength[chr_choose]]\n", | ||
"\n", | ||
" laf_chr_OE = (laf_chr/(np.mean(laf_chr[:])))\n", | ||
"\n", | ||
" log2_laf_chr = np.zeros(len(laf_chr_OE))\n", | ||
" non_zero_indices = (laf_chr_OE!=0.0)\n", | ||
" log2_laf_chr[non_zero_indices] = np.log2(laf_chr_OE[non_zero_indices])\n", | ||
"\n", | ||
" enriched_indices = log2_laf_chr>=0.0\n", | ||
" axs1[subplot_dict[nuclear_body]].bar(x_axis[enriched_indices],log2_laf_chr[enriched_indices],\n", | ||
" width=bin_width_,color=\"orange\",alpha=0.8)\n", | ||
" axs1[subplot_dict[nuclear_body]].bar(x_axis[~enriched_indices],log2_laf_chr[~enriched_indices],\n", | ||
" width=bin_width_,color=\"blue\",alpha=0.8)\n", | ||
" axs1[subplot_dict[nuclear_body]].set_ylim([-2.0,2.0])\n", | ||
" axs1[subplot_dict[nuclear_body]].set_xlim([0,x_axis[-1]])\n", | ||
" axs1[subplot_dict[nuclear_body]].spines['top'].set_linewidth('2.0')\n", | ||
" axs1[subplot_dict[nuclear_body]].spines['right'].set_linewidth('2.0')\n", | ||
" axs1[subplot_dict[nuclear_body]].spines['left'].set_linewidth('2.0')\n", | ||
" axs1[subplot_dict[nuclear_body]].spines['bottom'].set_linewidth('2.0')\n", | ||
" \n", | ||
"axs1[1].bar(x_axis[damid_data_low_res_chr_positive ],\n", | ||
" damid_data_low_res_chr[damid_data_low_res_chr_positive],color='orange',width=bin_width_,alpha=0.8)\n", | ||
"axs1[1].bar(x_axis[~damid_data_low_res_chr_positive ],\n", | ||
" damid_data_low_res_chr[~damid_data_low_res_chr_positive],color='blue',width=bin_width_,alpha=0.8)\n", | ||
"axs1[1].set_ylim([-2.0,2.0])\n", | ||
"axs1[3].bar(x_axis[tsa_data_low_res_positive],\n", | ||
" tsa_data_low_res_chr[tsa_data_low_res_positive],color='orange',width=bin_width_, alpha=0.8)\n", | ||
"axs1[3].bar(x_axis[~tsa_data_low_res_positive],\n", | ||
" tsa_data_low_res_chr[~tsa_data_low_res_positive],color='blue',width=bin_width_, alpha=0.8)\n", | ||
"axs1[3].set_ylim([-2.0,2.0])\n", | ||
"axs1[3].spines['top'].set_linewidth('2.0')\n", | ||
"axs1[3].spines['right'].set_linewidth('2.0')\n", | ||
"axs1[3].spines['left'].set_linewidth('2.0')\n", | ||
"axs1[3].spines['bottom'].set_linewidth('2.0')\n", | ||
"axs1[1].spines['top'].set_linewidth('2.0')\n", | ||
"axs1[1].spines['right'].set_linewidth('2.0')\n", | ||
"axs1[1].spines['left'].set_linewidth('2.0')\n", | ||
"axs1[1].spines['bottom'].set_linewidth('2.0')\n", | ||
"\n", | ||
"for i in range(4):\n", | ||
" axs1[i].set_ylabel(yaxis_labels[i],fontsize=22)\n", | ||
"axs1[3].set_xlabel(\"Genomic position (100KB)\",fontsize=24)\n", | ||
"\n", | ||
"location_array = axs1[0].get_position().get_points()\n", | ||
"x_legend,y_legend = location_array[0]+0.5*(location_array[1]-location_array[0])\n", | ||
"\n", | ||
"y_legend += 1.5 \n", | ||
"\n", | ||
"plt.tight_layout()\n", | ||
"plt.show()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "15f5efce-885f-4716-80fa-7e66a8333457", | ||
"metadata": {}, | ||
"source": [ | ||
"## Compute the chromosome contact probabilities" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "7dd7e0df-e4aa-491f-842a-65f44879c955", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"subprocess.call([\"gfortran -o chromsome_contact_calculation ../../openNucleome/utility/chromsome_contact_calculation.f90\"],shell=True,stdout=subprocess.PIPE)\n", | ||
"subprocess.call([\"./chromsome_contact_calculation DUMP_FILE.dcd 1 -1 ./contact_prob/ 1 counter.txt\"],shell=True,stdout=subprocess.PIPE)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.13" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.