store the changes

ZhangGroup-MITChemistry · Sep 29, 2023 · 2f71378 · 2f71378
1 parent e5e49a9
commit 2f71378
Show file tree

Hide file tree

Showing 5 changed files with 121,802 additions and 22 deletions.
diff --git a/tutorials/analysis_code/.ipynb_checkpoints/compute_contact_prob-checkpoint.ipynb b/tutorials/analysis_code/.ipynb_checkpoints/compute_contact_prob-checkpoint.ipynb
@@ -0,0 +1,299 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "3ef4e4c3-bbff-4170-9390-b51feae5efb6",
+   "metadata": {},
+   "source": [
+    "## Import the necessary packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "be874cb0-b8ff-4af7-ac51-7fd3c914291a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import numpy as np\n",
+    "import MDAnalysis as mda\n",
+    "import sklearn.cluster as sk\n",
+    "import matplotlib.pyplot as plt\n",
+    "import copy\n",
+    "import subprocess\n",
+    "import scipy.stats\n",
+    "import matplotlib.patches as mpatches\n",
+    "import matplotlib as mpl\n",
+    "\n",
+    "mpl.rcParams['pdf.fonttype'] = 42\n",
+    "\n",
+    "plt.rcParams['font.family'] = 'Helvetica'\n",
+    "plt.rcParams['font.size'] = 22"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b5d8c8a3-795d-423e-bf4b-a55f5f0289b5",
+   "metadata": {},
+   "source": [
+    "We defined the important parameters in the next block."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "7b3ae3b2-9dc7-4d74-a860-0b35631f3c28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The next two parameters are used when computing the DamID and TSASeq signals\n",
+    "sigma_tanh          =  4\n",
+    "rc_tanh             =  0.75\n",
+    "\n",
+    "N_chr_beads         = 60642            #Number of chromosome particles \n",
+    "\n",
+    "first_frame                 = 0      #The analysis starts from the first frame, currently, 500\n",
+    "N_nucleolus_particles       = 300      #The number of nucleolus particles\n",
+    "N_speckles_particles        = 1600     #The number of speckle particles\n",
+    "N_lamina_particles          = 8000     #The number of lamina particles\n",
+    "radius_nucleus              = 13.0     #The radius of cell nucleus, 13.0 (LJ unit) = 5.0 µm\n",
+    "\n",
+    "\"\"\"Info files\"\"\"\n",
+    "gLength             = np.loadtxt(\"mol_info/gLengthFile.txt\",dtype=int)      #The length of each chromosome\n",
+    "maternalIdx         = np.loadtxt(\"mol_info/maternalIdxFile.txt\",dtype=int)  #Index of each maternal chromosome\n",
+    "paternalIdx         = np.loadtxt(\"mol_info/paternalIdxFile.txt\",dtype=int)  #Index of each paternal chromosome"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d5c1c187-a093-40c5-b67f-94ffc8c8806e",
+   "metadata": {},
+   "source": [
+    "## Compute the DamID and TSASeq\n",
+    "\n",
+    "The next block defines a function to calculate the DamID and TSASeq, and is similar to OpenNucleome/openNucleome/utility/DamID_TSASeq_calculation.py"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "29222bbb-8773-4571-ac97-5756c1570785",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def DamID_TSASeq_calculation(traj_data):\n",
+    "    N_frame                     = len(traj_data)-first_frame\n",
+    "    exp_tsa_seq                 = np.zeros(N_chr_beads)\n",
+    "    exp_damid                   = np.zeros(N_chr_beads)\n",
+    "    exp_tsa_seq_all_frames      = np.zeros((N_frame,N_chr_beads))\n",
+    "    exp_damid_all_frames        = np.zeros((N_frame,N_chr_beads))\n",
+    "    N_speckle                   = []\n",
+    "    \n",
+    "    for frame_number in range(first_frame,len(traj_data),1):\n",
+    "        chr_I_data          = traj_data.trajectory[frame_number].positions[:N_chr_beads]\n",
+    "        speckles_data       = traj_data.trajectory[frame_number].positions[(N_chr_beads+N_nucleolus_particles):\n",
+    "                                                                           (N_chr_beads+N_nucleolus_particles+N_speckles_particles)]\n",
+    "        lamina_data         = traj_data.trajectory[frame_number].positions[(N_chr_beads+N_nucleolus_particles+N_speckles_particles):]\n",
+    "\n",
+    "        #Following code identifies the speckles clusters in the system and calculates and saves center of mass positions\n",
+    "        \"\"\"Code Snippet from DBSCAN Python Page\"\"\"\n",
+    "        \"\"\"https://scikit-learn.org/stable/auto_examples/cluster/plot_dbscan.html#sphx-glr-auto-examples-cluster-plot-dbscan-py\"\"\"\n",
+    "        db = sk.DBSCAN(eps=1.0).fit(speckles_data)\n",
+    "        core_samples_mask = np.zeros_like(db.labels_, dtype=bool)\n",
+    "        core_samples_mask[db.core_sample_indices_] = True\n",
+    "        labels = db.labels_\n",
+    "        # Number of clusters in labels, ignoring noise if present.\n",
+    "        n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)\n",
+    "        n_noise_ = list(labels).count(-1)\n",
+    "        \n",
+    "        N_speckle.append(n_clusters)\n",
+    "\n",
+    "        cluster_com_master = np.zeros((n_clusters_,3))\n",
+    "        radius_cluster_master = np.zeros(n_clusters_)\n",
+    "        i = 0\n",
+    "        while(i<=max(labels)):\n",
+    "            #Go cluster by cluster\n",
+    "            r_points_cluster            = copy.deepcopy(speckles_data[labels==i])\n",
+    "            cluster_com                 = np.mean(r_points_cluster,axis=0)\n",
+    "            cluster_com_master[i]       = copy.deepcopy(cluster_com)\n",
+    "            radius_cluster_master[i]    = (np.sum((r_points_cluster-cluster_com)**2,axis=None)/len(r_points_cluster))**0.5\n",
+    "            i +=1\n",
+    "\n",
+    "        for i in range(len(chr_I_data)):\n",
+    "            ###Speckles\n",
+    "            distances_from_speckles = np.sum((cluster_com_master-chr_I_data[i])**2,axis=1)**0.5\n",
+    "            distances_from_speckles -= radius_cluster_master\n",
+    "\n",
+    "            #Do this only for speckles\n",
+    "            exp_tsa_seq_all_frames[frame_number-first_frame,i] = \n",
+    "            np.sum(0.5*(1.0+np.tanh(sigma_tanh*(rc_tanh-distances_from_speckles))),axis=None)\n",
+    "            #exp_tsa_seq will be normalized by total (itself) while getting enrichment and so it does not matter if I divide by number of speckles\n",
+    "            #But have to divide exp_tsa_seq_all_frames by number of speckles so that the values can be interpretted as probabilities\n",
+    "            #when calculating correlations\n",
+    "            exp_tsa_seq_all_frames[frame_number-first_frame,i] /= float(n_clusters_)\n",
+    "            exp_tsa_seq[i] += copy.deepcopy(exp_tsa_seq_all_frames[frame_number-first_frame,i])\n",
+    "\n",
+    "            ###Lamina\n",
+    "            distances_from_lamina = np.sum((lamina_data-chr_I_data[i])**2,axis=1)**0.5\n",
+    "\n",
+    "            #Do this only for lamina\n",
+    "            exp_damid_all_frames[frame_number-first_frame,i] = \n",
+    "            np.sum(0.5*(1.0+np.tanh(sigma_tanh*(rc_tanh-distances_from_lamina))),axis=None)\n",
+    "            exp_damid[i] += copy.deepcopy(exp_damid_all_frames[frame_number-first_frame,i])\n",
+    "\n",
+    "\n",
+    "    damid_all_frames_haploid = np.zeros((N_frame,int(N_chr_beads/2)))\n",
+    "    tsaseq_all_frames_haploid = np.zeros((N_frame,int(N_chr_beads/2)))\n",
+    "    for i in range(23):\n",
+    "        damid_all_frames_haploid[:,gLength[i]:gLength[i+1]] = \n",
+    "        0.5*(exp_damid_all_frames[:,maternalIdx[i][0]-1:maternalIdx[i][1]]\n",
+    "             +exp_damid_all_frames[:,paternalIdx[i][0]-1:paternalIdx[i][1]])\n",
+    "        tsaseq_all_frames_haploid[:,gLength[i]:gLength[i+1]] = \n",
+    "        0.5*(exp_tsa_seq_all_frames[:,maternalIdx[i][0]-1:maternalIdx[i][1]]\n",
+    "             +exp_tsa_seq_all_frames[:,paternalIdx[i][0]-1:paternalIdx[i][1]])\n",
+    "        \n",
+    "    \n",
+    "    return (np.sum(damid_all_frames_haploid,axis=0), np.sum(tsaseq_all_frames_haploid,axis=0), N_speckle)   "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f1056cbd",
+   "metadata": {},
+   "source": [
+    "In the next step, we run a small sample (10 frames from the simulation) with the function above and plotted the simulated DamID and TSASeq against experimental results."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd8eec86",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "damid_simulated, tsaseq_simulated, N_spec_clusters = DamID_TSASeq_calculation(\"DUMP_FILE.dcd\")\n",
+    "\n",
+    "damid_data_low_res  = np.loadtxt(\"mol_info/DAM-ID_HFF_100KB.txt\",usecols=[1])\n",
+    "tsa_data_low_res    = np.loadtxt(\"mol_info/TSA-Seq_SON_HFF_100KB.txt\",usecols=[1])\n",
+    "\n",
+    "\n",
+    "chr_choose      = 7\n",
+    "dpi_            = 3000\n",
+    "bin_width_      = 0.8\n",
+    "yaxis_labels    = [\"DamID\\n(Model)\",\"DamID\\n(Expt)\",\"TSA-Seq\\n(Model)\",\"TSA-Seq\\n(Expt)\"]\n",
+    "subplot_dict    = dict({\"L\":0,\"S\":2})\n",
+    "\n",
+    "x_axis    = np.linspace(0,gLength[chr_choose]-gLength[chr_choose-1]-1,\n",
+    "                        gLength[chr_choose]-gLength[chr_choose-1],dtype=int)\n",
+    "fig1, axs1 = plt.subplots(4,figsize=(9,6.3),sharex=True,gridspec_kw={'height_ratios':[1,1,1,1]},dpi=dpi_)\n",
+    "\n",
+    "plt.subplots_adjust(hspace=0)\n",
+    "axs1[0].set_title('Chromosome 7')\n",
+    "\n",
+    "damid_data_low_res_chr             = copy.deepcopy(damid_data_low_res[paternalIdx[chr_disp-1][0]-1\n",
+    "                                                                      :paternalIdx[chr_disp-1][1]])\n",
+    "tsa_data_low_res_chr               = copy.deepcopy(tsa_data_low_res[paternalIdx[chr_disp-1][0]-1\n",
+    "                                                                    :paternalIdx[chr_disp-1][1]])\n",
+    "damid_data_low_res_chr_positive    = damid_data_low_res_chr>=0.0\n",
+    "tsa_data_low_res_positive          = tsa_data_low_res_chr>=0.0\n",
+    "\n",
+    "for nuclear_body in [\"L\",\"S\"]:\n",
+    "\n",
+    "    if nuclear_body == 'L':\n",
+    "        laf_chr          = damid_simulated[gLength[chr_choose-1]:gLength[chr_choose]]\n",
+    "    else:\n",
+    "        laf_chr          = tsaseq_simulated[gLength[chr_choose-1]:gLength[chr_choose]]\n",
+    "\n",
+    "    laf_chr_OE   = (laf_chr/(np.mean(laf_chr[:])))\n",
+    "\n",
+    "    log2_laf_chr     = np.zeros(len(laf_chr_OE))\n",
+    "    non_zero_indices = (laf_chr_OE!=0.0)\n",
+    "    log2_laf_chr[non_zero_indices] = np.log2(laf_chr_OE[non_zero_indices])\n",
+    "\n",
+    "    enriched_indices = log2_laf_chr>=0.0\n",
+    "    axs1[subplot_dict[nuclear_body]].bar(x_axis[enriched_indices],log2_laf_chr[enriched_indices],\n",
+    "                                             width=bin_width_,color=\"orange\",alpha=0.8)\n",
+    "    axs1[subplot_dict[nuclear_body]].bar(x_axis[~enriched_indices],log2_laf_chr[~enriched_indices],\n",
+    "                                             width=bin_width_,color=\"blue\",alpha=0.8)\n",
+    "    axs1[subplot_dict[nuclear_body]].set_ylim([-2.0,2.0])\n",
+    "    axs1[subplot_dict[nuclear_body]].set_xlim([0,x_axis[-1]])\n",
+    "    axs1[subplot_dict[nuclear_body]].spines['top'].set_linewidth('2.0')\n",
+    "    axs1[subplot_dict[nuclear_body]].spines['right'].set_linewidth('2.0')\n",
+    "    axs1[subplot_dict[nuclear_body]].spines['left'].set_linewidth('2.0')\n",
+    "    axs1[subplot_dict[nuclear_body]].spines['bottom'].set_linewidth('2.0')\n",
+    "    \n",
+    "axs1[1].bar(x_axis[damid_data_low_res_chr_positive ],\n",
+    "            damid_data_low_res_chr[damid_data_low_res_chr_positive],color='orange',width=bin_width_,alpha=0.8)\n",
+    "axs1[1].bar(x_axis[~damid_data_low_res_chr_positive ],\n",
+    "            damid_data_low_res_chr[~damid_data_low_res_chr_positive],color='blue',width=bin_width_,alpha=0.8)\n",
+    "axs1[1].set_ylim([-2.0,2.0])\n",
+    "axs1[3].bar(x_axis[tsa_data_low_res_positive],\n",
+    "            tsa_data_low_res_chr[tsa_data_low_res_positive],color='orange',width=bin_width_, alpha=0.8)\n",
+    "axs1[3].bar(x_axis[~tsa_data_low_res_positive],\n",
+    "            tsa_data_low_res_chr[~tsa_data_low_res_positive],color='blue',width=bin_width_, alpha=0.8)\n",
+    "axs1[3].set_ylim([-2.0,2.0])\n",
+    "axs1[3].spines['top'].set_linewidth('2.0')\n",
+    "axs1[3].spines['right'].set_linewidth('2.0')\n",
+    "axs1[3].spines['left'].set_linewidth('2.0')\n",
+    "axs1[3].spines['bottom'].set_linewidth('2.0')\n",
+    "axs1[1].spines['top'].set_linewidth('2.0')\n",
+    "axs1[1].spines['right'].set_linewidth('2.0')\n",
+    "axs1[1].spines['left'].set_linewidth('2.0')\n",
+    "axs1[1].spines['bottom'].set_linewidth('2.0')\n",
+    "\n",
+    "for i in range(4):\n",
+    "    axs1[i].set_ylabel(yaxis_labels[i],fontsize=22)\n",
+    "axs1[3].set_xlabel(\"Genomic position (100KB)\",fontsize=24)\n",
+    "\n",
+    "location_array    = axs1[0].get_position().get_points()\n",
+    "x_legend,y_legend = location_array[0]+0.5*(location_array[1]-location_array[0])\n",
+    "\n",
+    "y_legend += 1.5 \n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "15f5efce-885f-4716-80fa-7e66a8333457",
+   "metadata": {},
+   "source": [
+    "## Compute the chromosome contact probabilities"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7dd7e0df-e4aa-491f-842a-65f44879c955",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "subprocess.call([\"gfortran -o chromsome_contact_calculation ../../openNucleome/utility/chromsome_contact_calculation.f90\"],shell=True,stdout=subprocess.PIPE)\n",
+    "subprocess.call([\"./chromsome_contact_calculation DUMP_FILE.dcd 1 -1 ./contact_prob/ 1 counter.txt\"],shell=True,stdout=subprocess.PIPE)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}