From aa655e969233ab85242750a14e3a1c8ac38b4ae4 Mon Sep 17 00:00:00 2001 From: Lehotzky Date: Thu, 17 Oct 2024 16:26:45 -0400 Subject: [PATCH] account for cases when no indels are within maf --- assemble_interval_sets.ipynb | 5142 ------------------------------ generate_dig_report_coding.py | 94 +- generate_dig_report_combined.py | 65 +- generate_dig_report_noncoding.py | 25 +- run.py | 4 +- train_mutation_map.ipynb | 187 +- 6 files changed, 163 insertions(+), 5354 deletions(-) delete mode 100644 assemble_interval_sets.ipynb diff --git a/assemble_interval_sets.ipynb b/assemble_interval_sets.ipynb deleted file mode 100644 index 7498557..0000000 --- a/assemble_interval_sets.ipynb +++ /dev/null @@ -1,5142 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "e3dd6c6d-9bef-4b83-b1f0-6b888c8a2773", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "import os\n", - "import gcsfs\n", - "from google import auth" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "8e8bbc6e-c3be-4d3e-8d72-d3c81c16e89b", - "metadata": {}, - "outputs": [], - "source": [ - "project_name = 'broad-getzlab-workflows'\n", - "credentials, _ = auth.default()\n", - "gcs = gcsfs.GCSFileSystem(project_name, token=credentials)" - ] - }, - { - "cell_type": "markdown", - "id": "c9285490-fff0-4448-91c8-26f0e568062c", - "metadata": {}, - "source": [ - "Used https://genome.ucsc.edu/cgi-bin/hgLiftOver to lift over from hg38 to hg19" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "7b8b0296-d2f1-4e6a-b5ca-6ce39247bc77", - "metadata": {}, - "outputs": [], - "source": [ - "dir_interval_lists = './dlbcl_known_enhancers/'\n", - "genome_reference = 'hg19'" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "f8f5857a-de75-4226-91c4-d6893d9866e9", - "metadata": {}, - "outputs": [], - "source": [ - "enhancer_lists = [f for f in os.listdir(dir_interval_lists) if (genome_reference in f) and ('promoter' not in f) and ('bed12' not in f)]\n", - "promoter_lists = [f for f in os.listdir(dir_interval_lists) if (genome_reference in f) and ('promoter' in f) and ('bed12' not in f)]" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "fdb19876-5e71-4124-a6e5-9eddc044e312", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['Payton_DLBCL_superenhancers.hg19.sort.bed',\n", - " 'LY1.hg19.bed',\n", - " 'Immunoglobulin_functional_elements.sort.hg19.bed',\n", - " 'Pan_Hammarstrom_kategis.hg19.sort.bed',\n", - " 'all.sample_gt15.score_gt1.hg19.bed',\n", - " 'Bradner_superenhancers.hg19.sort.bed',\n", - " 'all.sample_gt5.score_gt1.hg19.bed']" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "enhancer_lists" - ] - }, - { - "cell_type": "markdown", - "id": "35e4d0bb-d161-4106-b385-46f8e6f92e8f", - "metadata": {}, - "source": [ - "# Promoters" - ] - }, - { - "cell_type": "markdown", - "id": "9ea6a04f-8571-4d32-826d-010609c2b849", - "metadata": {}, - "source": [ - "## Loading promoter tables" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "9ab4507d-4e72-41e8-a4bf-b52bd2b6a2db", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chromosomestartendgene
0chr1915901916367PERM1
1chr1917392918058PERM1
2chr116901791690620NADK
3chr117095551710848NADK
4chr118219051823065GNB1
...............
3161chrX154250890154251188F8
3162chrX154299263154300012BRCC3,MTCP1
3163chrX154841930154842809SPRY3
3164chrX154996925154997508SPRY3
3165chrY2190578021907500KDM5D
\n", - "

3166 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " chromosome start end gene\n", - "0 chr1 915901 916367 PERM1\n", - "1 chr1 917392 918058 PERM1\n", - "2 chr1 1690179 1690620 NADK\n", - "3 chr1 1709555 1710848 NADK\n", - "4 chr1 1821905 1823065 GNB1\n", - "... ... ... ... ...\n", - "3161 chrX 154250890 154251188 F8\n", - "3162 chrX 154299263 154300012 BRCC3,MTCP1\n", - "3163 chrX 154841930 154842809 SPRY3\n", - "3164 chrX 154996925 154997508 SPRY3\n", - "3165 chrY 21905780 21907500 KDM5D\n", - "\n", - "[3166 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "dict_promoters = {}\n", - "for f in promoter_lists:\n", - " df = pd.read_csv(dir_interval_lists + f, sep = '\\t', names=['chromosome', 'start', 'end', 'gene', 'drop'])\n", - " df = df[df.columns[:-1]]\n", - " dict_promoters[f] = df\n", - " display(df)" - ] - }, - { - "cell_type": "markdown", - "id": "e408ed8d-0f9f-4ca2-b26c-6546e229bbad", - "metadata": {}, - "source": [ - "## Checking whether genomic locations are local to chromosomes" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "44b29bd5-2e51-4b36-86a5-015ab642a7da", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "chr1 915901 248524780\n", - "chr2 1417958 242499125\n", - "chr3 1134442 197687216\n", - "chr4 52976 187645939\n", - "chr5 218165 180415934\n", - "chr6 391044 170863573\n", - "chr7 766136 158497912\n", - "chr8 1771788 145743470\n", - "chr9 214569 140445635\n", - "chr10 735220 135342980\n", - "chr11 535133 134094669\n", - "chr12 498371 133339056\n", - "chr13 19755907 114239632\n", - "chr14 20801125 107287760\n", - "chr15 22892423 102030108\n", - "chr16 402146 89883320\n", - "chr17 1303378 80798526\n", - "chr18 2570892 76740768\n", - "chr19 409070 58874427\n", - "chr20 524292 62796131\n", - "chr21 14981532 47573884\n", - "chr22 17073819 50946816\n" - ] - } - ], - "source": [ - "for c in sorted([int(ch[3:]) for ch in df.chromosome.unique() if ch[3:] not in ['X', 'Y']]):\n", - " idx = 'chr'+str(c)\n", - " print(idx, df.loc[df.chromosome==idx, 'start'].min(), df.loc[df.chromosome==idx, 'end'].max())" - ] - }, - { - "cell_type": "markdown", - "id": "4205b834-9705-4d55-b959-ad4a82e616fe", - "metadata": {}, - "source": [ - "## Converting intervals to interval sets with [bed12 format](https://github.com/maxwellsh/DIGDriver/wiki/06:-Analyzing-new-interval-sets#defining-sets-of-intervals)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "e074b070-f247-41a1-956b-48a6ad1a2176", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chromstartendnamescorestrandthickStartthickEnditemRgbblockCountblockSizesblockStarts
01915901918058hg19promoters::dlbcl::PERM1::NA1000.91590191805802466,666,0,1491,
1116901791710848hg19promoters::dlbcl::NADK::NA1000.1690179171084802441,1293,0,19376,
2118219051823065hg19promoters::dlbcl::GNB1::NA1000.18219051823065011160,0,
3119351201935421hg19promoters::dlbcl::CFAP74::NA1000.1935120193542101301,0,
4119814862036709hg19promoters::dlbcl::PRKCZ::NA1000.1981486203670903466,469,637,0,23456,54586,
5121586292160105hg19promoters::dlbcl::SKI::NA1000.21586292160105011476,0,
6123986902406854hg19promoters::dlbcl::PLCH2::NA1000.2398690240685402300,429,0,7735,
7124870752488155hg19promoters::dlbcl::TNFRSF14::NA1000.24870752488155011080,0,
8129856012986987hg19promoters::dlbcl::PRDM16::NA1000.29856012986987011386,0,
9135680793615958hg19promoters::dlbcl::TP73::NA1000.35680793615958031167,255,563,0,38954,47316,
10147162914716702hg19promoters::dlbcl::AJAP1::NA1000.4716291471670201411,0,
11162398266241312hg19promoters::dlbcl::CHD5::NA1000.62398266241312011486,0,
12162595386259942hg19promoters::dlbcl::RPL22::NA1000.6259538625994201404,0,
13162652646266448hg19promoters::dlbcl::RNF207::NA1000.6265264626644802332,599,0,585,
14168449167765429hg19promoters::dlbcl::CAMTA1::NA1000.68449167765429041151,301,793,1366,0,884655,895283,919147,
15180862018086639hg19promoters::dlbcl::ERRFI1::NA1000.8086201808663901438,0,
16196485089649354hg19promoters::dlbcl::TMEM201::NA1000.9648508964935401846,0,
17197117159751464hg19promoters::dlbcl::PIK3CD::NA1000.9711715975146403725,832,311,0,37422,39438,
1811027023410290992hg19promoters::dlbcl::KIF1B::NA1000.102702341029099202487,240,0,20518,
1911085656310856950hg19promoters::dlbcl::CASZ1::NA1000.108565631085695001387,0,
\n", - "
" - ], - "text/plain": [ - " chrom start end name score \\\n", - "0 1 915901 918058 hg19promoters::dlbcl::PERM1::NA 1000 \n", - "1 1 1690179 1710848 hg19promoters::dlbcl::NADK::NA 1000 \n", - "2 1 1821905 1823065 hg19promoters::dlbcl::GNB1::NA 1000 \n", - "3 1 1935120 1935421 hg19promoters::dlbcl::CFAP74::NA 1000 \n", - "4 1 1981486 2036709 hg19promoters::dlbcl::PRKCZ::NA 1000 \n", - "5 1 2158629 2160105 hg19promoters::dlbcl::SKI::NA 1000 \n", - "6 1 2398690 2406854 hg19promoters::dlbcl::PLCH2::NA 1000 \n", - "7 1 2487075 2488155 hg19promoters::dlbcl::TNFRSF14::NA 1000 \n", - "8 1 2985601 2986987 hg19promoters::dlbcl::PRDM16::NA 1000 \n", - "9 1 3568079 3615958 hg19promoters::dlbcl::TP73::NA 1000 \n", - "10 1 4716291 4716702 hg19promoters::dlbcl::AJAP1::NA 1000 \n", - "11 1 6239826 6241312 hg19promoters::dlbcl::CHD5::NA 1000 \n", - "12 1 6259538 6259942 hg19promoters::dlbcl::RPL22::NA 1000 \n", - "13 1 6265264 6266448 hg19promoters::dlbcl::RNF207::NA 1000 \n", - "14 1 6844916 7765429 hg19promoters::dlbcl::CAMTA1::NA 1000 \n", - "15 1 8086201 8086639 hg19promoters::dlbcl::ERRFI1::NA 1000 \n", - "16 1 9648508 9649354 hg19promoters::dlbcl::TMEM201::NA 1000 \n", - "17 1 9711715 9751464 hg19promoters::dlbcl::PIK3CD::NA 1000 \n", - "18 1 10270234 10290992 hg19promoters::dlbcl::KIF1B::NA 1000 \n", - "19 1 10856563 10856950 hg19promoters::dlbcl::CASZ1::NA 1000 \n", - "\n", - " strand thickStart thickEnd itemRgb blockCount blockSizes \\\n", - "0 . 915901 918058 0 2 466,666, \n", - "1 . 1690179 1710848 0 2 441,1293, \n", - "2 . 1821905 1823065 0 1 1160, \n", - "3 . 1935120 1935421 0 1 301, \n", - "4 . 1981486 2036709 0 3 466,469,637, \n", - "5 . 2158629 2160105 0 1 1476, \n", - "6 . 2398690 2406854 0 2 300,429, \n", - "7 . 2487075 2488155 0 1 1080, \n", - "8 . 2985601 2986987 0 1 1386, \n", - "9 . 3568079 3615958 0 3 1167,255,563, \n", - "10 . 4716291 4716702 0 1 411, \n", - "11 . 6239826 6241312 0 1 1486, \n", - "12 . 6259538 6259942 0 1 404, \n", - "13 . 6265264 6266448 0 2 332,599, \n", - "14 . 6844916 7765429 0 4 1151,301,793,1366, \n", - "15 . 8086201 8086639 0 1 438, \n", - "16 . 9648508 9649354 0 1 846, \n", - "17 . 9711715 9751464 0 3 725,832,311, \n", - "18 . 10270234 10290992 0 2 487,240, \n", - "19 . 10856563 10856950 0 1 387, \n", - "\n", - " blockStarts \n", - "0 0,1491, \n", - "1 0,19376, \n", - "2 0, \n", - "3 0, \n", - "4 0,23456,54586, \n", - "5 0, \n", - "6 0,7735, \n", - "7 0, \n", - "8 0, \n", - "9 0,38954,47316, \n", - "10 0, \n", - "11 0, \n", - "12 0, \n", - "13 0,585, \n", - "14 0,884655,895283,919147, \n", - "15 0, \n", - "16 0, \n", - "17 0,37422,39438, \n", - "18 0,20518, \n", - "19 0, " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "for key in dict_promoters:\n", - " df = dict_promoters[key]\n", - " il_name = key.split('.bed')[0].replace('.', '_')\n", - " # data frame containing genes associated with each interval\n", - " df_genes = df.gene.str.split(',', expand=True)\n", - " # unique list of all genes in interval list\n", - " genes = []\n", - " for col in df_genes.columns:\n", - " genes += df_genes[col][~df_genes[col].isna()].tolist()\n", - " genes = np.unique(genes)\n", - " # columns of bed12 file \n", - " cols_bed12 = ['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']\n", - " data = []\n", - " for g in genes:\n", - " dfi = df.loc[np.any(df_genes==g, axis=1)].copy().sort_values(['chromosome', 'start'], ignore_index=True)\n", - " # chromosome number\n", - " chrom = dfi.chromosome.unique()[0][3:]\n", - " # start position of first interval in set (zero-indexed)\n", - " start = dfi.start.min()\n", - " # end position of final interval in set (zero-indexed)\n", - " end = dfi.end.max()\n", - " # unique name for this set of annotations\n", - " name = '{}::dlbcl::{}::NA'.format(il_name.replace('_', ''), g)\n", - " # any non-negative number (we usually use 0)\n", - " score = 1000\n", - " # +, -, or . if no strand\n", - " strand = '.'\n", - " # same as start (for Dig)\n", - " thickStart = start\n", - " # same as end (for Dig)\n", - " thickEnd = end\n", - " # ignored (we usually set to 0)\n", - " itemRgb = 0\n", - " # number of intervals in the set\n", - " blockCount = len(dfi) \n", - " # # length of each interval in the set\n", - " blockSizes = ','.join((dfi.end - dfi.start).astype(str).tolist()) + ','\n", - " # # start position of each interval relative to start (so the first entry is always 0)\n", - " blockStarts = ','.join((dfi.start - dfi.start.min()).astype(str).tolist()) + ','\n", - " data.append([chrom, start, end, name, score, strand, thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts])\n", - " df_bed12 = pd.DataFrame(data=data, columns=cols_bed12).sort_values(['chrom', 'start'], ignore_index=True)\n", - " df_bed12.to_csv(dir_interval_lists + il_name + '_bed12.bed', sep='\\t', index=False, header=False, lineterminator='\\n')\n", - "df_bed12.head(20)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "e68947d8-93b1-4d4b-820f-fe7e33e7b04c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
124820772492453gc19_pc.prom::gencode::TNFRSF14::ENSG00000157873.131000+2482077.12492453.107104,200,117,98,262,68,228,0,4800,5035,5629,8902,9346,10148,
0129855313316553gc19_pc.prom::gencode::PRDM16::ENSG00000142611.121000+2985531331655308200,108,135,188,250,180,91,168,0,335,174964,175176,175539,327323,327632,330854,
1162576176272948gc19_pc.prom::gencode::RPL22::ENSG00000116251.51000-6257617627294801388,181,10,2,200,200,66,200,50,4,44,65,149,0,219,1796,1996,2055,3285,5737,5937,11487,1183...
2168451837806225gc19_pc.prom::gencode::CAMTA1::ENSG00000171735.141000+6845183780622509200,149,233,116,61,101,132,118,68,0,458,3971,951084,951423,959590,959905,960583,...
311117572011322764gc19_pc.prom::gencode::MTOR::ENSG00000198793.81000-111757201132276408200,200,668,57,264,239,131,200,0,15895,16630,18624,18823,19295,146644,146844,
411617035816207440gc19_pc.prom::gencode::SPEN::ENSG00000065526.61000+1617035816207440010234,860,200,164,256,251,200,362,218,215,0,828,3800,7479,8257,26581,29438,30610,32821,3...
.......................................
20033Y2776806327768263gc19_pc.prom::gencode::CDY1::ENSG00000172288.61000+277680632776826301200,0,
20034Y2811468928115089gc19_pc.prom::gencode::AC007965.1::ENSG0000026...1000-281146892811508902180,200,0,200,
20035Y5910027959100479gc19_pc.prom::gencode::SPRY3::ENSGR0000168939.61000+591002795910047901200,0,
20036Y5921376159214246gc19_pc.prom::gencode::VAMP7::ENSGR0000124333.101000+592137615921424602200,123,0,362,
20037Y5933005159330576gc19_pc.prom::gencode::IL9R::ENSGR0000124334.121000+593300515933057602200,112,0,413,
\n", - "

20038 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " 1 2482077 2492453 \\\n", - "0 1 2985531 3316553 \n", - "1 1 6257617 6272948 \n", - "2 1 6845183 7806225 \n", - "3 1 11175720 11322764 \n", - "4 1 16170358 16207440 \n", - "... .. ... ... \n", - "20033 Y 27768063 27768263 \n", - "20034 Y 28114689 28115089 \n", - "20035 Y 59100279 59100479 \n", - "20036 Y 59213761 59214246 \n", - "20037 Y 59330051 59330576 \n", - "\n", - " gc19_pc.prom::gencode::TNFRSF14::ENSG00000157873.13 1000 + 2482077.1 \\\n", - "0 gc19_pc.prom::gencode::PRDM16::ENSG00000142611.12 1000 + 2985531 \n", - "1 gc19_pc.prom::gencode::RPL22::ENSG00000116251.5 1000 - 6257617 \n", - "2 gc19_pc.prom::gencode::CAMTA1::ENSG00000171735.14 1000 + 6845183 \n", - "3 gc19_pc.prom::gencode::MTOR::ENSG00000198793.8 1000 - 11175720 \n", - "4 gc19_pc.prom::gencode::SPEN::ENSG00000065526.6 1000 + 16170358 \n", - "... ... ... .. ... \n", - "20033 gc19_pc.prom::gencode::CDY1::ENSG00000172288.6 1000 + 27768063 \n", - "20034 gc19_pc.prom::gencode::AC007965.1::ENSG0000026... 1000 - 28114689 \n", - "20035 gc19_pc.prom::gencode::SPRY3::ENSGR0000168939.6 1000 + 59100279 \n", - "20036 gc19_pc.prom::gencode::VAMP7::ENSGR0000124333.10 1000 + 59213761 \n", - "20037 gc19_pc.prom::gencode::IL9R::ENSGR0000124334.12 1000 + 59330051 \n", - "\n", - " 2492453.1 0 7 104,200,117,98,262,68,228, \\\n", - "0 3316553 0 8 200,108,135,188,250,180,91,168, \n", - "1 6272948 0 13 88,181,10,2,200,200,66,200,50,4,44,65,149, \n", - "2 7806225 0 9 200,149,233,116,61,101,132,118,68, \n", - "3 11322764 0 8 200,200,668,57,264,239,131,200, \n", - "4 16207440 0 10 234,860,200,164,256,251,200,362,218,215, \n", - "... ... .. .. ... \n", - "20033 27768263 0 1 200, \n", - "20034 28115089 0 2 180,200, \n", - "20035 59100479 0 1 200, \n", - "20036 59214246 0 2 200,123, \n", - "20037 59330576 0 2 200,112, \n", - "\n", - " 0,4800,5035,5629,8902,9346,10148, \n", - "0 0,335,174964,175176,175539,327323,327632,330854, \n", - "1 0,219,1796,1996,2055,3285,5737,5937,11487,1183... \n", - "2 0,458,3971,951084,951423,959590,959905,960583,... \n", - "3 0,15895,16630,18624,18823,19295,146644,146844, \n", - "4 0,828,3800,7479,8257,26581,29438,30610,32821,3... \n", - "... ... \n", - "20033 0, \n", - "20034 0,200, \n", - "20035 0, \n", - "20036 0,362, \n", - "20037 0,413, \n", - "\n", - "[20038 rows x 12 columns]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.read_csv('./interval_lists/gc19_pc.prom.bed', sep='\\t')" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "3c295c02-ad73-4ef9-9b99-5c207b58fdaa", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chromosomestartendenhancer_hg38
0chr117052101715089chr1:1773772-1783650
1chr117512241841253chr1:1819786-1909814
2chr122220432256622chr1:2290605-2325183
3chr124769852481874chr1:2545547-2550435
4chr135718793595579chr1:3655316-3679015
...............
1424chr9135245814135261884chr9:132370428-132386497
1425chr9135978382136023022chr9:133102996-133147635
1426chr9136809701136845830chr9:133944580-133980708
1427chr9139114103139137520chr9:136222258-136245674
1428chr9140519742140658171chr9:137625291-137763719
\n", - "

1429 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " chromosome start end enhancer_hg38\n", - "0 chr1 1705210 1715089 chr1:1773772-1783650\n", - "1 chr1 1751224 1841253 chr1:1819786-1909814\n", - "2 chr1 2222043 2256622 chr1:2290605-2325183\n", - "3 chr1 2476985 2481874 chr1:2545547-2550435\n", - "4 chr1 3571879 3595579 chr1:3655316-3679015\n", - "... ... ... ... ...\n", - "1424 chr9 135245814 135261884 chr9:132370428-132386497\n", - "1425 chr9 135978382 136023022 chr9:133102996-133147635\n", - "1426 chr9 136809701 136845830 chr9:133944580-133980708\n", - "1427 chr9 139114103 139137520 chr9:136222258-136245674\n", - "1428 chr9 140519742 140658171 chr9:137625291-137763719\n", - "\n", - "[1429 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chromosomestartendenhancer_hg38
0chr14106032614106032974chr14:105566278-105566637
1chr14106041491106041971chr14:105575155-105575634
2chr14106048351106048676chr14:105582015-105582339
3chr14106050491106054731chr14:105584155-105588394
4chr14106054315106056795chr14:105587979-105590458
...............
90chr222102514921025185chr22:22026077-22881392
91chr134899656748996634chr22:22026077-22881392
92chr3186029623186029950chr22:22026077-22881392
93chr222323593323265082chr22:22893754-22922910
94chr222370039923700999chr22:23358213-23358812
\n", - "

95 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " chromosome start end enhancer_hg38\n", - "0 chr14 106032614 106032974 chr14:105566278-105566637\n", - "1 chr14 106041491 106041971 chr14:105575155-105575634\n", - "2 chr14 106048351 106048676 chr14:105582015-105582339\n", - "3 chr14 106050491 106054731 chr14:105584155-105588394\n", - "4 chr14 106054315 106056795 chr14:105587979-105590458\n", - ".. ... ... ... ...\n", - "90 chr22 21025149 21025185 chr22:22026077-22881392\n", - "91 chr13 48996567 48996634 chr22:22026077-22881392\n", - "92 chr3 186029623 186029950 chr22:22026077-22881392\n", - "93 chr22 23235933 23265082 chr22:22893754-22922910\n", - "94 chr22 23700399 23700999 chr22:23358213-23358812\n", - "\n", - "[95 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chromosomestartendenhancer_hg38
0chr1150546616150552004chr1:150574141-150579528
1chr1203273727203276488chr1:203304600-203307360
2chr1226924119226926747chr1:226736419-226739046
3chr106365953163666480chr10:61899773-61906721
4chr11128389025128392119chr11:128519131-128522224
5chr1287620708765407chr12:8609475-8612811
6chr129253569092540320chr12:92141915-92146544
7chr12113493744113519862chr12:113055940-113082057
8chr12122454746122467180chr12:122016841-122029274
9chr137598117075984325chr13:75407035-75410189
10chr146925613269262207chr14:68789416-68795490
11chr14106066053106072343chr14:105599717-105606006
12chr14106091190106095703chr14:105624854-105629366
13chr14106110025106115350chr14:105643689-105649013
14chr14106172909106178778chr14:105706573-105712441
15chr14106208188106215693chr14:105741852-105749356
16chr14106235994106243520chr14:105769658-105777183
17chr14106317638106338808chr14:105851306-105872945
18chr14106333677106333727chr14:105851306-105872945
19chr14106333640106333677chr14:105851306-105872945
20chr14106333577106333627chr14:105851306-105872945
21chr14106333577106333698chr14:105851306-105872945
22chr14106347194106387925chr14:105881337-105922066
23chr152120743321207475chr14:106713675-106851398
24chr14106515683106515744chr14:106713675-106851398
25chr14106689843106689878chr14:106713675-106851398
26chr14106728345106728400chr14:106713675-106851398
27chr10127005812127005928chr14:106713675-106851398
28chr14107187023107187125chr14:106713675-106851398
29chrX8232551982325572chr14:106713675-106851398
30chr125564994055649976chr14:106713675-106851398
31chr1223477472223477546chr14:106713675-106851398
32chr19193268191932720chr14:106713675-106851398
33chr14106730649106730702chr14:106713675-106851398
34chr14106731182106731314chr14:106713675-106851398
35chr14107160361107160782chr14:106713675-106851398
36chr14107173399107173999chr14:106713675-106851398
37chr161096514810974698chr16:10871292-10880841
38chr168593186185942691chr16:85898256-85909085
39chr175640863956409500chr17:58331279-58332139
40chr183034673330351942chr18:32766771-32771979
41chr186098253560988825chr18:63315303-63321592
42chr191643585616438767chr19:16325046-16327956
43chr28912757489145342chr2:88828062-88845829
44chr28915093389165371chr2:88851421-88865859
45chr2136873437136876401chr2:136115868-136118831
46chr222272424422735821chr22:22369877-22381452
47chr222321397423246765chr22:22871796-22904585
48chr33202116832023964chr3:31979677-31982472
49chr3187455028187468250chr3:187737241-187750462
50chr44019363340209531chr4:40192014-40207911
51chr5158522416158528394chr5:159095409-159101386
52chr63713821937144149chr6:37170444-37176373
53chr69100307691010908chr6:90293358-90301189
54chr6134492661134496923chr6:134171524-134175785
55chr93702371637036421chr9:37023720-37036424
56chr93738346037385303chr9:37383464-37385306
57chrX1299150512995311chrX:12973387-12977192
\n", - "
" - ], - "text/plain": [ - " chromosome start end enhancer_hg38\n", - "0 chr1 150546616 150552004 chr1:150574141-150579528\n", - "1 chr1 203273727 203276488 chr1:203304600-203307360\n", - "2 chr1 226924119 226926747 chr1:226736419-226739046\n", - "3 chr10 63659531 63666480 chr10:61899773-61906721\n", - "4 chr11 128389025 128392119 chr11:128519131-128522224\n", - "5 chr12 8762070 8765407 chr12:8609475-8612811\n", - "6 chr12 92535690 92540320 chr12:92141915-92146544\n", - "7 chr12 113493744 113519862 chr12:113055940-113082057\n", - "8 chr12 122454746 122467180 chr12:122016841-122029274\n", - "9 chr13 75981170 75984325 chr13:75407035-75410189\n", - "10 chr14 69256132 69262207 chr14:68789416-68795490\n", - "11 chr14 106066053 106072343 chr14:105599717-105606006\n", - "12 chr14 106091190 106095703 chr14:105624854-105629366\n", - "13 chr14 106110025 106115350 chr14:105643689-105649013\n", - "14 chr14 106172909 106178778 chr14:105706573-105712441\n", - "15 chr14 106208188 106215693 chr14:105741852-105749356\n", - "16 chr14 106235994 106243520 chr14:105769658-105777183\n", - "17 chr14 106317638 106338808 chr14:105851306-105872945\n", - "18 chr14 106333677 106333727 chr14:105851306-105872945\n", - "19 chr14 106333640 106333677 chr14:105851306-105872945\n", - "20 chr14 106333577 106333627 chr14:105851306-105872945\n", - "21 chr14 106333577 106333698 chr14:105851306-105872945\n", - "22 chr14 106347194 106387925 chr14:105881337-105922066\n", - "23 chr15 21207433 21207475 chr14:106713675-106851398\n", - "24 chr14 106515683 106515744 chr14:106713675-106851398\n", - "25 chr14 106689843 106689878 chr14:106713675-106851398\n", - "26 chr14 106728345 106728400 chr14:106713675-106851398\n", - "27 chr10 127005812 127005928 chr14:106713675-106851398\n", - "28 chr14 107187023 107187125 chr14:106713675-106851398\n", - "29 chrX 82325519 82325572 chr14:106713675-106851398\n", - "30 chr12 55649940 55649976 chr14:106713675-106851398\n", - "31 chr1 223477472 223477546 chr14:106713675-106851398\n", - "32 chr1 91932681 91932720 chr14:106713675-106851398\n", - "33 chr14 106730649 106730702 chr14:106713675-106851398\n", - "34 chr14 106731182 106731314 chr14:106713675-106851398\n", - "35 chr14 107160361 107160782 chr14:106713675-106851398\n", - "36 chr14 107173399 107173999 chr14:106713675-106851398\n", - "37 chr16 10965148 10974698 chr16:10871292-10880841\n", - "38 chr16 85931861 85942691 chr16:85898256-85909085\n", - "39 chr17 56408639 56409500 chr17:58331279-58332139\n", - "40 chr18 30346733 30351942 chr18:32766771-32771979\n", - "41 chr18 60982535 60988825 chr18:63315303-63321592\n", - "42 chr19 16435856 16438767 chr19:16325046-16327956\n", - "43 chr2 89127574 89145342 chr2:88828062-88845829\n", - "44 chr2 89150933 89165371 chr2:88851421-88865859\n", - "45 chr2 136873437 136876401 chr2:136115868-136118831\n", - "46 chr22 22724244 22735821 chr22:22369877-22381452\n", - "47 chr22 23213974 23246765 chr22:22871796-22904585\n", - "48 chr3 32021168 32023964 chr3:31979677-31982472\n", - "49 chr3 187455028 187468250 chr3:187737241-187750462\n", - "50 chr4 40193633 40209531 chr4:40192014-40207911\n", - "51 chr5 158522416 158528394 chr5:159095409-159101386\n", - "52 chr6 37138219 37144149 chr6:37170444-37176373\n", - "53 chr6 91003076 91010908 chr6:90293358-90301189\n", - "54 chr6 134492661 134496923 chr6:134171524-134175785\n", - "55 chr9 37023716 37036421 chr9:37023720-37036424\n", - "56 chr9 37383460 37385303 chr9:37383464-37385306\n", - "57 chrX 12991505 12995311 chrX:12973387-12977192" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chromosomestartendenhancer_hg38
0chr113424001342510chr1:1407021-1407130
1chr162584806259620chr1:6198421-6199560
2chr180218508022220chr1:7961791-7962160
3chr11107280011073390chr1:11012744-11013333
4chr11683960016841510chr1:16513106-16515015
...............
747chr9130834640130834790chr9:128072362-128072511
748chr9134553080134553280chr9:131677694-131677893
749chrX1297422012975860chrX:12956102-12957741
750chrX2407316024073260chrX:24055044-24055143
751chrX7040175070401910chrX:71181901-71182060
\n", - "

752 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " chromosome start end enhancer_hg38\n", - "0 chr1 1342400 1342510 chr1:1407021-1407130\n", - "1 chr1 6258480 6259620 chr1:6198421-6199560\n", - "2 chr1 8021850 8022220 chr1:7961791-7962160\n", - "3 chr1 11072800 11073390 chr1:11012744-11013333\n", - "4 chr1 16839600 16841510 chr1:16513106-16515015\n", - ".. ... ... ... ...\n", - "747 chr9 130834640 130834790 chr9:128072362-128072511\n", - "748 chr9 134553080 134553280 chr9:131677694-131677893\n", - "749 chrX 12974220 12975860 chrX:12956102-12957741\n", - "750 chrX 24073160 24073260 chrX:24055044-24055143\n", - "751 chrX 70401750 70401910 chrX:71181901-71182060\n", - "\n", - "[752 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chromosomestartendenhancer_hg38
0chr117064121732343chr1:1774973-1800904
1chr117516681842316chr1:1820229-1910877
2chr140598314133780chr1:3999771-4073720
3chr166168866664806chr1:6556826-6604746
4chr173919317440079chr1:7331871-7380018
...............
2294chrX131613124131626145chrX:132479096-132492117
2295chrX135817164135864668chrX:136735005-136782509
2296chrX153229992153241022chrX:153964541-153975571
2297chrX153245353153256834chrX:153979902-153991383
2298chrX153271196153289362chrX:154005745-154023911
\n", - "

2299 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " chromosome start end enhancer_hg38\n", - "0 chr1 1706412 1732343 chr1:1774973-1800904\n", - "1 chr1 1751668 1842316 chr1:1820229-1910877\n", - "2 chr1 4059831 4133780 chr1:3999771-4073720\n", - "3 chr1 6616886 6664806 chr1:6556826-6604746\n", - "4 chr1 7391931 7440079 chr1:7331871-7380018\n", - "... ... ... ... ...\n", - "2294 chrX 131613124 131626145 chrX:132479096-132492117\n", - "2295 chrX 135817164 135864668 chrX:136735005-136782509\n", - "2296 chrX 153229992 153241022 chrX:153964541-153975571\n", - "2297 chrX 153245353 153256834 chrX:153979902-153991383\n", - "2298 chrX 153271196 153289362 chrX:154005745-154023911\n", - "\n", - "[2299 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chromosomestartendenhancer_hg38
0chr12761029320chr1:27611-29320
1chr1713220713660chr1:777841-778280
2chr1724060727180chr1:788681-791800
3chr1824880825300chr1:889501-889920
4chr1902180902660chr1:966801-967280
...............
18749chrY5882734058827540chrY:56763331-56763530
18750chrY5885589058856100chrY:56734771-56734980
18751chrY5897188058973420chrY:56825734-56827273
18752chrY5897446058974660chrY:56828314-56828513
18753chrY5897685058997920chrY:56830704-56851773
\n", - "

18754 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " chromosome start end enhancer_hg38\n", - "0 chr1 27610 29320 chr1:27611-29320\n", - "1 chr1 713220 713660 chr1:777841-778280\n", - "2 chr1 724060 727180 chr1:788681-791800\n", - "3 chr1 824880 825300 chr1:889501-889920\n", - "4 chr1 902180 902660 chr1:966801-967280\n", - "... ... ... ... ...\n", - "18749 chrY 58827340 58827540 chrY:56763331-56763530\n", - "18750 chrY 58855890 58856100 chrY:56734771-56734980\n", - "18751 chrY 58971880 58973420 chrY:56825734-56827273\n", - "18752 chrY 58974460 58974660 chrY:56828314-56828513\n", - "18753 chrY 58976850 58997920 chrY:56830704-56851773\n", - "\n", - "[18754 rows x 4 columns]" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "dict_enhancers = {}\n", - "for f in enhancer_lists:\n", - " df = pd.read_csv(dir_interval_lists + f, sep = '\\t', names=['chromosome', 'start', 'end', 'enhancer_hg38', 'drop'])\n", - " df = df[df.columns[:-1]]\n", - " if df.enhancer_hg38.astype(str).str[:3].unique()[0] == 'chr':\n", - " dict_enhancers[f] = df\n", - " display(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "bb94d92f-a8ff-4810-be87-ec0d91d6bb13", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chromstartendnamescorestrandthickStartthickEnditemRgbblockCountblockSizesblockStarts
0117052101715089PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.17052101715089019879,0,
1117512241841253PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.175122418412530190029,0,
2122220432256622PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.222204322566220134579,0,
3124769852481874PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.24769852481874014889,0,
4135718793595579PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.357187935955790123700,0,
5191201009143791PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.912010091437910123691,0,
6194602309489886PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.946023094898860129656,0,
7197117739793450PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.971177397934500181677,0,
811186276011876548PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.11862760118765480113788,0,
911209903612112488PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.12099036121124880113452,0,
1011219231112233319PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.12192311122333190141008,0,
1111402597514095686PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.14025975140956860169711,0,
1211615009616179237PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.16150096161792370129141,0,
1311696944517034545PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.16969445170345450165100,0,
14116987768207808943PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.1698776820780894304426,231,96,46,49,39,30,32,66,49,44,78,34,56,58,...0,55503744,103554727,103568268,103575745,10358...
1511705911517086792PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.17059115170867920127677,0,
1611721525817241269PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.17215258172412690126011,0,
1711977201919807688PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.19772019198076880135669,0,
18121207433223477546PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.2120743322347754601439,74,116,36,61,35,55,53,132,421,600,102,42,53,70725248,202270039,105798379,34442507,85308250...
1912161105521635087PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1...1000.21611055216350870124032,0,
\n", - "
" - ], - "text/plain": [ - " chrom start end \\\n", - "0 1 1705210 1715089 \n", - "1 1 1751224 1841253 \n", - "2 1 2222043 2256622 \n", - "3 1 2476985 2481874 \n", - "4 1 3571879 3595579 \n", - "5 1 9120100 9143791 \n", - "6 1 9460230 9489886 \n", - "7 1 9711773 9793450 \n", - "8 1 11862760 11876548 \n", - "9 1 12099036 12112488 \n", - "10 1 12192311 12233319 \n", - "11 1 14025975 14095686 \n", - "12 1 16150096 16179237 \n", - "13 1 16969445 17034545 \n", - "14 1 16987768 207808943 \n", - "15 1 17059115 17086792 \n", - "16 1 17215258 17241269 \n", - "17 1 19772019 19807688 \n", - "18 1 21207433 223477546 \n", - "19 1 21611055 21635087 \n", - "\n", - " name score strand \\\n", - "0 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "1 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "2 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "3 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "4 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "5 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "6 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "7 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "8 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "9 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "10 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "11 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "12 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "13 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "14 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "15 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "16 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "17 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "18 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "19 PaytonDLBCLsuperenhancershg19sort::dlbcl::chr1... 1000 . \n", - "\n", - " thickStart thickEnd itemRgb blockCount \\\n", - "0 1705210 1715089 0 1 \n", - "1 1751224 1841253 0 1 \n", - "2 2222043 2256622 0 1 \n", - "3 2476985 2481874 0 1 \n", - "4 3571879 3595579 0 1 \n", - "5 9120100 9143791 0 1 \n", - "6 9460230 9489886 0 1 \n", - "7 9711773 9793450 0 1 \n", - "8 11862760 11876548 0 1 \n", - "9 12099036 12112488 0 1 \n", - "10 12192311 12233319 0 1 \n", - "11 14025975 14095686 0 1 \n", - "12 16150096 16179237 0 1 \n", - "13 16969445 17034545 0 1 \n", - "14 16987768 207808943 0 44 \n", - "15 17059115 17086792 0 1 \n", - "16 17215258 17241269 0 1 \n", - "17 19772019 19807688 0 1 \n", - "18 21207433 223477546 0 14 \n", - "19 21611055 21635087 0 1 \n", - "\n", - " blockSizes \\\n", - "0 9879, \n", - "1 90029, \n", - "2 34579, \n", - "3 4889, \n", - "4 23700, \n", - "5 23691, \n", - "6 29656, \n", - "7 81677, \n", - "8 13788, \n", - "9 13452, \n", - "10 41008, \n", - "11 69711, \n", - "12 29141, \n", - "13 65100, \n", - "14 26,231,96,46,49,39,30,32,66,49,44,78,34,56,58,... \n", - "15 27677, \n", - "16 26011, \n", - "17 35669, \n", - "18 39,74,116,36,61,35,55,53,132,421,600,102,42,53, \n", - "19 24032, \n", - "\n", - " blockStarts \n", - "0 0, \n", - "1 0, \n", - "2 0, \n", - "3 0, \n", - "4 0, \n", - "5 0, \n", - "6 0, \n", - "7 0, \n", - "8 0, \n", - "9 0, \n", - "10 0, \n", - "11 0, \n", - "12 0, \n", - "13 0, \n", - "14 0,55503744,103554727,103568268,103575745,10358... \n", - "15 0, \n", - "16 0, \n", - "17 0, \n", - "18 70725248,202270039,105798379,34442507,85308250... \n", - "19 0, " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chromstartendnamescorestrandthickStartthickEnditemRgbblockCountblockSizesblockStarts
014591240148924163Immunoglobulinfunctionalelementssorthg19::dlbc...1000.459124014892416301231,75,62,35,51,90,102,51,34,27,57,42,112567135,144331824,144332172,144332888,342997...
1121207433223477546Immunoglobulinfunctionalelementssorthg19::dlbc...1000.2120743322347754604439,74,116,36,51,37,29,62,47,36,63,72,80,73,148...70725248,202270039,105798379,34442507,85245375...
21321025149186029950Immunoglobulinfunctionalelementssorthg19::dlbc...1000.210251491860299500467,36,843098,327,27971418,0,1355325,165004474,
314106032614106032974Immunoglobulinfunctionalelementssorthg19::dlbc...1000.10603261410603297401360,0,
414106041491106041971Immunoglobulinfunctionalelementssorthg19::dlbc...1000.10604149110604197101480,0,
514106048351106048676Immunoglobulinfunctionalelementssorthg19::dlbc...1000.10604835110604867601325,0,
614106050491106054731Immunoglobulinfunctionalelementssorthg19::dlbc...1000.106050491106054731014240,0,
714106054315106056795Immunoglobulinfunctionalelementssorthg19::dlbc...1000.106054315106056795012480,0,
814106064028106068064Immunoglobulinfunctionalelementssorthg19::dlbc...1000.106064028106068064014036,0,
914106069177106070455Immunoglobulinfunctionalelementssorthg19::dlbc...1000.106069177106070455011278,0,
1014106088249106092402Immunoglobulinfunctionalelementssorthg19::dlbc...1000.106088249106092402014153,0,
1114106093066106096258Immunoglobulinfunctionalelementssorthg19::dlbc...1000.106093066106096258013192,0,
1214106107302106111126Immunoglobulinfunctionalelementssorthg19::dlbc...1000.106107302106111126013824,0,
1314106111791106115058Immunoglobulinfunctionalelementssorthg19::dlbc...1000.106111791106115058013267,0,
1414106152458106152818Immunoglobulinfunctionalelementssorthg19::dlbc...1000.10615245810615281801360,0,
1514106162616106162991Immunoglobulinfunctionalelementssorthg19::dlbc...1000.10616261610616299101375,0,
1614106167280106167601Immunoglobulinfunctionalelementssorthg19::dlbc...1000.10616728010616760101321,0,
1714106170725106175001Immunoglobulinfunctionalelementssorthg19::dlbc...1000.106170725106175001014276,0,
1814106175047106178297Immunoglobulinfunctionalelementssorthg19::dlbc...1000.106175047106178297013250,0,
1914106204099106209407Immunoglobulinfunctionalelementssorthg19::dlbc...1000.106204099106209407015308,0,
\n", - "
" - ], - "text/plain": [ - " chrom start end \\\n", - "0 1 4591240 148924163 \n", - "1 1 21207433 223477546 \n", - "2 13 21025149 186029950 \n", - "3 14 106032614 106032974 \n", - "4 14 106041491 106041971 \n", - "5 14 106048351 106048676 \n", - "6 14 106050491 106054731 \n", - "7 14 106054315 106056795 \n", - "8 14 106064028 106068064 \n", - "9 14 106069177 106070455 \n", - "10 14 106088249 106092402 \n", - "11 14 106093066 106096258 \n", - "12 14 106107302 106111126 \n", - "13 14 106111791 106115058 \n", - "14 14 106152458 106152818 \n", - "15 14 106162616 106162991 \n", - "16 14 106167280 106167601 \n", - "17 14 106170725 106175001 \n", - "18 14 106175047 106178297 \n", - "19 14 106204099 106209407 \n", - "\n", - " name score strand \\\n", - "0 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "1 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "2 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "3 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "4 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "5 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "6 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "7 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "8 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "9 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "10 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "11 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "12 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "13 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "14 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "15 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "16 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "17 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "18 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "19 Immunoglobulinfunctionalelementssorthg19::dlbc... 1000 . \n", - "\n", - " thickStart thickEnd itemRgb blockCount \\\n", - "0 4591240 148924163 0 12 \n", - "1 21207433 223477546 0 44 \n", - "2 21025149 186029950 0 4 \n", - "3 106032614 106032974 0 1 \n", - "4 106041491 106041971 0 1 \n", - "5 106048351 106048676 0 1 \n", - "6 106050491 106054731 0 1 \n", - "7 106054315 106056795 0 1 \n", - "8 106064028 106068064 0 1 \n", - "9 106069177 106070455 0 1 \n", - "10 106088249 106092402 0 1 \n", - "11 106093066 106096258 0 1 \n", - "12 106107302 106111126 0 1 \n", - "13 106111791 106115058 0 1 \n", - "14 106152458 106152818 0 1 \n", - "15 106162616 106162991 0 1 \n", - "16 106167280 106167601 0 1 \n", - "17 106170725 106175001 0 1 \n", - "18 106175047 106178297 0 1 \n", - "19 106204099 106209407 0 1 \n", - "\n", - " blockSizes \\\n", - "0 31,75,62,35,51,90,102,51,34,27,57,42, \n", - "1 39,74,116,36,51,37,29,62,47,36,63,72,80,73,148... \n", - "2 67,36,843098,327, \n", - "3 360, \n", - "4 480, \n", - "5 325, \n", - "6 4240, \n", - "7 2480, \n", - "8 4036, \n", - "9 1278, \n", - "10 4153, \n", - "11 3192, \n", - "12 3824, \n", - "13 3267, \n", - "14 360, \n", - "15 375, \n", - "16 321, \n", - "17 4276, \n", - "18 3250, \n", - "19 5308, \n", - "\n", - " blockStarts \n", - "0 112567135,144331824,144332172,144332888,342997... \n", - "1 70725248,202270039,105798379,34442507,85245375... \n", - "2 27971418,0,1355325,165004474, \n", - "3 0, \n", - "4 0, \n", - "5 0, \n", - "6 0, \n", - "7 0, \n", - "8 0, \n", - "9 0, \n", - "10 0, \n", - "11 0, \n", - "12 0, \n", - "13 0, \n", - "14 0, \n", - "15 0, \n", - "16 0, \n", - "17 0, \n", - "18 0, \n", - "19 0, " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chromstartendnamescorestrandthickStartthickEnditemRgbblockCountblockSizesblockStarts
0121207433223477546PanHammarstromkategishg19sort::dlbcl::chr14:10...1000.2120743322347754601439,74,116,36,61,35,55,53,132,421,600,102,42,53,70725248,202270039,105798379,34442507,85308250...
11150546616150552004PanHammarstromkategishg19sort::dlbcl::chr1:150...1000.150546616150552004015388,0,
21203273727203276488PanHammarstromkategishg19sort::dlbcl::chr1:203...1000.203273727203276488012761,0,
31226924119226926747PanHammarstromkategishg19sort::dlbcl::chr1:226...1000.226924119226926747012628,0,
4106365953163666480PanHammarstromkategishg19sort::dlbcl::chr10:61...1000.6365953163666480016949,0,
511128389025128392119PanHammarstromkategishg19sort::dlbcl::chr11:12...1000.128389025128392119013094,0,
61287620708765407PanHammarstromkategishg19sort::dlbcl::chr12:86...1000.87620708765407013337,0,
7129253569092540320PanHammarstromkategishg19sort::dlbcl::chr12:92...1000.9253569092540320014630,0,
812113493744113519862PanHammarstromkategishg19sort::dlbcl::chr12:11...1000.1134937441135198620126118,0,
912122454746122467180PanHammarstromkategishg19sort::dlbcl::chr12:12...1000.1224547461224671800112434,0,
10137598117075984325PanHammarstromkategishg19sort::dlbcl::chr13:75...1000.7598117075984325013155,0,
11146925613269262207PanHammarstromkategishg19sort::dlbcl::chr14:68...1000.6925613269262207016075,0,
1214106066053106072343PanHammarstromkategishg19sort::dlbcl::chr14:10...1000.106066053106072343016290,0,
1314106091190106095703PanHammarstromkategishg19sort::dlbcl::chr14:10...1000.106091190106095703014513,0,
1414106110025106115350PanHammarstromkategishg19sort::dlbcl::chr14:10...1000.106110025106115350015325,0,
1514106172909106178778PanHammarstromkategishg19sort::dlbcl::chr14:10...1000.106172909106178778015869,0,
1614106208188106215693PanHammarstromkategishg19sort::dlbcl::chr14:10...1000.106208188106215693017505,0,
1714106235994106243520PanHammarstromkategishg19sort::dlbcl::chr14:10...1000.106235994106243520017526,0,
1814106317638106338808PanHammarstromkategishg19sort::dlbcl::chr14:10...1000.1063176381063388080521170,50,121,37,50,0,15939,15939,16002,16039,
1914106347194106387925PanHammarstromkategishg19sort::dlbcl::chr14:10...1000.1063471941063879250140731,0,
\n", - "
" - ], - "text/plain": [ - " chrom start end \\\n", - "0 1 21207433 223477546 \n", - "1 1 150546616 150552004 \n", - "2 1 203273727 203276488 \n", - "3 1 226924119 226926747 \n", - "4 10 63659531 63666480 \n", - "5 11 128389025 128392119 \n", - "6 12 8762070 8765407 \n", - "7 12 92535690 92540320 \n", - "8 12 113493744 113519862 \n", - "9 12 122454746 122467180 \n", - "10 13 75981170 75984325 \n", - "11 14 69256132 69262207 \n", - "12 14 106066053 106072343 \n", - "13 14 106091190 106095703 \n", - "14 14 106110025 106115350 \n", - "15 14 106172909 106178778 \n", - "16 14 106208188 106215693 \n", - "17 14 106235994 106243520 \n", - "18 14 106317638 106338808 \n", - "19 14 106347194 106387925 \n", - "\n", - " name score strand \\\n", - "0 PanHammarstromkategishg19sort::dlbcl::chr14:10... 1000 . \n", - "1 PanHammarstromkategishg19sort::dlbcl::chr1:150... 1000 . \n", - "2 PanHammarstromkategishg19sort::dlbcl::chr1:203... 1000 . \n", - "3 PanHammarstromkategishg19sort::dlbcl::chr1:226... 1000 . \n", - "4 PanHammarstromkategishg19sort::dlbcl::chr10:61... 1000 . \n", - "5 PanHammarstromkategishg19sort::dlbcl::chr11:12... 1000 . \n", - "6 PanHammarstromkategishg19sort::dlbcl::chr12:86... 1000 . \n", - "7 PanHammarstromkategishg19sort::dlbcl::chr12:92... 1000 . \n", - "8 PanHammarstromkategishg19sort::dlbcl::chr12:11... 1000 . \n", - "9 PanHammarstromkategishg19sort::dlbcl::chr12:12... 1000 . \n", - "10 PanHammarstromkategishg19sort::dlbcl::chr13:75... 1000 . \n", - "11 PanHammarstromkategishg19sort::dlbcl::chr14:68... 1000 . \n", - "12 PanHammarstromkategishg19sort::dlbcl::chr14:10... 1000 . \n", - "13 PanHammarstromkategishg19sort::dlbcl::chr14:10... 1000 . \n", - "14 PanHammarstromkategishg19sort::dlbcl::chr14:10... 1000 . \n", - "15 PanHammarstromkategishg19sort::dlbcl::chr14:10... 1000 . \n", - "16 PanHammarstromkategishg19sort::dlbcl::chr14:10... 1000 . \n", - "17 PanHammarstromkategishg19sort::dlbcl::chr14:10... 1000 . \n", - "18 PanHammarstromkategishg19sort::dlbcl::chr14:10... 1000 . \n", - "19 PanHammarstromkategishg19sort::dlbcl::chr14:10... 1000 . \n", - "\n", - " thickStart thickEnd itemRgb blockCount \\\n", - "0 21207433 223477546 0 14 \n", - "1 150546616 150552004 0 1 \n", - "2 203273727 203276488 0 1 \n", - "3 226924119 226926747 0 1 \n", - "4 63659531 63666480 0 1 \n", - "5 128389025 128392119 0 1 \n", - "6 8762070 8765407 0 1 \n", - "7 92535690 92540320 0 1 \n", - "8 113493744 113519862 0 1 \n", - "9 122454746 122467180 0 1 \n", - "10 75981170 75984325 0 1 \n", - "11 69256132 69262207 0 1 \n", - "12 106066053 106072343 0 1 \n", - "13 106091190 106095703 0 1 \n", - "14 106110025 106115350 0 1 \n", - "15 106172909 106178778 0 1 \n", - "16 106208188 106215693 0 1 \n", - "17 106235994 106243520 0 1 \n", - "18 106317638 106338808 0 5 \n", - "19 106347194 106387925 0 1 \n", - "\n", - " blockSizes \\\n", - "0 39,74,116,36,61,35,55,53,132,421,600,102,42,53, \n", - "1 5388, \n", - "2 2761, \n", - "3 2628, \n", - "4 6949, \n", - "5 3094, \n", - "6 3337, \n", - "7 4630, \n", - "8 26118, \n", - "9 12434, \n", - "10 3155, \n", - "11 6075, \n", - "12 6290, \n", - "13 4513, \n", - "14 5325, \n", - "15 5869, \n", - "16 7505, \n", - "17 7526, \n", - "18 21170,50,121,37,50, \n", - "19 40731, \n", - "\n", - " blockStarts \n", - "0 70725248,202270039,105798379,34442507,85308250... \n", - "1 0, \n", - "2 0, \n", - "3 0, \n", - "4 0, \n", - "5 0, \n", - "6 0, \n", - "7 0, \n", - "8 0, \n", - "9 0, \n", - "10 0, \n", - "11 0, \n", - "12 0, \n", - "13 0, \n", - "14 0, \n", - "15 0, \n", - "16 0, \n", - "17 0, \n", - "18 0,15939,15939,16002,16039, \n", - "19 0, " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chromstartendnamescorestrandthickStartthickEnditemRgbblockCountblockSizesblockStarts
0113424001342510allsamplegt15scoregt1hg19::dlbcl::chr1:1407021...1000.1342400134251001110,0,
1162584806259620allsamplegt15scoregt1hg19::dlbcl::chr1:6198421...1000.62584806259620011140,0,
2180218508022220allsamplegt15scoregt1hg19::dlbcl::chr1:7961791...1000.8021850802222001370,0,
311107280011073390allsamplegt15scoregt1hg19::dlbcl::chr1:1101274...1000.110728001107339001590,0,
411683960016841510allsamplegt15scoregt1hg19::dlbcl::chr1:1651310...1000.1683960016841510011910,0,
511722181017223710allsamplegt15scoregt1hg19::dlbcl::chr1:1689531...1000.1722181017223710011900,0,
611992360019924410allsamplegt15scoregt1hg19::dlbcl::chr1:1959710...1000.199236001992441001810,0,
712235214022352450allsamplegt15scoregt1hg19::dlbcl::chr1:2202564...1000.223521402235245001310,0,
812237924022379960allsamplegt15scoregt1hg19::dlbcl::chr1:2205274...1000.223792402237996001720,0,
912415134024151840allsamplegt15scoregt1hg19::dlbcl::chr1:2382485...1000.241513402415184001500,0,
1012428649024287080allsamplegt15scoregt1hg19::dlbcl::chr1:2396000...1000.242864902428708001590,0,
1112557352025573790allsamplegt15scoregt1hg19::dlbcl::chr1:2524703...1000.255735202557379001270,0,
1212632346026323700allsamplegt15scoregt1hg19::dlbcl::chr1:2599697...1000.263234602632370001240,0,
1312632436026324510allsamplegt15scoregt1hg19::dlbcl::chr1:2599787...1000.263243602632451001150,0,
1412675887026759310allsamplegt15scoregt1hg19::dlbcl::chr1:2643238...1000.267588702675931001440,0,
1512707039027071040allsamplegt15scoregt1hg19::dlbcl::chr1:2674390...1000.270703902707104001650,0,
1612865569028655790allsamplegt15scoregt1hg19::dlbcl::chr1:2832918...1000.286556902865579001100,0,
1712883260028832730allsamplegt15scoregt1hg19::dlbcl::chr1:2850608...1000.288326002883273001130,0,
1813122747031227860allsamplegt15scoregt1hg19::dlbcl::chr1:3075462...1000.312274703122786001390,0,
1913239416032394730allsamplegt15scoregt1hg19::dlbcl::chr1:3192856...1000.323941603239473001570,0,
\n", - "
" - ], - "text/plain": [ - " chrom start end \\\n", - "0 1 1342400 1342510 \n", - "1 1 6258480 6259620 \n", - "2 1 8021850 8022220 \n", - "3 1 11072800 11073390 \n", - "4 1 16839600 16841510 \n", - "5 1 17221810 17223710 \n", - "6 1 19923600 19924410 \n", - "7 1 22352140 22352450 \n", - "8 1 22379240 22379960 \n", - "9 1 24151340 24151840 \n", - "10 1 24286490 24287080 \n", - "11 1 25573520 25573790 \n", - "12 1 26323460 26323700 \n", - "13 1 26324360 26324510 \n", - "14 1 26758870 26759310 \n", - "15 1 27070390 27071040 \n", - "16 1 28655690 28655790 \n", - "17 1 28832600 28832730 \n", - "18 1 31227470 31227860 \n", - "19 1 32394160 32394730 \n", - "\n", - " name score strand \\\n", - "0 allsamplegt15scoregt1hg19::dlbcl::chr1:1407021... 1000 . \n", - "1 allsamplegt15scoregt1hg19::dlbcl::chr1:6198421... 1000 . \n", - "2 allsamplegt15scoregt1hg19::dlbcl::chr1:7961791... 1000 . \n", - "3 allsamplegt15scoregt1hg19::dlbcl::chr1:1101274... 1000 . \n", - "4 allsamplegt15scoregt1hg19::dlbcl::chr1:1651310... 1000 . \n", - "5 allsamplegt15scoregt1hg19::dlbcl::chr1:1689531... 1000 . \n", - "6 allsamplegt15scoregt1hg19::dlbcl::chr1:1959710... 1000 . \n", - "7 allsamplegt15scoregt1hg19::dlbcl::chr1:2202564... 1000 . \n", - "8 allsamplegt15scoregt1hg19::dlbcl::chr1:2205274... 1000 . \n", - "9 allsamplegt15scoregt1hg19::dlbcl::chr1:2382485... 1000 . \n", - "10 allsamplegt15scoregt1hg19::dlbcl::chr1:2396000... 1000 . \n", - "11 allsamplegt15scoregt1hg19::dlbcl::chr1:2524703... 1000 . \n", - "12 allsamplegt15scoregt1hg19::dlbcl::chr1:2599697... 1000 . \n", - "13 allsamplegt15scoregt1hg19::dlbcl::chr1:2599787... 1000 . \n", - "14 allsamplegt15scoregt1hg19::dlbcl::chr1:2643238... 1000 . \n", - "15 allsamplegt15scoregt1hg19::dlbcl::chr1:2674390... 1000 . \n", - "16 allsamplegt15scoregt1hg19::dlbcl::chr1:2832918... 1000 . \n", - "17 allsamplegt15scoregt1hg19::dlbcl::chr1:2850608... 1000 . \n", - "18 allsamplegt15scoregt1hg19::dlbcl::chr1:3075462... 1000 . \n", - "19 allsamplegt15scoregt1hg19::dlbcl::chr1:3192856... 1000 . \n", - "\n", - " thickStart thickEnd itemRgb blockCount blockSizes blockStarts \n", - "0 1342400 1342510 0 1 110, 0, \n", - "1 6258480 6259620 0 1 1140, 0, \n", - "2 8021850 8022220 0 1 370, 0, \n", - "3 11072800 11073390 0 1 590, 0, \n", - "4 16839600 16841510 0 1 1910, 0, \n", - "5 17221810 17223710 0 1 1900, 0, \n", - "6 19923600 19924410 0 1 810, 0, \n", - "7 22352140 22352450 0 1 310, 0, \n", - "8 22379240 22379960 0 1 720, 0, \n", - "9 24151340 24151840 0 1 500, 0, \n", - "10 24286490 24287080 0 1 590, 0, \n", - "11 25573520 25573790 0 1 270, 0, \n", - "12 26323460 26323700 0 1 240, 0, \n", - "13 26324360 26324510 0 1 150, 0, \n", - "14 26758870 26759310 0 1 440, 0, \n", - "15 27070390 27071040 0 1 650, 0, \n", - "16 28655690 28655790 0 1 100, 0, \n", - "17 28832600 28832730 0 1 130, 0, \n", - "18 31227470 31227860 0 1 390, 0, \n", - "19 32394160 32394730 0 1 570, 0, " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chromstartendnamescorestrandthickStartthickEnditemRgbblockCountblockSizesblockStarts
0117064121732343Bradnersuperenhancershg19sort::dlbcl::chr1:177...1000.170641217323430125931,0,
1117516681842316Bradnersuperenhancershg19sort::dlbcl::chr1:182...1000.175166818423160190648,0,
2140598314133780Bradnersuperenhancershg19sort::dlbcl::chr1:399...1000.405983141337800173949,0,
3166168866664806Bradnersuperenhancershg19sort::dlbcl::chr1:655...1000.661688666648060147920,0,
4173919317440079Bradnersuperenhancershg19sort::dlbcl::chr1:733...1000.739193174400790148148,0,
5184548488501956Bradnersuperenhancershg19sort::dlbcl::chr1:839...1000.845484885019560147108,0,
6185523668594467Bradnersuperenhancershg19sort::dlbcl::chr1:849...1000.855236685944670142101,0,
7189330578944097Bradnersuperenhancershg19sort::dlbcl::chr1:887...1000.893305789440970111040,0,
8191200659146380Bradnersuperenhancershg19sort::dlbcl::chr1:906...1000.912006591463800126315,0,
9196856499719974Bradnersuperenhancershg19sort::dlbcl::chr1:962...1000.968564997199740134325,0,
10198399349935747Bradnersuperenhancershg19sort::dlbcl::chr1:977...1000.983993499357470195813,0,
1111081460010857062Bradnersuperenhancershg19sort::dlbcl::chr1:107...1000.10814600108570620142462,0,
1211161783911631808Bradnersuperenhancershg19sort::dlbcl::chr1:115...1000.11617839116318080113969,0,
1311218474612245090Bradnersuperenhancershg19sort::dlbcl::chr1:121...1000.12184746122450900160344,0,
1411399186914091409Bradnersuperenhancershg19sort::dlbcl::chr1:136...1000.13991869140914090199540,0,
1511615621616178526Bradnersuperenhancershg19sort::dlbcl::chr1:158...1000.16156216161785260122310,0,
1611627404316303645Bradnersuperenhancershg19sort::dlbcl::chr1:159...1000.16274043163036450129602,0,
1711722152817256732Bradnersuperenhancershg19sort::dlbcl::chr1:168...1000.17221528172567320135204,0,
1811825099418312398Bradnersuperenhancershg19sort::dlbcl::chr1:179...1000.18250994183123980161404,0,
1911836139518375787Bradnersuperenhancershg19sort::dlbcl::chr1:180...1000.18361395183757870114392,0,
\n", - "
" - ], - "text/plain": [ - " chrom start end \\\n", - "0 1 1706412 1732343 \n", - "1 1 1751668 1842316 \n", - "2 1 4059831 4133780 \n", - "3 1 6616886 6664806 \n", - "4 1 7391931 7440079 \n", - "5 1 8454848 8501956 \n", - "6 1 8552366 8594467 \n", - "7 1 8933057 8944097 \n", - "8 1 9120065 9146380 \n", - "9 1 9685649 9719974 \n", - "10 1 9839934 9935747 \n", - "11 1 10814600 10857062 \n", - "12 1 11617839 11631808 \n", - "13 1 12184746 12245090 \n", - "14 1 13991869 14091409 \n", - "15 1 16156216 16178526 \n", - "16 1 16274043 16303645 \n", - "17 1 17221528 17256732 \n", - "18 1 18250994 18312398 \n", - "19 1 18361395 18375787 \n", - "\n", - " name score strand \\\n", - "0 Bradnersuperenhancershg19sort::dlbcl::chr1:177... 1000 . \n", - "1 Bradnersuperenhancershg19sort::dlbcl::chr1:182... 1000 . \n", - "2 Bradnersuperenhancershg19sort::dlbcl::chr1:399... 1000 . \n", - "3 Bradnersuperenhancershg19sort::dlbcl::chr1:655... 1000 . \n", - "4 Bradnersuperenhancershg19sort::dlbcl::chr1:733... 1000 . \n", - "5 Bradnersuperenhancershg19sort::dlbcl::chr1:839... 1000 . \n", - "6 Bradnersuperenhancershg19sort::dlbcl::chr1:849... 1000 . \n", - "7 Bradnersuperenhancershg19sort::dlbcl::chr1:887... 1000 . \n", - "8 Bradnersuperenhancershg19sort::dlbcl::chr1:906... 1000 . \n", - "9 Bradnersuperenhancershg19sort::dlbcl::chr1:962... 1000 . \n", - "10 Bradnersuperenhancershg19sort::dlbcl::chr1:977... 1000 . \n", - "11 Bradnersuperenhancershg19sort::dlbcl::chr1:107... 1000 . \n", - "12 Bradnersuperenhancershg19sort::dlbcl::chr1:115... 1000 . \n", - "13 Bradnersuperenhancershg19sort::dlbcl::chr1:121... 1000 . \n", - "14 Bradnersuperenhancershg19sort::dlbcl::chr1:136... 1000 . \n", - "15 Bradnersuperenhancershg19sort::dlbcl::chr1:158... 1000 . \n", - "16 Bradnersuperenhancershg19sort::dlbcl::chr1:159... 1000 . \n", - "17 Bradnersuperenhancershg19sort::dlbcl::chr1:168... 1000 . \n", - "18 Bradnersuperenhancershg19sort::dlbcl::chr1:179... 1000 . \n", - "19 Bradnersuperenhancershg19sort::dlbcl::chr1:180... 1000 . \n", - "\n", - " thickStart thickEnd itemRgb blockCount blockSizes blockStarts \n", - "0 1706412 1732343 0 1 25931, 0, \n", - "1 1751668 1842316 0 1 90648, 0, \n", - "2 4059831 4133780 0 1 73949, 0, \n", - "3 6616886 6664806 0 1 47920, 0, \n", - "4 7391931 7440079 0 1 48148, 0, \n", - "5 8454848 8501956 0 1 47108, 0, \n", - "6 8552366 8594467 0 1 42101, 0, \n", - "7 8933057 8944097 0 1 11040, 0, \n", - "8 9120065 9146380 0 1 26315, 0, \n", - "9 9685649 9719974 0 1 34325, 0, \n", - "10 9839934 9935747 0 1 95813, 0, \n", - "11 10814600 10857062 0 1 42462, 0, \n", - "12 11617839 11631808 0 1 13969, 0, \n", - "13 12184746 12245090 0 1 60344, 0, \n", - "14 13991869 14091409 0 1 99540, 0, \n", - "15 16156216 16178526 0 1 22310, 0, \n", - "16 16274043 16303645 0 1 29602, 0, \n", - "17 17221528 17256732 0 1 35204, 0, \n", - "18 18250994 18312398 0 1 61404, 0, \n", - "19 18361395 18375787 0 1 14392, 0, " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
chromstartendnamescorestrandthickStartthickEnditemRgbblockCountblockSizesblockStarts
012761029320allsamplegt5scoregt1hg19::dlbcl::chr1:27611-29...1000.2761029320011710,0,
11713220713660allsamplegt5scoregt1hg19::dlbcl::chr1:777841-7...1000.71322071366001440,0,
21724060727180allsamplegt5scoregt1hg19::dlbcl::chr1:788681-7...1000.724060727180013120,0,
31824880825300allsamplegt5scoregt1hg19::dlbcl::chr1:889501-8...1000.82488082530001420,0,
41902180902660allsamplegt5scoregt1hg19::dlbcl::chr1:966801-9...1000.90218090266001480,0,
51948960949360allsamplegt5scoregt1hg19::dlbcl::chr1:1013581-...1000.94896094936001400,0,
61974090974710allsamplegt5scoregt1hg19::dlbcl::chr1:1038711-...1000.97409097471001620,0,
7110037201006540allsamplegt5scoregt1hg19::dlbcl::chr1:1068341-...1000.10037201006540012820,0,
8110503801051430allsamplegt5scoregt1hg19::dlbcl::chr1:1115001-...1000.10503801051430011050,0,
9110794401080580allsamplegt5scoregt1hg19::dlbcl::chr1:1144061-...1000.10794401080580011140,0,
10111356101137540allsamplegt5scoregt1hg19::dlbcl::chr1:1200231-...1000.11356101137540011930,0,
11111574101158620allsamplegt5scoregt1hg19::dlbcl::chr1:1222031-...1000.11574101158620011210,0,
12112076201209040allsamplegt5scoregt1hg19::dlbcl::chr1:1272241-...1000.12076201209040011420,0,
13113101701310370allsamplegt5scoregt1hg19::dlbcl::chr1:1374791-...1000.1310170131037001200,0,
14113242801325490allsamplegt5scoregt1hg19::dlbcl::chr1:1388901-...1000.13242801325490011210,0,
15113413901342510allsamplegt5scoregt1hg19::dlbcl::chr1:1406011-...1000.13413901342510011120,0,
16113974601398640allsamplegt5scoregt1hg19::dlbcl::chr1:1462081-...1000.13974601398640011180,0,
17114074701408360allsamplegt5scoregt1hg19::dlbcl::chr1:1472091-...1000.1407470140836001890,0,
18114480701448580allsamplegt5scoregt1hg19::dlbcl::chr1:1512691-...1000.1448070144858001510,0,
19115514801551880allsamplegt5scoregt1hg19::dlbcl::chr1:1616101-...1000.1551480155188001400,0,
\n", - "
" - ], - "text/plain": [ - " chrom start end name \\\n", - "0 1 27610 29320 allsamplegt5scoregt1hg19::dlbcl::chr1:27611-29... \n", - "1 1 713220 713660 allsamplegt5scoregt1hg19::dlbcl::chr1:777841-7... \n", - "2 1 724060 727180 allsamplegt5scoregt1hg19::dlbcl::chr1:788681-7... \n", - "3 1 824880 825300 allsamplegt5scoregt1hg19::dlbcl::chr1:889501-8... \n", - "4 1 902180 902660 allsamplegt5scoregt1hg19::dlbcl::chr1:966801-9... \n", - "5 1 948960 949360 allsamplegt5scoregt1hg19::dlbcl::chr1:1013581-... \n", - "6 1 974090 974710 allsamplegt5scoregt1hg19::dlbcl::chr1:1038711-... \n", - "7 1 1003720 1006540 allsamplegt5scoregt1hg19::dlbcl::chr1:1068341-... \n", - "8 1 1050380 1051430 allsamplegt5scoregt1hg19::dlbcl::chr1:1115001-... \n", - "9 1 1079440 1080580 allsamplegt5scoregt1hg19::dlbcl::chr1:1144061-... \n", - "10 1 1135610 1137540 allsamplegt5scoregt1hg19::dlbcl::chr1:1200231-... \n", - "11 1 1157410 1158620 allsamplegt5scoregt1hg19::dlbcl::chr1:1222031-... \n", - "12 1 1207620 1209040 allsamplegt5scoregt1hg19::dlbcl::chr1:1272241-... \n", - "13 1 1310170 1310370 allsamplegt5scoregt1hg19::dlbcl::chr1:1374791-... \n", - "14 1 1324280 1325490 allsamplegt5scoregt1hg19::dlbcl::chr1:1388901-... \n", - "15 1 1341390 1342510 allsamplegt5scoregt1hg19::dlbcl::chr1:1406011-... \n", - "16 1 1397460 1398640 allsamplegt5scoregt1hg19::dlbcl::chr1:1462081-... \n", - "17 1 1407470 1408360 allsamplegt5scoregt1hg19::dlbcl::chr1:1472091-... \n", - "18 1 1448070 1448580 allsamplegt5scoregt1hg19::dlbcl::chr1:1512691-... \n", - "19 1 1551480 1551880 allsamplegt5scoregt1hg19::dlbcl::chr1:1616101-... \n", - "\n", - " score strand thickStart thickEnd itemRgb blockCount blockSizes \\\n", - "0 1000 . 27610 29320 0 1 1710, \n", - "1 1000 . 713220 713660 0 1 440, \n", - "2 1000 . 724060 727180 0 1 3120, \n", - "3 1000 . 824880 825300 0 1 420, \n", - "4 1000 . 902180 902660 0 1 480, \n", - "5 1000 . 948960 949360 0 1 400, \n", - "6 1000 . 974090 974710 0 1 620, \n", - "7 1000 . 1003720 1006540 0 1 2820, \n", - "8 1000 . 1050380 1051430 0 1 1050, \n", - "9 1000 . 1079440 1080580 0 1 1140, \n", - "10 1000 . 1135610 1137540 0 1 1930, \n", - "11 1000 . 1157410 1158620 0 1 1210, \n", - "12 1000 . 1207620 1209040 0 1 1420, \n", - "13 1000 . 1310170 1310370 0 1 200, \n", - "14 1000 . 1324280 1325490 0 1 1210, \n", - "15 1000 . 1341390 1342510 0 1 1120, \n", - "16 1000 . 1397460 1398640 0 1 1180, \n", - "17 1000 . 1407470 1408360 0 1 890, \n", - "18 1000 . 1448070 1448580 0 1 510, \n", - "19 1000 . 1551480 1551880 0 1 400, \n", - "\n", - " blockStarts \n", - "0 0, \n", - "1 0, \n", - "2 0, \n", - "3 0, \n", - "4 0, \n", - "5 0, \n", - "6 0, \n", - "7 0, \n", - "8 0, \n", - "9 0, \n", - "10 0, \n", - "11 0, \n", - "12 0, \n", - "13 0, \n", - "14 0, \n", - "15 0, \n", - "16 0, \n", - "17 0, \n", - "18 0, \n", - "19 0, " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "for key in dict_enhancers:\n", - " df = dict_enhancers[key]\n", - " il_name = key.split('.bed')[0].replace('.', '_')\n", - " # data frame containing enhancers associated with each interval\n", - " df_enhancers = df.enhancer_hg38.str.split(',', expand=True)\n", - " # unique list of all enhancers in interval list\n", - " enhancers = []\n", - " for col in df_enhancers.columns:\n", - " enhancers += df_enhancers[col][~df_enhancers[col].isna()].tolist()\n", - " enhancers = np.unique(enhancers)\n", - " # columns of bed12 file \n", - " cols_bed12 = ['chrom', 'start', 'end', 'name', 'score', 'strand', 'thickStart', 'thickEnd', 'itemRgb', 'blockCount', 'blockSizes', 'blockStarts']\n", - " data = []\n", - " for e in enhancers:\n", - " dfi = df.loc[np.any(df_enhancers==e, axis=1)].copy().sort_values(['chromosome', 'start'], ignore_index=True)\n", - " # chromosome number\n", - " chrom = dfi.chromosome.unique()[0][3:]\n", - " # start position of first interval in set (zero-indexed)\n", - " start = dfi.start.min()\n", - " # end position of final interval in set (zero-indexed)\n", - " end = dfi.end.max()\n", - " # unique name for this set of annotations\n", - " name = '{}::dlbcl::{}::NA'.format(il_name.replace('_', ''), e)\n", - " # any non-negative number (we usually use 0)\n", - " score = 1000\n", - " # +, -, or . if no strand\n", - " strand = '.'\n", - " # same as start (for Dig)\n", - " thickStart = start\n", - " # same as end (for Dig)\n", - " thickEnd = end\n", - " # ignored (we usually set to 0)\n", - " itemRgb = 0\n", - " # number of intervals in the set\n", - " blockCount = len(dfi)\n", - " # # length of each interval in the set\n", - " blockSizes = ','.join((dfi.end - dfi.start).astype(str).tolist()) + ','\n", - " # # start position of each interval relative to start (so the first entry is always 0)\n", - " blockStarts = ','.join((dfi.start - dfi.start.min()).astype(str).tolist()) + ','\n", - " data.append([chrom, start, end, name, score, strand, thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts])\n", - " \n", - " df_bed12 = pd.DataFrame(data=data, columns=cols_bed12).sort_values(['chrom', 'start'], ignore_index=True)\n", - " df_bed12.to_csv(dir_interval_lists + il_name + '_bed12.bed', sep='\\t', index=False, header=False, lineterminator='\\n')\n", - " display(df_bed12.head(20))" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "b6668884-a6bb-4dfd-aadf-fa67c6e70cdc", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
122662882266598enhancers::chr1:2264800-2266800::NA::NA904.2266288.12266598.101.1310,0,
0130109853011105enhancers::chr1:3010200-3011600::NA::NA905.3010985301110501120,0,
111119775711200572enhancers::chr1:11197600-11200600::NA::NA909.111977571120057203668,181,58,0,1378,2757,
211123737511237715enhancers::chr1:11235600-11238000::NA::NA900.112373751123771501340,0,
311124668511248729enhancers::chr1:11244800-11250000::NA::NA911.112466851124872902669,151,0,1893,
411125706911257242enhancers::chr1:11257000-11258000::NA::NA900.112570691125724201173,0,
.......................................
30810X154417120154417479enhancers::chrX:154416000-154417800::NA::NA924.15441712015441747901359,0,
30811X154459477154459797enhancers::chrX:154459200-154460200::NA::NA905.15445947715445979701320,0,
30812X154543197154543363enhancers::chrX:154543000-154543600::NA::NA935.15454319715454336301166,0,
30813X154564200154564800enhancers::chrX:154564200-154564800::NA::NA908.1545642001545648000275,317,0,283,
30814X154975537154975664enhancers::chrX:154974800-154975800::NA::NA909.15497553715497566401127,0,
\n", - "

30815 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " 1 2266288 2266598 enhancers::chr1:2264800-2266800::NA::NA \\\n", - "0 1 3010985 3011105 enhancers::chr1:3010200-3011600::NA::NA \n", - "1 1 11197757 11200572 enhancers::chr1:11197600-11200600::NA::NA \n", - "2 1 11237375 11237715 enhancers::chr1:11235600-11238000::NA::NA \n", - "3 1 11246685 11248729 enhancers::chr1:11244800-11250000::NA::NA \n", - "4 1 11257069 11257242 enhancers::chr1:11257000-11258000::NA::NA \n", - "... .. ... ... ... \n", - "30810 X 154417120 154417479 enhancers::chrX:154416000-154417800::NA::NA \n", - "30811 X 154459477 154459797 enhancers::chrX:154459200-154460200::NA::NA \n", - "30812 X 154543197 154543363 enhancers::chrX:154543000-154543600::NA::NA \n", - "30813 X 154564200 154564800 enhancers::chrX:154564200-154564800::NA::NA \n", - "30814 X 154975537 154975664 enhancers::chrX:154974800-154975800::NA::NA \n", - "\n", - " 904 . 2266288.1 2266598.1 0 1.1 310, 0, \n", - "0 905 . 3010985 3011105 0 1 120, 0, \n", - "1 909 . 11197757 11200572 0 3 668,181,58, 0,1378,2757, \n", - "2 900 . 11237375 11237715 0 1 340, 0, \n", - "3 911 . 11246685 11248729 0 2 669,151, 0,1893, \n", - "4 900 . 11257069 11257242 0 1 173, 0, \n", - "... ... .. ... ... .. ... ... ... \n", - "30810 924 . 154417120 154417479 0 1 359, 0, \n", - "30811 905 . 154459477 154459797 0 1 320, 0, \n", - "30812 935 . 154543197 154543363 0 1 166, 0, \n", - "30813 908 . 154564200 154564800 0 2 75,317, 0,283, \n", - "30814 909 . 154975537 154975664 0 1 127, 0, \n", - "\n", - "[30815 rows x 12 columns]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.read_csv('./interval_lists/enhancers.bed', sep='\\t')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/generate_dig_report_coding.py b/generate_dig_report_coding.py index 462133e..8cf544a 100644 --- a/generate_dig_report_coding.py +++ b/generate_dig_report_coding.py @@ -115,11 +115,16 @@ def generate_dig_report(path_to_dig_results, dir_output, cgc_list_path, pancan_l # Adding indicator of genes being part of the CGC or PanCan list df['CGC'] = df['GENE'].isin(cgc_list) df['PANCAN'] = df['GENE'].isin(pancan_list) - # Adding new columns for Non-synonymous SNVs + Indels - df['OBS_MUT'] = df['OBS_NONSYN'] + df['OBS_INDEL'] - df['EXP_MUT'] = df['EXP_NONSYN'] + df['EXP_INDEL'] - # Computing lower and upper bounds for the p-values muts_ts = list(mut_type.values()) + if 'EXP_INDEL' in df.columns: + # Adding new columns for Non-synonymous SNVs + Indels + df['OBS_MUT'] = df['OBS_NONSYN'] + df['OBS_INDEL'] + df['EXP_MUT'] = df['EXP_NONSYN'] + df['EXP_INDEL'] + else: + for key in list(mut_type.keys()): + if 'indel' in key.lower(): + del mut_type[key] + # Computing lower and upper bounds for the p-values muts_ts.remove('INDEL') muts_ts.remove('MUT') for m in muts_ts: @@ -165,46 +170,47 @@ def generate_dig_report(path_to_dig_results, dir_output, cgc_list_path, pancan_l df.ALPHA, 1 / (df.THETA * df['Pi_' + m] + 1) ) - # total indel burden - df['PVAL_INDEL_BURDEN_recalc'] = nb_pvalue_greater_midp( - df.OBS_INDEL, - df.ALPHA_INDEL, - 1 / (df.THETA_INDEL * df.Pi_INDEL + 1) - ) - df['PVAL_INDEL_BURDEN_unif'] = nb_pvalue_uniform_midp( - df.OBS_INDEL, - df.ALPHA_INDEL, - 1 / (df.THETA_INDEL * df.Pi_INDEL + 1) - ) - df['PVAL_INDEL_BURDEN_lower'] = nb_pvalue_lower( - df.OBS_INDEL, - df.ALPHA_INDEL, - 1 / (df.THETA_INDEL * df.Pi_INDEL + 1) - ) - df['PVAL_INDEL_BURDEN_upper'] = nb_pvalue_upper( - df.OBS_INDEL, - df.ALPHA_INDEL, - 1 / (df.THETA_INDEL * df.Pi_INDEL + 1) - ) - # p-values for nonsynonymous SNVs + indels - col_mut = 'PVAL_MUT_BURDEN' - df[col_mut + '_recalc'] = np.nan - df[col_mut + '_unif'] = np.nan - df[col_mut + '_lower'] = np.nan - df[col_mut + '_upper'] = np.nan - for idx in df.index: - df.at[idx, col_mut + '_recalc'] = sp.stats.combine_pvalues( - [df.at[idx, 'PVAL_NONSYN_BURDEN_recalc'], df.at[idx, 'PVAL_INDEL_BURDEN_recalc']], - method='fisher')[1] - df.at[idx, col_mut + '_unif'] = sp.stats.combine_pvalues( - [df.at[idx, 'PVAL_NONSYN_BURDEN_unif'], df.at[idx, 'PVAL_INDEL_BURDEN_unif']], - method='fisher')[1] - df.at[idx, col_mut + '_lower'] = sp.stats.combine_pvalues( - [df.at[idx, 'PVAL_NONSYN_BURDEN_lower'], df.at[idx, 'PVAL_INDEL_BURDEN_lower']], - method='fisher')[1] - df.at[idx, col_mut + '_upper'] = sp.stats.combine_pvalues( - [df.at[idx, 'PVAL_NONSYN_BURDEN_upper'], df.at[idx, 'PVAL_INDEL_BURDEN_upper']], - method='fisher')[1] + if 'EXP_INDEL' in df.columns: + # total indel burden + df['PVAL_INDEL_BURDEN_recalc'] = nb_pvalue_greater_midp( + df.OBS_INDEL, + df.ALPHA_INDEL, + 1 / (df.THETA_INDEL * df.Pi_INDEL + 1) + ) + df['PVAL_INDEL_BURDEN_unif'] = nb_pvalue_uniform_midp( + df.OBS_INDEL, + df.ALPHA_INDEL, + 1 / (df.THETA_INDEL * df.Pi_INDEL + 1) + ) + df['PVAL_INDEL_BURDEN_lower'] = nb_pvalue_lower( + df.OBS_INDEL, + df.ALPHA_INDEL, + 1 / (df.THETA_INDEL * df.Pi_INDEL + 1) + ) + df['PVAL_INDEL_BURDEN_upper'] = nb_pvalue_upper( + df.OBS_INDEL, + df.ALPHA_INDEL, + 1 / (df.THETA_INDEL * df.Pi_INDEL + 1) + ) + # p-values for nonsynonymous SNVs + indels + col_mut = 'PVAL_MUT_BURDEN' + df[col_mut + '_recalc'] = np.nan + df[col_mut + '_unif'] = np.nan + df[col_mut + '_lower'] = np.nan + df[col_mut + '_upper'] = np.nan + for idx in df.index: + df.at[idx, col_mut + '_recalc'] = sp.stats.combine_pvalues( + [df.at[idx, 'PVAL_NONSYN_BURDEN_recalc'], df.at[idx, 'PVAL_INDEL_BURDEN_recalc']], + method='fisher')[1] + df.at[idx, col_mut + '_unif'] = sp.stats.combine_pvalues( + [df.at[idx, 'PVAL_NONSYN_BURDEN_unif'], df.at[idx, 'PVAL_INDEL_BURDEN_unif']], + method='fisher')[1] + df.at[idx, col_mut + '_lower'] = sp.stats.combine_pvalues( + [df.at[idx, 'PVAL_NONSYN_BURDEN_lower'], df.at[idx, 'PVAL_INDEL_BURDEN_lower']], + method='fisher')[1] + df.at[idx, col_mut + '_upper'] = sp.stats.combine_pvalues( + [df.at[idx, 'PVAL_NONSYN_BURDEN_upper'], df.at[idx, 'PVAL_INDEL_BURDEN_upper']], + method='fisher')[1] def generate_plot_data(mut, bur, display_bounds, scatterpoint): """ diff --git a/generate_dig_report_combined.py b/generate_dig_report_combined.py index 8b9b1a7..f842122 100644 --- a/generate_dig_report_combined.py +++ b/generate_dig_report_combined.py @@ -123,7 +123,7 @@ def generate_dig_report( '3utr': path_to_3utr_results } result_types = list(dig_outputs.keys()) - + cols_mt_set = set() for result_type in result_types: dfi = pd.read_csv(dig_outputs[result_type], sep='\t') @@ -131,23 +131,34 @@ def generate_dig_report( if result_type == 'coding': dfi = dfi.set_index('GENE') df_comb = pd.DataFrame(index=dfi.index.copy()) - col_obs = ['OBS_NONSYN', 'N_SAMP_NONSYN', 'OBS_INDEL'] - col_pi = ['Pi_NONSYN', 'Pi_NONSYN', 'Pi_INDEL'] + col_obs = ['OBS_NONSYN', 'N_SAMP_NONSYN'] + col_pi = ['Pi_NONSYN', 'Pi_NONSYN'] col_size = 'GENE_LENGTH' else: dfi['GENE'] = dfi.ELT.str.split('::', expand=True)[2] dfi = dfi.sort_values(['GENE', 'OBS_SNV']) dfi = dfi.set_index('GENE') dfi = dfi.loc[~dfi.index.duplicated()] - col_obs = ['OBS_SNV', 'OBS_SAMPLES', 'OBS_INDEL'] - col_pi = ['Pi_SUM', 'Pi_SUM', 'Pi_INDEL'] + col_obs = ['OBS_SNV', 'OBS_SAMPLES'] + col_pi = ['Pi_SUM', 'Pi_SUM'] col_size = 'ELT_SIZE' + cols_mt = ['SNV', 'SNV_SAMPLE'] + if 'EXP_INDEL' in dfi.columns: + col_obs += ['OBS_INDEL'] + col_pi += ['Pi_INDEL'] + cols_mt += ['INDEL', 'MUT'] + else: + for key in list(mut_type.keys()): + if 'indel' in key.lower(): + del mut_type[key] + + cols_mt_set.update(cols_mt) genes_in_coding = dfi.index[dfi.index.isin(df_comb.index)] dfi_comp = dfi.loc[genes_in_coding] df_comb.loc[genes_in_coding, 'SIZE_' + result_type] = dfi_comp[col_size].to_numpy().copy() - for j, mt in enumerate(['SNV', 'SNV_SAMPLE', 'INDEL']): + for j, mt in enumerate(cols_mt[:3]): if mt == 'INDEL': pfx_at = '_INDEL' @@ -173,29 +184,29 @@ def generate_dig_report( dfi_comp['ALPHA' + pfx_at], 1 / (dfi_comp['THETA' + pfx_at] * dfi_comp[col_pi[j]] + 1) ) - - # combining p-values with Fisher's method - col_i = 'PVAL_' + result_type + '_' + 'MUT' - df_comb[col_i + '_recalc'] = np.nan - df_comb[col_i + '_unif'] = np.nan - df_comb[col_i + '_lower'] = np.nan - df_comb[col_i + '_upper'] = np.nan - for idx in df_comb.index: - df_comb.at[idx, col_i + '_recalc'] = sp.stats.combine_pvalues( - [df_comb.at[idx, 'PVAL_' + result_type + '_SNV_recalc'], - df_comb.at[idx, 'PVAL_' + result_type + '_INDEL_recalc']], method='fisher')[1] - df_comb.at[idx, col_i + '_unif'] = sp.stats.combine_pvalues( - [df_comb.at[idx, 'PVAL_' + result_type + '_SNV_unif'], - df_comb.at[idx, 'PVAL_' + result_type + '_INDEL_unif']], method='fisher')[1] - df_comb.at[idx, col_i + '_lower'] = sp.stats.combine_pvalues( - [df_comb.at[idx, 'PVAL_' + result_type + '_SNV_lower'], - df_comb.at[idx, 'PVAL_' + result_type + '_INDEL_lower']], method='fisher')[1] - df_comb.at[idx, col_i + '_upper'] = sp.stats.combine_pvalues( - [df_comb.at[idx, 'PVAL_' + result_type + '_SNV_upper'], - df_comb.at[idx, 'PVAL_' + result_type + '_INDEL_upper']], method='fisher')[1] + if 'EXP_INDEL' in dfi.columns: + # combining p-values with Fisher's method + col_i = 'PVAL_' + result_type + '_' + 'MUT' + df_comb[col_i + '_recalc'] = np.nan + df_comb[col_i + '_unif'] = np.nan + df_comb[col_i + '_lower'] = np.nan + df_comb[col_i + '_upper'] = np.nan + for idx in df_comb.index: + df_comb.at[idx, col_i + '_recalc'] = sp.stats.combine_pvalues( + [df_comb.at[idx, 'PVAL_' + result_type + '_SNV_recalc'], + df_comb.at[idx, 'PVAL_' + result_type + '_INDEL_recalc']], method='fisher')[1] + df_comb.at[idx, col_i + '_unif'] = sp.stats.combine_pvalues( + [df_comb.at[idx, 'PVAL_' + result_type + '_SNV_unif'], + df_comb.at[idx, 'PVAL_' + result_type + '_INDEL_unif']], method='fisher')[1] + df_comb.at[idx, col_i + '_lower'] = sp.stats.combine_pvalues( + [df_comb.at[idx, 'PVAL_' + result_type + '_SNV_lower'], + df_comb.at[idx, 'PVAL_' + result_type + '_INDEL_lower']], method='fisher')[1] + df_comb.at[idx, col_i + '_upper'] = sp.stats.combine_pvalues( + [df_comb.at[idx, 'PVAL_' + result_type + '_SNV_upper'], + df_comb.at[idx, 'PVAL_' + result_type + '_INDEL_upper']], method='fisher')[1] # combining p-values across region types - for mt in ['SNV', 'SNV_SAMPLE', 'INDEL', 'MUT']: + for mt in list(cols_mt_set): for typ in ['recalc', 'unif', 'lower', 'upper']: for idx in df_comb.index: df_comb.at[idx, 'PVAL_' + mt + '_' + typ] = sp.stats.combine_pvalues( diff --git a/generate_dig_report_noncoding.py b/generate_dig_report_noncoding.py index 9cd155a..a3ea8d4 100644 --- a/generate_dig_report_noncoding.py +++ b/generate_dig_report_noncoding.py @@ -115,14 +115,23 @@ def generate_dig_report(path_to_dig_results, dir_output, cgc_list_path, pancan_l # Adding indicator of genes being part of the CGC or PanCan list df['CGC'] = df['GENE'].isin(cgc_list) df['PANCAN'] = df['GENE'].isin(pancan_list) - # Adding new columns for Non-synonymous SNVs + Indels - df['OBS_MUT'] = df['OBS_SNV'] + df['OBS_INDEL'] - df['EXP_MUT'] = df['EXP_SNV'] + df['EXP_INDEL'] - # Computing lower and upper bounds for the p-values - pfxs_obs = ['SNV', 'INDEL', 'SAMPLES', 'MUT'] - pfxs_pval = ['SNV', 'INDEL', 'SAMPLE', 'MUT'] - pfxs_pi = ['SUM', 'INDEL', 'SUM', 'MUT'] - pfxs_at = ['', '_INDEL', '', '', ''] + if 'EXP_INDEL' in df.columns: + # Adding new columns for Non-synonymous SNVs + Indels + df['OBS_MUT'] = df['OBS_SNV'] + df['OBS_INDEL'] + df['EXP_MUT'] = df['EXP_SNV'] + df['EXP_INDEL'] + # Computing lower and upper bounds for the p-values + pfxs_obs = ['SNV', 'INDEL', 'SAMPLES', 'MUT'] + pfxs_pval = ['SNV', 'INDEL', 'SAMPLE', 'MUT'] + pfxs_pi = ['SUM', 'INDEL', 'SUM', 'MUT'] + pfxs_at = ['', '_INDEL', '', '', ''] + else: + for key in list(mut_type.keys()): + if 'indel' in key.lower(): + del mut_type[key] + pfxs_obs = ['SNV', 'SAMPLES'] + pfxs_pval = ['SNV', 'SAMPLE'] + pfxs_pi = ['SUM', 'SUM'] + pfxs_at = ['', ''] for i in range(len(pfxs_obs)): if pfxs_obs[i] == 'MUT': diff --git a/run.py b/run.py index 44a588f..d97f67f 100644 --- a/run.py +++ b/run.py @@ -3,8 +3,8 @@ import generate_dig_report_noncoding import generate_dig_report_combined -cgc_list_path = 'gs://getzlab-workflows-reference_files-oa/hg38/dig/cancer_gene_census_2024_06_20.tsv' -pancan_list_path = 'gs://getzlab-workflows-reference_files-oa/hg38/dig/pancanatlas_genes.tsv' +cgc_list_path = './cancer_gene_census_2024_06_20.tsv' +pancan_list_path = './pancanatlas_genes.tsv' # noncoding regions generate_dig_report_noncoding.generate_dig_report( diff --git a/train_mutation_map.ipynb b/train_mutation_map.ipynb index 8852bbb..2fc3e3e 100644 --- a/train_mutation_map.ipynb +++ b/train_mutation_map.ipynb @@ -564,7 +564,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "fa0b0e9c-df8f-4222-9e89-cd00757af562", "metadata": {}, "outputs": [ @@ -572,148 +572,73 @@ "name": "stdout", "output_type": "stream", "text": [ - "[20241009-01:40:30] [prefect] Starting Slurm controller ...\n", - "[20241009-01:40:30] [prefect] Waiting up to 60 seconds for Slurm controller to start ...\n", - "[20241009-01:40:30] [prefect] Started Slurm controller.\n", - "[20241009-01:40:30] [prefect] Workflow results disk low on space (22 GB remaining)\n", - "[20241009-01:40:30] [prefect] Enqueued workflow DIG_burden_test\n", - "[20241009-01:40:30] [DIG_burden_test:DIG_convert_maf] Hashing file TCGA_WGS_UVM.maf; 100/190 MiB completed\n", - "[20241009-01:40:32] [DIG_burden_test:Localize_tracks] Job avoidance disabled for this task; overwriting output.\n", - "[20241009-01:40:32] [DIG_burden_test:Localize_tracks] Localizing inputs...\n", - "[20241009-01:40:32] [DIG_burden_test:DIG_convert_maf] Localizing inputs...\n", - "[20241009-01:40:32] [DIG_burden_test:DIG_convert_maf] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_convert_maf__2024-10-08--14-33-09_mblxxey_s2nsq0q_cvilttyadla1u\n", - "[20241009-01:40:32] [DIG_burden_test:DIG_convert_maf] 1/1 jobs avoided\n", - "[20241009-01:40:32] [DIG_burden_test:Localize_tracks] Disk name is canine-a88d8673c93a0607e747fcb58805de1e\n", - "[20241009-01:40:32] [DIG_burden_test:Localize_ref_fasta] Job avoidance disabled for this task; overwriting output.\n", - "[20241009-01:40:32] [DIG_burden_test:Localize_ref_fasta] Localizing inputs...\n", - "[20241009-01:40:32] [DIG_burden_test:Localize_ref_fasta] Disk name is canine-5714e9942090819a76b081ec3fd090aa\n", - "[20241009-01:40:33] [DIG_burden_test:Localize_ref_fasta] Found existing disk canine-5714e9942090819a76b081ec3fd090aa\n", - "[20241009-01:40:33] [DIG_burden_test:Localize_ref_fasta] Task staged in /mnt/nfs/workspace/DIG_burden_test/Localize_ref_fasta__2024-10-08--14-33-09_mg4xwxy_tbhx1ki_ykhip0lgm0pxq\n", - "[20241009-01:40:33] [DIG_burden_test:Localize_tracks] Found existing disk canine-a88d8673c93a0607e747fcb58805de1e\n", - "[20241009-01:40:33] [DIG_burden_test:Localize_tracks] Task staged in /mnt/nfs/workspace/DIG_burden_test/Localize_tracks__2024-10-08--14-33-09_mg4xwxy_tbhx1ki_urlzoh5t1wzrw\n", - "[20241009-01:40:33] [DIG_burden_test:Localize_ref_fasta] 1 job submitted.\n", - "[20241009-01:40:33] [DIG_burden_test:Localize_tracks] 1 job submitted.\n", - "[20241009-01:42:03] [DIG_burden_test:Localize_ref_fasta] Finished with status COMPLETED\n", - "[20241009-01:42:03] [DIG_burden_test:Localize_tracks] Finished with status COMPLETED\n", - "[20241009-01:42:03] [DIG_burden_test:DIG_annotate_maf] Localizing inputs...\n", - "[20241009-01:42:03] [DIG_burden_test:DIG_annotate_maf] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_annotate_maf__2024-10-08--14-37-41_exnnusq_s2nsq0q_o35wynerh5vvo\n", - "[20241009-01:42:04] [DIG_burden_test:DIG_annotate_maf] 1/1 jobs avoided\n", - "[20241009-01:42:04] [DIG_burden_test:DIG_unzip_h5] Localizing inputs...\n", - "[20241009-01:42:04] [DIG_burden_test:DIG_unzip_h5] Found existing disk canine-scratch-tracks-0\n", - "[20241009-01:42:04] [DIG_burden_test:DIG_unzip_h5] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_unzip_h5__2024-10-08--14-43-44_iui3eoy_s2nsq0q_3jhzsqsvabgqw\n", - "[20241009-01:42:04] [DIG_burden_test:DIG_unzip_h5] 1 job submitted.\n", - "[20241009-01:44:35] [DIG_burden_test:DIG_unzip_h5] Finished with status COMPLETED\n", - "[20241009-01:44:35] [DIG_burden_test:DIG_add_objectives] Localizing inputs...\n", - "[20241009-01:44:35] [DIG_burden_test:DIG_add_objectives] Found existing disk canine-scratch-tracks-with-objectives-tcga-wgs-uvm-0\n", - "[20241009-01:44:35] [DIG_burden_test:DIG_add_objectives] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_add_objectives__2024-10-08--16-03-18_vave3cy_s2nsq0q_for2pvrvuagyw\n", - "[20241009-01:44:35] [DIG_burden_test:DIG_add_objectives] 1 job submitted.\n", - "[20241009-01:45:05] [DIG_burden_test:DIG_add_objectives] Finished with status COMPLETED\n", - "[20241009-01:45:05] [DIG_burden_test:DIG_run_kfold_training] Localizing inputs...\n", - "[20241009-01:45:05] [DIG_burden_test:DIG_run_kfold_training] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_run_kfold_training__2024-10-08--17-15-49_wlngc2q_exikiti_jyycnxxv4capq\n", - "[20241009-01:45:06] [DIG_burden_test:DIG_run_kfold_training] 1/1 jobs avoided\n", - "[20241009-01:45:06] [DIG_burden_test:DIG_pretrain_region] Localizing inputs...\n", - "[20241009-01:45:06] [DIG_burden_test:DIG_pretrain_region] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_pretrain_region__2024-10-09--01-45-06_htcoyhi_s2nsq0q_bol444355s1tq\n", - "[20241009-01:45:06] [DIG_burden_test:DIG_pretrain_region] 1 job submitted.\n", - "[20241009-01:47:07] [DIG_burden_test:DIG_pretrain_region] Finished with status COMPLETED\n", - "[20241009-01:47:07] [DIG_burden_test:DIG_pretrain_sequence] Localizing inputs...\n", - "[20241009-01:47:07] [DIG_burden_test:DIG_pretrain_sequence] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_pretrain_sequence__2024-10-09--01-47-07_o0g1b3q_s2nsq0q_iavhpxnlwlyng\n", - "[20241009-01:47:07] [DIG_burden_test:DIG_pretrain_sequence] 1 job submitted.\n", - "[20241009-01:47:37] [DIG_burden_test:DIG_pretrain_sequence] Finished with status COMPLETED\n", - "[20241009-01:47:38] [DIG_burden_test:DIG_pretrain_genic] Localizing inputs...\n", - "[20241009-01:47:38] [DIG_burden_test:DIG_pretrain_genic] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_pretrain_genic__2024-10-09--01-47-38_155ewla_s2nsq0q_i3j01fj3tov0m\n", - "[20241009-01:47:38] [DIG_burden_test:DIG_pretrain_genic] 1 job submitted.\n", - "[20241009-01:48:38] [DIG_burden_test:DIG_pretrain_genic] Finished with status COMPLETED\n", - "[20241009-01:48:38] [DIG_burden_test:DIG_test_coding] Localizing inputs...\n", - "[20241009-01:48:38] [DIG_burden_test:DIG_test_coding] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_test_coding__2024-10-09--01-48-38_ajn1isa_s2nsq0q_xxcmtgiacwhfc\n", - "[20241009-01:48:39] [DIG_burden_test:DIG_test_coding] 1 job submitted.\n", - "[20241009-01:48:39] [DIG_burden_test:DIG_preprocess_element_model] Localizing inputs...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Copying gs://getzlab-workflows-reference_files-oa/hg38/dig/element_data.h5...\n", - "\\ [1 files][144.0 MiB/144.0 MiB] \n", - "Operation completed over 1 objects/144.0 MiB. \n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[20241009-01:48:42] [DIG_burden_test:DIG_preprocess_element_model] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_preprocess_element_model__2024-10-09--01-48-39_42iihta_s2nsq0q_lv1fqminjd15o\n", - "[20241009-01:48:42] [DIG_burden_test:DIG_preprocess_element_model] 3 jobs submitted.\n", - "[20241009-01:49:09] [DIG_burden_test:DIG_test_coding] Finished with status COMPLETED\n", - "[20241009-01:49:09] [DIG_burden_test:DIG_report_coding] Localizing inputs...\n", - "[20241009-01:49:09] [DIG_burden_test:DIG_report_coding] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_report_coding__2024-10-09--01-49-09_2duowki_s2nsq0q_5pfx3cxg1ngke\n", - "[20241009-01:49:09] [DIG_burden_test:DIG_report_coding] 1 job submitted.\n", - "[20241009-01:50:40] [DIG_burden_test:DIG_report_coding] Finished with status COMPLETED\n", - "[20241009-01:59:47] [DIG_burden_test:DIG_preprocess_element_model] Finished with statuses COMPLETED: 3\n", - "[20241009-01:59:47] [DIG_burden_test:DIG_element_model] Localizing inputs...\n", - "[20241009-01:59:48] [DIG_burden_test:DIG_element_model] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_element_model__2024-10-09--01-59-47_ovsqvjy_s2nsq0q_zklufhcrwoanq\n", - "[20241009-01:59:48] [DIG_burden_test:DIG_element_model] 3 jobs submitted.\n", - "[20241009-02:00:18] [DIG_burden_test:DIG_element_model] Finished with statuses COMPLETED: 3\n", - "[20241009-02:00:18] [DIG_burden_test:DIG_test_noncoding] Localizing inputs...\n", - "[20241009-02:00:18] [DIG_burden_test:DIG_test_noncoding] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_test_noncoding__2024-10-09--02-00-18_nocgzsq_s2nsq0q_ixy3dknf03ii4\n", - "[20241009-02:00:19] [DIG_burden_test:DIG_test_noncoding] 3 jobs submitted.\n", - "[20241009-02:00:49] [DIG_burden_test:DIG_test_noncoding] Finished with statuses COMPLETED: 3\n", - "[20241009-02:00:49] [DIG_burden_test:Gather_noncoding] Localizing inputs...\n", - "[20241009-02:00:49] [DIG_burden_test:Gather_noncoding] Task staged in /mnt/nfs/workspace/DIG_burden_test/Gather_noncoding__2024-10-09--02-00-49_klkqtpy_tbhx1ki_dge4sulp1yeem\n", - "[20241009-02:00:49] [DIG_burden_test:DIG_report_noncoding] Localizing inputs...\n", - "[20241009-02:00:49] [DIG_burden_test:Gather_noncoding] 1 job submitted.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Copying gs://getzlab-workflows-reference_files-oa/hg38/dig/cancer_gene_census_2024_06_20.tsv...\n", - "/ [1 files][ 4.2 KiB/ 4.2 KiB] \n", - "Operation completed over 1 objects/4.2 KiB. \n", - "Copying gs://getzlab-workflows-reference_files-oa/hg38/dig/pancanatlas_genes.tsv...\n", - "/ [1 files][ 1.7 KiB/ 1.7 KiB] \n", - "Operation completed over 1 objects/1.7 KiB. \n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[20241009-02:00:53] [DIG_burden_test:DIG_report_noncoding] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_report_noncoding__2024-10-09--02-00-49_zkrm2ga_s2nsq0q_uwmb5ewgrv1r2\n", - "[20241009-02:00:53] [DIG_burden_test:DIG_report_noncoding] 3 jobs submitted.\n", - "[20241009-02:01:19] [DIG_burden_test:Gather_noncoding] Finished with status COMPLETED\n", - "[20241009-02:01:53] [DIG_burden_test:DIG_report_noncoding] Finished with statuses COMPLETED: 3\n", - "[20241009-02:01:53] [DIG_burden_test:Gather_noncoding] Localizing inputs...\n", - "[20241009-02:01:53] [DIG_burden_test:Gather_noncoding] Task staged in /mnt/nfs/workspace/DIG_burden_test/Gather_noncoding__2024-10-09--02-01-53_klkqtpy_tbhx1ki_az2aysxyy2csk\n", - "[20241009-02:01:53] [DIG_burden_test:Gather_noncoding] 1 job submitted.\n", - "[20241009-02:02:24] [DIG_burden_test:Gather_noncoding] Finished with status COMPLETED\n", - "[20241009-02:02:24] [DIG_burden_test:DIG_results] Localizing inputs...\n", - "[20241009-02:02:24] [DIG_burden_test:DIG_results] Task staged in /mnt/nfs/workspace/DIG_burden_test/DIG_results__2024-10-09--02-02-24_zs5sfpy_s2nsq0q_eghfiueojworg\n", - "[20241009-02:02:24] [DIG_burden_test:DIG_results] 1 job submitted.\n", - "[20241009-02:05:25] [DIG_burden_test:DIG_results] Finished with status COMPLETED\n", - "[20241009-02:05:26] [prefect] Collated results from workflow DIG_burden_test\n" + "[20241009-16:02:43] [prefect] Starting Slurm controller ...\n", + "[20241009-16:02:43] [prefect] Waiting up to 60 seconds for Slurm controller to start ...\n", + "[20241009-16:02:56] [prefect] Started Slurm controller.\n", + "[20241009-16:02:56] [prefect] Workflow results disk low on space (26 GB remaining)\n", + "[20241009-16:03:02] [prefect] Enqueued workflow uvm_tcga_dig\n", + "[20241009-16:03:03] [uvm_tcga_dig:DIG_convert_maf] Hashing file TCGA_WGS_UVM.maf; 100/190 MiB completed\n", + "[20241009-16:03:04] [uvm_tcga_dig:DIG_convert_maf] Localizing inputs...\n", + "[20241009-16:03:04] [uvm_tcga_dig:Localize_tracks] Job avoidance disabled for this task; overwriting output.\n", + "[20241009-16:03:04] [uvm_tcga_dig:Localize_tracks] Localizing inputs...\n", + "[20241009-16:03:04] [uvm_tcga_dig:Localize_ref_fasta] Job avoidance disabled for this task; overwriting output.\n", + "[20241009-16:03:04] [uvm_tcga_dig:Localize_ref_fasta] Localizing inputs...\n", + "[20241009-16:03:04] [uvm_tcga_dig:Localize_tracks] Disk name is canine-a88d8673c93a0607e747fcb58805de1e\n", + "[20241009-16:03:04] [uvm_tcga_dig:DIG_convert_maf] Task staged in /mnt/nfs/workspace/uvm_tcga_dig/DIG_convert_maf__2024-10-09--16-03-04_mblxxey_s2nsq0q_cvilttyadla1u\n", + "[20241009-16:03:04] [uvm_tcga_dig:Localize_ref_fasta] Disk name is canine-5714e9942090819a76b081ec3fd090aa\n", + "[20241009-16:03:04] [uvm_tcga_dig:DIG_convert_maf] 1 job submitted.\n", + "[20241009-16:03:05] [uvm_tcga_dig:Localize_tracks] Creating new persistent disk canine-a88d8673c93a0607e747fcb58805de1e\n", + "[20241009-16:03:05] [uvm_tcga_dig:Localize_tracks] Task staged in /mnt/nfs/workspace/uvm_tcga_dig/Localize_tracks__2024-10-09--16-03-04_mg4xwxy_tbhx1ki_urlzoh5t1wzrw\n", + "[20241009-16:03:05] [uvm_tcga_dig:Localize_ref_fasta] Creating new persistent disk canine-5714e9942090819a76b081ec3fd090aa\n", + "[20241009-16:03:05] [uvm_tcga_dig:Localize_ref_fasta] Task staged in /mnt/nfs/workspace/uvm_tcga_dig/Localize_ref_fasta__2024-10-09--16-03-04_mg4xwxy_tbhx1ki_ykhip0lgm0pxq\n", + "[20241009-16:03:05] [uvm_tcga_dig:Localize_tracks] 1 job submitted.\n", + "[20241009-16:03:05] [uvm_tcga_dig:Localize_ref_fasta] 1 job submitted.\n", + "[20241009-16:07:07] [uvm_tcga_dig:Localize_ref_fasta] Finished with status COMPLETED\n", + "[20241009-16:08:06] [uvm_tcga_dig:DIG_convert_maf] Finished with status COMPLETED\n", + "[20241009-16:08:06] [uvm_tcga_dig:DIG_annotate_maf] Localizing inputs...\n", + "[20241009-16:08:06] [uvm_tcga_dig:DIG_annotate_maf] Task staged in /mnt/nfs/workspace/uvm_tcga_dig/DIG_annotate_maf__2024-10-09--16-08-06_exnnusq_s2nsq0q_o35wynerh5vvo\n", + "[20241009-16:08:06] [uvm_tcga_dig:DIG_annotate_maf] 1 job submitted.\n", + "[20241009-16:09:37] [uvm_tcga_dig:DIG_annotate_maf] Finished with status COMPLETED\n", + "[20241009-16:13:39] [uvm_tcga_dig:Localize_tracks] Finished with status COMPLETED\n", + "[20241009-16:13:39] [uvm_tcga_dig:DIG_unzip_h5] Localizing inputs...\n", + "[20241009-16:13:39] [uvm_tcga_dig:DIG_unzip_h5] Creating new persistent disk canine-scratch-tracks-0\n", + "[20241009-16:13:39] [uvm_tcga_dig:DIG_unzip_h5] Task staged in /mnt/nfs/workspace/uvm_tcga_dig/DIG_unzip_h5__2024-10-09--16-13-39_iui3eoy_s2nsq0q_3jhzsqsvabgqw\n", + "[20241009-16:13:40] [uvm_tcga_dig:DIG_unzip_h5] 1 job submitted.\n", + "[20241009-17:44:47] [uvm_tcga_dig:DIG_unzip_h5] Finished with status COMPLETED\n", + "[20241009-17:44:47] [uvm_tcga_dig:DIG_add_objectives] Localizing inputs...\n", + "[20241009-17:44:48] [uvm_tcga_dig:DIG_add_objectives] Creating new persistent disk canine-scratch-tracks-with-objectives-tcga-wgs-uvm-0\n", + "[20241009-17:44:48] [uvm_tcga_dig:DIG_add_objectives] Task staged in /mnt/nfs/workspace/uvm_tcga_dig/DIG_add_objectives__2024-10-09--17-44-47_vave3cy_s2nsq0q_for2pvrvuagyw\n", + "[20241009-17:44:48] [uvm_tcga_dig:DIG_add_objectives] 1 job submitted.\n", + "[20241009-18:59:49] [uvm_tcga_dig:DIG_add_objectives] Finished with status COMPLETED\n", + "[20241009-18:59:49] [uvm_tcga_dig:DeleteDisk] Job avoidance disabled for this task; overwriting output.\n", + "[20241009-18:59:49] [uvm_tcga_dig:DeleteDisk] Localizing inputs...\n", + "[20241009-18:59:49] [uvm_tcga_dig:DeleteDisk] Task staged in /mnt/nfs/workspace/uvm_tcga_dig/DeleteDisk__2024-10-09--18-59-49_ask2hla_tbhx1ki_zdxl4tedwdsyu\n", + "[20241009-18:59:49] [uvm_tcga_dig:DIG_run_kfold_training] Localizing inputs...\n", + "[20241009-18:59:49] [uvm_tcga_dig:DIG_run_kfold_training] Task staged in /mnt/nfs/workspace/uvm_tcga_dig/DIG_run_kfold_training__2024-10-09--18-59-49_wlngc2q_exikiti_jyycnxxv4capq\n", + "[20241009-18:59:49] [uvm_tcga_dig:DeleteDisk] 1 job submitted.\n", + "[20241009-18:59:49] [uvm_tcga_dig:DIG_run_kfold_training] 1 job submitted.\n", + "[20241009-19:01:50] [uvm_tcga_dig:DeleteDisk] Finished with status COMPLETED\n" ] } ], "source": [ "with wolf.Workflow(workflow = dig_workflow) as w:\n", " w.run(\n", - " maf_file = 'TCGA_WGS_UVM.maf', # MAF from characterization pipeline \n", - " interval_set_name = [\n", + " maf_file = 'DLBCL_WGS_UVM_hg38.maf', # MAF from characterization pipeline \n", + " genewise_interval_set_name = [\n", " \"promoters\",\n", " \"3-prime_UTRs\",\n", " \"5-prime_UTRs\"\n", " ], # the first 3 interval sets must remain these, and in this order, additional sets can be added \n", - " interval_set_bed = [\n", - " \"gs://getzlab-workflows-reference_files-oa/hg38/dig/gc19_pc.prom.bed\",\n", - " \"gs://getzlab-workflows-reference_files-oa/hg38/dig/gc19_pc.3utr.bed\",\n", - " \"gs://getzlab-workflows-reference_files-oa/hg38/dig/gc19_pc.5utr.bed\"\n", + " genewise_interval_set_bed = [\n", + " \"gs://getzlab-workflows-reference_files-oa/hg19/dig/gc19_pc.prom.bed\",\n", + " \"gs://getzlab-workflows-reference_files-oa/hg19/dig/gc19_pc.3utr.bed\",\n", + " \"gs://getzlab-workflows-reference_files-oa/hg19/dig/gc19_pc.5utr.bed\"\n", " ], # must be consistent with list above\n", " # mutation_map = \"https://cb.csail.mit.edu/DIG/downloads/mutation_maps/Kidney-RCC_SNV_MNV_INDEL.Pretrained.h5\", # mutation map trained for the analyzed cohort\n", " ref_build = \"hg38\", # reference genome for MAF file\n", - " RUN_NAME = \"DIG_burden_test\"\n", - " ) " + " RUN_NAME = \"dlbcl_wgs\"\n", + " )" ] } ], @@ -733,7 +658,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.19" + "version": "3.10.8" } }, "nbformat": 4,