From 156a11d031a03e45ca3b2f5f7d930a6c41e538a1 Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Fri, 21 Apr 2023 13:03:20 +0200
Subject: [PATCH 01/10] add hook and use of --max-size arg for prefetch; close
 #113

---
 geofetch/cli.py      |  5 +++++
 geofetch/geofetch.py | 26 ++++++++++++++++----------
 geofetch/utils.py    | 16 +++++++++++++---
 3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/geofetch/cli.py b/geofetch/cli.py
index b6a2d97..4cb068e 100644
--- a/geofetch/cli.py
+++ b/geofetch/cli.py
@@ -170,6 +170,11 @@ def _parse_cmdl(cmdl):
                 Supported input formats : 12B, 12KB, 12MB, 12GB. """,
     )
 
+    parser.add_argument(
+        "--max-prefetch-size",
+        help="Argument to pass to prefetch program's --max-size option, if prefetch will be used in this run of geofetch; for reference: https://github.com/ncbi/sra-tools/wiki/08.-prefetch-and-fasterq-dump#check-the-maximum-size-limit-of-the-prefetch-tool",
+    )
+
     processed_group.add_argument(
         "-p",
         "--processed",
diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py
index f54ce07..2c4b771 100755
--- a/geofetch/geofetch.py
+++ b/geofetch/geofetch.py
@@ -19,6 +19,7 @@
 from .const import *
 from .utils import (
     Accession,
+    build_prefetch_command,
     parse_accessions,
     parse_SOFT_line,
     convert_size,
@@ -90,10 +91,12 @@ def __init__(
         disable_progressbar: bool = False,
         add_convert_modifier: bool = False,
         opts=None,
+        max_prefetch_size=None,
         **kwargs,
     ):
         """
-        init function
+        Constructor
+
         :param input: GSEnumber or path to the input file
         :param name: Specify a project name. Defaults to GSE number or name of accessions file name
         :param metadata_root:  Specify a parent folder location to store metadata.
@@ -154,15 +157,16 @@ def __init__(
 
         :param skip: Skip some accessions. [Default: no skip].
         :param opts: opts object [Optional]
+        :param str | int max_prefetch_size: argmuent to prefetch command's --max-size option;
+            for reference: https://github.com/ncbi/sra-tools/wiki/08.-prefetch-and-fasterq-dump#check-the-maximum-size-limit-of-the-prefetch-tool
         :param kwargs: other values
         """
 
-        if opts is not None:
-            _LOGGER = logmuse.logger_via_cli(opts)
-        else:
-            _LOGGER = logging.getLogger(__name__)
-
-        self._LOGGER = _LOGGER
+        self._LOGGER = (
+            logmuse.logger_via_cli(opts)
+            if opts is not None
+            else logging.getLogger(__name__)
+        )
 
         if name:
             self.project_name = name
@@ -261,6 +265,7 @@ def __init__(
             raise SystemExit("For SAM/BAM processing, samtools should be on PATH.")
 
         self.just_object = False
+        self.max_prefetch_size = max_prefetch_size
 
     def get_projects(
         self, input: str, just_metadata: bool = True, discard_soft: bool = True
@@ -634,9 +639,9 @@ def _process_sra_meta(
 
     def _download_raw_data(self, run_name: str) -> NoReturn:
         """
-        Downloade raw data from SRA by providing run name
+        Download raw data from SRA by providing run name
+
         :param run_name: Run name from SRA
-        :return: NoReturn
         """
         bam_file = (
             ""
@@ -1376,6 +1381,7 @@ def _download_SRA_file(self, run_name: str):
         """
         Download SRA file by ising 'prefetch' utility from the SRA Toolkit
         more info: (http://www.ncbi.nlm.nih.gov/books/NBK242621/)
+
         :param str run_name: SRR number of the SRA file
         """
 
@@ -1384,7 +1390,7 @@ def _download_SRA_file(self, run_name: str):
         while True:
             t = t + 1
             subprocess_return = run_subprocess(
-                ["prefetch", run_name, "--max-size", "50000000"]
+                build_prefetch_command(run_id=run_name, max_size=self.max_prefetch_size)
             )
 
             if subprocess_return == 0:
diff --git a/geofetch/utils.py b/geofetch/utils.py
index b07091a..e16daaa 100644
--- a/geofetch/utils.py
+++ b/geofetch/utils.py
@@ -8,7 +8,7 @@
 import requests
 from io import StringIO
 import csv
-from typing import NoReturn, Dict, List, Union
+from typing import *
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -22,6 +22,15 @@
 }
 
 
+def build_prefetch_command(
+    run_id: str, prefetch_path: str = "prefetch", max_size: Union[str, int] = 50000000
+) -> List[str]:
+    cmd = [prefetch_path, run_id]
+    if max_size is not None:
+        cmd.extend(["--max-size", str(max_size)])
+    return cmd
+
+
 def is_known_type(accn: str = None, typename: str = None):
     """
     Determine if the given accession is of a known type.
@@ -43,7 +52,7 @@ def is_known_type(accn: str = None, typename: str = None):
         return False
 
 
-def parse_accessions(input_arg, metadata_folder, just_metadata=False):
+def parse_accessions(input_arg, metadata_folder, just_metadata=False, max_size=None):
     """
     Create a list of GSE accessions, either from file or a single value.
 
@@ -56,6 +65,7 @@ def parse_accessions(input_arg, metadata_folder, just_metadata=False):
     :param str metadata_folder: path to folder for accession metadata
     :param bool just_metadata: whether to only process metadata, not the
         actual data associated with the accession
+    :param str | int max_size: argument for prefetch command's --max-size option
     """
 
     acc_GSE_list = {}
@@ -81,7 +91,7 @@ def parse_accessions(input_arg, metadata_folder, just_metadata=False):
                         run_ids.append(r_id)
             _LOGGER.info("{} run(s)".format(len(run_ids)))
             for r_id in run_ids:
-                run_subprocess(["prefetch", r_id, "--max-size", "50000000"])
+                run_subprocess(build_prefetch_command(run_id=r_id, max_size=max_size))
             # Early return if we've just handled SRP accession directly.
             return
         else:

From a9b81074c35342d03576eedf290efbdee29b1d59 Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Fri, 21 Apr 2023 13:03:32 +0200
Subject: [PATCH 02/10] ignore built package

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index d97b82c..a27a5b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 
 # Python
 *.pyc
+build/
 
 # ignore test results
 tests/test/*
@@ -94,4 +95,4 @@ docs_jupyter/*
 .env/
 env/
 .venv/
-venv/
\ No newline at end of file
+venv/

From 46d9c264d1c5a355a5fbe8cd04b41053f219b7d2 Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Fri, 21 Apr 2023 13:34:26 +0200
Subject: [PATCH 03/10] set 50g as default for prefetch --max-size; add test;
 #113

---
 geofetch/geofetch.py   | 4 +++-
 tests/test_geofetch.py | 5 +++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py
index 2c4b771..380b040 100755
--- a/geofetch/geofetch.py
+++ b/geofetch/geofetch.py
@@ -265,7 +265,9 @@ def __init__(
             raise SystemExit("For SAM/BAM processing, samtools should be on PATH.")
 
         self.just_object = False
-        self.max_prefetch_size = max_prefetch_size
+        self.max_prefetch_size = (
+            "50g" if max_prefetch_size is None else max_prefetch_size
+        )
 
     def get_projects(
         self, input: str, just_metadata: bool = True, discard_soft: bool = True
diff --git a/tests/test_geofetch.py b/tests/test_geofetch.py
index 0c8bac7..ea4fae0 100644
--- a/tests/test_geofetch.py
+++ b/tests/test_geofetch.py
@@ -37,6 +37,11 @@ def get_soft_path(gse_numb, sample_len, series_len):
 ]
 
 
+def test_max_prefetch_size__default_is_50g():
+    fetcher = Geofetcher()
+    assert fetcher.max_prefetch_size == "50g"
+
+
 class TestAccParser:
     """
     Testing input parser

From 6027cc28691ea293b296d35944f69b5f0f2d4d3c Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Fri, 21 Apr 2023 12:53:36 -0400
Subject: [PATCH 04/10] Added codecov check

---
 .github/workflows/run-codecov.yml  | 25 +++++++++++++++++++++----
 .github/workflows/run-pytest.yml   |  5 +----
 requirements/requirements-dev.txt  |  1 -
 requirements/requirements-docs.txt |  2 --
 requirements/requirements-test.txt |  7 ++++---
 5 files changed, 26 insertions(+), 14 deletions(-)
 delete mode 100644 requirements/requirements-docs.txt

diff --git a/.github/workflows/run-codecov.yml b/.github/workflows/run-codecov.yml
index 1db19ff..364eb68 100644
--- a/.github/workflows/run-codecov.yml
+++ b/.github/workflows/run-codecov.yml
@@ -1,21 +1,38 @@
 name: Run codecov
 
 on:
+  push:
+    branches: [dev]
   pull_request:
-    branches: [master, dev]
+    branches: [master]
 
 jobs:
   pytest:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.9]
+        python-version: [3.11]
         os: [ubuntu-latest]
 
     steps:
     - uses: actions/checkout@v2
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install test dependencies
+      run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi
+
+    - name: Install package
+      run: python -m pip install .
+
+    - name: Run pytest tests
+      run: pytest tests --cov=./ --cov-report=xml
+
     - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v2
+      uses: codecov/codecov-action@v3
       with:
         file: ./coverage.xml
-        name: py-${{ matrix.python-version }}-${{ matrix.os }}
+        name: py-${{ matrix.python-version }}-${{ matrix.os }}
\ No newline at end of file
diff --git a/.github/workflows/run-pytest.yml b/.github/workflows/run-pytest.yml
index 3009acb..6184da9 100644
--- a/.github/workflows/run-pytest.yml
+++ b/.github/workflows/run-pytest.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.11"]
         os: [ubuntu-latest]
 
     steps:
@@ -22,9 +22,6 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
 
-    - name: Install dev dependencies
-      run: if [ -f requirements/requirements-dev.txt ]; then pip install -r requirements/requirements-dev.txt; fi
-
     - name: Install test dependencies
       run: if [ -f requirements/requirements-test.txt ]; then pip install -r requirements/requirements-test.txt; fi
 
diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt
index b35218c..e69de29 100644
--- a/requirements/requirements-dev.txt
+++ b/requirements/requirements-dev.txt
@@ -1 +0,0 @@
-pytest==3.10.1
diff --git a/requirements/requirements-docs.txt b/requirements/requirements-docs.txt
deleted file mode 100644
index ac4df79..0000000
--- a/requirements/requirements-docs.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-geofetch
-https://github.com/databio/mkdocs-databio/archive/master.zip
diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt
index b7f7baf..aecaff0 100644
--- a/requirements/requirements-test.txt
+++ b/requirements/requirements-test.txt
@@ -1,3 +1,4 @@
-coveralls>=1.1
-pytest-cov>=2.4.0
-pytest>7.1
+black
+pytest
+coveralls
+pytest-cov

From 385c31b1e1f0aa6d416cf4b9bfce130facc4ad9e Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Fri, 21 Apr 2023 19:30:13 +0200
Subject: [PATCH 05/10] avoid having 2 defaults to manage, and let CLI one be
 used

---
 geofetch/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/geofetch/utils.py b/geofetch/utils.py
index e16daaa..f536948 100644
--- a/geofetch/utils.py
+++ b/geofetch/utils.py
@@ -23,7 +23,7 @@
 
 
 def build_prefetch_command(
-    run_id: str, prefetch_path: str = "prefetch", max_size: Union[str, int] = 50000000
+    run_id: str, prefetch_path: str = "prefetch", max_size: Union[str, int] = None
 ) -> List[str]:
     cmd = [prefetch_path, run_id]
     if max_size is not None:

From 0027165aa1028f223ba99e5224ae2ebd050f6d2c Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Fri, 21 Apr 2023 13:41:14 -0400
Subject: [PATCH 06/10] Logger refactoring + added main file

---
 geofetch/__init__.py |  11 ++-
 geofetch/__main__.py |  10 +++
 geofetch/cli.py      |   2 +-
 geofetch/finder.py   |   4 +-
 geofetch/geofetch.py | 209 +++++++++++++++++++------------------------
 setup.py             |   3 +-
 6 files changed, 117 insertions(+), 122 deletions(-)
 create mode 100644 geofetch/__main__.py

diff --git a/geofetch/__init__.py b/geofetch/__init__.py
index ef3887f..da89d27 100644
--- a/geofetch/__init__.py
+++ b/geofetch/__init__.py
@@ -1,7 +1,12 @@
 """ Package-level data """
-from .geofetch import *
-from .finder import *
-from ._version import __version__
 import logmuse
 
+from geofetch.geofetch import *
+from geofetch.finder import *
+from geofetch._version import __version__
+
+
+__author__ = ["Oleksandr Khoroshevskyi", "Vince Reuter", "Nathan Sheffield"]
+__all__ = ["Finder", "Geofetcher"]
+
 logmuse.init_logger("geofetch")
diff --git a/geofetch/__main__.py b/geofetch/__main__.py
new file mode 100644
index 0000000..97e5466
--- /dev/null
+++ b/geofetch/__main__.py
@@ -0,0 +1,10 @@
+import sys
+from geofetch.geofetch import main
+
+if __name__ == "__main__":
+    try:
+        sys.exit(main())
+
+    except KeyboardInterrupt:
+        print("Pipeline aborted.")
+        sys.exit(1)
diff --git a/geofetch/cli.py b/geofetch/cli.py
index 4cb068e..fd1bfbd 100644
--- a/geofetch/cli.py
+++ b/geofetch/cli.py
@@ -1,7 +1,7 @@
 import argparse
 import os
 import logmuse
-from ._version import __version__
+from geofetch._version import __version__
 
 
 def _safe_echo(var):
diff --git a/geofetch/finder.py b/geofetch/finder.py
index c54b9be..11b3bfb 100644
--- a/geofetch/finder.py
+++ b/geofetch/finder.py
@@ -10,14 +10,14 @@
 import xmltodict
 import re
 import os
-import logmuse
+import logging
 import coloredlogs
 from datetime import datetime
 from datetime import timedelta
 
 __author__ = "Oleksandr Khoroshevskyi"
 
-_LOGGER = logmuse.init_logger("pepannot")
+_LOGGER = logging.getLogger("__name__")
 coloredlogs.install(
     logger=_LOGGER,
     datefmt="%H:%M:%S",
diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py
index 380b040..1a67e0f 100755
--- a/geofetch/geofetch.py
+++ b/geofetch/geofetch.py
@@ -1,23 +1,24 @@
-#!/usr/bin/env python3
-
-__author__ = ["Oleksandr Khoroshevskyi", "Vince Reuter", "Nathan Sheffield"]
-
 import copy
 import csv
 import os
 import sys
-
-# from string import punctuation
-# import tarfile
 import requests
 import xmltodict
 import yaml
 import time
 import logging
 
-from .cli import _parse_cmdl
-from .const import *
-from .utils import (
+from rich.progress import track
+import re
+import logmuse
+from ubiquerg import expandpath, is_command_callable
+from typing import List, Union, Dict, Tuple, NoReturn
+import peppy
+import pandas as pd
+
+from geofetch.cli import _parse_cmdl
+from geofetch.const import *
+from geofetch.utils import (
     Accession,
     build_prefetch_command,
     parse_accessions,
@@ -43,13 +44,7 @@
     gse_content_to_dict,
 )
 
-from rich.progress import track
-import re
-import logmuse
-from ubiquerg import expandpath, is_command_callable
-from typing import List, Union, Dict, Tuple, NoReturn
-import peppy
-import pandas as pd
+_LOGGER = logging.getLogger(__name__)
 
 
 class Geofetcher:
@@ -162,7 +157,8 @@ def __init__(
         :param kwargs: other values
         """
 
-        self._LOGGER = (
+        global _LOGGER
+        _LOGGER = (
             logmuse.logger_via_cli(opts)
             if opts is not None
             else logging.getLogger(__name__)
@@ -236,7 +232,7 @@ def __init__(
             try:
                 self.filter_size = convert_size(filter_size.lower())
             except ValueError as message:
-                self._LOGGER.error(message)
+                _LOGGER.error(message)
                 raise SystemExit()
         else:
             self.filter_size = filter_size
@@ -258,7 +254,7 @@ def __init__(
         self.add_dotfile = add_dotfile
         self.disable_progressbar = disable_progressbar
         self.add_convert_modifier = add_convert_modifier
-        self._LOGGER.info(f"Metadata folder: {self.metadata_expanded}")
+        _LOGGER.info(f"Metadata folder: {self.metadata_expanded}")
 
         # Some sanity checks before proceeding
         if bam_conversion and not just_metadata and not _which("samtools"):
@@ -297,7 +293,7 @@ def get_projects(
                 self.acc_anno = False
                 for acc_GSE in acc_GSE_list.keys():
                     ncount += 1
-                    self._LOGGER.info(
+                    _LOGGER.info(
                         f"\033[38;5;200mProcessing accession {ncount} of {nkeys}: '{acc_GSE}'\033[0m"
                     )
                     project_dict.update(self.fetch_all(input=acc_GSE, name=acc_GSE))
@@ -318,7 +314,7 @@ def get_projects(
                 ncount = 0
                 for acc_GSE in acc_GSE_list.keys():
                     ncount += 1
-                    self._LOGGER.info(
+                    _LOGGER.info(
                         f"\033[38;5;200mProcessing accession {ncount} of {nkeys}: '{acc_GSE}'\033[0m"
                     )
                     project = self.fetch_all(input=acc_GSE)
@@ -387,22 +383,22 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje
             if ncount <= self.skip:
                 continue
             elif ncount == self.skip + 1:
-                self._LOGGER.info(f"Skipped {self.skip} accessions. Starting now.")
+                _LOGGER.info(f"Skipped {self.skip} accessions. Starting now.")
 
             if not self.just_object or not self.acc_anno:
-                self._LOGGER.info(
+                _LOGGER.info(
                     f"\033[38;5;200mProcessing accession {ncount} of {nkeys}: '{acc_GSE}'\033[0m"
                 )
 
             if len(re.findall(GSE_PATTERN, acc_GSE)) != 1:
-                self._LOGGER.debug(len(re.findall(GSE_PATTERN, acc_GSE)))
-                self._LOGGER.warning(
+                _LOGGER.debug(len(re.findall(GSE_PATTERN, acc_GSE)))
+                _LOGGER.warning(
                     "This does not appear to be a correctly formatted GSE accession! "
                     "Continue anyway..."
                 )
 
             if len(acc_GSE_list[acc_GSE]) > 0:
-                self._LOGGER.info(
+                _LOGGER.info(
                     f"Limit to: {list(acc_GSE_list[acc_GSE])}"
                 )  # a list of GSM#s
 
@@ -418,7 +414,7 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje
                     max_soft_size=self.max_soft_size,
                 )
             else:
-                self._LOGGER.info(f"Found previous GSE file: {file_gse}")
+                _LOGGER.info(f"Found previous GSE file: {file_gse}")
                 gse_file_obj = open(file_gse, "r")
                 file_gse_content = gse_file_obj.read().split("\n")
                 file_gse_content = [elem for elem in file_gse_content if len(elem) > 0]
@@ -433,7 +429,7 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje
                     max_soft_size=self.max_soft_size,
                 )
             else:
-                self._LOGGER.info(f"Found previous GSM file: {file_gsm}")
+                _LOGGER.info(f"Found previous GSM file: {file_gsm}")
                 gsm_file_obj = open(file_gsm, "r")
                 file_gsm_content = gsm_file_obj.read().split("\n")
                 file_gsm_content = [elem for elem in file_gsm_content if len(elem) > 0]
@@ -484,13 +480,13 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje
                     file_gse_content, gsm_metadata, file_sra
                 )
                 if not srp_list_result:
-                    self._LOGGER.info(f"No SRP data, continuing ....")
-                    self._LOGGER.warning(f"No raw pep will be created! ....")
+                    _LOGGER.info(f"No SRP data, continuing ....")
+                    _LOGGER.warning(f"No raw pep will be created! ....")
                     # delete current acc if no raw data was found
                     # del metadata_dict[acc_GSE]
                     pass
                 else:
-                    self._LOGGER.info("Parsing SRA file to download SRR records")
+                    _LOGGER.info("Parsing SRA file to download SRR records")
                 gsm_multi_table, gsm_metadata, runs = self._process_sra_meta(
                     srp_list_result, gsm_enter_dict, gsm_metadata
                 )
@@ -499,10 +495,10 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje
                 if not self.just_metadata:
                     for run in runs:
                         # download raw data
-                        self._LOGGER.info(f"Getting SRR: {run}  in ({acc_GSE})")
+                        _LOGGER.info(f"Getting SRR: {run}  in ({acc_GSE})")
                         self._download_raw_data(run)
                 else:
-                    self._LOGGER.info(f"Dry run, no data will be downloaded")
+                    _LOGGER.info(f"Dry run, no data will be downloaded")
 
                 # save one project
                 if self.acc_anno and nkeys > 1:
@@ -517,11 +513,11 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje
                     metadata_dict_combined.update(gsm_metadata)
                     subannotation_dict_combined.update(gsm_multi_table)
 
-        self._LOGGER.info(f"Finished processing {len(acc_GSE_list)} accession(s)")
+        _LOGGER.info(f"Finished processing {len(acc_GSE_list)} accession(s)")
 
         # Logging cleaning process:
         if self.discard_soft:
-            self._LOGGER.info(f"Cleaning soft files ...")
+            _LOGGER.info(f"Cleaning soft files ...")
             clean_soft_files(self.metadata_root_full)
 
         #######################################################################################
@@ -600,7 +596,7 @@ def _process_sra_meta(
             # Some experiments are flagged in SRA as having multiple runs.
             if gsm_metadata[experiment].get("SRR") is not None:
                 # This SRX number already has an entry in the table.
-                self._LOGGER.debug(f"Found additional run: {run_name} ({experiment})")
+                _LOGGER.debug(f"Found additional run: {run_name} ({experiment})")
                 if (
                     isinstance(gsm_metadata[experiment]["SRR"], str)
                     and experiment not in gsm_multi_table
@@ -657,16 +653,14 @@ def _download_raw_data(self, run_name: str) -> NoReturn:
         )
 
         if os.path.exists(bam_file):
-            self._LOGGER.info(f"BAM found: {bam_file} . Skipping...")
+            _LOGGER.info(f"BAM found: {bam_file} . Skipping...")
         elif os.path.exists(fq_file):
-            self._LOGGER.info(f"FQ found: {fq_file} .Skipping...")
+            _LOGGER.info(f"FQ found: {fq_file} .Skipping...")
         else:
             try:
                 self._download_SRA_file(run_name)
             except Exception as err:
-                self._LOGGER.warning(
-                    f"Error occurred while downloading SRA file: {err}"
-                )
+                _LOGGER.warning(f"Error occurred while downloading SRA file: {err}")
 
             if self.bam_conversion and self.bam_folder != "":
                 try:
@@ -678,7 +672,7 @@ def _download_raw_data(self, run_name: str) -> NoReturn:
                     # checking if bam_file converted correctly, if not --> use fastq-dump
                     st = os.stat(bam_file)
                     if st.st_size < 100:
-                        self._LOGGER.warning(
+                        _LOGGER.warning(
                             "Bam conversion failed with sam-dump. Trying fastq-dump..."
                         )
                         self._sra_to_bam_conversion_fastq_damp(
@@ -686,7 +680,7 @@ def _download_raw_data(self, run_name: str) -> NoReturn:
                         )
 
                 except FileNotFoundError as err:
-                    self._LOGGER.info(
+                    _LOGGER.info(
                         f"SRA file doesn't exist, please download it first: {err}"
                     )
 
@@ -805,7 +799,7 @@ def _download_processed_data(
         :return: Noreturn
         """
         data_geo_folder = os.path.join(self.geo_folder, acc_gse)
-        self._LOGGER.debug("Data folder: " + data_geo_folder)
+        _LOGGER.debug("Data folder: " + data_geo_folder)
 
         if self.supp_by == "all":
             processed_samples_files = [
@@ -852,7 +846,7 @@ def _expand_metadata_list(self, metadata_list: list) -> list:
         :param list metadata_list: list of dicts that store metadata
         :return list: expanded metadata list
         """
-        self._LOGGER.info("Expanding metadata list...")
+        _LOGGER.info("Expanding metadata list...")
         list_of_keys = _get_list_of_keys(metadata_list)
         for key_in_list in list_of_keys:
             metadata_list = self._expand_metadata_list_item(metadata_list, key_in_list)
@@ -937,22 +931,22 @@ def _expand_metadata_list_item(self, metadata_list: list, dict_key: str):
                         else:
                             del metadata_list[n_elem][dict_key]
                     except KeyError as err:
-                        # self._LOGGER.warning(
+                        # _LOGGER.warning(
                         #     f"expand_metadata_list: Key Error: {err}, continuing ..."
                         # )
                         pass
 
                 return metadata_list
             else:
-                self._LOGGER.debug(
+                _LOGGER.debug(
                     f"Metadata with {dict_key} was not expanded, as item is not list"
                 )
                 return metadata_list
         except KeyError as err:
-            self._LOGGER.warning(f"expand_metadata_list: Key Error: {err}")
+            _LOGGER.warning(f"expand_metadata_list: Key Error: {err}")
             return metadata_list
         except ValueError as err:
-            self._LOGGER.warning(f"expand_metadata_list: Value Error: {err}")
+            _LOGGER.warning(f"expand_metadata_list: Value Error: {err}")
             return metadata_list
 
     def _write_gsm_annotation(self, gsm_metadata: dict, file_annotation: str) -> str:
@@ -971,10 +965,10 @@ def _write_gsm_annotation(self, gsm_metadata: dict, file_annotation: str) -> str
             w.writeheader()
             for item in gsm_metadata:
                 w.writerow(gsm_metadata[item])
-        self._LOGGER.info(
+        _LOGGER.info(
             f"\033[92mSample annotation sheet: {file_annotation} . Saved!\033[0m"
         )
-        self._LOGGER.info("\033[92mFile has been saved successfully\033[0m")
+        _LOGGER.info("\033[92mFile has been saved successfully\033[0m")
         return fp
 
     def _write_processed_annotation(
@@ -994,7 +988,7 @@ def _write_processed_annotation(
         :return: none, or peppy project
         """
         if len(processed_metadata) == 0:
-            self._LOGGER.info(
+            _LOGGER.info(
                 "No files found. No data to save. File %s won't be created"
                 % file_annotation_path
             )
@@ -1005,7 +999,7 @@ def _write_processed_annotation(
         if not os.path.exists(pep_file_folder) and not self.just_object:
             os.makedirs(pep_file_folder)
 
-        self._LOGGER.info("Unifying and saving of metadata... ")
+        _LOGGER.info("Unifying and saving of metadata... ")
         processed_metadata = _unify_list_keys(processed_metadata)
 
         # delete rare keys
@@ -1028,7 +1022,7 @@ def _write_processed_annotation(
                 dict_writer = csv.DictWriter(m_file, processed_metadata[0].keys())
                 dict_writer.writeheader()
                 dict_writer.writerows(processed_metadata)
-            self._LOGGER.info(
+            _LOGGER.info(
                 "\033[92mFile %s has been saved successfully\033[0m"
                 % file_annotation_path
             )
@@ -1095,7 +1089,7 @@ def _write_raw_annotation_new(
         try:
             assert len(metadata_dict) > 0
         except AssertionError:
-            self._LOGGER.warning(
+            _LOGGER.warning(
                 "\033[33mNo PEP created, as no raw data was found!!!\033[0m"
             )
             return None
@@ -1103,9 +1097,7 @@ def _write_raw_annotation_new(
         if self.discard_soft:
             clean_soft_files(os.path.join(self.metadata_root_full))
 
-        self._LOGGER.info(
-            "Creating complete project annotation sheets and config file..."
-        )
+        _LOGGER.info("Creating complete project annotation sheets and config file...")
 
         proj_root = os.path.join(self.metadata_root_full, name)
         if not os.path.exists(proj_root) and not self.just_object:
@@ -1403,9 +1395,7 @@ def _download_SRA_file(self, run_name: str):
                     f"Prefetch retries of {run_name} failed. Try this sample later"
                 )
 
-            self._LOGGER.info(
-                "Prefetch attempt failed, wait a few seconds to try again"
-            )
+            _LOGGER.info("Prefetch attempt failed, wait a few seconds to try again")
             time.sleep(t * 2)
 
     def _sra_to_bam_conversion_sam_dump(self, bam_file: str, run_name: str) -> NoReturn:
@@ -1414,7 +1404,7 @@ def _sra_to_bam_conversion_sam_dump(self, bam_file: str, run_name: str) -> NoRet
         :param str bam_file: path to BAM file that has to be created
         :param str run_name: SRR number of the SRA file that has to be converted
         """
-        self._LOGGER.info("Converting to bam: " + run_name)
+        _LOGGER.info("Converting to bam: " + run_name)
         sra_file = os.path.join(self.sra_folder, run_name + ".sra")
         if not os.path.exists(sra_file):
             raise FileNotFoundError(sra_file)
@@ -1429,7 +1419,7 @@ def _sra_to_bam_conversion_sam_dump(self, bam_file: str, run_name: str) -> NoRet
         )
         # sam-dump -u SRR020515.sra | samtools view -bS - > test.bam
 
-        self._LOGGER.info(f"Conversion command: {cmd}")
+        _LOGGER.info(f"Conversion command: {cmd}")
         run_subprocess(cmd, shell=True)
 
     def _sra_to_bam_conversion_fastq_damp(
@@ -1450,10 +1440,10 @@ def _sra_to_bam_conversion_fastq_damp(
             + " "
             + os.path.join(self.sra_folder, run_name + ".sra")
         )
-        self._LOGGER.info(f"Command: {cmd}")
+        _LOGGER.info(f"Command: {cmd}")
         run_subprocess(cmd, shell=True)
         if not picard_path:
-            self._LOGGER.warning("Can't convert the fastq to bam without picard path")
+            _LOGGER.warning("Can't convert the fastq to bam without picard path")
         else:
             # was it paired data? you have to process it differently
             # so it knows it's paired end
@@ -1470,7 +1460,7 @@ def _sra_to_bam_conversion_fastq_damp(
             cmd += " OUTPUT=" + bam_file
             cmd += " SAMPLE_NAME=" + run_name
             cmd += " QUIET=true"
-            self._LOGGER.info(f"Conversion command: {cmd}")
+            _LOGGER.info(f"Conversion command: {cmd}")
             run_subprocess(cmd, shell=True)
 
     def _write_subannotation(
@@ -1487,9 +1477,9 @@ def _write_subannotation(
             write
         :return str: path to file written
         """
-        self._LOGGER.info(f"Sample subannotation sheet: {filepath}")
+        _LOGGER.info(f"Sample subannotation sheet: {filepath}")
         fp = expandpath(filepath)
-        self._LOGGER.info(f"Writing: {fp}")
+        _LOGGER.info(f"Writing: {fp}")
         with open(fp, "w") as openfile:
             writer = csv.writer(openfile, delimiter=",")
             # write header
@@ -1498,7 +1488,7 @@ def _write_subannotation(
                 tabular_data = [tabular_data]
             for table in tabular_data:
                 for key, values in table.items():
-                    self._LOGGER.debug(f"{key}: {values}")
+                    _LOGGER.debug(f"{key}: {values}")
                     writer.writerows(values)
         return fp
 
@@ -1519,18 +1509,18 @@ def _download_file(
             full_filepath = os.path.join(data_folder, new_name)
 
         if not os.path.exists(full_filepath):
-            self._LOGGER.info(f"\033[38;5;242m")  # set color to gray
+            _LOGGER.info(f"\033[38;5;242m")  # set color to gray
             # if dir does not exist:
             if not os.path.exists(data_folder):
                 os.makedirs(data_folder)
             ret = run_subprocess(
                 ["wget", "--no-clobber", file_url, "-O", full_filepath]
             )
-            self._LOGGER.info(f"\033[38;5;242m{ret}\033[0m")
+            _LOGGER.info(f"\033[38;5;242m{ret}\033[0m")
             time.sleep(sleep_after)
-            self._LOGGER.info(f"\033[0m")  # Reset to default terminal color
+            _LOGGER.info(f"\033[0m")  # Reset to default terminal color
         else:
-            self._LOGGER.info(f"\033[38;5;242mFile {full_filepath} exists.\033[0m")
+            _LOGGER.info(f"\033[38;5;242mFile {full_filepath} exists.\033[0m")
 
     def _get_list_of_processed_files(
         self, file_gse_content: list, file_gsm_content: list
@@ -1555,7 +1545,7 @@ def _get_list_of_processed_files(
                 pl = parse_SOFT_line(line)
                 file_url = pl[list(pl.keys())[0]].rstrip()
                 filename = os.path.basename(file_url)
-                self._LOGGER.debug(f"Processed GSE file found: %s" % str(file_url))
+                _LOGGER.debug(f"Processed GSE file found: %s" % str(file_url))
 
                 # search for tar file:
                 if tar_re.search(filename):
@@ -1579,14 +1569,14 @@ def _get_list_of_processed_files(
                                     with open(filelist_path, "w") as f:
                                         f.write(filelist_raw_text)
                                 except OSError:
-                                    self._LOGGER.warning(
+                                    _LOGGER.warning(
                                         f"{filelist_path} not found. File won't be saved.."
                                     )
 
                         else:
                             raise Exception(f"error in requesting tar_files_list")
                     else:
-                        self._LOGGER.info(f"Found previous GSM file: {filelist_path}")
+                        _LOGGER.info(f"Found previous GSM file: {filelist_path}")
                         filelist_obj = open(filelist_path, "r")
                         filelist_raw_text = filelist_obj.read()
 
@@ -1640,7 +1630,7 @@ def _get_list_of_processed_files(
                         if found_gsm:
                             pl = parse_SOFT_line(line_gsm)
                             file_url_gsm = pl[list(pl.keys())[0]].rstrip()
-                            self._LOGGER.debug(
+                            _LOGGER.debug(
                                 f"Processed GSM file found: %s" % str(file_url_gsm)
                             )
                             if file_url_gsm != "NONE":
@@ -1652,7 +1642,7 @@ def _get_list_of_processed_files(
                     )
                     meta_processed_samples = _separate_file_url(meta_processed_samples)
 
-                    self._LOGGER.info(
+                    _LOGGER.info(
                         f"\nTotal number of processed SAMPLES files found is: "
                         f"%s" % str(len(meta_processed_samples))
                     )
@@ -1693,13 +1683,13 @@ def _get_list_of_processed_files(
                     else:
                         meta_processed_series[bl_key].append(bl_value)
             except IndexError as ind_err:
-                self._LOGGER.debug(
+                _LOGGER.debug(
                     f"IndexError in adding value to meta_processed_series: %s" % ind_err
                 )
 
         meta_processed_series = _separate_list_of_files(meta_processed_series)
         meta_processed_series = _separate_file_url(meta_processed_series)
-        self._LOGGER.info(
+        _LOGGER.info(
             f"Total number of processed SERIES files found is: "
             f"%s" % str(len(meta_processed_series))
         )
@@ -1719,7 +1709,7 @@ def _run_filter(self, meta_list: list, col_name: str = "file") -> list:
         for meta_elem in meta_list:
             if self.filter_re.search(meta_elem[col_name].lower()):
                 filtered_list.append(meta_elem)
-        self._LOGGER.info(
+        _LOGGER.info(
             "\033[32mTotal number of files after filter is: %i \033[0m"
             % len(filtered_list)
         )
@@ -1739,11 +1729,11 @@ def _run_size_filter(self, meta_list, col_name="file_size"):
                 if int(meta_elem[col_name]) <= self.filter_size:
                     filtered_list.append(meta_elem)
         else:
-            self._LOGGER.info(
+            _LOGGER.info(
                 "\033[32mTotal number of files after size filter NONE?? \033[0m"
             )
             return meta_list
-        self._LOGGER.info(
+        _LOGGER.info(
             "\033[32mTotal number of files after size filter is: %i \033[0m"
             % len(filtered_list)
         )
@@ -1759,9 +1749,7 @@ def _download_processed_file(self, file_url: str, data_folder: str) -> bool:
         """
 
         if not self.geo_folder:
-            self._LOGGER.error(
-                "You must provide a geo_folder to download processed data."
-            )
+            _LOGGER.error("You must provide a geo_folder to download processed data.")
             sys.exit(1)
 
         filename = os.path.basename(file_url)
@@ -1770,18 +1758,18 @@ def _download_processed_file(self, file_url: str, data_folder: str) -> bool:
         while ntry < 10:
             try:
                 self._download_file(file_url, data_folder)
-                self._LOGGER.info(
+                _LOGGER.info(
                     "\033[92mFile %s has been downloaded successfully\033[0m"
                     % f"{data_folder}/{filename}"
                 )
                 return True
 
             except IOError as e:
-                self._LOGGER.error(str(e))
+                _LOGGER.error(str(e))
                 # The server times out if we are hitting it too frequently,
                 # so we should sleep a bit to reduce frequency
                 sleeptime = (ntry + 1) ** 3
-                self._LOGGER.info(f"Sleeping for {sleeptime} seconds")
+                _LOGGER.info(f"Sleeping for {sleeptime} seconds")
                 time.sleep(sleeptime)
                 ntry += 1
                 if ntry > 4:
@@ -1800,13 +1788,13 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None):
             found = re.findall(PROJECT_PATTERN, line)
             if found:
                 acc_SRP = found[0]
-                self._LOGGER.info(f"Found SRA Project accession: {acc_SRP}")
+                _LOGGER.info(f"Found SRA Project accession: {acc_SRP}")
                 break
 
         if not acc_SRP:
             # If I can't get an SRA accession, maybe raw data wasn't submitted to SRA
             # as part of this GEO submission. Can't proceed.
-            self._LOGGER.warning(
+            _LOGGER.warning(
                 "Unable to get SRA accession (SRP#) from GEO GSE SOFT file. "
                 "No raw data detected! Continuing anyway..."
             )
@@ -1815,12 +1803,12 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None):
             if len(gsm_metadata) == 1:
                 try:
                     acc_SRP = list(gsm_metadata.keys())[0]
-                    self._LOGGER.warning(
+                    _LOGGER.warning(
                         "But the GSM has an SRX number; instead of an "
                         "SRP, using SRX identifier for this sample: " + acc_SRP
                     )
                 except TypeError:
-                    self._LOGGER.warning("Error in gsm_metadata")
+                    _LOGGER.warning("Error in gsm_metadata")
                     return []
 
             # else:
@@ -1844,14 +1832,14 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None):
                     return srp_list
 
                 except Exception as err:
-                    self._LOGGER.warning(
+                    _LOGGER.warning(
                         f"Warning: error, while downloading SRA Info Metadata of {acc_SRP}. "
                         f"Error: {err}. Probably no SRA metadata found"
                     )
                     return []
             else:
                 # open existing annotation
-                self._LOGGER.info(f"Found SRA metadata, opening..")
+                _LOGGER.info(f"Found SRA metadata, opening..")
                 with open(file_sra, "r") as m_file:
                     reader = csv.reader(m_file)
                     file_list = []
@@ -1868,7 +1856,7 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None):
                 return srp_list
 
             except Exception as err:
-                self._LOGGER.warning(
+                _LOGGER.warning(
                     f"\033[91mError occurred, while downloading SRA Info Metadata of {acc_SRP}. "
                     f"Error: {err}  \033[0m"
                 )
@@ -1881,9 +1869,9 @@ def _get_SRP_list(self, srp_number: str) -> list:
         :return: list of dicts of SRRs
         """
         if not srp_number:
-            self._LOGGER.info(f"No srp number in this accession found")
+            _LOGGER.info(f"No srp number in this accession found")
             return []
-        self._LOGGER.info(f"Downloading {srp_number} sra metadata")
+        _LOGGER.info(f"Downloading {srp_number} sra metadata")
         ncbi_esearch = NCBI_ESEARCH.format(SRP_NUMBER=srp_number)
 
         # searching ids responding to srp
@@ -1891,7 +1879,7 @@ def _get_SRP_list(self, srp_number: str) -> list:
 
         if x.status_code != 200:
             x.encoding = "UTF-8"
-            self._LOGGER.error(f"Error in ncbi esearch response: {x.status_code}")
+            _LOGGER.error(f"Error in ncbi esearch response: {x.status_code}")
             raise x.raise_for_status()
         id_results = x.json()["esearchresult"]["idlist"]
         if len(id_results) > 500:
@@ -1908,7 +1896,7 @@ def _get_SRP_list(self, srp_number: str) -> list:
 
             y = requests.get(id_api)
             if y.status_code != 200:
-                self._LOGGER.error(
+                _LOGGER.error(
                     f"Error in ncbi efetch response in SRA fetching: {x.status_code}"
                 )
                 raise y.raise_for_status()
@@ -1972,13 +1960,13 @@ def _read_gsm_metadata(
                     "SRX": None,
                 }
 
-                self._LOGGER.debug(f"Found sample: {current_sample_id}")
+                _LOGGER.debug(f"Found sample: {current_sample_id}")
                 samples_list.append(current_sample_id)
             elif current_sample_id is not None:
                 try:
                     pl = parse_SOFT_line(line)
                 except IndexError:
-                    self._LOGGER.debug(
+                    _LOGGER.debug(
                         f"Failed to parse alleged SOFT line for sample ID {current_sample_id}; "
                         f"line: {line}"
                     )
@@ -1999,7 +1987,7 @@ def _read_gsm_metadata(
                 if not current_sample_srx:
                     found = re.findall(EXPERIMENT_PATTERN, line)
                     if found:
-                        self._LOGGER.debug(f"(SRX accession: {found[0]})")
+                        _LOGGER.debug(f"(SRX accession: {found[0]})")
                         srx_id = found[0]
                         gsm_metadata[srx_id] = gsm_metadata.pop(current_sample_id)
                         gsm_metadata[srx_id][
@@ -2008,7 +1996,7 @@ def _read_gsm_metadata(
                         current_sample_id = srx_id
                         current_sample_srx = True
         # GSM SOFT file parsed, save it in a list
-        self._LOGGER.info(f"Processed {len(samples_list)} samples.")
+        _LOGGER.info(f"Processed {len(samples_list)} samples.")
         gsm_metadata = self._expand_metadata_dict(gsm_metadata)
         return gsm_metadata
 
@@ -2027,7 +2015,7 @@ def _write(
         :param omit_newline: omit new line
         """
         fp = expandpath(f_var_value)
-        self._LOGGER.info((msg_pre or "") + fp)
+        _LOGGER.info((msg_pre or "") + fp)
         with open(fp, "w") as f:
             f.write(content)
             if not omit_newline:
@@ -2040,12 +2028,3 @@ def main():
     args_dict = vars(args)
     args_dict["args"] = args
     Geofetcher(**args_dict).fetch_all(args_dict["input"])
-
-
-if __name__ == "__main__":
-    try:
-        sys.exit(main())
-
-    except KeyboardInterrupt:
-        print("Pipeline aborted.")
-        sys.exit(1)
diff --git a/setup.py b/setup.py
index 79cca69..fd88b93 100644
--- a/setup.py
+++ b/setup.py
@@ -48,6 +48,7 @@ def read_reqs(reqs_name):
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
         "Topic :: Scientific/Engineering :: Bio-Informatics",
     ],
     keywords="project, bioinformatics, sequencing, ngs, workflow, GUI",
@@ -56,7 +57,7 @@ def read_reqs(reqs_name):
     license="BSD2",
     entry_points={
         "console_scripts": [
-            "geofetch = geofetch.geofetch:main",
+            "geofetch = geofetch.__main__:main",
             "sraconvert = geofetch.sraconvert:main",
         ],
     },

From a45efb943533a8351221bd816b0bbed13bde7888 Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Fri, 21 Apr 2023 13:49:03 -0400
Subject: [PATCH 07/10] Remove soft files from tests

---
 .../test_files/soft_files/GSE138657/GSE.soft  |   46 -
 .../test_files/soft_files/GSE138657/GSM.soft  |  660 ------
 .../test_files/soft_files/GSE146537/GSE.soft  |   36 -
 .../test_files/soft_files/GSE146537/GSM.soft  |  234 --
 .../test_files/soft_files/GSE146539/GSE.soft  |   56 -
 .../test_files/soft_files/GSE146539/GSM.soft  | 1066 ---------
 .../test_files/soft_files/GSE146540/GSE.soft  |   36 -
 .../test_files/soft_files/GSE146540/GSM.soft  |  252 --
 .../test_files/soft_files/GSE146583/GSE.soft  |   87 -
 .../test_files/soft_files/GSE146583/GSM.soft  | 2026 -----------------
 .../test_files/soft_files/GSE150868/GSE.soft  |   66 -
 .../test_files/soft_files/GSE150868/GSM.soft  | 1365 -----------
 12 files changed, 5930 deletions(-)
 delete mode 100644 tests/test_files/soft_files/GSE138657/GSE.soft
 delete mode 100644 tests/test_files/soft_files/GSE138657/GSM.soft
 delete mode 100644 tests/test_files/soft_files/GSE146537/GSE.soft
 delete mode 100644 tests/test_files/soft_files/GSE146537/GSM.soft
 delete mode 100644 tests/test_files/soft_files/GSE146539/GSE.soft
 delete mode 100644 tests/test_files/soft_files/GSE146539/GSM.soft
 delete mode 100644 tests/test_files/soft_files/GSE146540/GSE.soft
 delete mode 100644 tests/test_files/soft_files/GSE146540/GSM.soft
 delete mode 100644 tests/test_files/soft_files/GSE146583/GSE.soft
 delete mode 100644 tests/test_files/soft_files/GSE146583/GSM.soft
 delete mode 100644 tests/test_files/soft_files/GSE150868/GSE.soft
 delete mode 100644 tests/test_files/soft_files/GSE150868/GSM.soft

diff --git a/tests/test_files/soft_files/GSE138657/GSE.soft b/tests/test_files/soft_files/GSE138657/GSE.soft
deleted file mode 100644
index 69cf109..0000000
--- a/tests/test_files/soft_files/GSE138657/GSE.soft
+++ /dev/null
@@ -1,46 +0,0 @@
-^SERIES = GSE138657
-!Series_title = Mapping the global chromatin connectivity network in colorectal cancer cell HCT116 and Oxaliplatin-resistant cell HCT116OxR
-!Series_geo_accession = GSE138657
-!Series_status = Public on Oct 01 2021
-!Series_submission_date = Oct 09 2019
-!Series_last_update_date = Oct 02 2021
-!Series_summary = This SuperSeries is composed of the SubSeries listed below.
-!Series_overall_design = Refer to individual Series
-!Series_type = Expression profiling by high throughput sequencing
-!Series_type = Genome binding/occupancy profiling by high throughput sequencing
-!Series_type = Other
-!Series_sample_id = GSM4114982
-!Series_sample_id = GSM4114983
-!Series_sample_id = GSM4115516
-!Series_sample_id = GSM4115517
-!Series_sample_id = GSM4115598
-!Series_sample_id = GSM4115599
-!Series_sample_id = GSM4115600
-!Series_sample_id = GSM4115601
-!Series_sample_id = GSM4115602
-!Series_sample_id = GSM4115603
-!Series_sample_id = GSM4115604
-!Series_sample_id = GSM4115605
-!Series_sample_id = GSM4115606
-!Series_sample_id = GSM4115607
-!Series_sample_id = GSM4115628
-!Series_sample_id = GSM4115629
-!Series_contact_name = lian,qing,jiao
-!Series_contact_email = wangyunshan135@126.com
-!Series_contact_institute = the Second Hospital of Shandong University
-!Series_contact_address = 247 Beiyuan Road
-!Series_contact_city = Ji'nan
-!Series_contact_state = Shandong Province
-!Series_contact_zip/postal_code = 250033
-!Series_contact_country = China
-!Series_supplementary_file = ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE138nnn/GSE138657/suppl/GSE138657_RAW.tar
-!Series_platform_id = GPL11154
-!Series_platform_organism = Homo sapiens
-!Series_platform_taxid = 9606
-!Series_sample_organism = Homo sapiens
-!Series_sample_taxid = 9606
-!Series_relation = SuperSeries of: GSE138647
-!Series_relation = SuperSeries of: GSE138652
-!Series_relation = SuperSeries of: GSE138654
-!Series_relation = SuperSeries of: GSE138656
-!Series_relation = BioProject: https://www.ncbi.nlm.nih.gov/bioproject/PRJNA576681
diff --git a/tests/test_files/soft_files/GSE138657/GSM.soft b/tests/test_files/soft_files/GSE138657/GSM.soft
deleted file mode 100644
index 22d67e2..0000000
--- a/tests/test_files/soft_files/GSE138657/GSM.soft
+++ /dev/null
@@ -1,660 +0,0 @@
-^SAMPLE = GSM4114982
-!Sample_title = RNA-seq_HCT116
-!Sample_geo_accession = GSM4114982
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = cell type: HCT116 cells
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-unresistant
-!Sample_molecule_ch1 = polyA RNA
-!Sample_extract_protocol_ch1 = HCT116 cells were further purified from Pancreatic cancer tissue  .mRNAs were extracted from samples using TRIzol (Invitrogen) following the methods by Chomczynski
-!Sample_extract_protocol_ch1 = RNA libraries were prepared for sequencing using standard Illumina protocols
-!Sample_data_processing = Raw sequencing data was first filtered by Trimmomatic (version: 0.36), low-quality reads were discarded and adaptor sequences were trimmed
-!Sample_data_processing = Reads mapped to the exon regions of each gene were counted by featurecouts (Subread-1.5.1; Bioconductor) and FPKMs were calculated. Genes differential expressed between groups were identified using the edgeR package
-!Sample_data_processing = A corrected P-value cutoff of 0.05 and Fold-change cutoff of 2 were used to judge the statistical significance of gene expression differences.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: tab-delimited text files include FPKM values for each Sample
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001027
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6970343
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4114nnn/GSM4114982/suppl/GSM4114982_HCT116-FPKM.txt.gz
-!Sample_series_id = GSE138647
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4114983
-!Sample_title = RNA-seq_HCT116OxR
-!Sample_geo_accession = GSM4114983
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = cell type: HCT116 cells
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-resistant
-!Sample_molecule_ch1 = polyA RNA
-!Sample_extract_protocol_ch1 = HCT116 cells were further purified from Pancreatic cancer tissue  .mRNAs were extracted from samples using TRIzol (Invitrogen) following the methods by Chomczynski
-!Sample_extract_protocol_ch1 = RNA libraries were prepared for sequencing using standard Illumina protocols
-!Sample_data_processing = Raw sequencing data was first filtered by Trimmomatic (version: 0.36), low-quality reads were discarded and adaptor sequences were trimmed
-!Sample_data_processing = Reads mapped to the exon regions of each gene were counted by featurecouts (Subread-1.5.1; Bioconductor) and FPKMs were calculated. Genes differential expressed between groups were identified using the edgeR package
-!Sample_data_processing = A corrected P-value cutoff of 0.05 and Fold-change cutoff of 2 were used to judge the statistical significance of gene expression differences.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: tab-delimited text files include FPKM values for each Sample
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001026
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6970344
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4114nnn/GSM4114983/suppl/GSM4114983_HCT116OxR-FPKM.txt.gz
-!Sample_series_id = GSE138647
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4115516
-!Sample_title = Hi-C_HCT116
-!Sample_geo_accession = GSM4115516
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = cell type: HCT116 cells
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-unresistant
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = The cross-linked cells were lysed in lysis buffer,fragmentation of the genome using restriction enzymes
-!Sample_extract_protocol_ch1 = Hi-C libraries were prepared for sequencing using standard Illumina protocols
-!Sample_data_processing = Raw sequencing data was first filtered by Trimmomatic (version: 0.36), low-quality reads were discarded and adaptor sequences were trimmed
-!Sample_data_processing = The BWA package was used to map the  sequences against the human reference genome with the parameter -n 0. Mapped  reads with MAPQ quality scores ≥20 were chosen for further analysis.
-!Sample_data_processing = We applied the ICE  method to normalize the interaction matrix for different resolutions
-!Sample_data_processing = If a region between two adjacent boundaries was less than 400 kb, the region was  marked as a TAD boundary
-!Sample_data_processing = The Juicer pipeline’s HiCCUPS was applied for discovery of locally enriched  peaks
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: all the locally  enriched peaks were determined by HiCCUPS
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = Hi-C
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001580
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6970812
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4115nnn/GSM4115516/suppl/GSM4115516_diffloopgene_HCT116.txt.gz
-!Sample_series_id = GSE138652
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4115517
-!Sample_title = Hi-C_HCT116OxR
-!Sample_geo_accession = GSM4115517
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = cell type: HCT116 cells
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-resistant
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = The cross-linked cells were lysed in lysis buffer,fragmentation of the genome using restriction enzymes
-!Sample_extract_protocol_ch1 = Hi-C libraries were prepared for sequencing using standard Illumina protocols
-!Sample_data_processing = Raw sequencing data was first filtered by Trimmomatic (version: 0.36), low-quality reads were discarded and adaptor sequences were trimmed
-!Sample_data_processing = The BWA package was used to map the  sequences against the human reference genome with the parameter -n 0. Mapped  reads with MAPQ quality scores ≥20 were chosen for further analysis.
-!Sample_data_processing = We applied the ICE  method to normalize the interaction matrix for different resolutions
-!Sample_data_processing = If a region between two adjacent boundaries was less than 400 kb, the region was  marked as a TAD boundary
-!Sample_data_processing = The Juicer pipeline’s HiCCUPS was applied for discovery of locally enriched  peaks
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: all the locally  enriched peaks were determined by HiCCUPS
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = Hi-C
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001579
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6970813
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4115nnn/GSM4115517/suppl/GSM4115517_diffloopgene_HCT116OxR.txt.gz
-!Sample_series_id = GSE138652
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4115598
-!Sample_title = H3K4me1_HCT116
-!Sample_geo_accession = GSM4115598
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = chip antibody: H3K4me1(abcam,ab8895)
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-unresistant
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Lysates were clarified from sonicated nuclei and histone-DNA complexes were isolated with antibody.
-!Sample_extract_protocol_ch1 = ChIP-seq libraries were prepared for sequencing using standard Illumina protocols
-!Sample_description = H3K4me1_HCT116
-!Sample_data_processing = Reads were aligned to the hg19 human genome with Bowtie2 2.1.0
-!Sample_data_processing = MACS2 2.1.1 was used for peak calling (p value threshold = 10-5) with default parameters
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: all the locally  enriched peaks were determined by MACS2 2.1.1
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001578
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6970834
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4115nnn/GSM4115598/suppl/GSM4115598_H3K4me1_HCT116_peaks.txt.gz
-!Sample_series_id = GSE138654
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4115599
-!Sample_title = H3K4me1_HCT116OxR
-!Sample_geo_accession = GSM4115599
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = chip antibody: H3K4me1(abcam,ab8895)
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-resistant
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Lysates were clarified from sonicated nuclei and histone-DNA complexes were isolated with antibody.
-!Sample_extract_protocol_ch1 = ChIP-seq libraries were prepared for sequencing using standard Illumina protocols
-!Sample_description = H3K4me1_HCT116OxR
-!Sample_data_processing = Reads were aligned to the hg19 human genome with Bowtie2 2.1.0
-!Sample_data_processing = MACS2 2.1.1 was used for peak calling (p value threshold = 10-5) with default parameters
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: all the locally  enriched peaks were determined by MACS2 2.1.1
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001577
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6970835
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4115nnn/GSM4115599/suppl/GSM4115599_H3K4me1_HCT116OxR_peaks.txt.gz
-!Sample_series_id = GSE138654
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4115600
-!Sample_title = H3K4me3_HCT116
-!Sample_geo_accession = GSM4115600
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = chip antibody: H3K4me3(abcam,ab8580)
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-unresistant
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Lysates were clarified from sonicated nuclei and histone-DNA complexes were isolated with antibody.
-!Sample_extract_protocol_ch1 = ChIP-seq libraries were prepared for sequencing using standard Illumina protocols
-!Sample_description = H3K4me3_HCT116
-!Sample_data_processing = Reads were aligned to the hg19 human genome with Bowtie2 2.1.0
-!Sample_data_processing = MACS2 2.1.1 was used for peak calling (p value threshold = 10-5) with default parameters
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: all the locally  enriched peaks were determined by MACS2 2.1.1
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001576
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6970836
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4115nnn/GSM4115600/suppl/GSM4115600_H3K4me3_HCT116_peaks.txt.gz
-!Sample_series_id = GSE138654
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4115601
-!Sample_title = H3K4me3_HCT116OxR
-!Sample_geo_accession = GSM4115601
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = chip antibody: H3K4me3(abcam,ab8580)
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-resistant
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Lysates were clarified from sonicated nuclei and histone-DNA complexes were isolated with antibody.
-!Sample_extract_protocol_ch1 = ChIP-seq libraries were prepared for sequencing using standard Illumina protocols
-!Sample_description = H3K4me3_HCT116OxR
-!Sample_data_processing = Reads were aligned to the hg19 human genome with Bowtie2 2.1.0
-!Sample_data_processing = MACS2 2.1.1 was used for peak calling (p value threshold = 10-5) with default parameters
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: all the locally  enriched peaks were determined by MACS2 2.1.1
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001575
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6970837
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4115nnn/GSM4115601/suppl/GSM4115601_H3K4me3_HCT116OxR_peaks.txt.gz
-!Sample_series_id = GSE138654
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4115602
-!Sample_title = H3K27ac_HCT116
-!Sample_geo_accession = GSM4115602
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = chip antibody: H3K27ac(abcam,ab4729)
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-unresistant
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Lysates were clarified from sonicated nuclei and histone-DNA complexes were isolated with antibody.
-!Sample_extract_protocol_ch1 = ChIP-seq libraries were prepared for sequencing using standard Illumina protocols
-!Sample_description = H3K27ac_HCT116
-!Sample_data_processing = Reads were aligned to the hg19 human genome with Bowtie2 2.1.0
-!Sample_data_processing = MACS2 2.1.1 was used for peak calling (p value threshold = 10-5) with default parameters
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: all the locally  enriched peaks were determined by MACS2 2.1.1
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001574
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6970838
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4115nnn/GSM4115602/suppl/GSM4115602_H3K27ac_HCT116_peaks.txt.gz
-!Sample_series_id = GSE138654
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4115603
-!Sample_title = H3K27ac_HCT116OxR
-!Sample_geo_accession = GSM4115603
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = chip antibody: H3K27ac(abcam,ab4729)
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-resistant
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Lysates were clarified from sonicated nuclei and histone-DNA complexes were isolated with antibody.
-!Sample_extract_protocol_ch1 = ChIP-seq libraries were prepared for sequencing using standard Illumina protocols
-!Sample_description = H3K27ac_HCT116OxR
-!Sample_data_processing = Reads were aligned to the hg19 human genome with Bowtie2 2.1.0
-!Sample_data_processing = MACS2 2.1.1 was used for peak calling (p value threshold = 10-5) with default parameters
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: all the locally  enriched peaks were determined by MACS2 2.1.1
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001572
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6970839
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4115nnn/GSM4115603/suppl/GSM4115603_H3K27ac_HCT116OxR_peaks.txt.gz
-!Sample_series_id = GSE138654
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4115604
-!Sample_title = H3K27me3_HCT116
-!Sample_geo_accession = GSM4115604
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = chip antibody: H3K27me3(abcam,ab6002)
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-unresistant
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Lysates were clarified from sonicated nuclei and histone-DNA complexes were isolated with antibody.
-!Sample_extract_protocol_ch1 = ChIP-seq libraries were prepared for sequencing using standard Illumina protocols
-!Sample_description = H3K27me3_HCT116
-!Sample_data_processing = Reads were aligned to the hg19 human genome with Bowtie2 2.1.0
-!Sample_data_processing = MACS2 2.1.1 was used for peak calling (p value threshold = 10-5) with default parameters
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: all the locally  enriched peaks were determined by MACS2 2.1.1
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001570
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6970840
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4115nnn/GSM4115604/suppl/GSM4115604_H3K27me3_HCT116_peaks.txt.gz
-!Sample_series_id = GSE138654
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4115605
-!Sample_title = H3K27me3_HCT116OxR
-!Sample_geo_accession = GSM4115605
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = chip antibody: H3K27me3(abcam,ab6002)
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-resistant
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Lysates were clarified from sonicated nuclei and histone-DNA complexes were isolated with antibody.
-!Sample_extract_protocol_ch1 = ChIP-seq libraries were prepared for sequencing using standard Illumina protocols
-!Sample_description = H3K27me3_HCT116OxR
-!Sample_data_processing = Reads were aligned to the hg19 human genome with Bowtie2 2.1.0
-!Sample_data_processing = MACS2 2.1.1 was used for peak calling (p value threshold = 10-5) with default parameters
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: all the locally  enriched peaks were determined by MACS2 2.1.1
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001569
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6970841
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4115nnn/GSM4115605/suppl/GSM4115605_H3K27me3_HCT116OxR_peaks.txt.gz
-!Sample_series_id = GSE138654
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4115606
-!Sample_title = Input_HCT116
-!Sample_geo_accession = GSM4115606
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = chip antibody: none
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-unresistant
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Lysates were clarified from sonicated nuclei and histone-DNA complexes were isolated with antibody.
-!Sample_extract_protocol_ch1 = ChIP-seq libraries were prepared for sequencing using standard Illumina protocols
-!Sample_description = Input_HCT116
-!Sample_data_processing = Reads were aligned to the hg19 human genome with Bowtie2 2.1.0
-!Sample_data_processing = MACS2 2.1.1 was used for peak calling (p value threshold = 10-5) with default parameters
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: all the locally  enriched peaks were determined by MACS2 2.1.1
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001567
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6970842
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE138654
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4115607
-!Sample_title = Input_HCT116OxR
-!Sample_geo_accession = GSM4115607
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = chip antibody: none
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-resistant
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Lysates were clarified from sonicated nuclei and histone-DNA complexes were isolated with antibody.
-!Sample_extract_protocol_ch1 = ChIP-seq libraries were prepared for sequencing using standard Illumina protocols
-!Sample_description = Input_HCT116OxR
-!Sample_data_processing = Reads were aligned to the hg19 human genome with Bowtie2 2.1.0
-!Sample_data_processing = MACS2 2.1.1 was used for peak calling (p value threshold = 10-5) with default parameters
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: all the locally  enriched peaks were determined by MACS2 2.1.1
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001565
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6970843
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE138654
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4115628
-!Sample_title = ATAC-seq_HCT116
-!Sample_geo_accession = GSM4115628
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = cell type: HCT116 cells
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-unresistant
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Around 50,000 living cells were taken for each library preparation. The cells were  lysed in 1 × Lysis Buffer to get the nuclei
-!Sample_extract_protocol_ch1 = TruePrep DNA Library Prep Kit for Illumina was used to construct the transposase-treated libraries
-!Sample_data_processing = Raw data were stored in FASTQ format, including the base sequence and  corresponding quality information. Adaptor-polluted or low-quality reads were then  filtered out to get the clean data
-!Sample_data_processing = Clean data were mapped to reference genome by Bowtie2, and visualized by IGV
-!Sample_data_processing = The enrichment analysis of  GO term (http://geneontology.org/) or KEGG pathway (http://www.kegg.jp/) was  based on hypergeometric test with the threshold q value < 0.05, to find the significant  enrichment of detected genes.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Peaks corresponding  to the open region in genome were detected by MACS2
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001581
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6971480
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4115nnn/GSM4115628/suppl/GSM4115628_HCT116_peaks.txt.gz
-!Sample_series_id = GSE138656
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
-^SAMPLE = GSM4115629
-!Sample_title = ATAC-seq_HCT116OxR
-!Sample_geo_accession = GSM4115629
-!Sample_status = Public on Oct 01 2021
-!Sample_submission_date = Oct 09 2019
-!Sample_last_update_date = Oct 01 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = HCT116 cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = tissue: Colorectal cancer
-!Sample_characteristics_ch1 = cell type: HCT116 cells
-!Sample_characteristics_ch1 = genotype: Oxaliplatin-resistant
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Around 50,000 living cells were taken for each library preparation. The cells were  lysed in 1 × Lysis Buffer to get the nuclei
-!Sample_extract_protocol_ch1 = TruePrep DNA Library Prep Kit for Illumina was used to construct the transposase-treated libraries
-!Sample_data_processing = Raw data were stored in FASTQ format, including the base sequence and  corresponding quality information. Adaptor-polluted or low-quality reads were then  filtered out to get the clean data
-!Sample_data_processing = Clean data were mapped to reference genome by Bowtie2, and visualized by IGV
-!Sample_data_processing = The enrichment analysis of  GO term (http://geneontology.org/) or KEGG pathway (http://www.kegg.jp/) was  based on hypergeometric test with the threshold q value < 0.05, to find the significant  enrichment of detected genes.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Peaks corresponding  to the open region in genome were detected by MACS2
-!Sample_platform_id = GPL11154
-!Sample_contact_name = lian,qing,jiao
-!Sample_contact_email = wangyunshan135@126.com
-!Sample_contact_institute = the Second Hospital of Shandong University
-!Sample_contact_address = 247 Beiyuan Road
-!Sample_contact_city = Ji'nan
-!Sample_contact_state = Shandong Province
-!Sample_contact_zip/postal_code = 250033
-!Sample_contact_country = China
-!Sample_instrument_model = Illumina HiSeq 2000
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN13001585
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX6971481
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4115nnn/GSM4115629/suppl/GSM4115629_HCT116OxR_peaks.txt.gz
-!Sample_series_id = GSE138656
-!Sample_series_id = GSE138657
-!Sample_data_row_count = 0
diff --git a/tests/test_files/soft_files/GSE146537/GSE.soft b/tests/test_files/soft_files/GSE146537/GSE.soft
deleted file mode 100644
index b77ee9b..0000000
--- a/tests/test_files/soft_files/GSE146537/GSE.soft
+++ /dev/null
@@ -1,36 +0,0 @@
-^SERIES = GSE146537
-!Series_title = Chromatin landscape analysis of ccRCC cell line upon SETD2 rescue [ATACseq_JHRCC12]
-!Series_geo_accession = GSE146537
-!Series_status = Public on Nov 15 2021
-!Series_submission_date = Mar 06 2020
-!Series_last_update_date = Mar 25 2022
-!Series_pubmed_id = 35115713
-!Series_summary = SETD2, a H3K36 trimethyltransferase, is frequently mutated in human cancers with the highest prevalence (13%) in clear cell renal cell carcinoma (ccRCC). Genomic profiling of primary ccRCC tumors reveals a positive correlation between SETD2 mutations and metastasis. However, whether and how SETD2-loss promotes metastasis remains unclear. Here, we detected SETD2 mutations in 24 of 51 (47%) metastatic ccRCC tumors. Using SETD2-mutant metastatic ccRCC patient-derived cell line and xenograft models, we showed that H3K36me3 restoration greatly reduced distant metastases of ccRCC in mice. An integrated ATAC-seq, ChIP-seq, and transcriptome analysis concluded a tumor suppressor model in which loss of SETD2-mediated H3K36me3 activates enhancers to drive oncogenic transcription through dysregulating histone chaperone recruitment, enhancing histone exchange, and expanding chromatin accessibility. Furthermore, we uncovered mechanism-based therapeutic strategies for SETD2-deficient cancer through inhibition of histone chaperones. Overall, SETD2-loss creates a permissive epigenetic landscape for cooperating oncogenic drivers to amplify transcriptional output, providing unique therapeutic opportunities.
-!Series_overall_design = ATAC-Seq was performed in SETD2-proficient and SETD2-deficient JHRCC12 cells in 3 replicates.
-!Series_type = Genome binding/occupancy profiling by high throughput sequencing
-!Series_contributor = Yuchen,,Xie
-!Series_contributor = Merve,,Sahin
-!Series_contributor = Emily,H,Cheng
-!Series_sample_id = GSM4391896
-!Series_sample_id = GSM4391897
-!Series_sample_id = GSM4391898
-!Series_sample_id = GSM4391899
-!Series_sample_id = GSM4391900
-!Series_sample_id = GSM4391901
-!Series_contact_name = Christina,S,Leslie
-!Series_contact_department = Computational Biology Program
-!Series_contact_institute = Memorial Sloan Kettering Cancer Center
-!Series_contact_address = 417 E 68th St
-!Series_contact_city = New York
-!Series_contact_state = NY
-!Series_contact_zip/postal_code = 10065
-!Series_contact_country = USA
-!Series_supplementary_file = ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE146nnn/GSE146537/suppl/GSE146537_RAW.tar
-!Series_platform_id = GPL16791
-!Series_platform_organism = Homo sapiens
-!Series_platform_taxid = 9606
-!Series_sample_organism = Homo sapiens
-!Series_sample_taxid = 9606
-!Series_relation = SubSeries of: GSE146583
-!Series_relation = BioProject: https://www.ncbi.nlm.nih.gov/bioproject/PRJNA610881
-!Series_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRP251915
diff --git a/tests/test_files/soft_files/GSE146537/GSM.soft b/tests/test_files/soft_files/GSE146537/GSM.soft
deleted file mode 100644
index 3769f84..0000000
--- a/tests/test_files/soft_files/GSE146537/GSM.soft
+++ /dev/null
@@ -1,234 +0,0 @@
-^SAMPLE = GSM4391896
-!Sample_title = ATAC-Seq JHRCC12_SETD2_mut_rep1
-!Sample_geo_accession = GSM4391896
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 JHRCC12 cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the hg19 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322350
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866141
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391896/suppl/GSM4391896_JHRCC12_SETD2_mut_rep1.bw
-!Sample_series_id = GSE146537
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391897
-!Sample_title = ATAC-Seq JHRCC12_SETD2_mut_rep2
-!Sample_geo_accession = GSM4391897
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 JHRCC12 cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the hg19 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322349
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866142
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391897/suppl/GSM4391897_JHRCC12_SETD2_mut_rep2.bw
-!Sample_series_id = GSE146537
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391898
-!Sample_title = ATAC-Seq JHRCC12_SETD2_mut_rep3
-!Sample_geo_accession = GSM4391898
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 JHRCC12 cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the hg19 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322348
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866143
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391898/suppl/GSM4391898_JHRCC12_SETD2_mut_rep3.bw
-!Sample_series_id = GSE146537
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391899
-!Sample_title = ATAC-Seq JHRCC12_SETD2_rescue_rep1
-!Sample_geo_accession = GSM4391899
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 JHRCC12 cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the hg19 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322347
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866144
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391899/suppl/GSM4391899_JHRCC12_SETD2_rescue_rep1.bw
-!Sample_series_id = GSE146537
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391900
-!Sample_title = ATAC-Seq JHRCC12_SETD2_rescue_rep2
-!Sample_geo_accession = GSM4391900
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 JHRCC12 cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the hg19 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322346
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866145
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391900/suppl/GSM4391900_JHRCC12_SETD2_rescue_rep2.bw
-!Sample_series_id = GSE146537
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391901
-!Sample_title = ATAC-Seq JHRCC12_SETD2_rescue_rep3
-!Sample_geo_accession = GSM4391901
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 JHRCC12 cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the hg19 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322345
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866146
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391901/suppl/GSM4391901_JHRCC12_SETD2_rescue_rep3.bw
-!Sample_series_id = GSE146537
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
diff --git a/tests/test_files/soft_files/GSE146539/GSE.soft b/tests/test_files/soft_files/GSE146539/GSE.soft
deleted file mode 100644
index edfd5e9..0000000
--- a/tests/test_files/soft_files/GSE146539/GSE.soft
+++ /dev/null
@@ -1,56 +0,0 @@
-^SERIES = GSE146539
-!Series_title = Genome binding/occupancy profiling of ccRCC cell line upon SETD2 rescue [ChIPseq_JHRCC12]
-!Series_geo_accession = GSE146539
-!Series_status = Public on Nov 15 2021
-!Series_submission_date = Mar 06 2020
-!Series_last_update_date = Mar 25 2022
-!Series_pubmed_id = 35115713
-!Series_summary = SETD2, a H3K36 trimethyltransferase, is frequently mutated in human cancers with the highest prevalence (13%) in clear cell renal cell carcinoma (ccRCC). Genomic profiling of primary ccRCC tumors reveals a positive correlation between SETD2 mutations and metastasis. However, whether and how SETD2-loss promotes metastasis remains unclear. Here, we detected SETD2 mutations in 24 of 51 (47%) metastatic ccRCC tumors. Using SETD2-mutant metastatic ccRCC patient-derived cell line and xenograft models, we showed that H3K36me3 restoration greatly reduced distant metastases of ccRCC in mice. An integrated ATAC-seq, ChIP-seq, and transcriptome analysis concluded a tumor suppressor model in which loss of SETD2-mediated H3K36me3 activates enhancers to drive oncogenic transcription through dysregulating histone chaperone recruitment, enhancing histone exchange, and expanding chromatin accessibility. Furthermore, we uncovered mechanism-based therapeutic strategies for SETD2-deficient cancer through inhibition of histone chaperones. Overall, SETD2-loss creates a permissive epigenetic landscape for cooperating oncogenic drivers to amplify transcriptional output, providing unique therapeutic opportunities.
-!Series_overall_design = ChIP-Seq was performed in SETD2-proficient and SETD2-deficient JHRCC12 cells in 2 replicates.
-!Series_type = Genome binding/occupancy profiling by high throughput sequencing
-!Series_contributor = Yuchen,,Xie
-!Series_contributor = Merve,,Sahin
-!Series_contributor = Emily,H,Cheng
-!Series_sample_id = GSM4391908
-!Series_sample_id = GSM4391909
-!Series_sample_id = GSM4391910
-!Series_sample_id = GSM4391911
-!Series_sample_id = GSM4391912
-!Series_sample_id = GSM4391913
-!Series_sample_id = GSM4391914
-!Series_sample_id = GSM4391915
-!Series_sample_id = GSM4391916
-!Series_sample_id = GSM4391917
-!Series_sample_id = GSM4391918
-!Series_sample_id = GSM4391919
-!Series_sample_id = GSM4391920
-!Series_sample_id = GSM4391921
-!Series_sample_id = GSM4391922
-!Series_sample_id = GSM4391923
-!Series_sample_id = GSM4391924
-!Series_sample_id = GSM4391925
-!Series_sample_id = GSM4391926
-!Series_sample_id = GSM4391927
-!Series_sample_id = GSM4391928
-!Series_sample_id = GSM4391929
-!Series_sample_id = GSM4391930
-!Series_sample_id = GSM4391931
-!Series_sample_id = GSM4391932
-!Series_sample_id = GSM4391933
-!Series_contact_name = Christina,S,Leslie
-!Series_contact_department = Computational Biology Program
-!Series_contact_institute = Memorial Sloan Kettering Cancer Center
-!Series_contact_address = 417 E 68th St
-!Series_contact_city = New York
-!Series_contact_state = NY
-!Series_contact_zip/postal_code = 10065
-!Series_contact_country = USA
-!Series_supplementary_file = ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE146nnn/GSE146539/suppl/GSE146539_RAW.tar
-!Series_platform_id = GPL16791
-!Series_platform_organism = Homo sapiens
-!Series_platform_taxid = 9606
-!Series_sample_organism = Homo sapiens
-!Series_sample_taxid = 9606
-!Series_relation = SubSeries of: GSE146583
-!Series_relation = BioProject: https://www.ncbi.nlm.nih.gov/bioproject/PRJNA610879
-!Series_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRP251918
diff --git a/tests/test_files/soft_files/GSE146539/GSM.soft b/tests/test_files/soft_files/GSE146539/GSM.soft
deleted file mode 100644
index 9426624..0000000
--- a/tests/test_files/soft_files/GSE146539/GSM.soft
+++ /dev/null
@@ -1,1066 +0,0 @@
-^SAMPLE = GSM4391908
-!Sample_title = ChIP-Seq SETD2_mut_H3K4me1_rep1
-!Sample_geo_accession = GSM4391908
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me1 (Abcam, ab8895)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322338
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866160
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391908/suppl/GSM4391908_JHRCC12_SETD2_mut_H3K4me1_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391909
-!Sample_title = ChIP-Seq SETD2_mut_H3K4me1_rep2
-!Sample_geo_accession = GSM4391909
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me1 (Abcam, ab8895)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322237
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866161
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391909/suppl/GSM4391909_JHRCC12_SETD2_mut_H3K4me1_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391910
-!Sample_title = ChIP-Seq SETD2_rescue_H3K4me1_rep1
-!Sample_geo_accession = GSM4391910
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me1 (Abcam, ab8895)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322333
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866162
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391910/suppl/GSM4391910_JHRCC12_SETD2_rescue_H3K4me1_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391911
-!Sample_title = ChIP-Seq SETD2_rescue_H3K4me1_rep2
-!Sample_geo_accession = GSM4391911
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me1 (Abcam, ab8895)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322332
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866163
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391911/suppl/GSM4391911_JHRCC12_SETD2_rescue_H3K4me1_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391912
-!Sample_title = ChIP-Seq SETD2_mut_H3K4me3_rep1
-!Sample_geo_accession = GSM4391912
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me3 (Abcam, ab8580)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322331
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866164
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391912/suppl/GSM4391912_JHRCC12_SETD2_mut_H3K4me3_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391913
-!Sample_title = ChIP-Seq SETD2_mut_H3K4me3_rep2
-!Sample_geo_accession = GSM4391913
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me3 (Abcam, ab8580)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322330
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866165
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391913/suppl/GSM4391913_JHRCC12_SETD2_mut_H3K4me3_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391914
-!Sample_title = ChIP-Seq SETD2_rescue_H3K4me3_rep1
-!Sample_geo_accession = GSM4391914
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me3 (Abcam, ab8580)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322329
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866166
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391914/suppl/GSM4391914_JHRCC12_SETD2_rescue_H3K4me3_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391915
-!Sample_title = ChIP-Seq SETD2_rescue_H3K4me3_rep2
-!Sample_geo_accession = GSM4391915
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me3 (Abcam, ab8580)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322328
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866167
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391915/suppl/GSM4391915_JHRCC12_SETD2_rescue_H3K4me3_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391916
-!Sample_title = ChIP-Seq SETD2_mut_H3K27ac_rep1
-!Sample_geo_accession = GSM4391916
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27ac (Abcam, ab4729)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322327
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866168
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391916/suppl/GSM4391916_JHRCC12_SETD2_mut_H3K27AC_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391917
-!Sample_title = ChIP-Seq SETD2_mut_H3K27ac_rep2
-!Sample_geo_accession = GSM4391917
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27ac (Abcam, ab4729)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322326
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866169
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391917/suppl/GSM4391917_JHRCC12_SETD2_mut_H3K27AC_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391918
-!Sample_title = ChIP-Seq SETD2_rescue_H3K27ac_rep1
-!Sample_geo_accession = GSM4391918
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27ac (Abcam, ab4729)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322325
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866170
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391918/suppl/GSM4391918_JHRCC12_SETD2_rescue_H3K27AC_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391919
-!Sample_title = ChIP-Seq SETD2_rescue_H3K27ac_rep2
-!Sample_geo_accession = GSM4391919
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27ac (Abcam, ab4729)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322323
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866171
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391919/suppl/GSM4391919_JHRCC12_SETD2_rescue_H3K27AC_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391920
-!Sample_title = ChIP-Seq SETD2_mut_H3K56ac_rep1
-!Sample_geo_accession = GSM4391920
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K56ac (Millipore, 07-677)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322324
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866172
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391920/suppl/GSM4391920_JHRCC12_SETD2_mut_H3K56AC_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391921
-!Sample_title = ChIP-Seq SETD2_mut_H3K56ac_rep2
-!Sample_geo_accession = GSM4391921
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K56ac (Millipore, 07-677)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322322
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866173
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391921/suppl/GSM4391921_JHRCC12_SETD2_mut_H3K56AC_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391922
-!Sample_title = ChIP-Seq SETD2_rescue_H3K56ac_rep1
-!Sample_geo_accession = GSM4391922
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K56ac (Millipore, 07-677)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322321
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866174
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391922/suppl/GSM4391922_JHRCC12_SETD2_rescue_H3K56AC_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391923
-!Sample_title = ChIP-Seq SETD2_rescue_H3K56ac_rep2
-!Sample_geo_accession = GSM4391923
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K56ac (Millipore, 07-677)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322320
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866175
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391923/suppl/GSM4391923_JHRCC12_SETD2_rescue_H3K56AC_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391924
-!Sample_title = ChIP-Seq SETD2_mut_H3K27me3_rep1
-!Sample_geo_accession = GSM4391924
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27me3 (Cell Signaling Technology, 9733)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322319
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866176
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391924/suppl/GSM4391924_JHRCC12_SETD2_mut_H3K27me3_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391925
-!Sample_title = ChIP-Seq SETD2_mut_H3K27me3_rep2
-!Sample_geo_accession = GSM4391925
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27me3 (Cell Signaling Technology, 9733)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322318
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866177
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391925/suppl/GSM4391925_JHRCC12_SETD2_mut_H3K27me3_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391926
-!Sample_title = ChIP-Seq SETD2_rescue_H3K27me3_rep1
-!Sample_geo_accession = GSM4391926
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27me3 (Cell Signaling Technology, 9733)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322317
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866178
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391926/suppl/GSM4391926_JHRCC12_SETD2_rescue_H3K27me3_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391927
-!Sample_title = ChIP-Seq SETD2_rescue_H3K27me3_rep2
-!Sample_geo_accession = GSM4391927
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27me3 (Cell Signaling Technology, 9733)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322316
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866179
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391927/suppl/GSM4391927_JHRCC12_SETD2_rescue_H3K27me3_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391928
-!Sample_title = ChIP-Seq SETD2_mut_H3K36me3_rep1
-!Sample_geo_accession = GSM4391928
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K36me3 (Abcam, ab9050)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322315
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866180
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391928/suppl/GSM4391928_JHRCC12_SETD2_mut_H3K36me3_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391929
-!Sample_title = ChIP-Seq SETD2_mut_H3K36me3_rep2
-!Sample_geo_accession = GSM4391929
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K36me3 (Abcam, ab9050)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322314
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866181
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391929/suppl/GSM4391929_JHRCC12_SETD2_mut_H3K36me3_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391930
-!Sample_title = ChIP-Seq SETD2_rescue_H3K36me3_rep1
-!Sample_geo_accession = GSM4391930
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K36me3 (Abcam, ab9050)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322313
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866182
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391930/suppl/GSM4391930_JHRCC12_SETD2_rescue_H3K36me3_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391931
-!Sample_title = ChIP-Seq SETD2_rescue_H3K36me3_rep2
-!Sample_geo_accession = GSM4391931
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K36me3 (Abcam, ab9050)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322312
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866183
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391931/suppl/GSM4391931_JHRCC12_SETD2_rescue_H3K36me3_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391932
-!Sample_title = ChIP-Seq SETD2_mut_input
-!Sample_geo_accession = GSM4391932
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: none
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322311
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866184
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391933
-!Sample_title = ChIP-Seq SETD2_rescue_input
-!Sample_geo_accession = GSM4391933
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: none
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322310
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866185
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
diff --git a/tests/test_files/soft_files/GSE146540/GSE.soft b/tests/test_files/soft_files/GSE146540/GSE.soft
deleted file mode 100644
index 0b2829d..0000000
--- a/tests/test_files/soft_files/GSE146540/GSE.soft
+++ /dev/null
@@ -1,36 +0,0 @@
-^SERIES = GSE146540
-!Series_title = Gene expression analysis of ccRCC cell line upon SETD2 rescue [RNAseq_JHRCC12]
-!Series_geo_accession = GSE146540
-!Series_status = Public on Nov 15 2021
-!Series_submission_date = Mar 06 2020
-!Series_last_update_date = Mar 25 2022
-!Series_pubmed_id = 35115713
-!Series_summary = SETD2, a H3K36 trimethyltransferase, is frequently mutated in human cancers with the highest prevalence (13%) in clear cell renal cell carcinoma (ccRCC). Genomic profiling of primary ccRCC tumors reveals a positive correlation between SETD2 mutations and metastasis. However, whether and how SETD2-loss promotes metastasis remains unclear. Here, we detected SETD2 mutations in 24 of 51 (47%) metastatic ccRCC tumors. Using SETD2-mutant metastatic ccRCC patient-derived cell line and xenograft models, we showed that H3K36me3 restoration greatly reduced distant metastases of ccRCC in mice. An integrated ATAC-seq, ChIP-seq, and transcriptome analysis concluded a tumor suppressor model in which loss of SETD2-mediated H3K36me3 activates enhancers to drive oncogenic transcription through dysregulating histone chaperone recruitment, enhancing histone exchange, and expanding chromatin accessibility. Furthermore, we uncovered mechanism-based therapeutic strategies for SETD2-deficient cancer through inhibition of histone chaperones. Overall, SETD2-loss creates a permissive epigenetic landscape for cooperating oncogenic drivers to amplify transcriptional output, providing unique therapeutic opportunities.
-!Series_overall_design = RNA-Seq assay was perfomed in SETD2-proficient and SETD2-deficient JHRCC12 cells in 3 replicates
-!Series_type = Expression profiling by high throughput sequencing
-!Series_contributor = Yuchen,,Xie
-!Series_contributor = Merve,,Sahin
-!Series_contributor = Emily,H,Cheng
-!Series_sample_id = GSM4391934
-!Series_sample_id = GSM4391935
-!Series_sample_id = GSM4391936
-!Series_sample_id = GSM4391937
-!Series_sample_id = GSM4391938
-!Series_sample_id = GSM4391939
-!Series_contact_name = Christina,S,Leslie
-!Series_contact_department = Computational Biology Program
-!Series_contact_institute = Memorial Sloan Kettering Cancer Center
-!Series_contact_address = 417 E 68th St
-!Series_contact_city = New York
-!Series_contact_state = NY
-!Series_contact_zip/postal_code = 10065
-!Series_contact_country = USA
-!Series_supplementary_file = ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE146nnn/GSE146540/suppl/GSE146540_All_gene_counts.txt.gz
-!Series_platform_id = GPL20301
-!Series_platform_organism = Homo sapiens
-!Series_platform_taxid = 9606
-!Series_sample_organism = Homo sapiens
-!Series_sample_taxid = 9606
-!Series_relation = SubSeries of: GSE146583
-!Series_relation = BioProject: https://www.ncbi.nlm.nih.gov/bioproject/PRJNA610878
-!Series_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRP251919
diff --git a/tests/test_files/soft_files/GSE146540/GSM.soft b/tests/test_files/soft_files/GSE146540/GSM.soft
deleted file mode 100644
index 31ee5bc..0000000
--- a/tests/test_files/soft_files/GSE146540/GSM.soft
+++ /dev/null
@@ -1,252 +0,0 @@
-^SAMPLE = GSM4391934
-!Sample_title = RNA-Seq JHRCC12_SETD2_mut_rep1
-!Sample_geo_accession = GSM4391934
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = After RiboGreen quantification and quality control of Agilent BioAnalyzer, 6-15 ng of total RNA underwent amplification (12 cycles) using the SMART-seq V4 (Clontech) Ultra Low Input RNA kit for sequencing. 10 ng of amplified cDNA was used to prepare Illumina HiSeq libraries with the Kapa DNA library preparation chemistry (Kapa Biosystems) using 8 cycles of PCR.
-!Sample_description = All_gene_counts.txt.gz
-!Sample_description = 1633_1
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the hg19 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL20301
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 4000
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322309
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866186
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146540
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391935
-!Sample_title = RNA-Seq JHRCC12_SETD2_mut_rep2
-!Sample_geo_accession = GSM4391935
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = After RiboGreen quantification and quality control of Agilent BioAnalyzer, 6-15 ng of total RNA underwent amplification (12 cycles) using the SMART-seq V4 (Clontech) Ultra Low Input RNA kit for sequencing. 10 ng of amplified cDNA was used to prepare Illumina HiSeq libraries with the Kapa DNA library preparation chemistry (Kapa Biosystems) using 8 cycles of PCR.
-!Sample_description = All_gene_counts.txt.gz
-!Sample_description = 1633_2
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the hg19 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL20301
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 4000
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322308
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866187
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146540
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391936
-!Sample_title = RNA-Seq JHRCC12_SETD2_mut_rep3
-!Sample_geo_accession = GSM4391936
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = After RiboGreen quantification and quality control of Agilent BioAnalyzer, 6-15 ng of total RNA underwent amplification (12 cycles) using the SMART-seq V4 (Clontech) Ultra Low Input RNA kit for sequencing. 10 ng of amplified cDNA was used to prepare Illumina HiSeq libraries with the Kapa DNA library preparation chemistry (Kapa Biosystems) using 8 cycles of PCR.
-!Sample_description = All_gene_counts.txt.gz
-!Sample_description = 1633_3
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the hg19 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL20301
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 4000
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322307
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866188
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146540
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391937
-!Sample_title = RNA-Seq JHRCC12_SETD2_rescue_rep1
-!Sample_geo_accession = GSM4391937
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = After RiboGreen quantification and quality control of Agilent BioAnalyzer, 6-15 ng of total RNA underwent amplification (12 cycles) using the SMART-seq V4 (Clontech) Ultra Low Input RNA kit for sequencing. 10 ng of amplified cDNA was used to prepare Illumina HiSeq libraries with the Kapa DNA library preparation chemistry (Kapa Biosystems) using 8 cycles of PCR.
-!Sample_description = All_gene_counts.txt.gz
-!Sample_description = 2210_1
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the hg19 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL20301
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 4000
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322306
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866189
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146540
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391938
-!Sample_title = RNA-Seq JHRCC12_SETD2_rescue_rep2
-!Sample_geo_accession = GSM4391938
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = After RiboGreen quantification and quality control of Agilent BioAnalyzer, 6-15 ng of total RNA underwent amplification (12 cycles) using the SMART-seq V4 (Clontech) Ultra Low Input RNA kit for sequencing. 10 ng of amplified cDNA was used to prepare Illumina HiSeq libraries with the Kapa DNA library preparation chemistry (Kapa Biosystems) using 8 cycles of PCR.
-!Sample_description = All_gene_counts.txt.gz
-!Sample_description = 2210_2
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the hg19 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL20301
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 4000
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322305
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866190
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146540
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391939
-!Sample_title = RNA-Seq JHRCC12_SETD2_rescue_rep3
-!Sample_geo_accession = GSM4391939
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = After RiboGreen quantification and quality control of Agilent BioAnalyzer, 6-15 ng of total RNA underwent amplification (12 cycles) using the SMART-seq V4 (Clontech) Ultra Low Input RNA kit for sequencing. 10 ng of amplified cDNA was used to prepare Illumina HiSeq libraries with the Kapa DNA library preparation chemistry (Kapa Biosystems) using 8 cycles of PCR.
-!Sample_description = All_gene_counts.txt.gz
-!Sample_description = 2210_3
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the hg19 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL20301
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 4000
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322304
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866191
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146540
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
diff --git a/tests/test_files/soft_files/GSE146583/GSE.soft b/tests/test_files/soft_files/GSE146583/GSE.soft
deleted file mode 100644
index cb28988..0000000
--- a/tests/test_files/soft_files/GSE146583/GSE.soft
+++ /dev/null
@@ -1,87 +0,0 @@
-^SERIES = GSE146583
-!Series_title = SETD2 Loss Creates A Permissive Epigenetic Landscape that Promotes Kidney Cancer Metastasis and Engenders Therapeutic Vulnerabilities
-!Series_geo_accession = GSE146583
-!Series_status = Public on Nov 15 2021
-!Series_submission_date = Mar 06 2020
-!Series_last_update_date = Mar 25 2022
-!Series_pubmed_id = 35115713
-!Series_summary = This SuperSeries is composed of the SubSeries listed below.
-!Series_overall_design = Refer to individual Series
-!Series_type = Genome binding/occupancy profiling by high throughput sequencing
-!Series_type = Expression profiling by high throughput sequencing
-!Series_sample_id = GSM4391896
-!Series_sample_id = GSM4391897
-!Series_sample_id = GSM4391898
-!Series_sample_id = GSM4391899
-!Series_sample_id = GSM4391900
-!Series_sample_id = GSM4391901
-!Series_sample_id = GSM4391902
-!Series_sample_id = GSM4391903
-!Series_sample_id = GSM4391904
-!Series_sample_id = GSM4391905
-!Series_sample_id = GSM4391906
-!Series_sample_id = GSM4391907
-!Series_sample_id = GSM4391908
-!Series_sample_id = GSM4391909
-!Series_sample_id = GSM4391910
-!Series_sample_id = GSM4391911
-!Series_sample_id = GSM4391912
-!Series_sample_id = GSM4391913
-!Series_sample_id = GSM4391914
-!Series_sample_id = GSM4391915
-!Series_sample_id = GSM4391916
-!Series_sample_id = GSM4391917
-!Series_sample_id = GSM4391918
-!Series_sample_id = GSM4391919
-!Series_sample_id = GSM4391920
-!Series_sample_id = GSM4391921
-!Series_sample_id = GSM4391922
-!Series_sample_id = GSM4391923
-!Series_sample_id = GSM4391924
-!Series_sample_id = GSM4391925
-!Series_sample_id = GSM4391926
-!Series_sample_id = GSM4391927
-!Series_sample_id = GSM4391928
-!Series_sample_id = GSM4391929
-!Series_sample_id = GSM4391930
-!Series_sample_id = GSM4391931
-!Series_sample_id = GSM4391932
-!Series_sample_id = GSM4391933
-!Series_sample_id = GSM4391934
-!Series_sample_id = GSM4391935
-!Series_sample_id = GSM4391936
-!Series_sample_id = GSM4391937
-!Series_sample_id = GSM4391938
-!Series_sample_id = GSM4391939
-!Series_sample_id = GSM4391940
-!Series_sample_id = GSM4391941
-!Series_sample_id = GSM4391942
-!Series_sample_id = GSM4391943
-!Series_sample_id = GSM4391944
-!Series_sample_id = GSM4391945
-!Series_contact_name = Christina,S,Leslie
-!Series_contact_department = Computational Biology Program
-!Series_contact_institute = Memorial Sloan Kettering Cancer Center
-!Series_contact_address = 417 E 68th St
-!Series_contact_city = New York
-!Series_contact_state = NY
-!Series_contact_zip/postal_code = 10065
-!Series_contact_country = USA
-!Series_supplementary_file = ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE146nnn/GSE146583/suppl/GSE146583_RAW.tar
-!Series_platform_id = GPL16791
-!Series_platform_id = GPL17021
-!Series_platform_id = GPL20301
-!Series_platform_organism = Homo sapiens
-!Series_platform_organism = Mus musculus
-!Series_platform_taxid = 9606
-!Series_platform_taxid = 10090
-!Series_sample_organism = Homo sapiens
-!Series_sample_organism = Mus musculus
-!Series_sample_taxid = 9606
-!Series_sample_taxid = 10090
-!Series_relation = SuperSeries of: GSE146537
-!Series_relation = SuperSeries of: GSE146538
-!Series_relation = SuperSeries of: GSE146539
-!Series_relation = SuperSeries of: GSE146540
-!Series_relation = SuperSeries of: GSE146541
-!Series_relation = BioProject: https://www.ncbi.nlm.nih.gov/bioproject/PRJNA610874
diff --git a/tests/test_files/soft_files/GSE146583/GSM.soft b/tests/test_files/soft_files/GSE146583/GSM.soft
deleted file mode 100644
index cbc0332..0000000
--- a/tests/test_files/soft_files/GSE146583/GSM.soft
+++ /dev/null
@@ -1,2026 +0,0 @@
-^SAMPLE = GSM4391896
-!Sample_title = ATAC-Seq JHRCC12_SETD2_mut_rep1
-!Sample_geo_accession = GSM4391896
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 JHRCC12 cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the hg19 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322350
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866141
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391896/suppl/GSM4391896_JHRCC12_SETD2_mut_rep1.bw
-!Sample_series_id = GSE146537
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391897
-!Sample_title = ATAC-Seq JHRCC12_SETD2_mut_rep2
-!Sample_geo_accession = GSM4391897
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 JHRCC12 cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the hg19 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322349
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866142
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391897/suppl/GSM4391897_JHRCC12_SETD2_mut_rep2.bw
-!Sample_series_id = GSE146537
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391898
-!Sample_title = ATAC-Seq JHRCC12_SETD2_mut_rep3
-!Sample_geo_accession = GSM4391898
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 JHRCC12 cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the hg19 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322348
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866143
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391898/suppl/GSM4391898_JHRCC12_SETD2_mut_rep3.bw
-!Sample_series_id = GSE146537
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391899
-!Sample_title = ATAC-Seq JHRCC12_SETD2_rescue_rep1
-!Sample_geo_accession = GSM4391899
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 JHRCC12 cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the hg19 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322347
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866144
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391899/suppl/GSM4391899_JHRCC12_SETD2_rescue_rep1.bw
-!Sample_series_id = GSE146537
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391900
-!Sample_title = ATAC-Seq JHRCC12_SETD2_rescue_rep2
-!Sample_geo_accession = GSM4391900
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 JHRCC12 cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the hg19 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322346
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866145
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391900/suppl/GSM4391900_JHRCC12_SETD2_rescue_rep2.bw
-!Sample_series_id = GSE146537
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391901
-!Sample_title = ATAC-Seq JHRCC12_SETD2_rescue_rep3
-!Sample_geo_accession = GSM4391901
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 JHRCC12 cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the hg19 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322345
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866146
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391901/suppl/GSM4391901_JHRCC12_SETD2_rescue_rep3.bw
-!Sample_series_id = GSE146537
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391902
-!Sample_title = ATAC-Seq RTE_SETD2_wt_rep1
-!Sample_geo_accession = GSM4391902
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Murine renal tubular epithelial cells
-!Sample_organism_ch1 = Mus musculus
-!Sample_taxid_ch1 = 10090
-!Sample_characteristics_ch1 = cell type: primary renal tubular epithelial (RTE) cells
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Kidneys were dissected from Setd2(F/F)Ksp-Cre+ and Setd2(F/F) mice at 4-5 weeks of age. Kidney cortices were minced and digested in advanced DMEM/F12 (Thermo Fisher Scientific) containing liberase (Sigma) for 1h at 37 °C. Digested samples were filtered through 40 μm strainers (BD Biosciences) and washed with cold Hank's Balanced Salt Solution (HBSS, Thermo Fisher Scientific) twice. Finally, samples were resuspended in advanced DMEM/F12 supplemented with penicillin/streptomycin (Thermo Fisher Scientific), non-essential amino acids (NEAA, Thermo Fisher Scientific), glutamine (Thermo Fisher Scientific), 10 mM HEPES (Thermo Fisher Scientific), Insulin-transferrin-selenium (Thermo Fisher Scientific), 20 ng/ml EGF (R & D Systems), and 100 nM hydrocortisone (Sigma), and plated in dishes.
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 murine renal tubular epithelial cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the mm10 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: mm10
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL17021
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322344
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866154
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391902/suppl/GSM4391902_wt_Rep1_Norm.bw
-!Sample_series_id = GSE146538
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391903
-!Sample_title = ATAC-Seq RTE_SETD2_wt_rep2
-!Sample_geo_accession = GSM4391903
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Murine renal tubular epithelial cells
-!Sample_organism_ch1 = Mus musculus
-!Sample_taxid_ch1 = 10090
-!Sample_characteristics_ch1 = cell type: primary renal tubular epithelial (RTE) cells
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Kidneys were dissected from Setd2(F/F)Ksp-Cre+ and Setd2(F/F) mice at 4-5 weeks of age. Kidney cortices were minced and digested in advanced DMEM/F12 (Thermo Fisher Scientific) containing liberase (Sigma) for 1h at 37 °C. Digested samples were filtered through 40 μm strainers (BD Biosciences) and washed with cold Hank's Balanced Salt Solution (HBSS, Thermo Fisher Scientific) twice. Finally, samples were resuspended in advanced DMEM/F12 supplemented with penicillin/streptomycin (Thermo Fisher Scientific), non-essential amino acids (NEAA, Thermo Fisher Scientific), glutamine (Thermo Fisher Scientific), 10 mM HEPES (Thermo Fisher Scientific), Insulin-transferrin-selenium (Thermo Fisher Scientific), 20 ng/ml EGF (R & D Systems), and 100 nM hydrocortisone (Sigma), and plated in dishes.
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 murine renal tubular epithelial cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the mm10 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: mm10
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL17021
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322343
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866155
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391903/suppl/GSM4391903_wt_Rep2_Norm.bw
-!Sample_series_id = GSE146538
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391904
-!Sample_title = ATAC-Seq RTE_SETD2_wt_rep3
-!Sample_geo_accession = GSM4391904
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Murine renal tubular epithelial cells
-!Sample_organism_ch1 = Mus musculus
-!Sample_taxid_ch1 = 10090
-!Sample_characteristics_ch1 = cell type: primary renal tubular epithelial (RTE) cells
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Kidneys were dissected from Setd2(F/F)Ksp-Cre+ and Setd2(F/F) mice at 4-5 weeks of age. Kidney cortices were minced and digested in advanced DMEM/F12 (Thermo Fisher Scientific) containing liberase (Sigma) for 1h at 37 °C. Digested samples were filtered through 40 μm strainers (BD Biosciences) and washed with cold Hank's Balanced Salt Solution (HBSS, Thermo Fisher Scientific) twice. Finally, samples were resuspended in advanced DMEM/F12 supplemented with penicillin/streptomycin (Thermo Fisher Scientific), non-essential amino acids (NEAA, Thermo Fisher Scientific), glutamine (Thermo Fisher Scientific), 10 mM HEPES (Thermo Fisher Scientific), Insulin-transferrin-selenium (Thermo Fisher Scientific), 20 ng/ml EGF (R & D Systems), and 100 nM hydrocortisone (Sigma), and plated in dishes.
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 murine renal tubular epithelial cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the mm10 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: mm10
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL17021
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322342
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866156
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391904/suppl/GSM4391904_wt_Rep3_Norm.bw
-!Sample_series_id = GSE146538
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391905
-!Sample_title = ATAC-Seq RTE_SETD2_KO_rep1
-!Sample_geo_accession = GSM4391905
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Murine renal tubular epithelial cells
-!Sample_organism_ch1 = Mus musculus
-!Sample_taxid_ch1 = 10090
-!Sample_characteristics_ch1 = cell type: primary renal tubular epithelial (RTE) cells
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Kidneys were dissected from Setd2(F/F)Ksp-Cre+ and Setd2(F/F) mice at 4-5 weeks of age. Kidney cortices were minced and digested in advanced DMEM/F12 (Thermo Fisher Scientific) containing liberase (Sigma) for 1h at 37 °C. Digested samples were filtered through 40 μm strainers (BD Biosciences) and washed with cold Hank's Balanced Salt Solution (HBSS, Thermo Fisher Scientific) twice. Finally, samples were resuspended in advanced DMEM/F12 supplemented with penicillin/streptomycin (Thermo Fisher Scientific), non-essential amino acids (NEAA, Thermo Fisher Scientific), glutamine (Thermo Fisher Scientific), 10 mM HEPES (Thermo Fisher Scientific), Insulin-transferrin-selenium (Thermo Fisher Scientific), 20 ng/ml EGF (R & D Systems), and 100 nM hydrocortisone (Sigma), and plated in dishes.
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 murine renal tubular epithelial cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the mm10 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: mm10
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL17021
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322341
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866157
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391905/suppl/GSM4391905_ko_Rep1_Norm.bw
-!Sample_series_id = GSE146538
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391906
-!Sample_title = ATAC-Seq RTE_SETD2_KO_rep2
-!Sample_geo_accession = GSM4391906
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Murine renal tubular epithelial cells
-!Sample_organism_ch1 = Mus musculus
-!Sample_taxid_ch1 = 10090
-!Sample_characteristics_ch1 = cell type: primary renal tubular epithelial (RTE) cells
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Kidneys were dissected from Setd2(F/F)Ksp-Cre+ and Setd2(F/F) mice at 4-5 weeks of age. Kidney cortices were minced and digested in advanced DMEM/F12 (Thermo Fisher Scientific) containing liberase (Sigma) for 1h at 37 °C. Digested samples were filtered through 40 μm strainers (BD Biosciences) and washed with cold Hank's Balanced Salt Solution (HBSS, Thermo Fisher Scientific) twice. Finally, samples were resuspended in advanced DMEM/F12 supplemented with penicillin/streptomycin (Thermo Fisher Scientific), non-essential amino acids (NEAA, Thermo Fisher Scientific), glutamine (Thermo Fisher Scientific), 10 mM HEPES (Thermo Fisher Scientific), Insulin-transferrin-selenium (Thermo Fisher Scientific), 20 ng/ml EGF (R & D Systems), and 100 nM hydrocortisone (Sigma), and plated in dishes.
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 murine renal tubular epithelial cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the mm10 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: mm10
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL17021
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322340
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866158
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391906/suppl/GSM4391906_ko_Rep2_Norm.bw
-!Sample_series_id = GSE146538
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391907
-!Sample_title = ATAC-Seq RTE_SETD2_KO_rep3
-!Sample_geo_accession = GSM4391907
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Murine renal tubular epithelial cells
-!Sample_organism_ch1 = Mus musculus
-!Sample_taxid_ch1 = 10090
-!Sample_characteristics_ch1 = cell type: primary renal tubular epithelial (RTE) cells
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Kidneys were dissected from Setd2(F/F)Ksp-Cre+ and Setd2(F/F) mice at 4-5 weeks of age. Kidney cortices were minced and digested in advanced DMEM/F12 (Thermo Fisher Scientific) containing liberase (Sigma) for 1h at 37 °C. Digested samples were filtered through 40 μm strainers (BD Biosciences) and washed with cold Hank's Balanced Salt Solution (HBSS, Thermo Fisher Scientific) twice. Finally, samples were resuspended in advanced DMEM/F12 supplemented with penicillin/streptomycin (Thermo Fisher Scientific), non-essential amino acids (NEAA, Thermo Fisher Scientific), glutamine (Thermo Fisher Scientific), 10 mM HEPES (Thermo Fisher Scientific), Insulin-transferrin-selenium (Thermo Fisher Scientific), 20 ng/ml EGF (R & D Systems), and 100 nM hydrocortisone (Sigma), and plated in dishes.
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 50,000 murine renal tubular epithelial cells were used for the transposition reaction at 37 °C for 30 min. After purification of the DNA with the MinElute PCR purification kit (Qiagen), material was amplified for 5 cycles as described previously (Buenrostro et al., 2013). Additional PCR cycles were evaluated by real time PCR. Final product was cleaned by AMPure Beads at a 1.5x ratio.
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were mapped to the mm10 genome assembly using Bowtie2 (v2.3.4.3), and non-uniquely mapping reads were removed.
-!Sample_data_processing = Peak calling was performed on each replicate using MACS2 (v2.1.2) with ‘-p 1e-2 --extsize 200 --shift -100 --nomodel’ parameters.
-!Sample_data_processing = Genome_build: mm10
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL17021
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322339
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866159
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391907/suppl/GSM4391907_ko_Rep3_Norm.bw
-!Sample_series_id = GSE146538
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391908
-!Sample_title = ChIP-Seq SETD2_mut_H3K4me1_rep1
-!Sample_geo_accession = GSM4391908
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me1 (Abcam, ab8895)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322338
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866160
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391908/suppl/GSM4391908_JHRCC12_SETD2_mut_H3K4me1_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391909
-!Sample_title = ChIP-Seq SETD2_mut_H3K4me1_rep2
-!Sample_geo_accession = GSM4391909
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me1 (Abcam, ab8895)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322237
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866161
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391909/suppl/GSM4391909_JHRCC12_SETD2_mut_H3K4me1_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391910
-!Sample_title = ChIP-Seq SETD2_rescue_H3K4me1_rep1
-!Sample_geo_accession = GSM4391910
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me1 (Abcam, ab8895)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322333
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866162
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391910/suppl/GSM4391910_JHRCC12_SETD2_rescue_H3K4me1_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391911
-!Sample_title = ChIP-Seq SETD2_rescue_H3K4me1_rep2
-!Sample_geo_accession = GSM4391911
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me1 (Abcam, ab8895)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322332
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866163
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391911/suppl/GSM4391911_JHRCC12_SETD2_rescue_H3K4me1_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391912
-!Sample_title = ChIP-Seq SETD2_mut_H3K4me3_rep1
-!Sample_geo_accession = GSM4391912
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me3 (Abcam, ab8580)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322331
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866164
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391912/suppl/GSM4391912_JHRCC12_SETD2_mut_H3K4me3_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391913
-!Sample_title = ChIP-Seq SETD2_mut_H3K4me3_rep2
-!Sample_geo_accession = GSM4391913
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me3 (Abcam, ab8580)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322330
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866165
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391913/suppl/GSM4391913_JHRCC12_SETD2_mut_H3K4me3_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391914
-!Sample_title = ChIP-Seq SETD2_rescue_H3K4me3_rep1
-!Sample_geo_accession = GSM4391914
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me3 (Abcam, ab8580)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322329
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866166
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391914/suppl/GSM4391914_JHRCC12_SETD2_rescue_H3K4me3_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391915
-!Sample_title = ChIP-Seq SETD2_rescue_H3K4me3_rep2
-!Sample_geo_accession = GSM4391915
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K4me3 (Abcam, ab8580)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322328
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866167
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391915/suppl/GSM4391915_JHRCC12_SETD2_rescue_H3K4me3_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391916
-!Sample_title = ChIP-Seq SETD2_mut_H3K27ac_rep1
-!Sample_geo_accession = GSM4391916
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27ac (Abcam, ab4729)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322327
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866168
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391916/suppl/GSM4391916_JHRCC12_SETD2_mut_H3K27AC_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391917
-!Sample_title = ChIP-Seq SETD2_mut_H3K27ac_rep2
-!Sample_geo_accession = GSM4391917
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27ac (Abcam, ab4729)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322326
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866169
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391917/suppl/GSM4391917_JHRCC12_SETD2_mut_H3K27AC_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391918
-!Sample_title = ChIP-Seq SETD2_rescue_H3K27ac_rep1
-!Sample_geo_accession = GSM4391918
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27ac (Abcam, ab4729)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322325
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866170
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391918/suppl/GSM4391918_JHRCC12_SETD2_rescue_H3K27AC_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391919
-!Sample_title = ChIP-Seq SETD2_rescue_H3K27ac_rep2
-!Sample_geo_accession = GSM4391919
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27ac (Abcam, ab4729)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322323
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866171
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391919/suppl/GSM4391919_JHRCC12_SETD2_rescue_H3K27AC_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391920
-!Sample_title = ChIP-Seq SETD2_mut_H3K56ac_rep1
-!Sample_geo_accession = GSM4391920
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K56ac (Millipore, 07-677)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322324
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866172
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391920/suppl/GSM4391920_JHRCC12_SETD2_mut_H3K56AC_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391921
-!Sample_title = ChIP-Seq SETD2_mut_H3K56ac_rep2
-!Sample_geo_accession = GSM4391921
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K56ac (Millipore, 07-677)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322322
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866173
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391921/suppl/GSM4391921_JHRCC12_SETD2_mut_H3K56AC_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391922
-!Sample_title = ChIP-Seq SETD2_rescue_H3K56ac_rep1
-!Sample_geo_accession = GSM4391922
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K56ac (Millipore, 07-677)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322321
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866174
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391922/suppl/GSM4391922_JHRCC12_SETD2_rescue_H3K56AC_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391923
-!Sample_title = ChIP-Seq SETD2_rescue_H3K56ac_rep2
-!Sample_geo_accession = GSM4391923
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K56ac (Millipore, 07-677)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322320
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866175
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391923/suppl/GSM4391923_JHRCC12_SETD2_rescue_H3K56AC_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391924
-!Sample_title = ChIP-Seq SETD2_mut_H3K27me3_rep1
-!Sample_geo_accession = GSM4391924
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27me3 (Cell Signaling Technology, 9733)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322319
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866176
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391924/suppl/GSM4391924_JHRCC12_SETD2_mut_H3K27me3_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391925
-!Sample_title = ChIP-Seq SETD2_mut_H3K27me3_rep2
-!Sample_geo_accession = GSM4391925
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27me3 (Cell Signaling Technology, 9733)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322318
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866177
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391925/suppl/GSM4391925_JHRCC12_SETD2_mut_H3K27me3_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391926
-!Sample_title = ChIP-Seq SETD2_rescue_H3K27me3_rep1
-!Sample_geo_accession = GSM4391926
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27me3 (Cell Signaling Technology, 9733)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322317
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866178
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391926/suppl/GSM4391926_JHRCC12_SETD2_rescue_H3K27me3_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391927
-!Sample_title = ChIP-Seq SETD2_rescue_H3K27me3_rep2
-!Sample_geo_accession = GSM4391927
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K27me3 (Cell Signaling Technology, 9733)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322316
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866179
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391927/suppl/GSM4391927_JHRCC12_SETD2_rescue_H3K27me3_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391928
-!Sample_title = ChIP-Seq SETD2_mut_H3K36me3_rep1
-!Sample_geo_accession = GSM4391928
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K36me3 (Abcam, ab9050)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322315
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866180
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391928/suppl/GSM4391928_JHRCC12_SETD2_mut_H3K36me3_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391929
-!Sample_title = ChIP-Seq SETD2_mut_H3K36me3_rep2
-!Sample_geo_accession = GSM4391929
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K36me3 (Abcam, ab9050)
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322314
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866181
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391929/suppl/GSM4391929_JHRCC12_SETD2_mut_H3K36me3_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391930
-!Sample_title = ChIP-Seq SETD2_rescue_H3K36me3_rep1
-!Sample_geo_accession = GSM4391930
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K36me3 (Abcam, ab9050)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322313
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866182
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391930/suppl/GSM4391930_JHRCC12_SETD2_rescue_H3K36me3_rep1.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391931
-!Sample_title = ChIP-Seq SETD2_rescue_H3K36me3_rep2
-!Sample_geo_accession = GSM4391931
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: H3K36me3 (Abcam, ab9050)
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322312
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866183
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391931/suppl/GSM4391931_JHRCC12_SETD2_rescue_H3K36me3_rep2.bw
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391932
-!Sample_title = ChIP-Seq SETD2_mut_input
-!Sample_geo_accession = GSM4391932
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: none
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322311
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866184
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391933
-!Sample_title = ChIP-Seq SETD2_rescue_input
-!Sample_geo_accession = GSM4391933
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = chip antibody: none
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = 2 x 10^6 JHRCC12 cells were cross-linked with 1% paraformaldehyde for 10 min at room temperature and quenched by glycine. Cells were washed with cold PBS, pelleted by centrifugation, and lysed. After sonication, samples were spun down and incubated with 1 μg primary antibody for each ChIP experiment at 4 °C overnight. Magnetic beads (Thermo Fisher Scientific) were added the next day and incubated at 4 °C for 2h. Samples were then washed and histone complexes were eluted. The eluted samples were treated with RNase A, proteinase K, reversed crosslink, and purified with Qiagen PCR purification kit.
-!Sample_extract_protocol_ch1 = Library preparation were performed by the Integrated Genomics Operation Core Facility at MSKCC.
-!Sample_data_processing = Reads were aligned to hg19 using BWA (v0.7.17-r1188) with parameters ‘-q 5 -l 32 -k 2’. Uniquely aligned paired reads were extracted using SAMtools. PCR duplicates were removed using Picard tools (v2.18.16).
-!Sample_data_processing = Peak calling was performed for each individual and pooled replicates of each cell type using MACS2 v2.1.2 with parameters ‘-g hs -p 0.01 --keep-dup all --no-model -c $input_dna_control’
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Normalized coverage track in bigWig format
-!Sample_platform_id = GPL16791
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = ChIP
-!Sample_library_source = genomic
-!Sample_library_strategy = ChIP-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322310
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866185
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146539
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391934
-!Sample_title = RNA-Seq JHRCC12_SETD2_mut_rep1
-!Sample_geo_accession = GSM4391934
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = After RiboGreen quantification and quality control of Agilent BioAnalyzer, 6-15 ng of total RNA underwent amplification (12 cycles) using the SMART-seq V4 (Clontech) Ultra Low Input RNA kit for sequencing. 10 ng of amplified cDNA was used to prepare Illumina HiSeq libraries with the Kapa DNA library preparation chemistry (Kapa Biosystems) using 8 cycles of PCR.
-!Sample_description = All_gene_counts.txt.gz
-!Sample_description = 1633_1
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the hg19 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL20301
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 4000
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322309
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866186
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146540
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391935
-!Sample_title = RNA-Seq JHRCC12_SETD2_mut_rep2
-!Sample_geo_accession = GSM4391935
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = After RiboGreen quantification and quality control of Agilent BioAnalyzer, 6-15 ng of total RNA underwent amplification (12 cycles) using the SMART-seq V4 (Clontech) Ultra Low Input RNA kit for sequencing. 10 ng of amplified cDNA was used to prepare Illumina HiSeq libraries with the Kapa DNA library preparation chemistry (Kapa Biosystems) using 8 cycles of PCR.
-!Sample_description = All_gene_counts.txt.gz
-!Sample_description = 1633_2
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the hg19 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL20301
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 4000
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322308
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866187
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146540
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391936
-!Sample_title = RNA-Seq JHRCC12_SETD2_mut_rep3
-!Sample_geo_accession = GSM4391936
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = After RiboGreen quantification and quality control of Agilent BioAnalyzer, 6-15 ng of total RNA underwent amplification (12 cycles) using the SMART-seq V4 (Clontech) Ultra Low Input RNA kit for sequencing. 10 ng of amplified cDNA was used to prepare Illumina HiSeq libraries with the Kapa DNA library preparation chemistry (Kapa Biosystems) using 8 cycles of PCR.
-!Sample_description = All_gene_counts.txt.gz
-!Sample_description = 1633_3
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the hg19 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL20301
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 4000
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322307
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866188
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146540
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391937
-!Sample_title = RNA-Seq JHRCC12_SETD2_rescue_rep1
-!Sample_geo_accession = GSM4391937
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = After RiboGreen quantification and quality control of Agilent BioAnalyzer, 6-15 ng of total RNA underwent amplification (12 cycles) using the SMART-seq V4 (Clontech) Ultra Low Input RNA kit for sequencing. 10 ng of amplified cDNA was used to prepare Illumina HiSeq libraries with the Kapa DNA library preparation chemistry (Kapa Biosystems) using 8 cycles of PCR.
-!Sample_description = All_gene_counts.txt.gz
-!Sample_description = 2210_1
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the hg19 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL20301
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 4000
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322306
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866189
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146540
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391938
-!Sample_title = RNA-Seq JHRCC12_SETD2_rescue_rep2
-!Sample_geo_accession = GSM4391938
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = After RiboGreen quantification and quality control of Agilent BioAnalyzer, 6-15 ng of total RNA underwent amplification (12 cycles) using the SMART-seq V4 (Clontech) Ultra Low Input RNA kit for sequencing. 10 ng of amplified cDNA was used to prepare Illumina HiSeq libraries with the Kapa DNA library preparation chemistry (Kapa Biosystems) using 8 cycles of PCR.
-!Sample_description = All_gene_counts.txt.gz
-!Sample_description = 2210_2
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the hg19 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL20301
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 4000
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322305
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866190
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146540
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391939
-!Sample_title = RNA-Seq JHRCC12_SETD2_rescue_rep3
-!Sample_geo_accession = GSM4391939
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Clear cell renal cell carcinoma cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell line: JHRCC12
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Cells were cultured in F10 medium (advanced DMEM/F12) with 10% FBS, 1× L-glutamine, 1× sodium pyruvate, 1× nonessential amino acid, and 1× penicillin/streptomycin).
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = After RiboGreen quantification and quality control of Agilent BioAnalyzer, 6-15 ng of total RNA underwent amplification (12 cycles) using the SMART-seq V4 (Clontech) Ultra Low Input RNA kit for sequencing. 10 ng of amplified cDNA was used to prepare Illumina HiSeq libraries with the Kapa DNA library preparation chemistry (Kapa Biosystems) using 8 cycles of PCR.
-!Sample_description = All_gene_counts.txt.gz
-!Sample_description = 2210_3
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the hg19 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL20301
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 4000
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322304
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866191
-!Sample_supplementary_file_1 = NONE
-!Sample_series_id = GSE146540
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391940
-!Sample_title = RNA-Seq RTE_SETD2_wt_rep1
-!Sample_geo_accession = GSM4391940
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Murine renal tubular epithelial cells
-!Sample_organism_ch1 = Mus musculus
-!Sample_taxid_ch1 = 10090
-!Sample_characteristics_ch1 = cell type: primary renal tubular epithelial (RTE) cells
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Kidneys were dissected from Setd2(F/F)Ksp-Cre+ and Setd2(F/F) mice at 4-5 weeks of age. Kidney cortices were minced and digested in advanced DMEM/F12 (Thermo Fisher Scientific) containing liberase (Sigma) for 1h at 37 °C. Digested samples were filtered through 40 μm strainers (BD Biosciences) and washed with cold Hank's Balanced Salt Solution (HBSS, Thermo Fisher Scientific) twice. Finally, samples were resuspended in advanced DMEM/F12 supplemented with penicillin/streptomycin (Thermo Fisher Scientific), non-essential amino acids (NEAA, Thermo Fisher Scientific), glutamine (Thermo Fisher Scientific), 10 mM HEPES (Thermo Fisher Scientific), Insulin-transferrin-selenium (Thermo Fisher Scientific), 20 ng/ml EGF (R & D Systems), and 100 nM hydrocortisone (Sigma), and plated in dishes.
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = The isolated RNA samples were then processed using the TruSeq RNA Sample Prep kit (Illumina).
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the mm10 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: mm10
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL17021
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322303
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866192
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391940/suppl/GSM4391940_wt_1.gene.count.txt.gz
-!Sample_series_id = GSE146541
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391941
-!Sample_title = RNA-Seq RTE_SETD2_wt_rep2
-!Sample_geo_accession = GSM4391941
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Murine renal tubular epithelial cells
-!Sample_organism_ch1 = Mus musculus
-!Sample_taxid_ch1 = 10090
-!Sample_characteristics_ch1 = cell type: primary renal tubular epithelial (RTE) cells
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Kidneys were dissected from Setd2(F/F)Ksp-Cre+ and Setd2(F/F) mice at 4-5 weeks of age. Kidney cortices were minced and digested in advanced DMEM/F12 (Thermo Fisher Scientific) containing liberase (Sigma) for 1h at 37 °C. Digested samples were filtered through 40 μm strainers (BD Biosciences) and washed with cold Hank's Balanced Salt Solution (HBSS, Thermo Fisher Scientific) twice. Finally, samples were resuspended in advanced DMEM/F12 supplemented with penicillin/streptomycin (Thermo Fisher Scientific), non-essential amino acids (NEAA, Thermo Fisher Scientific), glutamine (Thermo Fisher Scientific), 10 mM HEPES (Thermo Fisher Scientific), Insulin-transferrin-selenium (Thermo Fisher Scientific), 20 ng/ml EGF (R & D Systems), and 100 nM hydrocortisone (Sigma), and plated in dishes.
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = The isolated RNA samples were then processed using the TruSeq RNA Sample Prep kit (Illumina).
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the mm10 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: mm10
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL17021
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322302
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866193
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391941/suppl/GSM4391941_wt_2.gene.count.txt.gz
-!Sample_series_id = GSE146541
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391942
-!Sample_title = RNA-Seq RTE_SETD2_wt_rep3
-!Sample_geo_accession = GSM4391942
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Murine renal tubular epithelial cells
-!Sample_organism_ch1 = Mus musculus
-!Sample_taxid_ch1 = 10090
-!Sample_characteristics_ch1 = cell type: primary renal tubular epithelial (RTE) cells
-!Sample_characteristics_ch1 = genotype: SETD2-proficient
-!Sample_growth_protocol_ch1 = Kidneys were dissected from Setd2(F/F)Ksp-Cre+ and Setd2(F/F) mice at 4-5 weeks of age. Kidney cortices were minced and digested in advanced DMEM/F12 (Thermo Fisher Scientific) containing liberase (Sigma) for 1h at 37 °C. Digested samples were filtered through 40 μm strainers (BD Biosciences) and washed with cold Hank's Balanced Salt Solution (HBSS, Thermo Fisher Scientific) twice. Finally, samples were resuspended in advanced DMEM/F12 supplemented with penicillin/streptomycin (Thermo Fisher Scientific), non-essential amino acids (NEAA, Thermo Fisher Scientific), glutamine (Thermo Fisher Scientific), 10 mM HEPES (Thermo Fisher Scientific), Insulin-transferrin-selenium (Thermo Fisher Scientific), 20 ng/ml EGF (R & D Systems), and 100 nM hydrocortisone (Sigma), and plated in dishes.
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = The isolated RNA samples were then processed using the TruSeq RNA Sample Prep kit (Illumina).
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the mm10 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: mm10
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL17021
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322301
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866194
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391942/suppl/GSM4391942_wt_3.gene.count.txt.gz
-!Sample_series_id = GSE146541
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391943
-!Sample_title = RNA-Seq RTE_SETD2_KO_rep1
-!Sample_geo_accession = GSM4391943
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Murine renal tubular epithelial cells
-!Sample_organism_ch1 = Mus musculus
-!Sample_taxid_ch1 = 10090
-!Sample_characteristics_ch1 = cell type: primary renal tubular epithelial (RTE) cells
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Kidneys were dissected from Setd2(F/F)Ksp-Cre+ and Setd2(F/F) mice at 4-5 weeks of age. Kidney cortices were minced and digested in advanced DMEM/F12 (Thermo Fisher Scientific) containing liberase (Sigma) for 1h at 37 °C. Digested samples were filtered through 40 μm strainers (BD Biosciences) and washed with cold Hank's Balanced Salt Solution (HBSS, Thermo Fisher Scientific) twice. Finally, samples were resuspended in advanced DMEM/F12 supplemented with penicillin/streptomycin (Thermo Fisher Scientific), non-essential amino acids (NEAA, Thermo Fisher Scientific), glutamine (Thermo Fisher Scientific), 10 mM HEPES (Thermo Fisher Scientific), Insulin-transferrin-selenium (Thermo Fisher Scientific), 20 ng/ml EGF (R & D Systems), and 100 nM hydrocortisone (Sigma), and plated in dishes.
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = The isolated RNA samples were then processed using the TruSeq RNA Sample Prep kit (Illumina).
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the mm10 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: mm10
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL17021
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322300
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866195
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391943/suppl/GSM4391943_ko_1.gene.count.txt.gz
-!Sample_series_id = GSE146541
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391944
-!Sample_title = RNA-Seq RTE_SETD2_KO_rep2
-!Sample_geo_accession = GSM4391944
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Murine renal tubular epithelial cells
-!Sample_organism_ch1 = Mus musculus
-!Sample_taxid_ch1 = 10090
-!Sample_characteristics_ch1 = cell type: primary renal tubular epithelial (RTE) cells
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Kidneys were dissected from Setd2(F/F)Ksp-Cre+ and Setd2(F/F) mice at 4-5 weeks of age. Kidney cortices were minced and digested in advanced DMEM/F12 (Thermo Fisher Scientific) containing liberase (Sigma) for 1h at 37 °C. Digested samples were filtered through 40 μm strainers (BD Biosciences) and washed with cold Hank's Balanced Salt Solution (HBSS, Thermo Fisher Scientific) twice. Finally, samples were resuspended in advanced DMEM/F12 supplemented with penicillin/streptomycin (Thermo Fisher Scientific), non-essential amino acids (NEAA, Thermo Fisher Scientific), glutamine (Thermo Fisher Scientific), 10 mM HEPES (Thermo Fisher Scientific), Insulin-transferrin-selenium (Thermo Fisher Scientific), 20 ng/ml EGF (R & D Systems), and 100 nM hydrocortisone (Sigma), and plated in dishes.
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = The isolated RNA samples were then processed using the TruSeq RNA Sample Prep kit (Illumina).
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the mm10 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: mm10
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL17021
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322299
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866196
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391944/suppl/GSM4391944_ko_2.gene.count.txt.gz
-!Sample_series_id = GSE146541
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
-^SAMPLE = GSM4391945
-!Sample_title = RNA-Seq RTE_SETD2_KO_rep3
-!Sample_geo_accession = GSM4391945
-!Sample_status = Public on Nov 15 2021
-!Sample_submission_date = Mar 06 2020
-!Sample_last_update_date = Nov 15 2021
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = Murine renal tubular epithelial cells
-!Sample_organism_ch1 = Mus musculus
-!Sample_taxid_ch1 = 10090
-!Sample_characteristics_ch1 = cell type: primary renal tubular epithelial (RTE) cells
-!Sample_characteristics_ch1 = genotype: SETD2-deficient
-!Sample_growth_protocol_ch1 = Kidneys were dissected from Setd2(F/F)Ksp-Cre+ and Setd2(F/F) mice at 4-5 weeks of age. Kidney cortices were minced and digested in advanced DMEM/F12 (Thermo Fisher Scientific) containing liberase (Sigma) for 1h at 37 °C. Digested samples were filtered through 40 μm strainers (BD Biosciences) and washed with cold Hank's Balanced Salt Solution (HBSS, Thermo Fisher Scientific) twice. Finally, samples were resuspended in advanced DMEM/F12 supplemented with penicillin/streptomycin (Thermo Fisher Scientific), non-essential amino acids (NEAA, Thermo Fisher Scientific), glutamine (Thermo Fisher Scientific), 10 mM HEPES (Thermo Fisher Scientific), Insulin-transferrin-selenium (Thermo Fisher Scientific), 20 ng/ml EGF (R & D Systems), and 100 nM hydrocortisone (Sigma), and plated in dishes.
-!Sample_molecule_ch1 = total RNA
-!Sample_extract_protocol_ch1 = Total RNA was extracted and cleaned up using RNeasy Mini Kit (Qiagen).
-!Sample_extract_protocol_ch1 = The isolated RNA samples were then processed using the TruSeq RNA Sample Prep kit (Illumina).
-!Sample_data_processing = Raw reads were trimmed and filtered for quality using Trimmomatic (v0.38). Trimmed reads were then aligned against the mm10 genome using STAR (v2.7.1a).
-!Sample_data_processing = For each RefSeq annotated gene, reads overlapping with exon regions were counted using HTSeq (v0.11.2).
-!Sample_data_processing = Genome_build: mm10
-!Sample_data_processing = Supplementary_files_format_and_content: Gene quantification count in txt format
-!Sample_platform_id = GPL17021
-!Sample_contact_name = Christina,S,Leslie
-!Sample_contact_department = Computational Biology Program
-!Sample_contact_institute = Memorial Sloan Kettering Cancer Center
-!Sample_contact_address = 417 E 68th St
-!Sample_contact_city = New York
-!Sample_contact_state = NY
-!Sample_contact_zip/postal_code = 10065
-!Sample_contact_country = USA
-!Sample_instrument_model = Illumina HiSeq 2500
-!Sample_library_selection = cDNA
-!Sample_library_source = transcriptomic
-!Sample_library_strategy = RNA-Seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14322298
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX7866197
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4391nnn/GSM4391945/suppl/GSM4391945_ko_3.gene.count.txt.gz
-!Sample_series_id = GSE146541
-!Sample_series_id = GSE146583
-!Sample_data_row_count = 0
diff --git a/tests/test_files/soft_files/GSE150868/GSE.soft b/tests/test_files/soft_files/GSE150868/GSE.soft
deleted file mode 100644
index d09f51c..0000000
--- a/tests/test_files/soft_files/GSE150868/GSE.soft
+++ /dev/null
@@ -1,66 +0,0 @@
-^SERIES = GSE150868
-!Series_title = Determine differences in chromatin accessibility in primary human AML samples by ATAC-seq.
-!Series_geo_accession = GSE150868
-!Series_status = Public on Feb 06 2022
-!Series_submission_date = May 19 2020
-!Series_last_update_date = Feb 08 2022
-!Series_summary = ATAC-Seq was performed for 35 AML primary specimens from primary AML cells, followed by a detailed ATAC-Seq pipeline for data processing. We provide both raw files as well as various processed files such as individual and consensus peaks.
-!Series_overall_design = Chromatin accessiblity was analyzed on 35 AML primary specimens using Assay for Transposase-Accessible Chromatin using sequencing (ATAC-seq) in order to analyze transcription factor activity in combination of mRNA seq data in GPR56 KD scenario.
-!Series_type = Genome binding/occupancy profiling by high throughput sequencing
-!Series_contributor = Lixiazi,,He
-!Series_contributor = Caroline,,Pabst
-!Series_contributor = Christian,,Arnold
-!Series_contributor = Judith,,Zaugg
-!Series_sample_id = GSM4559921
-!Series_sample_id = GSM4559922
-!Series_sample_id = GSM4559923
-!Series_sample_id = GSM4559924
-!Series_sample_id = GSM4559925
-!Series_sample_id = GSM4559926
-!Series_sample_id = GSM4559927
-!Series_sample_id = GSM4559928
-!Series_sample_id = GSM4559929
-!Series_sample_id = GSM4559930
-!Series_sample_id = GSM4559931
-!Series_sample_id = GSM4559932
-!Series_sample_id = GSM4559933
-!Series_sample_id = GSM4559934
-!Series_sample_id = GSM4559935
-!Series_sample_id = GSM4559936
-!Series_sample_id = GSM4559937
-!Series_sample_id = GSM4559938
-!Series_sample_id = GSM4559939
-!Series_sample_id = GSM4559940
-!Series_sample_id = GSM4559941
-!Series_sample_id = GSM4559942
-!Series_sample_id = GSM4559943
-!Series_sample_id = GSM4559944
-!Series_sample_id = GSM4559945
-!Series_sample_id = GSM4559946
-!Series_sample_id = GSM4559947
-!Series_sample_id = GSM4559948
-!Series_sample_id = GSM4559949
-!Series_sample_id = GSM4559950
-!Series_sample_id = GSM4559951
-!Series_sample_id = GSM4559952
-!Series_sample_id = GSM4559953
-!Series_sample_id = GSM4559954
-!Series_sample_id = GSM4559955
-!Series_contact_name = Christian,,Arnold
-!Series_contact_email = christian.arnold@embl.de
-!Series_contact_institute = EMBL
-!Series_contact_address = Meyerhofstraße 1
-!Series_contact_city = Heidelberg
-!Series_contact_state = Baden-Württemberg
-!Series_contact_zip/postal_code = 69117
-!Series_contact_country = Germany
-!Series_supplementary_file = ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE150nnn/GSE150868/suppl/GSE150868_RAW.tar
-!Series_supplementary_file = ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE150nnn/GSE150868/suppl/GSE150868_consensusPeaks_all_minOverlap10.bed.gz
-!Series_supplementary_file = ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE150nnn/GSE150868/suppl/GSE150868_consensusPeaks_all_minOverlap10.bed.rawCounts.txt.gz
-!Series_platform_id = GPL18573
-!Series_platform_organism = Homo sapiens
-!Series_platform_taxid = 9606
-!Series_sample_organism = Homo sapiens
-!Series_sample_taxid = 9606
-!Series_relation = BioProject: https://www.ncbi.nlm.nih.gov/bioproject/PRJNA633890
-!Series_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRP262351
diff --git a/tests/test_files/soft_files/GSE150868/GSM.soft b/tests/test_files/soft_files/GSE150868/GSM.soft
deleted file mode 100644
index d9e2c7b..0000000
--- a/tests/test_files/soft_files/GSE150868/GSM.soft
+++ /dev/null
@@ -1,1365 +0,0 @@
-^SAMPLE = GSM4559921
-!Sample_title = 11244
-!Sample_geo_accession = GSM4559921
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973883
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364446
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559921/suppl/GSM4559921_11244.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559922
-!Sample_title = 22412
-!Sample_geo_accession = GSM4559922
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973882
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364447
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559922/suppl/GSM4559922_22412.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559923
-!Sample_title = 23207
-!Sample_geo_accession = GSM4559923
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973881
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364448
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559923/suppl/GSM4559923_23207.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559924
-!Sample_title = 25963
-!Sample_geo_accession = GSM4559924
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973880
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364449
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559924/suppl/GSM4559924_25963.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559925
-!Sample_title = 26774
-!Sample_geo_accession = GSM4559925
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973879
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364450
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559925/suppl/GSM4559925_26774.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559926
-!Sample_title = 26996
-!Sample_geo_accession = GSM4559926
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973878
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364451
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559926/suppl/GSM4559926_26996.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559927
-!Sample_title = 04H055
-!Sample_geo_accession = GSM4559927
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973877
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364452
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559927/suppl/GSM4559927_04H055.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559928
-!Sample_title = 04H112
-!Sample_geo_accession = GSM4559928
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973876
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364453
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559928/suppl/GSM4559928_04H112.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559929
-!Sample_title = 05H111
-!Sample_geo_accession = GSM4559929
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973875
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364454
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559929/suppl/GSM4559929_05H111.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559930
-!Sample_title = 06H133
-!Sample_geo_accession = GSM4559930
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973898
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364455
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559930/suppl/GSM4559930_06H133.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559931
-!Sample_title = 06H143
-!Sample_geo_accession = GSM4559931
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973897
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364456
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559931/suppl/GSM4559931_06H143.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559932
-!Sample_title = 07H005
-!Sample_geo_accession = GSM4559932
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973896
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364457
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559932/suppl/GSM4559932_07H005.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559933
-!Sample_title = 07H009
-!Sample_geo_accession = GSM4559933
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973895
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364458
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559933/suppl/GSM4559933_07H009.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559934
-!Sample_title = 07H062
-!Sample_geo_accession = GSM4559934
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973894
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364459
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559934/suppl/GSM4559934_07H062.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559935
-!Sample_title = 07H158
-!Sample_geo_accession = GSM4559935
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973893
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364460
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559935/suppl/GSM4559935_07H158.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559936
-!Sample_title = 08H082
-!Sample_geo_accession = GSM4559936
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973892
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364461
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559936/suppl/GSM4559936_08H082.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559937
-!Sample_title = 08H138
-!Sample_geo_accession = GSM4559937
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973864
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364462
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559937/suppl/GSM4559937_08H138.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559938
-!Sample_title = 09H083
-!Sample_geo_accession = GSM4559938
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973891
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364463
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559938/suppl/GSM4559938_09H083.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559939
-!Sample_title = 09H115
-!Sample_geo_accession = GSM4559939
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973890
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364464
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559939/suppl/GSM4559939_09H115.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559940
-!Sample_title = 10H166
-!Sample_geo_accession = GSM4559940
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973889
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364465
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559940/suppl/GSM4559940_10H166.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559941
-!Sample_title = 11H008
-!Sample_geo_accession = GSM4559941
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973888
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364466
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559941/suppl/GSM4559941_11H008.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559942
-!Sample_title = 11H027
-!Sample_geo_accession = GSM4559942
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973887
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364467
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559942/suppl/GSM4559942_11H027.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559943
-!Sample_title = 11H142
-!Sample_geo_accession = GSM4559943
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973886
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364468
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559943/suppl/GSM4559943_11H142.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559944
-!Sample_title = 11H160
-!Sample_geo_accession = GSM4559944
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973885
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364469
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559944/suppl/GSM4559944_11H160.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559945
-!Sample_title = 12H010
-!Sample_geo_accession = GSM4559945
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973884
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364470
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559945/suppl/GSM4559945_12H010.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559946
-!Sample_title = 13H073
-!Sample_geo_accession = GSM4559946
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973853
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364471
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559946/suppl/GSM4559946_13H073.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559947
-!Sample_title = 13H110
-!Sample_geo_accession = GSM4559947
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973870
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364472
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559947/suppl/GSM4559947_13H110.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559948
-!Sample_title = 14H001
-!Sample_geo_accession = GSM4559948
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973869
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364473
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559948/suppl/GSM4559948_14H001.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559949
-!Sample_title = 14H007
-!Sample_geo_accession = GSM4559949
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973868
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364474
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559949/suppl/GSM4559949_14H007.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559950
-!Sample_title = E170001
-!Sample_geo_accession = GSM4559950
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973852
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364475
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559950/suppl/GSM4559950_E170001.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559951
-!Sample_title = E180007
-!Sample_geo_accession = GSM4559951
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973874
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364476
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559951/suppl/GSM4559951_E180007.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559952
-!Sample_title = E180013
-!Sample_geo_accession = GSM4559952
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973873
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364477
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559952/suppl/GSM4559952_E180013.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559953
-!Sample_title = E180015
-!Sample_geo_accession = GSM4559953
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973872
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364478
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559953/suppl/GSM4559953_E180015.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559954
-!Sample_title = E180029
-!Sample_geo_accession = GSM4559954
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973871
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364479
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559954/suppl/GSM4559954_E180029.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0
-^SAMPLE = GSM4559955
-!Sample_title = E180117
-!Sample_geo_accession = GSM4559955
-!Sample_status = Public on Feb 06 2022
-!Sample_submission_date = May 19 2020
-!Sample_last_update_date = Feb 07 2022
-!Sample_type = SRA
-!Sample_channel_count = 1
-!Sample_source_name_ch1 = AML cells
-!Sample_organism_ch1 = Homo sapiens
-!Sample_taxid_ch1 = 9606
-!Sample_characteristics_ch1 = cell type: primary AML cells
-!Sample_molecule_ch1 = genomic DNA
-!Sample_extract_protocol_ch1 = Briefly 50,000 cells from frozen samples were collected, then placed into lysis buffer (10 mM Tris·Cl, pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1% NP-40, 0.1% Tween 20), followed by Tn5 transposase-mediated tagmentation and adaptor incorporation at sites of accessible chromatin carried out using the Nextera DNA Library Prep kit (FC-121-1030, Illumina) at 37°C for 30 min.
-!Sample_extract_protocol_ch1 = Library amplification was performed using NEB Nex High-Fidelity 2x PCR Master Mix (M0541L, NEB) and primer combinations provided in Nextera DNA Library Prep kit.  The library was optimized for enrichment for 100-1000 bps fragments using SPRI beads based size-selection and the quality of the purified DNA library was analyzed on a Bioanalyzer (2100 Expert software, Agilent Technologies) using High Sensitivity DNA Chips (5067-4626; Agilent Technologies Inc.). The appropriate concentration of sample was determined using the Qubit Fluorometer (Molecular Probes). Ten 40nM samples were pooled and run on a NextSeq 500/550 High Output Kit (20024907; Illumina, Inc. San Diego, CA) and the NextSeq 500 Illumina Sequencer to obtain paired end reads of 75 bp.
-!Sample_data_processing = Adaptor trimming with Trimmomatic v0.38 (parameters: ILLUMINACLIP::1:30:4:1:true TRAILING:3 MINLEN:20)
-!Sample_data_processing = Alignment with Bowtie2 v2.3.4.3 (parameters: -X2000 --very-sensitive -t)
-!Sample_data_processing = Post-alignment processing (using samtools v1.9, Picard tools v2.18.16-SNAPSHOT): (0) cleaning the BAM file with Picard CleanSam, FixMateInformation, AddOrReplaceReadGroups and ReorderSam,  (1) removing mitochondrial reads and reads from non-assembled contigs or alternative haplotypes, (2) marking and removing duplicate reads with MarkDuplicates, (3) filtering reads with a mapping quality below 10 (-F 4 -q 10), (4) adjusting read start sites as described previously by 4 and -5 base pairs, and (5) removing reads with insertions or deletions using samtools, followed by final cleaning and sorting using Picard SortSam, CleanSam, and FixMateInformation
-!Sample_data_processing = Peak calling with MACS2 (parameters: -q 0.01 -g hg19 --nomodel --keep-dup all) followed by filtering using bedtools subtract v2.27.1 against the publicly available blacklist regions
-!Sample_data_processing = Genome_build: hg19
-!Sample_data_processing = Supplementary_files_format_and_content: We provide the individual peak files as well as the main consensus peak set along with raw read counts for each sample
-!Sample_platform_id = GPL18573
-!Sample_contact_name = Christian,,Arnold
-!Sample_contact_email = christian.arnold@embl.de
-!Sample_contact_institute = EMBL
-!Sample_contact_address = Meyerhofstraße 1
-!Sample_contact_city = Heidelberg
-!Sample_contact_state = Baden-Württemberg
-!Sample_contact_zip/postal_code = 69117
-!Sample_contact_country = Germany
-!Sample_instrument_model = Illumina NextSeq 500
-!Sample_library_selection = other
-!Sample_library_source = genomic
-!Sample_library_strategy = ATAC-seq
-!Sample_relation = BioSample: https://www.ncbi.nlm.nih.gov/biosample/SAMN14973867
-!Sample_relation = SRA: https://www.ncbi.nlm.nih.gov/sra?term=SRX8364480
-!Sample_supplementary_file_1 = ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM4559nnn/GSM4559955/suppl/GSM4559955_E180117.narrowPeak.gz
-!Sample_series_id = GSE150868
-!Sample_data_row_count = 0

From 0216942a2b805f7d035fe3ab5d8a80c72081b68a Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Fri, 21 Apr 2023 14:09:22 -0400
Subject: [PATCH 08/10] docs + version + changelog

---
 docs/changelog.md                             |   7 +
 docs/gse_finder.md                            |   2 +-
 docs/usage.md                                 | 158 ++++++++++--------
 .../build/processed-data-downloading.md       |   5 -
 geofetch/_version.py                          |   2 +-
 5 files changed, 99 insertions(+), 75 deletions(-)

diff --git a/docs/changelog.md b/docs/changelog.md
index 0441e16..841174f 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1,5 +1,12 @@
 # Changelog
 
+## [0.12.2] -- 2023-03-27
+- Added `max-prefetch-size` argument.
+- Improved code and logger structure.
+
+## [0.12.1] -- 2023-03-27
+- Fixed Finder bug
+
 ## [0.12.0] -- 2023-03-27
 - Added functionality that saves gse metadata to config file
 - Fixed description in initialization of pepy object
diff --git a/docs/gse_finder.md b/docs/gse_finder.md
index 14a353f..5daa083 100644
--- a/docs/gse_finder.md
+++ b/docs/gse_finder.md
@@ -17,7 +17,7 @@ from geofetch import Finder
 gse_obj = Finder()
 
 # Optionally: provide filter string and max number of retrieve elements
-gse_obj = Finder(filter="((bed) OR narrow peak) AND Homo sapiens[Organism]", retmax=10)
+gse_obj = Finder(filters="((bed) OR narrow peak) AND Homo sapiens[Organism]", retmax=10)
 ```
 
 1) Get list of all GSE in GEO 
diff --git a/docs/usage.md b/docs/usage.md
index 29ba6b1..f00b69e 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -1,72 +1,86 @@
-# Usage reference
+# <img src="./img/geofetch_logo.svg" class="img-header">  usage reference
+
+`geofetch` command-line usage instructions:
 
-geofetch command-line usage instructions:
 
-`geofetch -V`
-```console
-geofetch 0.11.0
-```
 
 `geofetch --help`
-```console
-usage: geofetch [-h] [-V] -i INPUT [-n NAME] [-m METADATA_ROOT] [-u METADATA_FOLDER]
-                [--just-metadata] [-r] [--config-template CONFIG_TEMPLATE]
-                [--pipeline-samples PIPELINE_SAMPLES] [--pipeline-project PIPELINE_PROJECT]
-                [--disable-progressbar] [-k SKIP] [--acc-anno] [--discard-soft]
-                [--const-limit-project CONST_LIMIT_PROJECT]
-                [--const-limit-discard CONST_LIMIT_DISCARD]
-                [--attr-limit-truncate ATTR_LIMIT_TRUNCATE] [--add-dotfile] [-p]
-                [--data-source {all,samples,series}] [--filter FILTER]
-                [--filter-size FILTER_SIZE] [-g GEO_FOLDER] [-x] [-b BAM_FOLDER]
-                [-f FQ_FOLDER] [--use-key-subset] [--silent] [--verbosity V] [--logdev]
+```{console}
+usage: geofetch [<args>]
+
+The example how to use geofetch (to download GSE573030 just metadata):
+    geofetch -i GSE67303 -m `pwd` --just-metadata
+
+To download all processed data of GSE57303:
+    geofetch -i GSE67303 --processed --geo-folder `pwd` -m `pwd`
+
+* where `pwd` is a current directory
 
 Automatic GEO and SRA data downloader
 
-optional arguments:
+options:
   -h, --help            show this help message and exit
   -V, --version         show program's version number and exit
   -i INPUT, --input INPUT
-                        required: a GEO (GSE) accession, or a file with a list of GSE
-                        numbers
+                        required: a GEO (GSE) accession, or a file with a list
+                        of GSE numbers
   -n NAME, --name NAME  Specify a project name. Defaults to GSE number
   -m METADATA_ROOT, --metadata-root METADATA_ROOT
-                        Specify a parent folder location to store metadata. The project name
-                        will be added as a subfolder [Default: $SRAMETA:]
+                        Specify a parent folder location to store metadata.
+                        The project name will be added as a subfolder
+                        [Default: $SRAMETA:]
   -u METADATA_FOLDER, --metadata-folder METADATA_FOLDER
-                        Specify an absolute folder location to store metadata. No subfolder
-                        will be added. Overrides value of --metadata-root [Default: Not used
-                        (--metadata-root is used by default)]
-  --just-metadata       If set, don't actually run downloads, just create metadata
+                        Specify an absolute folder location to store metadata.
+                        No subfolder will be added. Overrides value of
+                        --metadata-root [Default: Not used (--metadata-root is
+                        used by default)]
+  --just-metadata       If set, don't actually run downloads, just create
+                        metadata
   -r, --refresh-metadata
                         If set, re-download metadata even if it exists.
   --config-template CONFIG_TEMPLATE
                         Project config yaml file template.
   --pipeline-samples PIPELINE_SAMPLES
-                        Optional: Specify one or more filepaths to SAMPLES pipeline
-                        interface yaml files. These will be added to the project config file
-                        to make it immediately compatible with looper. [Default: null]
+                        Optional: Specify one or more filepaths to SAMPLES
+                        pipeline interface yaml files. These will be added to
+                        the project config file to make it immediately
+                        compatible with looper. [Default: null]
   --pipeline-project PIPELINE_PROJECT
-                        Optional: Specify one or more filepaths to PROJECT pipeline
-                        interface yaml files. These will be added to the project config file
-                        to make it immediately compatible with looper. [Default: null]
+                        Optional: Specify one or more filepaths to PROJECT
+                        pipeline interface yaml files. These will be added to
+                        the project config file to make it immediately
+                        compatible with looper. [Default: null]
   --disable-progressbar
                         Optional: Disable progressbar
   -k SKIP, --skip SKIP  Skip some accessions. [Default: no skip].
-  --acc-anno            Optional: Produce annotation sheets for each accession. Project
-                        combined PEP for the whole project won't be produced.
-  --discard-soft        Optional: After creation of PEP files, all soft and additional files
-                        will be deleted
+  --acc-anno            Optional: Produce annotation sheets for each
+                        accession. Project combined PEP for the whole project
+                        won't be produced.
+  --discard-soft        Optional: After creation of PEP files, all soft and
+                        additional files will be deleted
   --const-limit-project CONST_LIMIT_PROJECT
-                        Optional: Limit of the number of the constant sample characters that
-                        should not be in project yaml. [Default: 50]
+                        Optional: Limit of the number of the constant sample
+                        characters that should not be in project yaml.
+                        [Default: 50]
   --const-limit-discard CONST_LIMIT_DISCARD
-                        Optional: Limit of the number of the constant sample characters that
-                        should not be discarded [Default: 250]
+                        Optional: Limit of the number of the constant sample
+                        characters that should not be discarded [Default: 250]
   --attr-limit-truncate ATTR_LIMIT_TRUNCATE
-                        Optional: Limit of the number of sample characters.Any attribute
-                        with more than X characters will truncate to the first X, where X is
-                        a number of characters [Default: 500]
-  --add-dotfile         Optional: Add .pep.yaml file that points .yaml PEP file
+                        Optional: Limit of the number of sample characters.Any
+                        attribute with more than X characters will truncate to
+                        the first X, where X is a number of characters
+                        [Default: 500]
+  --add-dotfile         Optional: Add .pep.yaml file that points .yaml PEP
+                        file
+  --max-soft-size MAX_SOFT_SIZE
+                        Optional: Max size of soft file. [Default: 1GB].
+                        Supported input formats : 12B, 12KB, 12MB, 12GB.
+  --max-prefetch-size MAX_PREFETCH_SIZE
+                        Argument to pass to prefetch program's --max-size
+                        option, if prefetch will be used in this run of
+                        geofetch; for reference: https://github.com/ncbi/sra-
+                        tools/wiki/08.-prefetch-and-fasterq-dump#check-the-
+                        maximum-size-limit-of-the-prefetch-tool
   --silent              Silence logging. Overrides verbosity.
   --verbosity V         Set logging level (1-5 or logging module level name)
   --logdev              Expand content of logging message format.
@@ -74,35 +88,43 @@ optional arguments:
 processed:
   -p, --processed       Download processed data [Default: download raw data].
   --data-source {all,samples,series}
-                        Optional: Specifies the source of data on the GEO record to retrieve
-                        processed data, which may be attached to the collective series
-                        entity, or to individual samples. Allowable values are: samples,
-                        series or both (all). Ignored unless 'processed' flag is set.
-                        [Default: samples]
-  --filter FILTER       Optional: Filter regex for processed filenames [Default:
-                        None].Ignored unless 'processed' flag is set.
+                        Optional: Specifies the source of data on the GEO
+                        record to retrieve processed data, which may be
+                        attached to the collective series entity, or to
+                        individual samples. Allowable values are: samples,
+                        series or both (all). Ignored unless 'processed' flag
+                        is set. [Default: samples]
+  --filter FILTER       Optional: Filter regex for processed filenames
+                        [Default: None].Ignored unless 'processed' flag is
+                        set.
   --filter-size FILTER_SIZE
-                        Optional: Filter size for processed files that are stored as sample
-                        repository [Default: None]. Works only for sample data. Supported
-                        input formats : 12B, 12KB, 12MB, 12GB. Ignored unless 'processed'
-                        flag is set.
+                        Optional: Filter size for processed files that are
+                        stored as sample repository [Default: None]. Works
+                        only for sample data. Supported input formats : 12B,
+                        12KB, 12MB, 12GB. Ignored unless 'processed' flag is
+                        set.
   -g GEO_FOLDER, --geo-folder GEO_FOLDER
-                        Optional: Specify a location to store processed GEO files. Ignored
-                        unless 'processed' flag is set.[Default: $GEODATA:]
+                        Optional: Specify a location to store processed GEO
+                        files. Ignored unless 'processed' flag is
+                        set.[Default: $GEODATA:]
 
 raw:
   -x, --split-experiments
-                        Split SRR runs into individual samples. By default, SRX experiments
-                        with multiple SRR Runs will have a single entry in the annotation
-                        table, with each run as a separate row in the subannotation table.
-                        This setting instead treats each run as a separate sample
+                        Split SRR runs into individual samples. By default,
+                        SRX experiments with multiple SRR Runs will have a
+                        single entry in the annotation table, with each run as
+                        a separate row in the subannotation table. This
+                        setting instead treats each run as a separate sample
   -b BAM_FOLDER, --bam-folder BAM_FOLDER
-                        Optional: Specify folder of bam files. Geofetch will not download
-                        sra files when corresponding bam files already exist. [Default:
-                        $SRABAM:]
+                        Optional: Specify folder of bam files. Geofetch will
+                        not download sra files when corresponding bam files
+                        already exist. [Default: $SRABAM:]
   -f FQ_FOLDER, --fq-folder FQ_FOLDER
-                        Optional: Specify folder of fastq files. Geofetch will not download
-                        sra files when corresponding fastq files already exist. [Default:
-                        $SRAFQ:]
-  --use-key-subset      Use just the keys defined in this module when writing out metadata.
+                        Optional: Specify folder of fastq files. Geofetch will
+                        not download sra files when corresponding fastq files
+                        already exist. [Default: $SRAFQ:]
+  --use-key-subset      Use just the keys defined in this module when writing
+                        out metadata.
+  --add-convert-modifier
+                        Add looper SRA convert modifier to config file.
 ```
diff --git a/docs_jupyter/build/processed-data-downloading.md b/docs_jupyter/build/processed-data-downloading.md
index b851a61..cd080ee 100644
--- a/docs_jupyter/build/processed-data-downloading.md
+++ b/docs_jupyter/build/processed-data-downloading.md
@@ -24,11 +24,6 @@ Calling geofetch will do 4 tasks:
 
 Complete details about geofetch outputs is cataloged in the [metadata outputs reference](metadata_output.md).
 
-from IPython.core.display import SVG
-SVG(filename='logo.svg')
-
-![arguments_outputs.svg](attachment:arguments_outputs.svg)
-
 ## Download the data
 
 First, create the metadata for processed data (by adding --processed and --just-metadata):
diff --git a/geofetch/_version.py b/geofetch/_version.py
index def467e..76da4a9 100644
--- a/geofetch/_version.py
+++ b/geofetch/_version.py
@@ -1 +1 @@
-__version__ = "0.12.1"
+__version__ = "0.12.2"

From bf66f0744794e1b3ced517861e572ae82e917e8e Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Fri, 21 Apr 2023 14:22:52 -0400
Subject: [PATCH 09/10] fixed changedlog data

---
 docs/changelog.md | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/docs/changelog.md b/docs/changelog.md
index 841174f..2c31285 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1,12 +1,9 @@
 # Changelog
 
-## [0.12.2] -- 2023-03-27
+## [0.12.2] -- 2023-04-32
 - Added `max-prefetch-size` argument.
 - Improved code and logger structure.
 
-## [0.12.1] -- 2023-03-27
-- Fixed Finder bug
-
 ## [0.12.0] -- 2023-03-27
 - Added functionality that saves gse metadata to config file
 - Fixed description in initialization of pepy object

From 3ad1dda7d6c813caeb1f915ec853b7f47b621c01 Mon Sep 17 00:00:00 2001
From: Khoroshevskyi <sasha99250@gmail.com>
Date: Tue, 25 Apr 2023 11:15:29 -0400
Subject: [PATCH 10/10] docs corrections

---
 docs/changelog.md |  4 ++--
 docs/usage.md     | 13 +++++--------
 geofetch/cli.py   | 14 ++++++--------
 3 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/docs/changelog.md b/docs/changelog.md
index 2c31285..652b6a2 100644
--- a/docs/changelog.md
+++ b/docs/changelog.md
@@ -1,7 +1,7 @@
 # Changelog
 
-## [0.12.2] -- 2023-04-32
-- Added `max-prefetch-size` argument.
+## [0.12.2] -- 2023-04-25
+- Added `max-prefetch-size` argument. #113
 - Improved code and logger structure.
 
 ## [0.12.0] -- 2023-03-27
diff --git a/docs/usage.md b/docs/usage.md
index f00b69e..63f1db7 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -9,12 +9,10 @@
 usage: geofetch [<args>]
 
 The example how to use geofetch (to download GSE573030 just metadata):
-    geofetch -i GSE67303 -m `pwd` --just-metadata
+    geofetch -i GSE67303 -m <folder> --just-metadata
 
 To download all processed data of GSE57303:
-    geofetch -i GSE67303 --processed --geo-folder `pwd` -m `pwd`
-
-* where `pwd` is a current directory
+    geofetch -i GSE67303 --processed --geo-folder <folder> -m <folder>
 
 Automatic GEO and SRA data downloader
 
@@ -32,8 +30,7 @@ options:
   -u METADATA_FOLDER, --metadata-folder METADATA_FOLDER
                         Specify an absolute folder location to store metadata.
                         No subfolder will be added. Overrides value of
-                        --metadata-root [Default: Not used (--metadata-root is
-                        used by default)]
+                        --metadata-root.
   --just-metadata       If set, don't actually run downloads, just create
                         metadata
   -r, --refresh-metadata
@@ -56,8 +53,8 @@ options:
   --acc-anno            Optional: Produce annotation sheets for each
                         accession. Project combined PEP for the whole project
                         won't be produced.
-  --discard-soft        Optional: After creation of PEP files, all soft and
-                        additional files will be deleted
+  --discard-soft        Optional: After creation of PEP files, all .soft files
+                        will be deleted
   --const-limit-project CONST_LIMIT_PROJECT
                         Optional: Limit of the number of the constant sample
                         characters that should not be in project yaml.
diff --git a/geofetch/cli.py b/geofetch/cli.py
index fd1bfbd..6bb96b7 100644
--- a/geofetch/cli.py
+++ b/geofetch/cli.py
@@ -18,12 +18,10 @@ def _parse_cmdl(cmdl):
         usage="""geofetch [<args>]
 
 The example how to use geofetch (to download GSE573030 just metadata):
-    geofetch -i GSE67303 -m `pwd` --just-metadata
+    geofetch -i GSE67303 -m <folder> --just-metadata
 
 To download all processed data of GSE57303:
-    geofetch -i GSE67303 --processed --geo-folder `pwd` -m `pwd`
-
-* where `pwd` is a current directory
+    geofetch -i GSE67303 --processed --geo-folder <folder> -m <folder>
 
 """,
     )
@@ -63,8 +61,7 @@ def _parse_cmdl(cmdl):
         "-u",
         "--metadata-folder",
         help="Specify an absolute folder location to store metadata. "
-        "No subfolder will be added. Overrides value of --metadata-root "
-        "[Default: Not used (--metadata-root is used by default)]",
+        "No subfolder will be added. Overrides value of --metadata-root.",
     )
 
     parser.add_argument(
@@ -127,7 +124,7 @@ def _parse_cmdl(cmdl):
     parser.add_argument(
         "--discard-soft",
         action="store_true",
-        help="Optional: After creation of PEP files, all soft and additional files will be deleted",
+        help="Optional: After creation of PEP files, all .soft files will be deleted",
     )
 
     parser.add_argument(
@@ -172,7 +169,8 @@ def _parse_cmdl(cmdl):
 
     parser.add_argument(
         "--max-prefetch-size",
-        help="Argument to pass to prefetch program's --max-size option, if prefetch will be used in this run of geofetch; for reference: https://github.com/ncbi/sra-tools/wiki/08.-prefetch-and-fasterq-dump#check-the-maximum-size-limit-of-the-prefetch-tool",
+        help="Argument to pass to prefetch program's --max-size option, if prefetch will be used in this run of geofetch; "
+        "for reference: https://github.com/ncbi/sra-tools/wiki/08.-prefetch-and-fasterq-dump#check-the-maximum-size-limit-of-the-prefetch-tool",
     )
 
     processed_group.add_argument(