From 6074339bf8a7d7bbc83795b0cd43cdb43ddcdffa Mon Sep 17 00:00:00 2001 From: Chris Moth Date: Wed, 10 Jul 2024 16:24:20 -0500 Subject: [PATCH] Create MANE database table with schema #48 --- lib/create_schema_MANE.sql | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/lib/create_schema_MANE.sql b/lib/create_schema_MANE.sql index 61355a2..92bab81 100644 --- a/lib/create_schema_MANE.sql +++ b/lib/create_schema_MANE.sql @@ -34,5 +34,28 @@ CREATE TABLE MANE_xref ( optional_uniprot_id VARCHAR(40) NOT NULL COMMENT 'the bracketed uniprot ID that appears in some rows of raw data', PRIMARY KEY (ensembl_transcript_id) ) ENGINE=InnoDB COMMENT 'Lookup by ENST000 ID verifies MANE cross reference, and returns refseq IDs' +CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci; + +-- Another important source of MANE information is the sumamry fiel that is downloaded from ensembl +-- https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/ +CREATE TABLE MANE_v13_summary ( + NCBI_GeneID VARCHAR(40), + Ensembl_Gene VARCHAR(40), + HGNC_ID VARCHAR(40), + symbol VARCHAR(40), + name VARCHAR(40), + RefSeq_nuc VARCHAR(40), + RefSeq_prot VARCHAR(40), + Ensembl_nuc VARCHAR(40), + Ensembl_prot VARCHAR(40), + MANE_status VARCHAR(40), + GRCh38_chr VARCHAR(40), + chr_start VARCHAR(40), + chr_end VARCHAR(40), + chr_strand VARCHAR(40), + PRIMARY KEY (refSeq_nuc,Ensembl_nuc), + KEY (Ensembl_nuc) +) ENGINE=InnoDB CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci; +