From 9dafd639143ed347f4fe18507cc036808fc2c954 Mon Sep 17 00:00:00 2001 From: Usman Jamshed Date: Fri, 13 Oct 2023 23:58:47 -0400 Subject: [PATCH 1/3] Initial investigation completed and handling molecules without PCA or UMAP data --- .../DB_investigation-checkpoint.ipynb | 798 ++++++++++++++++++ DB_investigation.ipynb | 798 ++++++++++++++++++ backend/app/app/api/v2/endpoints/molecule.py | 9 +- duplicates.pkl | Bin 0 -> 9364 bytes frontend/src/pages/Molecule.jsx | 26 +- 5 files changed, 1617 insertions(+), 14 deletions(-) create mode 100644 .ipynb_checkpoints/DB_investigation-checkpoint.ipynb create mode 100644 DB_investigation.ipynb create mode 100644 duplicates.pkl diff --git a/.ipynb_checkpoints/DB_investigation-checkpoint.ipynb b/.ipynb_checkpoints/DB_investigation-checkpoint.ipynb new file mode 100644 index 0000000..025dc89 --- /dev/null +++ b/.ipynb_checkpoints/DB_investigation-checkpoint.ipynb @@ -0,0 +1,798 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9368bb63", + "metadata": {}, + "outputs": [], + "source": [ + "# Want to start by checking which molecules are duplicates.\n", + "# For examples we have 241905 and 1497" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e28921c8", + "metadata": {}, + "outputs": [], + "source": [ + "import psycopg2\n", + "import pandas as pd\n", + "from rdkit import Chem" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "214277f9", + "metadata": {}, + "outputs": [], + "source": [ + "from tqdm import tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a128c9e3", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_556/3175014960.py:16: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n", + " df = pd.read_sql_query(query, connection)\n" + ] + } + ], + "source": [ + "# Establish a connection\n", + "# You must have the DB container running to run this cell successfully.\n", + "# Connection parameters\n", + "db_params = {\n", + " 'dbname': 'postgres',\n", + " 'user': 'postgres',\n", + " 'password': '',\n", + " 'host': '127.0.0.1',\n", + " 'port': '5432'\n", + "}\n", + "\n", + "# Establish a connection to the PostgreSQL database\n", + "connection = psycopg2.connect(**db_params)\n", + "\n", + "# Execute an SQL statement\n", + "query = \"SELECT molecule_id, smiles, molecular_weight FROM molecule\"\n", + "df = pd.read_sql_query(query, connection)\n", + "\n", + "# Close the connection\n", + "connection.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "686a1a35", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
molecule_idsmilesmolecular_weight
0331406COc1cccc(c1c1ccccc1P(c1ccccc1)c1ccccc1)OC398.441986
1140360COc1ccc(P(c2ccccc2SC)c2ccccc2SC)c(C)c1398.532990
2331409C1CCC(CC1)P(c1ccccc1)C1CCCCC1274.388000
32027CN(c1ccccc1c1ccccc1P(C12CC3CC(C2)CC(C1)C3)C12C...497.707001
42036CCC1(CC)O[C@@H]2[C@@H](O1)C(c1cc(C(C)(C)C)cc(C...1049.558960
............
330962608Cc1cc(C)cc(P(c2cc(C)cc(C)c2)c2ccccc2C=O)c1346.410004
330963461CCO[Si](CCP(c1ccccc1)c1ccccc1)(OCC)OCC376.509003
3309641064Cc1c(C)n(C(C)C)c(=NP(N=c2n(C(C)C)c(C)c(C)n2C(C...462.666992
330965523CN(C)/N=C/c1ccc(P(c2ccc(/C=N/N(C)C)s2)c2ccc(/C...490.664001
3309661817COc1cccc(P(c2cccc(OC)c2OC)c2cccc(OC)c2OC)c1OC442.447998
\n", + "

330967 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " molecule_id smiles \\\n", + "0 331406 COc1cccc(c1c1ccccc1P(c1ccccc1)c1ccccc1)OC \n", + "1 140360 COc1ccc(P(c2ccccc2SC)c2ccccc2SC)c(C)c1 \n", + "2 331409 C1CCC(CC1)P(c1ccccc1)C1CCCCC1 \n", + "3 2027 CN(c1ccccc1c1ccccc1P(C12CC3CC(C2)CC(C1)C3)C12C... \n", + "4 2036 CCC1(CC)O[C@@H]2[C@@H](O1)C(c1cc(C(C)(C)C)cc(C... \n", + "... ... ... \n", + "330962 608 Cc1cc(C)cc(P(c2cc(C)cc(C)c2)c2ccccc2C=O)c1 \n", + "330963 461 CCO[Si](CCP(c1ccccc1)c1ccccc1)(OCC)OCC \n", + "330964 1064 Cc1c(C)n(C(C)C)c(=NP(N=c2n(C(C)C)c(C)c(C)n2C(C... \n", + "330965 523 CN(C)/N=C/c1ccc(P(c2ccc(/C=N/N(C)C)s2)c2ccc(/C... \n", + "330966 1817 COc1cccc(P(c2cccc(OC)c2OC)c2cccc(OC)c2OC)c1OC \n", + "\n", + " molecular_weight \n", + "0 398.441986 \n", + "1 398.532990 \n", + "2 274.388000 \n", + "3 497.707001 \n", + "4 1049.558960 \n", + "... ... \n", + "330962 346.410004 \n", + "330963 376.509003 \n", + "330964 462.666992 \n", + "330965 490.664001 \n", + "330966 442.447998 \n", + "\n", + "[330967 rows x 3 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "149e8d9b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
molecule_idsmilesmolecular_weight
143223241905[H]P([H])C48.025002
3298681497CP48.025002
\n", + "
" + ], + "text/plain": [ + " molecule_id smiles molecular_weight\n", + "143223 241905 [H]P([H])C 48.025002\n", + "329868 1497 CP 48.025002" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Sanity check to see if the data is correct\n", + "df[(df[\"molecule_id\"]==241905) | (df[\"molecule_id\"]==1497)]" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "9596cb12", + "metadata": {}, + "outputs": [], + "source": [ + "# These two molecules are the same so lets check if rdkit will return the same smiles string when canonicalizing them\n", + "mol_241905 = df[df[\"molecule_id\"]==241905][\"smiles\"].to_list()[0]\n", + "mol_1497 = df[df[\"molecule_id\"]==1497][\"smiles\"].to_list()[0]\n", + "\n", + "# Double check with the molecular weight. Use difference is less than some tolerance 1e-6.\n", + "mol_241905_weight = df[df[\"molecule_id\"]==241905][\"molecular_weight\"].to_list()[0]\n", + "mol_1497_weight = df[df[\"molecule_id\"]==1497][\"molecular_weight\"].to_list()[0]\n", + "\n", + "a = Chem.CanonSmiles(mol_241905)\n", + "b = Chem.CanonSmiles(mol_241905)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "9c94d293", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CP 48.025001525878906\n", + "CP 48.025001525878906\n", + "True\n", + "True\n" + ] + } + ], + "source": [ + "print(a, mol_241905_weight)\n", + "print(b, mol_1497_weight)\n", + "print(a == b)\n", + "print(abs(mol_241905_weight - mol_1497_weight) < 0.000001)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "5cca5c81", + "metadata": {}, + "outputs": [], + "source": [ + "all_data_dict = df.to_dict(orient=\"records\")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "7ea5a56b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/330967 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
molecule_idsmilesmolecular_weight
0331406COc1cccc(c1c1ccccc1P(c1ccccc1)c1ccccc1)OC398.441986
1140360COc1ccc(P(c2ccccc2SC)c2ccccc2SC)c(C)c1398.532990
2331409C1CCC(CC1)P(c1ccccc1)C1CCCCC1274.388000
32027CN(c1ccccc1c1ccccc1P(C12CC3CC(C2)CC(C1)C3)C12C...497.707001
42036CCC1(CC)O[C@@H]2[C@@H](O1)C(c1cc(C(C)(C)C)cc(C...1049.558960
............
330962608Cc1cc(C)cc(P(c2cc(C)cc(C)c2)c2ccccc2C=O)c1346.410004
330963461CCO[Si](CCP(c1ccccc1)c1ccccc1)(OCC)OCC376.509003
3309641064Cc1c(C)n(C(C)C)c(=NP(N=c2n(C(C)C)c(C)c(C)n2C(C...462.666992
330965523CN(C)/N=C/c1ccc(P(c2ccc(/C=N/N(C)C)s2)c2ccc(/C...490.664001
3309661817COc1cccc(P(c2cccc(OC)c2OC)c2cccc(OC)c2OC)c1OC442.447998
\n", + "

330967 rows × 3 columns

\n", + "" + ], + "text/plain": [ + " molecule_id smiles \\\n", + "0 331406 COc1cccc(c1c1ccccc1P(c1ccccc1)c1ccccc1)OC \n", + "1 140360 COc1ccc(P(c2ccccc2SC)c2ccccc2SC)c(C)c1 \n", + "2 331409 C1CCC(CC1)P(c1ccccc1)C1CCCCC1 \n", + "3 2027 CN(c1ccccc1c1ccccc1P(C12CC3CC(C2)CC(C1)C3)C12C... \n", + "4 2036 CCC1(CC)O[C@@H]2[C@@H](O1)C(c1cc(C(C)(C)C)cc(C... \n", + "... ... ... \n", + "330962 608 Cc1cc(C)cc(P(c2cc(C)cc(C)c2)c2ccccc2C=O)c1 \n", + "330963 461 CCO[Si](CCP(c1ccccc1)c1ccccc1)(OCC)OCC \n", + "330964 1064 Cc1c(C)n(C(C)C)c(=NP(N=c2n(C(C)C)c(C)c(C)n2C(C... \n", + "330965 523 CN(C)/N=C/c1ccc(P(c2ccc(/C=N/N(C)C)s2)c2ccc(/C... \n", + "330966 1817 COc1cccc(P(c2cccc(OC)c2OC)c2cccc(OC)c2OC)c1OC \n", + "\n", + " molecular_weight \n", + "0 398.441986 \n", + "1 398.532990 \n", + "2 274.388000 \n", + "3 497.707001 \n", + "4 1049.558960 \n", + "... ... \n", + "330962 346.410004 \n", + "330963 376.509003 \n", + "330964 462.666992 \n", + "330965 490.664001 \n", + "330966 442.447998 \n", + "\n", + "[330967 rows x 3 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "149e8d9b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
molecule_idsmilesmolecular_weight
143223241905[H]P([H])C48.025002
3298681497CP48.025002
\n", + "
" + ], + "text/plain": [ + " molecule_id smiles molecular_weight\n", + "143223 241905 [H]P([H])C 48.025002\n", + "329868 1497 CP 48.025002" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Sanity check to see if the data is correct\n", + "df[(df[\"molecule_id\"]==241905) | (df[\"molecule_id\"]==1497)]" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "9596cb12", + "metadata": {}, + "outputs": [], + "source": [ + "# These two molecules are the same so lets check if rdkit will return the same smiles string when canonicalizing them\n", + "mol_241905 = df[df[\"molecule_id\"]==241905][\"smiles\"].to_list()[0]\n", + "mol_1497 = df[df[\"molecule_id\"]==1497][\"smiles\"].to_list()[0]\n", + "\n", + "# Double check with the molecular weight. Use difference is less than some tolerance 1e-6.\n", + "mol_241905_weight = df[df[\"molecule_id\"]==241905][\"molecular_weight\"].to_list()[0]\n", + "mol_1497_weight = df[df[\"molecule_id\"]==1497][\"molecular_weight\"].to_list()[0]\n", + "\n", + "a = Chem.CanonSmiles(mol_241905)\n", + "b = Chem.CanonSmiles(mol_241905)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "9c94d293", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CP 48.025001525878906\n", + "CP 48.025001525878906\n", + "True\n", + "True\n" + ] + } + ], + "source": [ + "print(a, mol_241905_weight)\n", + "print(b, mol_1497_weight)\n", + "print(a == b)\n", + "print(abs(mol_241905_weight - mol_1497_weight) < 0.000001)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "5cca5c81", + "metadata": {}, + "outputs": [], + "source": [ + "all_data_dict = df.to_dict(orient=\"records\")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "7ea5a56b", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/330967 [00:001ss8YEUer@)%JW{6t52GuNkb^#X@np^BhU zwIW6Zeg^tIn>)IMclEBc?pHMtm&ie;E)YV8)hf>stgUKPsv8R}+aMg|$f*GqLjNhH z=Nogor$e9a8#PRyI;t0mr{odU5pur(JNzp2UV#wB-Vr3@SDe9E-SNzK2-#OOc%5AR;a@!J;vS{e2_s%t@YE!M3C_j|v0=@BDiPg39gf8`xzUK*O2J9JNVYS@I)4ND?A zX-wK_I_vIu=AvOE`NC8aB}j@&^$-KUJ!0x2z~CMF#Zw|3pHN80PH}R7wjU4QckuA2 zfxQqZ&|hzu1Hsx#dz)Tr@^{mn@R7uBb}$?R*oGSww!9nmy3xS-M0N#Gs$U{A^L%_sCagVv48AcX-?d6*WK5wjrAXb=34nS=;Y zxUq?CtI8dxVXCW5K6v)PvY{N2DVzjI#M;#-Qo6G)VtttSHz%!`%EiZkZE_6^jP{L7 zV4<^VeHTj?05bh30sO;H4vr&52C2@E;EqxtT==mC3nEoalKWI2-+O4(7<79=k2Eu< zX{VH#Om|SAy`@YcGgYB67GW-!K5jCwRZ%p{G4MQlXumk67t?i&x4M024Bgg$s>_J@ z>VSTWagU42xMhI)Z(6Sa6lC)Yn-ULB#UsLRAA5Py!2cjAR7L=VrA<{yD|~`p7A;DJ zj5)j@)uxw$nSBA-jobhfdsb}*8|L5}6g1DXMNVq_hhuJAh0I(pX+xyA$y|3*)Vd(ZeiU6Kx z$^rtI&hrmJ@lsm9(_1WB+z_CWongjw)s2+h~gp6CKzW@+GHopDQt7hezYKFQ)IbwW7!8**ru)WOMN-3 z3bR%L7`Be_$_}?2ws?j9lnjbh-X_A-9QuAz=v)rDb)mJ8L$;nmJD+>t_=l8EACg6grS0vHi@PE)&Pc8OK>460OjJ z-=a?47j}Ts{IR6M=rBBc82qwMVAA*or4cXGLu}S~^+1+`55F=gvQ+O*>ou#r+Xs8gLcrh|O3?k#RjEt;>3g}$x-WW%lj(x9dE-Gog$`g{m^p=d2 zX`;ka!W_R&>4h!>NrDy~l5OdD4f^q{u!#A$#2Z_ra_xM>N?@I2p{qBPvhpKK(2JPn z1y%-hw90jK>h%awqblB7NL0580`b6e*9{wQobM1knt-~veXjvm`RIi!T!#`)M)9E1 zKhk<#4X1)uBs0t-XPpD%KOHp!egG{G;oK5RnagLR=%Fan%IDx9-}-hv*SHTrkbQ7$ z`J}KaUrxs@ZggDJ8qhzR#(79(Ov}fe2K7MR%sECvC^XY7#Z&xyPp|8FhZ2IVOyd~~ zIE%7Q7!e8Dd2BR$UF9FK4lMH;hGK@OpoyMc+B)@WR_3U^XVD1=yFYbHmvP^&j;Zt^ zv+4Zh>0$3ABtQU&|IrNMfNWwx#;UId3=|<19xvoHu*Wwzryzl+xBD#)S~V0MD3T|k zG|#AD_*BF+qr%feg1@KrW>>KSGPhl6V4G60wau;;>||=veQDclR0k9KCdY;~skJS1 zw$ria1BG5s>mFDBS}Gwh%)W7VrLnzZI+WQR>`@@iWLg)SeaMP4oheNv5HIZ=x0zEZ z=`|)o7^!c(@n}SWFn>$yn+y;`Ey#_GWa#pd+z9K=bRzkx7)cE$rmd{P>AuI$44b^T zF`)+yAMAl(VviRA_rJ(E-#wBi=X~p0d0x=!&uLWorZkz!#4-B7_4~_TKOxdr<}|_4 zE{*3Q?AqA><0j=~!A8x)LBJQ&dM#Q|XR}u9%pLNhH~(bR*mYsM03m&RGZ0thAX@B3 z=7Av7@BOiMCo95GCpgFk-xG7JZCrxxLWhDFJCSk4J+kSOItX!Elm3;QH<;fwy;!f-eIe!e{ zUVwHuql%{Yv8^v|>?oze)t&UoND6z)*NbMkS2}0h*}2KJn556{owM|aiH4E1evc^> zECGTg+ZIPK3yKBKcB~jh2_^hDg93LlO!LLAnj!2_A^vk|U5aF!3&~b9@8g%%CXJxC zCiJ@)Ek5&jUCUQP)r6wl9yQ4C1(4C>!ZlK5&B z32`SFpwc}g&4~ucj_tXDZf{Zp5Qsu+Tv2n+7PX%6E^*Yx{ zfxzSl0q`LZBo5rY9OKi0y8)#5SfyfIT>b*3zzNtGgY7^J1?7^v${?mIq*fje^6k=B zgsB4MLi^ec!5Vt!B|kf2Eb#zkAdwX!?1N38Vlqw4=ZV1RCEY7>LqBM47;!9TYz|OD zz_NJ?IzZz68$Z!yxPOt{uQP6AKLjb{dl?>nC{WYf)Tt;M#Clfu^wVDMzPHf=e%e0H z+jqG<-G9`!eB}ozl)-TV@XlDIGy*|3&X^xD9F2vK6vyEp(zhh^rD`>5@oILDFb00{ zV+UA#=iFbMFqU|TOqWJhMb;aH=yGOuLhtEUPzk@yZ%SmE;(xjAG87=QL{q5+5~iUK zq1Kdz)KZ_j;fN9ZD-?WsNn+R;;Rp?raw9=_^NQF z0PVhG4_OO*hWz6%+1&c9AIR6@o)&zs*RVP3gUZ_K!;yRp@)*H@4WIdmjmqAQZU^`r z`CsEfpeKw$eT5FnD;D*p&x`>Nfi~hSzd8u7@%eaTvQTPcH(x!4Yk&Ew!B4?&$)xU~ z;4L1)Mfg3(j!qir|DeBsaT;zXW;k{UL#PDtJO*T$pLAa#;xOog$6Z?}bes%bwpDTG zUW2#)hfhKzs{3+UUvD(Slk5m}Z0yh?9Y)x=gp&}?gF(1c?cASH55)@t)(ub_K_jYQ6KHl z8x&EQFwzK-d61WD5-BNA7545HvGQ`7z)D$;RiMEi-8XvZ>0xnSHNmd~93&G`ga$^A zp!gti%PkpbS(zkBkqCUDT1U|!;$ysmc6d!9f;)cRh - + + { Object.keys(molData).length > 0 ? + 1366) ? 6 : 12} sx={{mt: 3}}> {Object.keys(svg).length > 0 && 1366) ? 6 : 12}> {Object.keys(molData).length > 0 && } - {(width > 768) && allConformers.length > 0 && conformer.length > 0 && 1366) ? 6 : 12}> + {(width > 768) && allConformers.length > 0 && conformer.length > 0 && 1366) && Object.keys(neighborData).length > 0 ? 6 : 12}> Conformer @@ -219,8 +224,8 @@ export default function MoleculeInfo() { } - {(width > 768) && 1366) && allConformers.length > 0 && conformer.length > 0 ? 6 : 12}> - {Object.keys(neighborData).length > 0 ? + {(width > 768) && Object.keys(neighborData).length > 0 && 1366) && allConformers.length > 0 && conformer.length > 0 ? 6 : 12}> + {Object.keys(neighborData).length > 0 && - : - - - } } - {Object.keys(molData).length > 0 && (width > 768) && - } - + : + + + } ) } From 1773ec75297f6dcc3313aee2a2532f88ffa71d26 Mon Sep 17 00:00:00 2001 From: Usman Jamshed Date: Sat, 14 Oct 2023 16:45:11 -0400 Subject: [PATCH 2/3] Removing ipynb checkpoints --- .../DB_investigation-checkpoint.ipynb | 798 ------------------ 1 file changed, 798 deletions(-) delete mode 100644 .ipynb_checkpoints/DB_investigation-checkpoint.ipynb diff --git a/.ipynb_checkpoints/DB_investigation-checkpoint.ipynb b/.ipynb_checkpoints/DB_investigation-checkpoint.ipynb deleted file mode 100644 index 025dc89..0000000 --- a/.ipynb_checkpoints/DB_investigation-checkpoint.ipynb +++ /dev/null @@ -1,798 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "9368bb63", - "metadata": {}, - "outputs": [], - "source": [ - "# Want to start by checking which molecules are duplicates.\n", - "# For examples we have 241905 and 1497" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e28921c8", - "metadata": {}, - "outputs": [], - "source": [ - "import psycopg2\n", - "import pandas as pd\n", - "from rdkit import Chem" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "214277f9", - "metadata": {}, - "outputs": [], - "source": [ - "from tqdm import tqdm" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "a128c9e3", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_556/3175014960.py:16: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n", - " df = pd.read_sql_query(query, connection)\n" - ] - } - ], - "source": [ - "# Establish a connection\n", - "# You must have the DB container running to run this cell successfully.\n", - "# Connection parameters\n", - "db_params = {\n", - " 'dbname': 'postgres',\n", - " 'user': 'postgres',\n", - " 'password': '',\n", - " 'host': '127.0.0.1',\n", - " 'port': '5432'\n", - "}\n", - "\n", - "# Establish a connection to the PostgreSQL database\n", - "connection = psycopg2.connect(**db_params)\n", - "\n", - "# Execute an SQL statement\n", - "query = \"SELECT molecule_id, smiles, molecular_weight FROM molecule\"\n", - "df = pd.read_sql_query(query, connection)\n", - "\n", - "# Close the connection\n", - "connection.close()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "686a1a35", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
molecule_idsmilesmolecular_weight
0331406COc1cccc(c1c1ccccc1P(c1ccccc1)c1ccccc1)OC398.441986
1140360COc1ccc(P(c2ccccc2SC)c2ccccc2SC)c(C)c1398.532990
2331409C1CCC(CC1)P(c1ccccc1)C1CCCCC1274.388000
32027CN(c1ccccc1c1ccccc1P(C12CC3CC(C2)CC(C1)C3)C12C...497.707001
42036CCC1(CC)O[C@@H]2[C@@H](O1)C(c1cc(C(C)(C)C)cc(C...1049.558960
............
330962608Cc1cc(C)cc(P(c2cc(C)cc(C)c2)c2ccccc2C=O)c1346.410004
330963461CCO[Si](CCP(c1ccccc1)c1ccccc1)(OCC)OCC376.509003
3309641064Cc1c(C)n(C(C)C)c(=NP(N=c2n(C(C)C)c(C)c(C)n2C(C...462.666992
330965523CN(C)/N=C/c1ccc(P(c2ccc(/C=N/N(C)C)s2)c2ccc(/C...490.664001
3309661817COc1cccc(P(c2cccc(OC)c2OC)c2cccc(OC)c2OC)c1OC442.447998
\n", - "

330967 rows × 3 columns

\n", - "
" - ], - "text/plain": [ - " molecule_id smiles \\\n", - "0 331406 COc1cccc(c1c1ccccc1P(c1ccccc1)c1ccccc1)OC \n", - "1 140360 COc1ccc(P(c2ccccc2SC)c2ccccc2SC)c(C)c1 \n", - "2 331409 C1CCC(CC1)P(c1ccccc1)C1CCCCC1 \n", - "3 2027 CN(c1ccccc1c1ccccc1P(C12CC3CC(C2)CC(C1)C3)C12C... \n", - "4 2036 CCC1(CC)O[C@@H]2[C@@H](O1)C(c1cc(C(C)(C)C)cc(C... \n", - "... ... ... \n", - "330962 608 Cc1cc(C)cc(P(c2cc(C)cc(C)c2)c2ccccc2C=O)c1 \n", - "330963 461 CCO[Si](CCP(c1ccccc1)c1ccccc1)(OCC)OCC \n", - "330964 1064 Cc1c(C)n(C(C)C)c(=NP(N=c2n(C(C)C)c(C)c(C)n2C(C... \n", - "330965 523 CN(C)/N=C/c1ccc(P(c2ccc(/C=N/N(C)C)s2)c2ccc(/C... \n", - "330966 1817 COc1cccc(P(c2cccc(OC)c2OC)c2cccc(OC)c2OC)c1OC \n", - "\n", - " molecular_weight \n", - "0 398.441986 \n", - "1 398.532990 \n", - "2 274.388000 \n", - "3 497.707001 \n", - "4 1049.558960 \n", - "... ... \n", - "330962 346.410004 \n", - "330963 376.509003 \n", - "330964 462.666992 \n", - "330965 490.664001 \n", - "330966 442.447998 \n", - "\n", - "[330967 rows x 3 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "149e8d9b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
molecule_idsmilesmolecular_weight
143223241905[H]P([H])C48.025002
3298681497CP48.025002
\n", - "
" - ], - "text/plain": [ - " molecule_id smiles molecular_weight\n", - "143223 241905 [H]P([H])C 48.025002\n", - "329868 1497 CP 48.025002" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Sanity check to see if the data is correct\n", - "df[(df[\"molecule_id\"]==241905) | (df[\"molecule_id\"]==1497)]" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "9596cb12", - "metadata": {}, - "outputs": [], - "source": [ - "# These two molecules are the same so lets check if rdkit will return the same smiles string when canonicalizing them\n", - "mol_241905 = df[df[\"molecule_id\"]==241905][\"smiles\"].to_list()[0]\n", - "mol_1497 = df[df[\"molecule_id\"]==1497][\"smiles\"].to_list()[0]\n", - "\n", - "# Double check with the molecular weight. Use difference is less than some tolerance 1e-6.\n", - "mol_241905_weight = df[df[\"molecule_id\"]==241905][\"molecular_weight\"].to_list()[0]\n", - "mol_1497_weight = df[df[\"molecule_id\"]==1497][\"molecular_weight\"].to_list()[0]\n", - "\n", - "a = Chem.CanonSmiles(mol_241905)\n", - "b = Chem.CanonSmiles(mol_241905)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "9c94d293", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CP 48.025001525878906\n", - "CP 48.025001525878906\n", - "True\n", - "True\n" - ] - } - ], - "source": [ - "print(a, mol_241905_weight)\n", - "print(b, mol_1497_weight)\n", - "print(a == b)\n", - "print(abs(mol_241905_weight - mol_1497_weight) < 0.000001)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "5cca5c81", - "metadata": {}, - "outputs": [], - "source": [ - "all_data_dict = df.to_dict(orient=\"records\")" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "7ea5a56b", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/330967 [00:00 Date: Sun, 19 Nov 2023 00:04:41 -0500 Subject: [PATCH 3/3] removing db_investigation and duplicates.pkl --- DB_investigation.ipynb | 798 ----------------------------------------- duplicates.pkl | Bin 9364 -> 0 bytes 2 files changed, 798 deletions(-) delete mode 100644 DB_investigation.ipynb delete mode 100644 duplicates.pkl diff --git a/DB_investigation.ipynb b/DB_investigation.ipynb deleted file mode 100644 index 025dc89..0000000 --- a/DB_investigation.ipynb +++ /dev/null @@ -1,798 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "9368bb63", - "metadata": {}, - "outputs": [], - "source": [ - "# Want to start by checking which molecules are duplicates.\n", - "# For examples we have 241905 and 1497" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e28921c8", - "metadata": {}, - "outputs": [], - "source": [ - "import psycopg2\n", - "import pandas as pd\n", - "from rdkit import Chem" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "214277f9", - "metadata": {}, - "outputs": [], - "source": [ - "from tqdm import tqdm" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "a128c9e3", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_556/3175014960.py:16: UserWarning: pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.\n", - " df = pd.read_sql_query(query, connection)\n" - ] - } - ], - "source": [ - "# Establish a connection\n", - "# You must have the DB container running to run this cell successfully.\n", - "# Connection parameters\n", - "db_params = {\n", - " 'dbname': 'postgres',\n", - " 'user': 'postgres',\n", - " 'password': '',\n", - " 'host': '127.0.0.1',\n", - " 'port': '5432'\n", - "}\n", - "\n", - "# Establish a connection to the PostgreSQL database\n", - "connection = psycopg2.connect(**db_params)\n", - "\n", - "# Execute an SQL statement\n", - "query = \"SELECT molecule_id, smiles, molecular_weight FROM molecule\"\n", - "df = pd.read_sql_query(query, connection)\n", - "\n", - "# Close the connection\n", - "connection.close()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "686a1a35", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
molecule_idsmilesmolecular_weight
0331406COc1cccc(c1c1ccccc1P(c1ccccc1)c1ccccc1)OC398.441986
1140360COc1ccc(P(c2ccccc2SC)c2ccccc2SC)c(C)c1398.532990
2331409C1CCC(CC1)P(c1ccccc1)C1CCCCC1274.388000
32027CN(c1ccccc1c1ccccc1P(C12CC3CC(C2)CC(C1)C3)C12C...497.707001
42036CCC1(CC)O[C@@H]2[C@@H](O1)C(c1cc(C(C)(C)C)cc(C...1049.558960
............
330962608Cc1cc(C)cc(P(c2cc(C)cc(C)c2)c2ccccc2C=O)c1346.410004
330963461CCO[Si](CCP(c1ccccc1)c1ccccc1)(OCC)OCC376.509003
3309641064Cc1c(C)n(C(C)C)c(=NP(N=c2n(C(C)C)c(C)c(C)n2C(C...462.666992
330965523CN(C)/N=C/c1ccc(P(c2ccc(/C=N/N(C)C)s2)c2ccc(/C...490.664001
3309661817COc1cccc(P(c2cccc(OC)c2OC)c2cccc(OC)c2OC)c1OC442.447998
\n", - "

330967 rows × 3 columns

\n", - "
" - ], - "text/plain": [ - " molecule_id smiles \\\n", - "0 331406 COc1cccc(c1c1ccccc1P(c1ccccc1)c1ccccc1)OC \n", - "1 140360 COc1ccc(P(c2ccccc2SC)c2ccccc2SC)c(C)c1 \n", - "2 331409 C1CCC(CC1)P(c1ccccc1)C1CCCCC1 \n", - "3 2027 CN(c1ccccc1c1ccccc1P(C12CC3CC(C2)CC(C1)C3)C12C... \n", - "4 2036 CCC1(CC)O[C@@H]2[C@@H](O1)C(c1cc(C(C)(C)C)cc(C... \n", - "... ... ... \n", - "330962 608 Cc1cc(C)cc(P(c2cc(C)cc(C)c2)c2ccccc2C=O)c1 \n", - "330963 461 CCO[Si](CCP(c1ccccc1)c1ccccc1)(OCC)OCC \n", - "330964 1064 Cc1c(C)n(C(C)C)c(=NP(N=c2n(C(C)C)c(C)c(C)n2C(C... \n", - "330965 523 CN(C)/N=C/c1ccc(P(c2ccc(/C=N/N(C)C)s2)c2ccc(/C... \n", - "330966 1817 COc1cccc(P(c2cccc(OC)c2OC)c2cccc(OC)c2OC)c1OC \n", - "\n", - " molecular_weight \n", - "0 398.441986 \n", - "1 398.532990 \n", - "2 274.388000 \n", - "3 497.707001 \n", - "4 1049.558960 \n", - "... ... \n", - "330962 346.410004 \n", - "330963 376.509003 \n", - "330964 462.666992 \n", - "330965 490.664001 \n", - "330966 442.447998 \n", - "\n", - "[330967 rows x 3 columns]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "149e8d9b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
molecule_idsmilesmolecular_weight
143223241905[H]P([H])C48.025002
3298681497CP48.025002
\n", - "
" - ], - "text/plain": [ - " molecule_id smiles molecular_weight\n", - "143223 241905 [H]P([H])C 48.025002\n", - "329868 1497 CP 48.025002" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Sanity check to see if the data is correct\n", - "df[(df[\"molecule_id\"]==241905) | (df[\"molecule_id\"]==1497)]" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "9596cb12", - "metadata": {}, - "outputs": [], - "source": [ - "# These two molecules are the same so lets check if rdkit will return the same smiles string when canonicalizing them\n", - "mol_241905 = df[df[\"molecule_id\"]==241905][\"smiles\"].to_list()[0]\n", - "mol_1497 = df[df[\"molecule_id\"]==1497][\"smiles\"].to_list()[0]\n", - "\n", - "# Double check with the molecular weight. Use difference is less than some tolerance 1e-6.\n", - "mol_241905_weight = df[df[\"molecule_id\"]==241905][\"molecular_weight\"].to_list()[0]\n", - "mol_1497_weight = df[df[\"molecule_id\"]==1497][\"molecular_weight\"].to_list()[0]\n", - "\n", - "a = Chem.CanonSmiles(mol_241905)\n", - "b = Chem.CanonSmiles(mol_241905)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "9c94d293", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "CP 48.025001525878906\n", - "CP 48.025001525878906\n", - "True\n", - "True\n" - ] - } - ], - "source": [ - "print(a, mol_241905_weight)\n", - "print(b, mol_1497_weight)\n", - "print(a == b)\n", - "print(abs(mol_241905_weight - mol_1497_weight) < 0.000001)" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "5cca5c81", - "metadata": {}, - "outputs": [], - "source": [ - "all_data_dict = df.to_dict(orient=\"records\")" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "7ea5a56b", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/330967 [00:001ss8YEUer@)%JW{6t52GuNkb^#X@np^BhU zwIW6Zeg^tIn>)IMclEBc?pHMtm&ie;E)YV8)hf>stgUKPsv8R}+aMg|$f*GqLjNhH z=Nogor$e9a8#PRyI;t0mr{odU5pur(JNzp2UV#wB-Vr3@SDe9E-SNzK2-#OOc%5AR;a@!J;vS{e2_s%t@YE!M3C_j|v0=@BDiPg39gf8`xzUK*O2J9JNVYS@I)4ND?A zX-wK_I_vIu=AvOE`NC8aB}j@&^$-KUJ!0x2z~CMF#Zw|3pHN80PH}R7wjU4QckuA2 zfxQqZ&|hzu1Hsx#dz)Tr@^{mn@R7uBb}$?R*oGSww!9nmy3xS-M0N#Gs$U{A^L%_sCagVv48AcX-?d6*WK5wjrAXb=34nS=;Y zxUq?CtI8dxVXCW5K6v)PvY{N2DVzjI#M;#-Qo6G)VtttSHz%!`%EiZkZE_6^jP{L7 zV4<^VeHTj?05bh30sO;H4vr&52C2@E;EqxtT==mC3nEoalKWI2-+O4(7<79=k2Eu< zX{VH#Om|SAy`@YcGgYB67GW-!K5jCwRZ%p{G4MQlXumk67t?i&x4M024Bgg$s>_J@ z>VSTWagU42xMhI)Z(6Sa6lC)Yn-ULB#UsLRAA5Py!2cjAR7L=VrA<{yD|~`p7A;DJ zj5)j@)uxw$nSBA-jobhfdsb}*8|L5}6g1DXMNVq_hhuJAh0I(pX+xyA$y|3*)Vd(ZeiU6Kx z$^rtI&hrmJ@lsm9(_1WB+z_CWongjw)s2+h~gp6CKzW@+GHopDQt7hezYKFQ)IbwW7!8**ru)WOMN-3 z3bR%L7`Be_$_}?2ws?j9lnjbh-X_A-9QuAz=v)rDb)mJ8L$;nmJD+>t_=l8EACg6grS0vHi@PE)&Pc8OK>460OjJ z-=a?47j}Ts{IR6M=rBBc82qwMVAA*or4cXGLu}S~^+1+`55F=gvQ+O*>ou#r+Xs8gLcrh|O3?k#RjEt;>3g}$x-WW%lj(x9dE-Gog$`g{m^p=d2 zX`;ka!W_R&>4h!>NrDy~l5OdD4f^q{u!#A$#2Z_ra_xM>N?@I2p{qBPvhpKK(2JPn z1y%-hw90jK>h%awqblB7NL0580`b6e*9{wQobM1knt-~veXjvm`RIi!T!#`)M)9E1 zKhk<#4X1)uBs0t-XPpD%KOHp!egG{G;oK5RnagLR=%Fan%IDx9-}-hv*SHTrkbQ7$ z`J}KaUrxs@ZggDJ8qhzR#(79(Ov}fe2K7MR%sECvC^XY7#Z&xyPp|8FhZ2IVOyd~~ zIE%7Q7!e8Dd2BR$UF9FK4lMH;hGK@OpoyMc+B)@WR_3U^XVD1=yFYbHmvP^&j;Zt^ zv+4Zh>0$3ABtQU&|IrNMfNWwx#;UId3=|<19xvoHu*Wwzryzl+xBD#)S~V0MD3T|k zG|#AD_*BF+qr%feg1@KrW>>KSGPhl6V4G60wau;;>||=veQDclR0k9KCdY;~skJS1 zw$ria1BG5s>mFDBS}Gwh%)W7VrLnzZI+WQR>`@@iWLg)SeaMP4oheNv5HIZ=x0zEZ z=`|)o7^!c(@n}SWFn>$yn+y;`Ey#_GWa#pd+z9K=bRzkx7)cE$rmd{P>AuI$44b^T zF`)+yAMAl(VviRA_rJ(E-#wBi=X~p0d0x=!&uLWorZkz!#4-B7_4~_TKOxdr<}|_4 zE{*3Q?AqA><0j=~!A8x)LBJQ&dM#Q|XR}u9%pLNhH~(bR*mYsM03m&RGZ0thAX@B3 z=7Av7@BOiMCo95GCpgFk-xG7JZCrxxLWhDFJCSk4J+kSOItX!Elm3;QH<;fwy;!f-eIe!e{ zUVwHuql%{Yv8^v|>?oze)t&UoND6z)*NbMkS2}0h*}2KJn556{owM|aiH4E1evc^> zECGTg+ZIPK3yKBKcB~jh2_^hDg93LlO!LLAnj!2_A^vk|U5aF!3&~b9@8g%%CXJxC zCiJ@)Ek5&jUCUQP)r6wl9yQ4C1(4C>!ZlK5&B z32`SFpwc}g&4~ucj_tXDZf{Zp5Qsu+Tv2n+7PX%6E^*Yx{ zfxzSl0q`LZBo5rY9OKi0y8)#5SfyfIT>b*3zzNtGgY7^J1?7^v${?mIq*fje^6k=B zgsB4MLi^ec!5Vt!B|kf2Eb#zkAdwX!?1N38Vlqw4=ZV1RCEY7>LqBM47;!9TYz|OD zz_NJ?IzZz68$Z!yxPOt{uQP6AKLjb{dl?>nC{WYf)Tt;M#Clfu^wVDMzPHf=e%e0H z+jqG<-G9`!eB}ozl)-TV@XlDIGy*|3&X^xD9F2vK6vyEp(zhh^rD`>5@oILDFb00{ zV+UA#=iFbMFqU|TOqWJhMb;aH=yGOuLhtEUPzk@yZ%SmE;(xjAG87=QL{q5+5~iUK zq1Kdz)KZ_j;fN9ZD-?WsNn+R;;Rp?raw9=_^NQF z0PVhG4_OO*hWz6%+1&c9AIR6@o)&zs*RVP3gUZ_K!;yRp@)*H@4WIdmjmqAQZU^`r z`CsEfpeKw$eT5FnD;D*p&x`>Nfi~hSzd8u7@%eaTvQTPcH(x!4Yk&Ew!B4?&$)xU~ z;4L1)Mfg3(j!qir|DeBsaT;zXW;k{UL#PDtJO*T$pLAa#;xOog$6Z?}bes%bwpDTG zUW2#)hfhKzs{3+UUvD(Slk5m}Z0yh?9Y)x=gp&}?gF(1c?cASH55)@t)(ub_K_jYQ6KHl z8x&EQFwzK-d61WD5-BNA7545HvGQ`7z)D$;RiMEi-8XvZ>0xnSHNmd~93&G`ga$^A zp!gti%PkpbS(zkBkqCUDT1U|!;$ysmc6d!9f;)cRh