From 4a96c8789e012cae194eb35f6a94a7e6395cfe58 Mon Sep 17 00:00:00 2001 From: Abhishek Dasgupta Date: Tue, 18 Jun 2024 13:21:52 +0100 Subject: [PATCH 1/3] Create fhirflat.toml, fixes #47 --- fhirflat/__init__.py | 2 + fhirflat/ingest.py | 69 ++++++++++++++++++++++++++++++++- pyproject.toml | 8 +++- tests/bundle/condition.parquet | Bin 0 -> 14975 bytes tests/bundle/encounter.parquet | Bin 0 -> 26792 bytes tests/bundle/patient.parquet | Bin 0 -> 5217 bytes tests/test_ingest.py | 33 ++++++++++++++++ 7 files changed, 109 insertions(+), 3 deletions(-) create mode 100644 tests/bundle/condition.parquet create mode 100644 tests/bundle/encounter.parquet create mode 100644 tests/bundle/patient.parquet diff --git a/fhirflat/__init__.py b/fhirflat/__init__.py index f730b6b..3b9678b 100644 --- a/fhirflat/__init__.py +++ b/fhirflat/__init__.py @@ -18,4 +18,6 @@ ) from .ingest import convert_data_to_flat +# Update this when bumping version in pyproject.toml! +__version__ = "0.1.0" __all__ = ["convert_data_to_flat"] \ No newline at end of file diff --git a/fhirflat/ingest.py b/fhirflat/ingest.py index e55e4d1..66bf683 100644 --- a/fhirflat/ingest.py +++ b/fhirflat/ingest.py @@ -4,17 +4,22 @@ """ import argparse +import hashlib import os import timeit import warnings from datetime import datetime +from glob import glob from math import isnan +from pathlib import Path +from typing import Literal, TypedDict from zoneinfo import ZoneInfo import dateutil.parser import numpy as np import pandas as pd +import fhirflat from fhirflat.util import get_local_resource, group_keys # 1:1 (single row, single resource) mapping: Patient, Encounter @@ -31,6 +36,13 @@ """ +class FlatMetadata(TypedDict): + N: int | Literal["NA"] + generator: str + checksum: str + checksum_file: str + + def find_field_value( row, response, fhir_attr, mapp, date_format, timezone, raw_data=None ): @@ -357,6 +369,60 @@ def condense(x): return melted_data["flat_dict"].to_frame() +def checksum(file: str) -> str: + "Calculate the SHA-256 checksum of a file" + h = hashlib.sha256() + with open(file, "rb") as fp: + while True: + data = fp.read(4096) + if len(data) == 0: + break + h.update(data) + return h.hexdigest() + + +def checksum_text(checksums: dict[str, str]) -> str: + return "\n".join(f"{checksums[k]} {k}" for k in sorted(checksums)) + "\n" + + +def generate_metadata(folder_name: str) -> tuple[FlatMetadata, dict[str, str]]: + "Generate metadata for a FHIRFlat folder" + + patient_file = os.path.join(folder_name, "patient.parquet") + if not os.path.exists(patient_file): + N = "NA" + else: + N = len(pd.read_parquet(patient_file).id.unique()) + if isinstance(N, int): + assert N > 0, "patient.parquet file is empty" + checksums = { + os.path.basename(f): checksum(f) for f in glob(f"{folder_name}/*.parquet") + } + m = hashlib.sha256() + m.update(checksum_text(checksums).encode("utf-8")) + + # write checksums file + return { + "N": N, + "generator": f"fhirflat/{fhirflat.__version__}", + "checksum": m.hexdigest(), + "checksum_file": "sha256sums.txt", + }, checksums + + +def write_metadata( + metadata: FlatMetadata, checksums: dict[str, str], metadata_path: Path +): + metadata_text = f"""[metadata] +N = {metadata['N']} +generator = "{metadata['generator']}" +checksum = "{metadata['checksum']}" +checksum_file = "{metadata['checksum_file']}" +""" + metadata_path.write_text(metadata_text) + (metadata_path.parent / "sha256sums.txt").write_text(checksum_text(checksums)) + + def convert_data_to_flat( data: str, date_format: str, @@ -458,7 +524,6 @@ def convert_data_to_flat( f"{resource.__name__} took {total_time:.2f} seconds to convert" f" {len(df)} rows. " ) - if errors is not None: errors.to_csv( os.path.join(folder_name, f"{resource.__name__.lower()}_errors.csv"), @@ -470,6 +535,8 @@ def convert_data_to_flat( f"Errors saved to {resource.__name__.lower()}_errors.csv" ) + write_metadata(*generate_metadata(folder_name), Path(folder_name) / "fhirflat.toml") + def main(): parser = argparse.ArgumentParser( diff --git a/pyproject.toml b/pyproject.toml index 48ba1af..3795a93 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ dependencies = [ "pydantic==2.6.1", "pydantic_core==2.16.2", "tzdata", - "python-dateutil" + "python-dateutil", ] [project.optional-dependencies] @@ -35,6 +35,7 @@ dev = [ "pytest-cov", "pytest-unordered", "ruff", + "tomli==2.*; python_version < '3.11'", "pre-commit" ] docs = [ @@ -73,4 +74,7 @@ extend-select = [ "RUF", # Ruff-specific "YTT", # flake8-2020 ] -ignore = ["C901"] +ignore = [ + "C901", # function is too complex + "C408", # unnecessary `dict` call (rewrite as a literal) +] diff --git a/tests/bundle/condition.parquet b/tests/bundle/condition.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f213484721ce977ee26bc82d686915dabc7ac32c GIT binary patch literal 14975 zcmdU0TWs6r6{Zx`Nm?h(vQU##H}#x^^%}>z*m9&SeW2t>mTD`uZ0pt)hIp|}o4QmN z%W_*_7=~dOilG>aVc5eE48<@E!_bGWD~2N2!ybmBk3*mOPz*(}hdmU-Fcd{m?3|w> zMN-tojg@9fLXrH>fBx@W|8ssjD{RtBJ7^C*w?x}%>O4iczITD5sC=bVFA5WExgyCE z!RZN~$2&_?^m*Dj4Rq(|5qe~No|?K~Mdz}73rtT>*x_oo+3XbascoDd9rIHo=f+%* z=ec}MEQrP0TCE}#w`sf6?)=E+{HKk{FsF{X&e${vMA3E%F6ZEjqTo3~$5jC@|M)3* zw^OT?Z@S&pVyPesGg5J@NCaFj{sJEJU}!+k?RT6 z!}mutJp|5hC84<{)x;UTB!~`Ks@6`5GEods$sg>_x9!ZE4(7cRsFZ%|^#^3Bs&RTk zRn^(KM*(zWrzDFLh|R(S9&L}hboVe#Z|(O_Yf`$}Qr5(S8WHRboAdWJ=EGCWU1Dt= z9|KQIxc{)xAKI4Cr|8R{hH7G^AQek;X}dYIBj1=QRkqz> zQ2Sc53cl#(OGQDdNu}a-9(;4VRB66ku9P^i#&xvIb+A%FbrMHAAmS}2IR@@s&8Nzt z`O^tI`jg{??S!4V`?#j*z!?>~r9U~GZ#bO4b}*lhGG9K5{`5exTiVh|UsPT1v8j_F z(~6>UQ1k?S1>?GvdF)dsTSG4O2B^>7LOqZ6=I7_V;C`yt=+fv!zd5eH6tHk-a{2SO+R~rK zoMPz+cIR7m=eu_16Po$*0y;w9#Rh<5sVmq%R9&5(cmyCv#C@@1kyRcc11{wY==8bd zY z;~>16g`ObMkRi-C_*S_BBHa@((1-L-o*Ad@tzz&@saO?j z3$W>~O9hdDdClhhg^l^;cbI=qjrZVbp&S#)KUsprw=M@>5oG(x1oH3GZIGiCkuOyQ zQ9ztJj2~ZP-n|AG+e}O>EiqoCFR6+=<2en>hSlQ3tkHRD>a;fKYTufm{?4sAp$ni^ zOZ5sbt~bjHRK?5g%`iWlK`(1_(KECJI%vA_VTm$lT}F3HH`<)8p{!AWw4LeZXS15j zfiqRySEO3AXLh;gWj^rq$}ZZ113fb4uHKZ$pYBaoPr(r-bJpKvb{HN zC7X2)dDZ58-Nt<4XWsUsYVB9Zbu7$K z+6(5PE8v--mEFv6N@M0hmafkKJn6JEr=0)21V1ktn|6(q^F<+Fb^YzaOOr~?%$MYP zp;(=~Ir02tM;$%+!t}(XLZOt{A|ZTfvY0Q3Fr`?RWuSml30c@QzEc+3tU*5ugp>86 zgthP_P#5cka#I^9k&Xl37sOg#$k*~f{?a7Yivs+V3FmICN3I7ZUwNes#)x9l^ph!M zW`JZxa9{XP?pLM(W=rNt!_R~|*Y6lMt&Tsec)q2g7Y5W(cMMn)_Q0SHn=l;BLLIAX zZH8)QqXjyKO{*V^2DC*~5xZbehlheeJ;3j@0BVMub>Ps@)>efx$&PEQL#DOw)ACwt z*D0}K=$KdSc~)%!r7yh+-?4b4@dOvtp`EC!wlJEhKmG7g8qS&fDYApugpVtgWaME? z`s!O<%I`=hh7;ya7#+i=g<&TdbhqhPT7Pcbxw3imSQl=e0$VL?95pR`pJ8k`jqO~# zW7xF%v4o4Zh;?*y3=ai^at3Olf%brD-TTu#x?e}d(pidi?io_CHa~|rjlre8Hb)n! zpDJR$R;rL1OP07wzS6`JtK7_2Dy0T8gEM*j5g@+x`e!^do=K>5)a^<)=$rB3808ve zlgT^jo66y+>tD~f@zdB|Wmy*g;paJ)onu4WY=mXE+3+@b7YZ518~F0JHXg${_zWMy zg^b5ow!y+Y_(mq7Eb@Y9pxg%f(2ZC@uIHkuxr{Hd&n>2ETya^tBZat_9NfyM{d>90 z&K4I<`Eu!lt>s8?D<*rYv2c8s^Bs7(v|pu>`DScB7O4S0{|=YlKpu4=eNbHrvr?`g z%Um?M#T6p8+#1kD{a!BGKt7EutdY$mwuD8wLB_&yFISL(e^QP~jcwrP)$i`^#Bha1Ufexj{GH zhL{R^#7sy=9I--B$`?|*LYR!#av%@z@MJZ89R!!cXfIX2tduPT_rz49EJQczI^KnR zI#ta@g7qxOq419H=N9Gld?qO`5#4MMnXGVfQTb4a)HEAphJ;rDpKHn58*ecwi;JsW z@eW0=KJ)=$pD*t9(Q9G{tpU$_< zG{w*kS4i&gUVY3Ql=;~tr};q2L;Qp*rfU46tj{U?*+N;)&MJPyrBj}4ItjMBt>vk$ zwFc%6UCdw(*{Ju?riIPKXiE4h`(!u25*L%O_a(}lZ?4WS#$|psBxijx=IFhR zOiG5F*wx<#oonTXu$S#~%HC(OH^%olUqgxQfnqlZ_DOk?uz%)K$uia>UH0&OkX06L zV=hQ-};kEqc6P<6h0rru22G zg?}cz`n~nR*u6(@5sq(Vd}>c~fHtpY4ntd+CaJ5f;-<_MSAB`Dxks)G3t7uvLx~%& zfN_zXg?P>+_jw6=4&1Awvcwl6O%dyN=ySw$q{$b8vr2x+K^`SJmh>~r>l^ju@LVHS zB>hPnBlxSb&mfOjb7?mQbnzgzZ!DXIf1wubGTcIo2TfiGdcP_*fQt?61$pfC_E{K@ zvI}tAfIsko=QtaNn{8|h=df*jZWTyFWa=G!PLO_a4yo7iqBNCr*PvB}hY-6%URLo+ z`bqN0Ch;rd4MN`pb$cXO&1G^j7YQ^n-T)4feiRm7RQgeT#Uf;Qb9lq;Mw;6Z(!@gZ ziw}5FK0JJd4u!PR`?1k=pue`K;#p9*>>;g!hfKvu>;f*ZsEdCkp5SFTi}QUe`_O;G z7SgN%fV5-JshYoO5p-#a^Lpq%R#O9;JVWcwGlP?!BYoSy)E;g%xl; zoxejc!l$ioVoK08*xw81ATp1kxPtNr=G+h}@DytKD~T^%{Kt_K@$oh6O?F7Y#Bn{} zI)oaD?^e7ZrS7~+iFf^c(jOvG3g`Q^`Cak@;t#=DNUDZ&AfLc`*8(cuzWE{A(HjGk za0BNDO#14VAACrEunXqV%S-fYnIE#c{$0xtEX24wxC|G)zDdyN^li=$9;6o_#`X5? zh;M`v)tw2cKF)^raDG{vufz{AiY|T>e$hTodJd q<;A*KyUv%(*U2I7MC*`tLOH&L-{ON`OZ^Iesn4E)-!q2)dip;D@h>v~ literal 0 HcmV?d00001 diff --git a/tests/bundle/encounter.parquet b/tests/bundle/encounter.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a1cdf7a9f8f827680d85e0cfc67d93aefe31d848 GIT binary patch literal 26792 zcmeHQU2Gdyb|z_6v0d3w5|g1_HOmUlGz;6}k3^fyx?2xPiK3z(ind6R+0-RDBXXpf zA;TX@WD;NzEEdKh2=Y)EMiFcmi^U=j!6FYq5EOYRieM2GMIRPLk%yot7C|41pa_C( zABsHmoIC%+ONz2Z$%~eV$DFz6p7Wh^&pqefbB9iv%>>LYpAAH#flwe20yUp=PaF2W?s&m>P#^Ds6d?G?2pJm(xt_ui`tpe>_k=P+ zkxNBR5}Hk+CePrv*6mumQRZjLHI5$>g=XssUgRZSRt)l?%kzngd1st??+>Pq2p`pW zx7-QMc)r)CK!>;}$U?a&`qt!fO{`Tqz64)~VIs(S4L3E2-aD|BYOr5V9B8l>zu)RL z*t;&zFI>!jKj)APQQ`QS@V}t$nGY0HQ19q(;R0P?C z_@_@ZzkTY^h#?RygGDhsVnDj&e$JSw@aSB?G{>9{<~_F_UCcAA!L#~oqtCD(cuLHd z7nomP_%1E{^$d4|CzH0tbCmFL)@b#bz60Feknj_)ZoPhh=^p-p)AO;DdHY%BHy&ct zJ~F9paMOKD^WNuLmjKsozOgHm`3(PR8@$agbsBRa<(iMo6SCn z>;F#D71%jZl&wUvKk+K8?*^_o+?|I`1_5psAH&4O!)EHTDjGcC#axtHM zS6B7klblej$hD@>yvy-&)4I$!Q`W#(lIfWSzr{CTD0t%=#Pl)Ge>**2Ihl`MV7~ZM zG|j>As3!5cr!C)bqhLUC;Lh>UpYdQ+yoq0R%gX z)?=6>h?;LZJ@=i=-(F?jyozdS7^uqp!2K7RGBd=N^_=6=E$@7GF(EX|)ncQ<8&4_P zK0_h%K11P1x5vl9qqN5poY1U`#SSsaeV6B57jr+ryz3{EVrZaLqdjCq)(v{CQ4@KJ z{f3NeRzAZJWz9*R>da z@h{zAjrqFJgbB2^6)!e;Kfhm;>ST=U%VIZ`&`W;|e9g;;|GBA~*L#jP3j`avJWAyL z*Y7cJeGhF!XW35@Rm(~D1x?G-xHE#!g$MFK?DbShKd2nmWeOBGO zUxpnG#AnW1U{iWJP4TAR=@r}c+OiXytaEA#?RLcjN@3qS=%CUQnB9WY38reh||WThba~Nu=8p* zIv1IX9vS=K%JiP^tPfaFn@p&O+d|-cr^6C{I(HK7sz;>G=mI^Q%qfs~oCr7Rp?8n z`#+qTN{z2yl>yV8D(};Fa!sC>n!Z-e*KBXWZo;?Ks7bz(0I7{{Kxwfq^KGdn3q@a1 z=6tZ(fMi%<$`!s21KD7G}e z#mtZjv?njDH7Z3}==QHQpSMzYZTB|Z_qGk(Pd1e*V7gJ;6*#E7Aao@`+!L5js|el3 zt%o~RiJrAXS}E~hYQtKGWeE*JCd}<#%bgAWk0k^C6U}yMn=dO)u2x4y=AU*DwuOg{ zBNg(nIV~8Ojo@1GFsDd*pS}r1rvP<(bK2K+UFpb~7Wmk}eD-dQ`A|k1^jjM0!iIoD zwN=ldQA)4j!I56aw^eKSWI={{N3qkSq_?R-^xlE3RQr6;w%doE_>?oEubRwTO|;KI zK?lH~+QqW61f=9+cXxds4xJ%IMWZ?R5^sA z=Uj^l!hXs`RRs;FsUm*TqllTup%u~2Q4DmZ!vHE8x?$B`hmjA%VXwQ=1Ol0+yn77C z(5O2fMD8NwdOccm3!^S=XpKeQq1dU#--Bq!6%Qn3VY6Fkby4#(wNht(WUWtWEoZ?I zhAQ!ciO`o<3SAb|7QSwPKO-56)on$ zC<^w>=*#KMXiDD9V^vgU9*Yt#bDSks&-*WWT+C_DJ2x*lz+bUUB6G#2_f6kx(@Jd@ zet*%Hb=viT=h*W6iO+WCxov}%SDNYE^=EGfnrPGMeeTknIh;e9wY$a zbQ^x}0WWV&1GU_i>K(nIMow4oxx}}MT(MOI^4F%}fR?;KiG4TjEL@9Bzww3v#)@Lv z@_QfnRT6TuV4_mk5Q6Ao;i1}ZTm_V>%2xK|2S*1$&}ohmg!bM{&o6%{U-Vy)P^K8G#VL||$#Hl^K_F?gHeg3G z{S0%OCDNzkVA@fj6DW%@<(W;|BhCQj<pgHMCunZQh78aAn#cbghRGa+nqcqiCQW-a%k^7@tc z-;#d(Tw#kW%dW#8{Jg}nb8M`_F0gEcjaSICSj^JChY#oV_Cy8y!aaNl7t)?$**zBe z!QX_*IugJ=d{RIayOEN_b|IOa+YGJlmKL+ElDs0U39(X2jBXWk;hn-}b*q%jh6=g; zt(Ar7R!R&sQ}LzkQfNO|%7rx=vD`^rPc5{7U$|PzZ6J>}m)mbH$628uiDD_4*(ym3 zt-@`fONN7`I@H`^3+ zT=Au9el`=#=l0hNn@jalXuVwke2Sbx9c-|(8PfD)g*;&3atqCkWVTbxMR%5^Y`YLo z_0tP6K9o)+Iom8P0`|qhSj)4SYFSHY^6V;%$y__1Lq3^1rP*vJAKGZ=L(v@-C(WOlATyAg&9;j<^ouO_ zFn-l<3_OFjME`C%B<_}kFxW0w=N49Xxy_~R!tHPX=AGi- za;#dCU|fS{+wX%dG9@k-t`ps}0b)16*eWlI=DD<+hhJ;vXBD3)<+1^=U6o7D>+^H# z_8!ieez;)%uLd9vKp(_OeBmHnXnkno@<{W1nDHphF4YSmvE6_En*A{fbC|2EI-UsL zvXqTk{K@d^gKXX@Y*_rsh@S)ervP&r;_k@igI%9P%m=$ZCpaILs^w62=T2y@Uu+T} ze~E?psN!cdd&^qvQR3Ut%+u8-$n72wH@8a=)61RkQ2sEQc%tHzGa|p(H#ccGPjH+w z^?Z(NmmtSLtlfdU-&J!8W$yK*rNs>;|Af?hxp0)RI=ctC4Rcz$u(Mh%OMUaFSWb#v zTvOc%6%zT)C1o9QY`zM4Fqv(uxvm52D9k6JLN+FrC2_Y94;#-m^feU4?hnO>HYQRj z8Le`Oyt;<$U1Juc{oVZLdT8|!>l@73=r?42Bf~lr)7I5kA0Y_b^Vup8$XR1aqkw@8EoD#(Q!(72+Ml1Ki()U{4){xd7{s@OEJ{ z7FZsPAM(KhwQ#PitRybHCR6cR<{S#IMliyteIfl z8SSWb!e%kI)Pc2dnAEf6SQx%{O0#7$XJD<>QP!(iLrm?j-k#fA-ev`?mzXeyV9t^C z9^tW_YOe@$dnq{p_0>H5iK#YU!LL|!jbJS|!tP*2b_wn`;!uSRVQuy$tkJT8I9xhx znPr#u7O_%`-NMLgsRE_j*bdfi7qGy)vYvqZJd}6Y8`yPa9lFNz$Z8GmS1UjY_dr^} zvT%ZwedqE1Rp7zS6Fj6|oJp7be!N}BnlQ^Q<3%YVlW+6*6l%Y;dN2}zU{J7vSWyS% zvw?rbQ_)?&AHKvAF~XV@p_TeG5sgM-(FW|I$*1vQ`c4Rn;&WR6SOy;wqpJ05a!;5* z{uWDwJzI`KDSb}YXOEyzKoJ5WI!U_17_c!?&s1|i-mhT)`Sg}*j}6jEp;rW8SA~fP zYkeGNmgSJXyFKgoN8oOLfq;VIJ>K9&`M{X6u`ZWWYS-*i7wDrEl!pzfekS{nUTM5S z{hi&l;GYnWpx(@`VE-%Y2lR(=f&NmqtJF%f8(6=^XKWh#`&nE6CN?SKgWte) z;Dhr9@?U#dE>z)+fUNJK#x9l@*OeHAM2YP=;1|*OspE}CR{0TApb^*vnsU*!_EOdY zNZGuS_WM`T+lR8hi1a~}Pwc+}hbF31na)F$`L~KqR>uKIBmG_? zY|#&9KYSE=?5Z3R74e~UA8Om1X|`VM0>3DG1zZ=p45@E?2kDCh5A<4B<`;1?C0-Cy z!QmBftl?$go{H420ecwvUEkL9>ob2)$ygn7UJKc?Z1+0&+q#CI-6s#{@M;<_3Gc1; z7;0Y*@w24jNwJIRhU$+iWRTX7sVP2UAZM}Gf0!Bv_F-A!iV`2=Y;qg)F~tuEs_-%S z1I8Mtm)Pn!NEAeTVZHw#|0nST&c>>RP&NUBDY%$NoU92tN{vlj@lu$Z4azS?o{jy-MVfB&6CKu_W=LaH7gz z!~R$HH2HnDnIELi zi!#~d9i*?JJh;>BV;WClA0z*Qec+r4<`P{rO2JR24&)aJ)=_wFOL9{L``^&}tMLGhVxA8qf5Sep05!xc#BnmzWk1BH zIbtuMR^t;r|C9J+sXK$2RexS|_a83)t#m=&m371ma{9$@;@Wu$>QFgYmKxukaP3U;=f zPm%ox2wzQO=r|<)g)U5;NX4(jKg~bMAe-hd4icM0e}prBs4;5irS|h7)$ikFWe`><&`1$Cq3YwK^onE Oj{izI9BKG}6#Xy5HKd~e literal 0 HcmV?d00001 diff --git a/tests/bundle/patient.parquet b/tests/bundle/patient.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0a8280224d50f5010bf2d17112e82ec0654221c1 GIT binary patch literal 5217 zcmcgw&2QUQ5~pNUb~bM64N5^ObTEXP$RUX(#mYuSx4@%p$x;<3R%}w@Vqx%066Hrq z)u-h&m!6BoVh@XAPm7{h^t8vK$D)5hZUuViq38Vr_EdD{k@~WzB52&w_9HnnZ{{~& zZ)80sl@uT1Bm6sMKFD*oIqv42b&lhns(s6J`aH+q<|B7OVv7&+;rK)D&br5nP98w| z#&!_aP#_TEgwF$UelwQg!dtQEX4mYs2VLDf8Fx%R7zsuG5s3UNAbcQf_^XPcDUJ_v zux!DNxLm zWKH-sh?wR#`=WW5zcWE|dmmBQDmBB1%ETu>JV=j*UH%nBKfN(S6mFP~VRjj+&jOKu z2ZYbx5Pk?TR0}G6ac%IMlN5$F|GNW`yOO(T2&wKG0Y_(V^Ozum|JjK1K`(J^(rX7Ju8#E*4TWz`<`Mc!%8qzXrV46ojp_%G=p2Q{2Y^^=d^x#(}z& zn8}t9Fqb{&BB5A3_VINeRj+@%2-kER{3b2@`i=QpHk0ss7JKB!^+-tAi2QToZH|lG zjfOj_W2n98%Z=v=x6AZ49oSAU@!s}d5=)(z`0L&6ge&1TrO5=JCmhu_VT&`M6eQ}F zNsY5bnrrB+5AzfR69dQ6+lH9{X=h+}##2UHYngf<>9*Nd4YjX=`16G6fVkx}aPR3! z{@$a+ix)F6izpKF!y7d%;926n7Bcks#a#gCd7H3|Ws5HDdhNI)ciGmAe!cdl<8;~N z0_%!ZNZ_j`c`1~Wq2?k#&gCC@b$bDH&0*HruMF6%XFXsHc4j0qH_(eiS1opulm52>^#&-gmL00@z(WV2XAnjGtM?>vU>TAwp>xm_5 zMVhUva^_sAHtSkJN-Oe3{c%29FVbYMm@Bok^hHXOGZPuAkBbkB`9A2&G&T7Ybqoyo zqF2rlOR*`{3YEHM=ljYtkSkCUrMWr znd*Z|MlYaVVrl8pM~ZxubonW26!M^>(k!;x4Rc(gnte1>3g8!F%9SMPpw@qqD`o4_ z0`^Okmml=3DXv^;tW=MNMzz#ZB-frV{uOe1ArG<4U(RDSfQ&YpvB=r6=1H8Q1a z(yWRU?HBE=rP@-<$T5Cj0Y5-TvNny=MYf#7JWb+CELA?XR^aL=uAXa;tL8{O?GWO3 zxDYQ#VKq`*g=c~Pd&Q?_)3H%#9zacaHKcT)e$T7vSqztC9I>g{m8PDW&rPLiK+R#S z!T-K~NayQxShG8{_Q1`BCQHehjPeK5+OIzwVLc9Na>~nfh1SwiQ%_6h$98rIebv+~ zmUml~rJj<{zKX10ZlNc#yaXf;U#zpUWI2Yb&M@@DH{Wz%?AN?=W%=BJ^DueT`h|RZ=ZkJj5VP#q zJCAy*TpHJ^r3~wLN6f(USbLzex-x8Otf%P^>HyBF;>FRky-~SEEUcY5jA^A>x>V)t zfaz!z2am12QPD}_Nse=ZkVCJ2;n)}P=+CpGzJaqKi$Y~2=3ovGa)JvR68opS!`)qY z5lNE~d$_Xsz+OoOZ`D+`1HD1be72`l6{_VQjjE|f$YS#)2iD#Bf+Qlc@gufisG-b3 zkqxCBqq_5nNwLMZ&X7YsEPMM2IeiL`2>-aolrSfzqmqz)%HoRG)+Ao=JRqX$H=tCM|Ccp9|hQ-;5U^m+gB^{_8-`hm>vi<4f#g_@93Hm)qKcFA?F(#h(H5d{3eP literal 0 HcmV?d00001 diff --git a/tests/test_ingest.py b/tests/test_ingest.py index b027ac9..e85a073 100644 --- a/tests/test_ingest.py +++ b/tests/test_ingest.py @@ -5,17 +5,28 @@ format_dates, create_dict_wide, create_dict_long, + generate_metadata, + write_metadata, + checksum, ) from fhirflat.resources.encounter import Encounter from fhirflat.resources.observation import Observation import pandas as pd from pandas.testing import assert_frame_equal import os +import sys import shutil +from pathlib import Path from decimal import Decimal import numpy as np import pytest +if sys.version_info < (3, 11): # tomllib was introduced in 3.11 + import tomli +else: + import tomllib as tomli + +METADATA_CHECKSUM = "64c9da8c3bef0174342de0aeb335282ac05bfa6b4ef94ce4a523fb446858a6bf" FIELD_VAL_ROW_WIDE = pd.Series( { "dates_enrolment": "2021-04-02", @@ -925,6 +936,28 @@ def test_convert_data_to_flat_wrong_mapping_type_error(): ) +def test_generate_metadata(): + meta = generate_metadata("tests/bundle") + assert meta[0]["checksum"] == METADATA_CHECKSUM + assert meta[0]["N"] == 2 + assert meta[1] == { + "condition.parquet": "cfe0c5f792e47facffda38d0ddaf2f8b87613998eb99aefa56d99551dcb7bf80", + "encounter.parquet": "165f2295e6419f9674c909b53553006e9715691c6a3f1a739f4313c08fde0747", + "patient.parquet": "710c73bf95f3b5d95bfdcfed18c9c41609a3fe00dd32590d43372e3220b8e971", + } + + +def test_write_metadata(): + meta = generate_metadata("tests/bundle") + write_metadata(*meta, Path("tests/bundle/fhirflat.toml")) + metadata = tomli.loads(Path("tests/bundle/fhirflat.toml").read_text())["metadata"] + assert metadata["checksum"] == METADATA_CHECKSUM + assert metadata["N"] == 2 + assert checksum("tests/bundle/sha256sums.txt") == METADATA_CHECKSUM + os.remove("tests/bundle/fhirflat.toml") + os.remove("tests/bundle/sha256sums.txt") + + def test_convert_data_to_flat_local_mapping(): output_folder = "tests/ingestion_output" mappings = { From b75e0541bf05add89c47b38114ca31de8736bc83 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Jun 2024 16:12:01 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .readthedocs.yaml | 2 +- README.md | 6 +++--- docs/conf.py | 4 ++-- docs/howto/conversion-data.md | 6 +++--- docs/howto/conversion-fhir.md | 4 ++-- docs/index.md | 2 +- docs/resources.rst | 2 +- docs/resources_base.rst | 2 +- docs/spec/fhirflat.md | 6 +++--- docs/spec/isaric-fhir-extensions.rst | 2 +- docs/spec/mapping.md | 2 +- fhirflat/__init__.py | 2 +- fhirflat/fhir2flat.py | 1 - fhirflat/resources/extension_validators.py | 6 +++--- tests/data/patient.ndjson | 2 +- tests/dummy_data/combined_dummy_data.csv | 2 +- tests/dummy_data/encounter_dummy_data_multi_patient.csv | 2 +- tests/dummy_data/encounter_dummy_data_single.csv | 2 +- tests/dummy_data/observation_dummy_mapping.csv | 2 +- tests/dummy_data/vital_signs_dummy_data.csv | 2 +- 20 files changed, 29 insertions(+), 30 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 69d03e6..3d77cb8 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -22,4 +22,4 @@ sphinx: python: install: - method: pip - path: .[docs] \ No newline at end of file + path: .[docs] diff --git a/README.md b/README.md index c601973..c472ab1 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ for the [ISARIC](https://isaric.org) 3.0 project and data pipelines. Portions of the code are specific to the ISARIC project, such as ISARIC specific FHIR extensions. -The FHIRflat FHIR resources are derived from the +The FHIRflat FHIR resources are derived from the [fhir.resources](https://github.com/nazrulworld/fhir.resources) package. For more information, howtos and tutorials, see the @@ -35,8 +35,8 @@ https://github.com/globaldothealth/fhirflat/main.tar.gz ## Development To test and develop fhirflat, from a cloned version of fhirflat use an editable install -including the development dependencies(`pip install -e ".[dev]"`). This will allow you +including the development dependencies(`pip install -e ".[dev]"`). This will allow you to test the packages, and installs formatting and linting tools, and [pre-commit](https://pre-commit.com). -Setup pre-commit hooks (`pre-commit install`) which will do linting checks before commit. \ No newline at end of file +Setup pre-commit hooks (`pre-commit install`) which will do linting checks before commit. diff --git a/docs/conf.py b/docs/conf.py index 2cb2a65..ffc7e46 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -37,7 +37,7 @@ html_theme = "better" html_static_path = ["_static"] -html_theme_path=[better.better_theme_path] +html_theme_path = [better.better_theme_path] html_short_title = "Home" html_theme_options = { @@ -45,4 +45,4 @@ "sidebarwidth": "25rem", "cssfiles": ["_static/style.css"], "showheader": False, -} \ No newline at end of file +} diff --git a/docs/howto/conversion-data.md b/docs/howto/conversion-data.md index 2343d83..b3ed308 100644 --- a/docs/howto/conversion-data.md +++ b/docs/howto/conversion-data.md @@ -11,14 +11,14 @@ fhirflat transform data-file google-sheet-id date-format timezone-name Here *data-file* data file that fhirflat will transform, and *google-sheet-id* is the unique ID of the google sheet containing the mapping information (found in the url; the format -if usually https://docs.google.com/spreadsheets/d/{spreadsheet_id}/edit#gid={sheet_id}, +if usually https://docs.google.com/spreadsheets/d/{spreadsheet_id}/edit#gid={sheet_id}, you want the spreadsheet_id. The sheet has to be public, i.e. share settings must be set to 'Anyone with the link' for this to work). *date-format* is the format dates follow in the raw data, e.g. a "2020-04-20" date has a date format of "%Y-%m-%d", and *timezone* -is the time zone the data was recorded in, e.g. "America/New_York". A full list of +is the time zone the data was recorded in, e.g. "America/New_York". A full list of timezones can be found [here](https://nodatime.org/timezones). -Further information on the structure of the mapping file can be found +Further information on the structure of the mapping file can be found [in the specification](../spec/mapping.md) ## Library diff --git a/docs/howto/conversion-fhir.md b/docs/howto/conversion-fhir.md index 2095a72..c0be300 100644 --- a/docs/howto/conversion-fhir.md +++ b/docs/howto/conversion-fhir.md @@ -38,7 +38,7 @@ from fhirflat import Patient Patient.fhir_file_to_flat("patient_export.ndjson") ``` creates a "patient_export.parquet" FHIRflat file. -This first initialises a `Patient` data class for each row to make use of the Pydantic +This first initialises a `Patient` data class for each row to make use of the Pydantic data validation, then creates a FHIRflat file. ## From FHIRflat @@ -47,4 +47,4 @@ FHIR resources can also be created directly from FHIRflat files Patient.from_flat("patient_flat.parquet") ``` which returns either a single Patient resource, or a list of Patient resources if -the Parquet file contains multiple rows of data. \ No newline at end of file +the Parquet file contains multiple rows of data. diff --git a/docs/index.md b/docs/index.md index 5f99bda..a8f00fc 100644 --- a/docs/index.md +++ b/docs/index.md @@ -100,4 +100,4 @@ maxdepth: 1 fhirflat resources_base resources -``` \ No newline at end of file +``` diff --git a/docs/resources.rst b/docs/resources.rst index 623aafc..89ea4fc 100644 --- a/docs/resources.rst +++ b/docs/resources.rst @@ -1,2 +1,2 @@ .. automodule:: fhirflat.resources - :members: \ No newline at end of file + :members: diff --git a/docs/resources_base.rst b/docs/resources_base.rst index 5abe80d..ae58981 100644 --- a/docs/resources_base.rst +++ b/docs/resources_base.rst @@ -2,4 +2,4 @@ FHIRFlat Base Class =================== .. autoclass:: fhirflat.resources.base.FHIRFlatBase - :members: \ No newline at end of file + :members: diff --git a/docs/spec/fhirflat.md b/docs/spec/fhirflat.md index c84e10d..709c07c 100644 --- a/docs/spec/fhirflat.md +++ b/docs/spec/fhirflat.md @@ -51,7 +51,7 @@ normalising a FHIR structure, noted below. "display": "Donald Duck" } ``` - becomes + becomes | subject.reference | |-------------------| |"Patient/f001" | @@ -154,7 +154,7 @@ normalising a FHIR structure, noted below. }, ] ``` - becomes + becomes | encounter.diagnosis_dense | |--------------------------------------| - |"[{"condition": [{"reference"...}]}]" | \ No newline at end of file + |"[{"condition": [{"reference"...}]}]" | diff --git a/docs/spec/isaric-fhir-extensions.rst b/docs/spec/isaric-fhir-extensions.rst index ecd125c..75f37fe 100644 --- a/docs/spec/isaric-fhir-extensions.rst +++ b/docs/spec/isaric-fhir-extensions.rst @@ -1,3 +1,3 @@ .. automodule:: fhirflat.resources.extensions :members: - :exclude-members: elements_sequence \ No newline at end of file + :exclude-members: elements_sequence diff --git a/docs/spec/mapping.md b/docs/spec/mapping.md index fa80a4e..52b405c 100644 --- a/docs/spec/mapping.md +++ b/docs/spec/mapping.md @@ -114,4 +114,4 @@ Conditional assignments are made using the **`if not`** statement. A value of FHIR resource entries can be lists, such as a set of codes. This is handled in the mapping specification by allowing multiple matches for a (`raw_variable`, `raw_response`) tuple. The corresponding resource assignments (such as -`coding.code`) are then collected into lists. \ No newline at end of file +`coding.code`) are then collected into lists. diff --git a/fhirflat/__init__.py b/fhirflat/__init__.py index 3b9678b..d886e4c 100644 --- a/fhirflat/__init__.py +++ b/fhirflat/__init__.py @@ -20,4 +20,4 @@ # Update this when bumping version in pyproject.toml! __version__ = "0.1.0" -__all__ = ["convert_data_to_flat"] \ No newline at end of file +__all__ = ["convert_data_to_flat"] diff --git a/fhirflat/fhir2flat.py b/fhirflat/fhir2flat.py index dd1dd43..8b83d89 100644 --- a/fhirflat/fhir2flat.py +++ b/fhirflat/fhir2flat.py @@ -213,7 +213,6 @@ def flattenExtensions(df: pd.DataFrame, extension: str) -> pd.DataFrame: """ def expand_and_redefine(df, extension): - def redefine( row: pd.Series | pd.DataFrame, extension: str ) -> pd.Series | pd.DataFrame: diff --git a/fhirflat/resources/extension_validators.py b/fhirflat/resources/extension_validators.py index 322f0b8..ec2fe24 100644 --- a/fhirflat/resources/extension_validators.py +++ b/fhirflat/resources/extension_validators.py @@ -96,9 +96,9 @@ def fhir_model_validator( self, model_name: str, v: Union[StrBytes, dict, Path, FHIRAbstractModel] ): """ """ - model_class: Type[BaseModel] | Type[FHIRAbstractModel] = ( - self.get_fhir_model_class(model_name) - ) + model_class: Type[BaseModel] | Type[ + FHIRAbstractModel + ] = self.get_fhir_model_class(model_name) if isinstance(v, (str, bytes)): try: diff --git a/tests/data/patient.ndjson b/tests/data/patient.ndjson index d030f0c..1f9dea0 100644 --- a/tests/data/patient.ndjson +++ b/tests/data/patient.ndjson @@ -1,3 +1,3 @@ {"resourceType":"Patient","id":"ewnMwMK-UNvVvM.bakFSlkw3","identifier":[{"use":"usual","type":{"text":"EPIC"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.5.737384.0","value":"E9254"},{"use":"usual","type":{"text":"EXTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":"Z11363"},{"use":"usual","type":{"text":"FHIR"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-dstu2-fhir-id","value":"TYO3ktvhYAUhbae7JuBwDdpyIbUZc8kZG.bMW2ZwVnwgB"},{"use":"usual","type":{"text":"FHIR STU3"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-fhir-id","value":"ewnMwMK-UNvVvM.bakFSlkw3"},{"use":"usual","type":{"text":"PAS"},"system":"urn:oid:2.16.840.1.113883.2.1.3.12.1.1","value":"2005294"},{"use":"usual","type":{"text":"INTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":" Z11363"}],"active":true,"name":[{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"official","text":"Test ADVANCEPREPTWO","family":"ADVANCEPREPTWO","given":["Test"],"_family":{"extension":[{"valueString":"Advancepreptwo","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}},{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"usual","text":"Test ADVANCEPREPTWO","family":"ADVANCEPREPTWO","given":["Test"],"_family":{"extension":[{"valueString":"Advancepreptwo","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}}],"gender":"female","birthDate":"2006-10-07","deceasedBoolean":false,"maritalStatus":{"text":"Single"}} {"resourceType":"Patient","id":"exU8JSL0p8npSw5g1QYAyOw3","identifier":[{"use":"usual","type":{"text":"EPIC"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.5.737384.0","value":"E9340"},{"use":"usual","type":{"text":"EXTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":"Z11449"},{"use":"usual","type":{"text":"FHIR"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-dstu2-fhir-id","value":"TwzgMHMPt5OhnHkFA2H2DudL3FU8qcRZkZyn0F3extkAB"},{"use":"usual","type":{"text":"FHIR STU3"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-fhir-id","value":"exU8JSL0p8npSw5g1QYAyOw3"},{"use":"usual","type":{"text":"PAS"},"system":"urn:oid:2.16.840.1.113883.2.1.3.12.1.1","value":"2005380"},{"use":"usual","type":{"text":"INTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":" Z11449"}],"active":true,"name":[{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"official","text":"BcnScTenAugTwentyOne UPGRADETEST","family":"UPGRADETEST","given":["BcnScTenAugTwentyOne"],"_family":{"extension":[{"valueString":"UpgradeTest","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}},{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"usual","text":"BcnScTenAugTwentyOne UPGRADETEST","family":"UPGRADETEST","given":["BcnScTenAugTwentyOne"],"_family":{"extension":[{"valueString":"UpgradeTest","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}}],"gender":"female","birthDate":"2019-09-21","deceasedBoolean":false,"maritalStatus":{"text":"Single"}} -{"resourceType":"Patient","id":"ezER-U3fAMP-WvI-Fc8V9wQ3","identifier":[{"use":"usual","type":{"text":"EPIC"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.5.737384.0","value":"E9411"},{"use":"usual","type":{"text":"EXTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":"Z11522"},{"use":"usual","type":{"text":"FHIR"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-dstu2-fhir-id","value":"T2oDrcOUw0w.1ZUdthu24c7V95lTOTEOUPRqBqGn99KEB"},{"use":"usual","type":{"text":"FHIR STU3"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-fhir-id","value":"ezER-U3fAMP-WvI-Fc8V9wQ3"},{"use":"usual","type":{"text":"PAS"},"system":"urn:oid:2.16.840.1.113883.2.1.3.12.1.1","value":"2005452"},{"use":"usual","type":{"text":"INTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":" Z11522"}],"active":true,"name":[{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"official","text":"Fred TEST","family":"TEST","given":["Fred"],"_family":{"extension":[{"valueString":"Test","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}},{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"usual","text":"Fred TEST","family":"TEST","given":["Fred"],"_family":{"extension":[{"valueString":"Test","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}}],"telecom":[{"system":"phone","value":"07594 832748","use":"mobile"},{"system":"email","value":"fred@email.com"}],"gender":"male","birthDate":"1967-01-19","deceasedBoolean":false,"address":[{"use":"old","line":["123 Anywhere"],"city":"Bristol","postalCode":"BS1 6JY","country":"ENG"},{"use":"home","line":["123 Anywhere"],"city":"Bristol","postalCode":"BS1 6JY","country":"ENG","period":{"start":"2022-01-19"}}],"maritalStatus":{"text":"Single"},"communication":[{"language":{"coding":[{"system":"urn:ietf:bcp:47","code":"en","display":"English"}],"text":"English"},"preferred":true}]} \ No newline at end of file +{"resourceType":"Patient","id":"ezER-U3fAMP-WvI-Fc8V9wQ3","identifier":[{"use":"usual","type":{"text":"EPIC"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.5.737384.0","value":"E9411"},{"use":"usual","type":{"text":"EXTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":"Z11522"},{"use":"usual","type":{"text":"FHIR"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-dstu2-fhir-id","value":"T2oDrcOUw0w.1ZUdthu24c7V95lTOTEOUPRqBqGn99KEB"},{"use":"usual","type":{"text":"FHIR STU3"},"system":"http://open.epic.com/FHIR/StructureDefinition/patient-fhir-id","value":"ezER-U3fAMP-WvI-Fc8V9wQ3"},{"use":"usual","type":{"text":"PAS"},"system":"urn:oid:2.16.840.1.113883.2.1.3.12.1.1","value":"2005452"},{"use":"usual","type":{"text":"INTERNAL"},"system":"urn:oid:1.2.840.114350.1.13.520.3.7.2.698084","value":" Z11522"}],"active":true,"name":[{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"official","text":"Fred TEST","family":"TEST","given":["Fred"],"_family":{"extension":[{"valueString":"Test","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}},{"extension":[{"valueCode":"NL4","url":"http://hl7.org/fhir/StructureDefinition/humanname-assembly-order"}],"use":"usual","text":"Fred TEST","family":"TEST","given":["Fred"],"_family":{"extension":[{"valueString":"Test","url":"http://hl7.org/fhir/StructureDefinition/humanname-own-name"}]}}],"telecom":[{"system":"phone","value":"07594 832748","use":"mobile"},{"system":"email","value":"fred@email.com"}],"gender":"male","birthDate":"1967-01-19","deceasedBoolean":false,"address":[{"use":"old","line":["123 Anywhere"],"city":"Bristol","postalCode":"BS1 6JY","country":"ENG"},{"use":"home","line":["123 Anywhere"],"city":"Bristol","postalCode":"BS1 6JY","country":"ENG","period":{"start":"2022-01-19"}}],"maritalStatus":{"text":"Single"},"communication":[{"language":{"coding":[{"system":"urn:ietf:bcp:47","code":"en","display":"English"}],"text":"English"},"preferred":true}]} diff --git a/tests/dummy_data/combined_dummy_data.csv b/tests/dummy_data/combined_dummy_data.csv index eb4d59d..cc6eb3c 100644 --- a/tests/dummy_data/combined_dummy_data.csv +++ b/tests/dummy_data/combined_dummy_data.csv @@ -2,4 +2,4 @@ 1,10,2020-05-01,0,,,,,,,cough,,,7,2020-01-01,36.2,120,30,70,120,5,,75,1,1,1,150 2,11,,1,2021-04-01,18:00,fish,1,,2,,,2021-04-10,1,2021-02-02,37,100,40,80,130,6,10,85,0,2,1,200 3,12,,1,2021-05-10,17:30,,1,,1,flu,,2021-05-15,4,2022-03-03,35.5,70,50,90,140,7,,95,0,3,1, -4,13,,1,2022-06-15,21:00,dolphin,0,Malaria,,,,2022-06-20,2,,,,,,,,,,,,, \ No newline at end of file +4,13,,1,2022-06-15,21:00,dolphin,0,Malaria,,,,2022-06-20,2,,,,,,,,,,,,, diff --git a/tests/dummy_data/encounter_dummy_data_multi_patient.csv b/tests/dummy_data/encounter_dummy_data_multi_patient.csv index eedb6fc..768b02f 100644 --- a/tests/dummy_data/encounter_dummy_data_multi_patient.csv +++ b/tests/dummy_data/encounter_dummy_data_multi_patient.csv @@ -2,4 +2,4 @@ 1,10,2020-05-01,0,,,,,,,cough,,,7 2,11,,1,2021-04-01,18:00,fish,1,,2,,,2021-04-10,1 3,12,,1,2021-05-10,17:30,,1,,1,flu,,2021-05-15,4 -4,13,,1,2022-06-15,21:00,dolphin,0,Malaria,,,,2022-06-20,2 \ No newline at end of file +4,13,,1,2022-06-15,21:00,dolphin,0,Malaria,,,,2022-06-20,2 diff --git a/tests/dummy_data/encounter_dummy_data_single.csv b/tests/dummy_data/encounter_dummy_data_single.csv index d0c908d..8cf2bef 100644 --- a/tests/dummy_data/encounter_dummy_data_single.csv +++ b/tests/dummy_data/encounter_dummy_data_single.csv @@ -1,2 +1,2 @@ subjid,visitid,dates_enrolment,dates_adm,dates_admdate,dates_admtime,non_encounter_field,outco_denguediag,outco_denguediag_main,outco_denguediag_class,outco_not_dengue,outco_secondiag_oth,outco_date,outco_outcome -2,11,2021-04-02,1,2021-04-01,18:00,fish,1,,2,,,2021-04-10,1 \ No newline at end of file +2,11,2021-04-02,1,2021-04-01,18:00,fish,1,,2,,,2021-04-10,1 diff --git a/tests/dummy_data/observation_dummy_mapping.csv b/tests/dummy_data/observation_dummy_mapping.csv index 6e39236..1d0ed8c 100644 --- a/tests/dummy_data/observation_dummy_mapping.csv +++ b/tests/dummy_data/observation_dummy_mapping.csv @@ -17,4 +17,4 @@ vital_avpu,"1, Alert",,http://terminology.hl7.org/CodeSystem/observation-categor ,"3, Pain",,http://terminology.hl7.org/CodeSystem/observation-category,vital-signs,Vital Signs,,https://snomed.info/sct,1.10444E+15,Alert Confusion Voice Pain Unresponsive scale score (observable entity),Patient/+,Encounter/+,,,,,https://snomed.info/sct,450847001,Responds to pain (finding),, ,"4, Unresponsive",,http://terminology.hl7.org/CodeSystem/observation-category,vital-signs,Vital Signs,,https://snomed.info/sct,1.10444E+15,Alert Confusion Voice Pain Unresponsive scale score (observable entity),Patient/+,Encounter/+,,,,,https://snomed.info/sct,422768004,Unresponsive (finding),, vital_gcs,,,http://terminology.hl7.org/CodeSystem/observation-category,vital-signs,Vital Signs,,https://snomed.info/sct,9269-2,Glasgow coma score total,Patient/+,Encounter/+,,,,,,,,, -vital_urineflow,,,http://terminology.hl7.org/CodeSystem/observation-category,vital-signs,Vital Signs,,https://loinc.org,9192-6,Urine output 24 hour,Patient/+,Encounter/+,,https://snomed.info/sct,258861009,Millilitre/24 hours (qualifier value),,,,, \ No newline at end of file +vital_urineflow,,,http://terminology.hl7.org/CodeSystem/observation-category,vital-signs,Vital Signs,,https://loinc.org,9192-6,Urine output 24 hour,Patient/+,Encounter/+,,https://snomed.info/sct,258861009,Millilitre/24 hours (qualifier value),,,,, diff --git a/tests/dummy_data/vital_signs_dummy_data.csv b/tests/dummy_data/vital_signs_dummy_data.csv index b03903b..2e83665 100644 --- a/tests/dummy_data/vital_signs_dummy_data.csv +++ b/tests/dummy_data/vital_signs_dummy_data.csv @@ -1,4 +1,4 @@ subjid,visitid,daily_date,vital_highesttem_c,vital_hr,vital_rr,vital_systolicbp,vital_diastolicbp,vital_spo2,vital_fio2spo2_02110,vital_fio2spo2_pcnt,vital_capillaryr,vital_avpu,vital_gcs,vital_urineflow 1,10,2020-01-01,36.2,120,30,70,120,5,,75,1,1,1,150 2,11,2021-02-02,37,100,40,80,130,6,10,85,0,2,1,200 -3,12,2022-03-03,35.5,70,50,90,140,7,,95,0,3,1, \ No newline at end of file +3,12,2022-03-03,35.5,70,50,90,140,7,,95,0,3,1, From 69b2799a57c56f37c9fe6259c5f745ef16a95f2c Mon Sep 17 00:00:00 2001 From: Pip Liggins Date: Mon, 24 Jun 2024 12:29:49 +0100 Subject: [PATCH 3/3] Fix codecov complaint --- tests/test_ingest.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_ingest.py b/tests/test_ingest.py index e85a073..1a05993 100644 --- a/tests/test_ingest.py +++ b/tests/test_ingest.py @@ -22,7 +22,7 @@ import pytest if sys.version_info < (3, 11): # tomllib was introduced in 3.11 - import tomli + import tomli # pragma: no cover else: import tomllib as tomli @@ -941,9 +941,9 @@ def test_generate_metadata(): assert meta[0]["checksum"] == METADATA_CHECKSUM assert meta[0]["N"] == 2 assert meta[1] == { - "condition.parquet": "cfe0c5f792e47facffda38d0ddaf2f8b87613998eb99aefa56d99551dcb7bf80", - "encounter.parquet": "165f2295e6419f9674c909b53553006e9715691c6a3f1a739f4313c08fde0747", - "patient.parquet": "710c73bf95f3b5d95bfdcfed18c9c41609a3fe00dd32590d43372e3220b8e971", + "condition.parquet": "cfe0c5f792e47facffda38d0ddaf2f8b87613998eb99aefa56d99551dcb7bf80", # noqa: E501 + "encounter.parquet": "165f2295e6419f9674c909b53553006e9715691c6a3f1a739f4313c08fde0747", # noqa: E501 + "patient.parquet": "710c73bf95f3b5d95bfdcfed18c9c41609a3fe00dd32590d43372e3220b8e971", # noqa: E501 }