diff --git a/testing/regress/ecl/key/parquet_compress.xml b/testing/regress/ecl/key/parquet_compress.xml index c22e668b5f2..7497771fa7d 100644 --- a/testing/regress/ecl/key/parquet_compress.xml +++ b/testing/regress/ecl/key/parquet_compress.xml @@ -1,4 +1,9 @@ + 0aaatrue + 1aabfalse + 2aactrue + 3aadfalse + 4aaetrue 0aaafalse 1aabfalse 2aactrue @@ -7,18 +12,52 @@ 10aai123 11aaj-987 12aak0 + 300afa32767 + 301afb2147483647 + 302afc9223372036854775807 + 10aai123 + 11aaj-987 + 12aak456 + 13aal789 + 14aam-321 + 340afp127 + 341afq-128 + 342afr0 20aas3.14 21aat-0.5 22aau123.456 + 170adk1.23 + 171adl-9.869999999999999 + 172adm3.14159265358979 + 173adn2.71828182845904 + 174ado-1.41421356237309 + 320afg1.230000019073486 + 321afh-9.869999885559082 + 322afi3.141590118408203 + 30aas1.23 + 31aat-9.869999999999999 + 32aau45.67 + 33aav78.90000000000001 + 34aaw-32.1 30abc123.456789 31abd-987.6543209999999 32abe0.000001 + 40aax12.34 + 41aay-56.78 + 42aaz90.12 + 43aba34.56 + 44abb-78.9 + 50abcHello + 51abdWorld + 52abeTest + 53abfString + 54abgTypes 40abmHello, World! 41abnData Science 42abo12345 @@ -27,13 +66,28 @@ 50abwTHIS IS A "Q" STRING. 51abxANOTHER "EXAMPLE" HERE. 52abyQSTRINGS ARE USEFUL! + 80abrQSTR1 + 81absQSTR2 + 82abtQSTR3 + 83abuQSTR4 + 84abvQSTR5 60acgこんにちは、世界! 61achUnicode characters: ḸḹḾ 62aciṎ Beautiful Unicode Ṙ + 100acbUnicode1 + 101accUnicode2 + 102acdUnicode3 + 103aceUnicode4 + 104acfUnicode5 + 90abwUTF8_1 + 91abxUTF8_2 + 92abyUTF8_3 + 93abzUTF8_4 + 94acaUTF8_5 70acqCafé au lait ☕ 71acr🎉 UTF-8 Characters 🎉 72acsSpecial characters: ©®™ @@ -42,6 +96,11 @@ 80ada01A48D8414D848E900 81adb01F48AB446A76F8923 82adc01A48EC793A76F9400 + 60abh01234567C289C2ABC38DC3AF + 61abiC3BEC39CC2BAC29876543210 + 62abj00C3BF00C3BF00C3BF00C3BF + 63abkC3BF00C3BF00C3BF00C3BF00 + 64abl12345678C290C2ABC38DC3AF 90adkShort text diff --git a/testing/regress/ecl/parquet_compress.ecl b/testing/regress/ecl/parquet_compress.ecl index 1d417f0ff86..987e6e4db5d 100644 --- a/testing/regress/ecl/parquet_compress.ecl +++ b/testing/regress/ecl/parquet_compress.ecl @@ -24,67 +24,33 @@ compressionType := #IFDEFINED(root.compressionType, 'Snappy'); IMPORT Parquet; -// Define datasets -BooleanData := DATASET([{000, 'aaa', 0}, - {001, 'aab', false}, - {002, 'aac', 1}], {UNSIGNED testid, STRING3 testname, BOOLEAN value}); - -IntegerData := DATASET([{010, 'aai', 123}, - {011, 'aaj', -987}, - {012, 'aak', 0}], {UNSIGNED testid, STRING3 testname, INTEGER value}); - -RealData := DATASET([{020, 'aas', 3.14}, - {021, 'aat', -0.5}, - {022, 'aau', 123.456}], {UNSIGNED testid, STRING3 testname, REAL value}); - -DecimalData := DATASET([{030, 'abc', 123.456789}, - {031, 'abd', -987.654321}, - {032, 'abe', 0.000001}], {UNSIGNED testid, STRING3 testname, DECIMAL value}); - -StringData := DATASET([{040, 'abm', 'Hello, World!'}, - {041, 'abn', 'Data Science'}, - {042, 'abo', '12345'}], {UNSIGNED testid, STRING3 testname, STRING value}); - -QStringData := DATASET([{050, 'abw', 'This is a "Q" string.'}, - {051, 'abx', 'Another "example" here.'}, - {052, 'aby', 'Qstrings are useful!'}], {UNSIGNED testid, STRING3 testname, QSTRING value}); - -UnicodeData := DATASET([{060, 'acg', U'こんにちは、世界!'}, - {061, 'ach', U'Unicode characters: ḸḹḾ'}, - {062, 'aci', U'Ṏ Beautiful Unicode Ṙ'}], {UNSIGNED testid, STRING3 testname, UNICODE value}); - -UTF8Data := DATASET([{070, 'acq', U'Café au lait ☕'}, - {071, 'acr', U'🎉 UTF-8 Characters 🎉'}, - {072, 'acs', U'Special characters: ©®™'}], {UNSIGNED testid, STRING3 testname, UTF8 value}); - -DataData := DATASET([{080, 'ada', x'01a48d8414d848e900'}, - {081, 'adb', x'01f48ab446a76f8923'}, - {082, 'adc', x'01a48ec793a76f9400'}], {UNSIGNED testid, STRING3 testname, DATA value}); - -VarstringData := DATASET([{090, 'adk', U'Short text'}, - {091, 'adl', U'A longer variable-length string'}, - {092, 'adm', U'Strings are flexible!'}], {UNSIGNED testid, STRING3 testname, VARSTRING value}); - -VarunicodeData := DATASET([{100, 'adu', U'Variable-length Unicode: こんにちは、世界!'}, - {101, 'adv', U'🌟 Variable-length Unicode Symbols 🌟'}, - {102, 'adw', U'Unicode flexibility is awesome!'}], {UNSIGNED testid, STRING3 testname, VARUNICODE value}); - -// Write datasets to Parquet files -PARALLEL( - ParquetIO.write(BooleanData, '/var/lib/HPCCSystems/mydropzone/Boolean.parquet', TRUE, compressionType), - ParquetIO.write(IntegerData, '/var/lib/HPCCSystems/mydropzone/Integer.parquet', TRUE, compressionType), - ParquetIO.write(RealData, '/var/lib/HPCCSystems/mydropzone/Real.parquet', TRUE, compressionType), - ParquetIO.write(DecimalData, '/var/lib/HPCCSystems/mydropzone/Decimal.parquet', TRUE, compressionType), - ParquetIO.write(StringData, '/var/lib/HPCCSystems/mydropzone/String.parquet', TRUE, compressionType), - ParquetIO.write(QStringData, '/var/lib/HPCCSystems/mydropzone/QString.parquet', TRUE, compressionType), - ParquetIO.write(UnicodeData, '/var/lib/HPCCSystems/mydropzone/Unicode.parquet', TRUE, compressionType), - ParquetIO.write(UTF8Data, '/var/lib/HPCCSystems/mydropzone/UTF8.parquet', TRUE, compressionType), - ParquetIO.write(DataData, '/var/lib/HPCCSystems/mydropzone/Data.parquet', TRUE, compressionType), - ParquetIO.write(VarstringData, '/var/lib/HPCCSystems/mydropzone/Varstring.parquet', TRUE, compressionType), - ParquetIO.write(VarunicodeData, '/var/lib/HPCCSystems/mydropzone/Varunicode.parquet', TRUE, compressionType) -); - -// Output datasets +// Define record structures +BooleanRec := RECORD UNSIGNED testid; STRING3 testname; BOOLEAN value; END; +IntegerRec := RECORD UNSIGNED testid; STRING3 testname; INTEGER value; END; +RealRec := RECORD UNSIGNED testid; STRING3 testname; REAL value; END; +DecimalRec := RECORD UNSIGNED testid; STRING3 testname; DECIMAL value; END; +StringRec := RECORD UNSIGNED testid; STRING3 testname; STRING value; END; +QStringRec := RECORD UNSIGNED testid; STRING3 testname; QSTRING value; END; +UnicodeRec := RECORD UNSIGNED testid; STRING3 testname; UNICODE value; END; +UTF8Rec := RECORD UNSIGNED testid; STRING3 testname; UTF8 value; END; +DataRec := RECORD UNSIGNED testid; STRING3 testname; DATA value; END; +VarstringRec := RECORD UNSIGNED testid; STRING3 testname; VARSTRING value; END; +VarunicodeRec := RECORD UNSIGNED testid; STRING3 testname; VARUNICODE value; END; + +// Read datasets from Parquet files +BooleanData := ParquetIO.Read(BooleanRec, '/var/lib/HPCCSystems/mydropzone/Boolean.parquet'); +IntegerData := ParquetIO.Read(IntegerRec, '/var/lib/HPCCSystems/mydropzone/Integer.parquet'); +RealData := ParquetIO.Read(RealRec, '/var/lib/HPCCSystems/mydropzone/Real.parquet'); +DecimalData := ParquetIO.Read(DecimalRec, '/var/lib/HPCCSystems/mydropzone/Decimal.parquet'); +StringData := ParquetIO.Read(StringRec, '/var/lib/HPCCSystems/mydropzone/String.parquet'); +QStringData := ParquetIO.Read(QStringRec, '/var/lib/HPCCSystems/mydropzone/QString.parquet'); +UnicodeData := ParquetIO.Read(UnicodeRec, '/var/lib/HPCCSystems/mydropzone/Unicode.parquet'); +UTF8Data := ParquetIO.Read(UTF8Rec, '/var/lib/HPCCSystems/mydropzone/UTF8.parquet'); +DataData := ParquetIO.Read(DataRec, '/var/lib/HPCCSystems/mydropzone/Data.parquet'); +VarstringData := ParquetIO.Read(VarstringRec, '/var/lib/HPCCSystems/mydropzone/Varstring.parquet'); +VarunicodeData := ParquetIO.Read(VarunicodeRec, '/var/lib/HPCCSystems/mydropzone/Varunicode.parquet'); + +// Output datasets read from Parquet files OUTPUT(BooleanData, NAMED('BooleanData')); OUTPUT(IntegerData, NAMED('IntegerData')); OUTPUT(RealData, NAMED('RealData')); diff --git a/testing/regress/ecl/parquet_overwrite.ecl b/testing/regress/ecl/parquet_overwrite.ecl index 32b019d1c3b..856c24f77ec 100644 --- a/testing/regress/ecl/parquet_overwrite.ecl +++ b/testing/regress/ecl/parquet_overwrite.ecl @@ -18,8 +18,6 @@ IMPORT Parquet; SingleRowDataset := DATASET([{1, 'SingleRow', TRUE}], {UNSIGNED id, STRING name, BOOLEAN flag}); -ParquetIO.write(SingleRowDataset, '/var/lib/HPCCSystems/mydropzone/SingleRowTest.parquet'); +writeParquetFile := ParquetIO.write(SingleRowDataset, '/var/lib/HPCCSystems/mydropzone/SingleRowTest.parquet'); -ConflictingDataset := DATASET([{2, 'OverwrittenRow', 123}], {UNSIGNED id, STRING name, INTEGER conflict}); // Schema conflict - -ParquetIO.write(ConflictingDataset, '/var/lib/HPCCSystems/mydropzone/SingleRowTest.parquet'); // Assuming TRUE should force overwrite if the parameter is valid +SEQUENTIAL(writeParquetFile, writeParquetFile);