-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2_1_FEATURE_SELECTION.flw
executable file
·1 lines (1 loc) · 13.1 KB
/
2_1_FEATURE_SELECTION.flw
1
{"creationTimeStamp":null,"modifiedTimeStamp":null,"createdBy":null,"modifiedBy":null,"version":2,"id":null,"name":"2_1_FEATURE_SELECTION.flw","description":null,"properties":{"UI_PROP_DF_OPTIMIZE":"false","UI_PROP_DF_ID":"d1bde3e0-7811-4516-a939-2d4bbb1572d0","UI_PROP_DF_EXECUTION_ORDERED":"false"},"links":[],"nodes":{"id-1732739458804-2673":{"nodeType":"step","version":1,"id":"id-1732739458804-2673","name":"Logistic regression","note":{"version":1,"id":"id-1732739458804-2677","name":null,"description":null,"properties":{"UI_NOTE_PROP_IS_EXPANDED":"false","UI_NOTE_PROP_HEIGHT":"0","UI_NOTE_PROP_WIDTH":"0","UI_NOTE_PROP_IS_STICKYNOTE":"false"}},"priority":0,"properties":{"UI_PROP_XPOS":"226","UI_PROP_YPOS":"79","UI_PROP_COLORGRP":"0","UI_PROP_IS_INPUT_EXPANDED":"false","UI_PROP_IS_OUTPUT_EXPANDED":"false","UI_PROP_INPUT_PORT|inTables|0":"5daba458-2310-41d9-b9ef-23e5fe3aec06|Input table 1|Input tables","UI_PROP_OUTPUT_PORT|outTables|0":"87100189-5423-496e-ab1f-2adecedf247a|Output table 1|Output tables","UI_PROP_NODE_DATA_ID":"a7190700-f59c-4a94-afe2-214ce639fcde","UI_PROP_NODE_DATA_MODIFIED_DATE":"1721912191502"},"portMappings":[{"mappingType":"tableStructure","portName":"outTables","portIndex":0,"tableStructure":{"columnDefinitions":[]}}],"stepReference":{"type":"uri","path":"/dataFlows/steps/a7190700-f59c-4a94-afe2-214ce639fcde"},"arguments":{"codeOptions":{"code":"proc logselect data=MYCAS.DATA_FS_PS_CORR_5A noclprint;\n\tpartition role=Selected (test='0');\n\tmodel y(event='1')=A1 A5 A6 A13 A15 A21 A24 A25 A27 A29 A30 A34 A36 A37 A41 \n\t\tA43 A45 A46 A49 A51 A54 A55 A57 A58 A59 A60 A61 A63 A64 / link=logit;\n\tselection method=stepwise\n (select=aic stop=aic choose=aic) hierarchy=none;\n\toutput out=MYCAS.OUTPUT_RES xbeta predicted lower upper copyvars=(_all_);\nrun;\n\ndata output_res;\n\tset mycas.output_res;\n\twhere selected eq 0;\n\tif _pred_ ge 0.5 then y_pred = 1;\n\telse y_pred = 0;\nrun;\n\n%score(output_res, y, y_pred);\n\n/* proc hpbin data=MYCAS.DATA_FS_PS_CORR_5A numbin=10 bucket woe */\n/* \t\toutput=MYCAS.DATA_FS_PS_CORR_5A_WOE noprint; */\n/* \tinput A1 A5 A6 A13 A15 A21 A24 A25 A27 A29 A30 A34 A36 A37 A41 A43 A45 A46 A49 */\n/* \t\tA51 A54 A55 A57 A58 A59 A60 A61 A63 A64; */\n/* \ttarget y / level=binary; */\n/* \tid _all_; */\n/* run; */\n/* */\n/* proc logselect data=MYCAS.DATA_FS_PS_CORR_5A_WOE noclprint; */\n/* \tpartition role=Selected (test='0'); */\n/* \tmodel y(event='1')=A1 A5 A6 A13 A15 A21 A24 A25 A27 A29 A30 A34 A36 A37 A41 */\n/* \t\tA43 A45 A46 A49 A51 A54 A55 A57 A58 A59 A60 A61 A63 A64 / link=logit; */\n/* \tselection method=stepwise */\n/* (select=aic stop=aic choose=aic) hierarchy=none; */\n/* \toutput out=MYCAS.OUTPUT_RES xbeta predicted lower upper copyvars=(_all_); */\n/* run; */\n/* */\n/* data output_res; */\n/* \tset mycas.output_res; */\n/* \twhere selected eq 0; */\n/* \tif _pred_ ge 0.5 then y_pred = 1; */\n/* \telse y_pred = 0; */\n/* run; */\n/* */\n/* %score(output_res, y, y_pred); */","logHTML":"","resultsHTML":"","variables":[{"name":"_output1","value":{"referenceType":"outputPort","portName":"outTables","portIndex":0,"arguments":{}}},{"name":"_input1","value":{"referenceType":"inputPort","portName":"inTables","portIndex":0}}],"contentType":"embedded"}}},"id-1733645604833-15132":{"nodeType":"step","version":1,"id":"id-1733645604833-15132","name":"Gradient boosting","note":{"version":1,"id":"id-1733645604834-15136","name":null,"description":null,"properties":{"UI_NOTE_PROP_IS_EXPANDED":"false","UI_NOTE_PROP_HEIGHT":"0","UI_NOTE_PROP_WIDTH":"0","UI_NOTE_PROP_IS_STICKYNOTE":"false"}},"priority":1,"properties":{"UI_PROP_XPOS":"837","UI_PROP_YPOS":"79","UI_PROP_COLORGRP":"0","UI_PROP_IS_INPUT_EXPANDED":"false","UI_PROP_IS_OUTPUT_EXPANDED":"false","UI_PROP_INPUT_PORT|inTables|0":"acd9ac55-4907-48e6-bc67-b17497e9d8bc|Input table 1|Input tables","UI_PROP_OUTPUT_PORT|outTables|0":"56e34288-7825-43c8-b978-5fc9a08ddb71|Output table 1|Output tables","UI_PROP_NODE_DATA_ID":"a7190700-f59c-4a94-afe2-214ce639fcde","UI_PROP_NODE_DATA_MODIFIED_DATE":"1721912191502"},"portMappings":[{"mappingType":"tableStructure","portName":"outTables","portIndex":0,"tableStructure":{"columnDefinitions":[]}}],"stepReference":{"type":"uri","path":"/dataFlows/steps/a7190700-f59c-4a94-afe2-214ce639fcde"},"arguments":{"codeOptions":{"code":"proc gradboost data=MYCAS.DATA_FS_PS_CORR_5A;\n\tpartition role=Selected (test='0');\n\ttarget y / level=nominal;\n\tinput A1 A5 A6 A13 A15 A21 A24 A25 A27 A29 A30 A34 A36 A37 A41 A43 A45 A46 A49 \n\t\tA51 A54 A55 A57 A58 A59 A60 A61 A63 A64 / level=interval;\n\tscore out=MYCAS.OUTPUT_RES copyvars=(y A1 A5 A6 A13 A15 A21 A24 A25 A27 A29 \n\t\tA30 A34 A36 A37 A41 A43 A45 A46 A49 A51 A54 A55 A57 A58 A59 A60 A61 A63 A64);\nrun;\n\ndata output_res;\n\tset mycas.output_res;\n\ty_pred = input(compress(I_y), 3.);\nrun;\n\n%score(output_res, y, y_pred);","logHTML":"","resultsHTML":"","variables":[{"name":"_output1","value":{"referenceType":"outputPort","portName":"outTables","portIndex":0,"arguments":{}}},{"name":"_input1","value":{"referenceType":"inputPort","portName":"inTables","portIndex":0}}],"contentType":"embedded"}}},"id-1731527809428-2463":{"nodeType":"step","version":1,"id":"id-1731527809428-2463","name":"Train-test split","note":{"version":1,"id":"id-1731527809429-2467","name":null,"description":null,"properties":{"UI_NOTE_PROP_IS_EXPANDED":"false","UI_NOTE_PROP_HEIGHT":"0","UI_NOTE_PROP_WIDTH":"0","UI_NOTE_PROP_IS_STICKYNOTE":"false"}},"priority":2,"properties":{"UI_PROP_XPOS":"56","UI_PROP_YPOS":"82","UI_PROP_COLORGRP":"0","UI_PROP_IS_INPUT_EXPANDED":"false","UI_PROP_IS_OUTPUT_EXPANDED":"false","UI_PROP_INPUT_PORT|inTables|0":"49c0baf4-96a1-4295-a9ed-9c4a2e3c6830|Input table 1|Input tables","UI_PROP_OUTPUT_PORT|outTables|0":"170827b8-bdf6-4615-a676-e4fbed466e4c|Output table 1|Output tables","UI_PROP_NODE_DATA_ID":"a7190700-f59c-4a94-afe2-214ce639fcde","UI_PROP_NODE_DATA_MODIFIED_DATE":"1721912191502"},"portMappings":[{"mappingType":"tableStructure","portName":"outTables","portIndex":0,"tableStructure":{"columnDefinitions":[]}}],"stepReference":{"type":"uri","path":"/dataFlows/steps/a7190700-f59c-4a94-afe2-214ce639fcde"},"arguments":{"codeOptions":{"code":"proc surveyselect data=default.data_fs_ps_corr_5a rate=0.7 outall out=data_fs_ps_corr_5a seed=1234 noprint;\nrun;\n\nproc sql noprint;\n\tselect name\n\tinto :list separated by ' '\n\tfrom sashelp.vcolumn\n\twhere libname eq 'WORK' and memname eq 'DATA_FS_PS_CORR_5A' and name ne 'year' and name ne 'y' and name ne 'Selected'\n\t;\nquit;\n\n%macro check_stability();\n\t%let i=1;\n\t%let var = %scan(&list., &i., ' ');\n\t%do %while(&var ne);\n\t\tproc ttest data=data_fs_ps_corr_5a sides=2 h0=0 plots=none;\n\t\t\tclass Selected;\n\t\t\tvar &var.;\n\t\trun;\n\t\t%let i=%eval(&i+1);\n\t\t%let var = %scan(&list., &i., ' ');\n\t%end;\n%mend;\n\n%check_stability();\n\nproc casutil;\n load data=data_fs_ps_corr_5a casout=\"data_fs_ps_corr_5a\" replace;\nrun;","logHTML":"","resultsHTML":"","variables":[{"name":"_output1","value":{"referenceType":"outputPort","portName":"outTables","portIndex":0,"arguments":{}}},{"name":"_input1","value":{"referenceType":"inputPort","portName":"inTables","portIndex":0}}],"contentType":"embedded"}}},"id-1733644074164-10157":{"nodeType":"step","version":1,"id":"id-1733644074164-10157","name":"Random forest","note":{"version":1,"id":"id-1733644074165-10161","name":null,"description":null,"properties":{"UI_NOTE_PROP_IS_EXPANDED":"false","UI_NOTE_PROP_HEIGHT":"0","UI_NOTE_PROP_WIDTH":"0","UI_NOTE_PROP_IS_STICKYNOTE":"false"}},"priority":3,"properties":{"UI_PROP_XPOS":"644","UI_PROP_YPOS":"92","UI_PROP_COLORGRP":"0","UI_PROP_IS_INPUT_EXPANDED":"false","UI_PROP_IS_OUTPUT_EXPANDED":"false","UI_PROP_INPUT_PORT|inTables|0":"24d6b40c-526b-4033-a5ac-51b1192b186d|Input table 1|Input tables","UI_PROP_OUTPUT_PORT|outTables|0":"0b89fcbb-a99e-4cda-87f4-0c6e2f0d41e0|Output table 1|Output tables","UI_PROP_NODE_DATA_ID":"a7190700-f59c-4a94-afe2-214ce639fcde","UI_PROP_NODE_DATA_MODIFIED_DATE":"1721912191502"},"portMappings":[{"mappingType":"tableStructure","portName":"outTables","portIndex":0,"tableStructure":{"columnDefinitions":[]}}],"stepReference":{"type":"uri","path":"/dataFlows/steps/a7190700-f59c-4a94-afe2-214ce639fcde"},"arguments":{"codeOptions":{"code":"proc forest data=MYCAS.DATA_FS_PS_CORR_5A;\n\tpartition role=Selected (test='0');\n\ttarget y / level=nominal;\n\tinput A1 A5 A6 A13 A15 A21 A24 A25 A27 A29 A30 A34 A36 A37 A41 A43 A45 A46 A49 \n\t\tA51 A54 A55 A57 A58 A59 A60 A61 A63 A64 / level=interval;\n\tgrow gini;\n\tscore out=MYCAS.OUTPUT_RES copyvars=(y A1 A5 A6 A13 A15 A21 A24 A25 A27 A29 \n\t\tA30 A34 A36 A37 A41 A43 A45 A46 A49 A51 A54 A55 A57 A58 A59 A60 A61 A63 A64);\nrun;\n\ndata output_res;\n\tset mycas.output_res;\n\ty_pred = input(compress(I_y), 3.);\nrun;\n\n%score(output_res, y, y_pred);","logHTML":"","resultsHTML":"","variables":[{"name":"_output1","value":{"referenceType":"outputPort","portName":"outTables","portIndex":0,"arguments":{}}},{"name":"_input1","value":{"referenceType":"inputPort","portName":"inTables","portIndex":0}}],"contentType":"embedded"}}},"id-1733645902837-15992":{"nodeType":"step","version":1,"id":"id-1733645902837-15992","name":"Neural network","note":{"version":1,"id":"id-1733645902839-15996","name":null,"description":null,"properties":{"UI_NOTE_PROP_IS_EXPANDED":"false","UI_NOTE_PROP_HEIGHT":"0","UI_NOTE_PROP_WIDTH":"0","UI_NOTE_PROP_IS_STICKYNOTE":"false"}},"priority":4,"properties":{"UI_PROP_XPOS":"957","UI_PROP_YPOS":"103","UI_PROP_COLORGRP":"0","UI_PROP_IS_INPUT_EXPANDED":"false","UI_PROP_IS_OUTPUT_EXPANDED":"false","UI_PROP_INPUT_PORT|inTables|0":"506fafde-bbec-4a3f-9133-abcc17bd2e67|Input table 1|Input tables","UI_PROP_OUTPUT_PORT|outTables|0":"8ddf63ca-d682-4e2f-89c8-99d0281976b0|Output table 1|Output tables","UI_PROP_NODE_DATA_ID":"a7190700-f59c-4a94-afe2-214ce639fcde","UI_PROP_NODE_DATA_MODIFIED_DATE":"1721912191502"},"portMappings":[{"mappingType":"tableStructure","portName":"outTables","portIndex":0,"tableStructure":{"columnDefinitions":[]}}],"stepReference":{"type":"uri","path":"/dataFlows/steps/a7190700-f59c-4a94-afe2-214ce639fcde"},"arguments":{"codeOptions":{"code":"libname _tmpcas_ cas caslib=\"CASUSER\";\n\nproc nnet data=MYCAS.DATA_FS_PS_CORR_5A;\n\tpartition role=Selected (validate='0');\n\ttarget y / level=nominal;\n\tinput A1 A5 A6 A13 A15 A21 A24 A25 A27 A29 A30 A34 A36 A37 A41 A43 A45 A46 A49 \n\t\tA51 A54 A55 A57 A58 A59 A60 A61 A63 A64 / level=interval;\n\thidden 50;\n\thidden 50 / act=logistic;\n\ttrain stagnation=5 outmodel=_tmpcas_._Nnet_model_;\n\toptimization algorithm=sgd regL2=0.1 minibatchsize=50;\n\tscore out=MYCAS.OUTPUT_RES copyvars=(y A1 A5 A6 A13 A15 A21 A24 A25 A27 A29 \n\t\tA30 A34 A36 A37 A41 A43 A45 A46 A49 A51 A54 A55 A57 A58 A59 A60 A61 A63 A64 \n\t\tSelected);\nrun;\n\nproc delete data=_tmpcas_._Nnet_model_;\nrun;\n\nlibname _tmpcas_;\n\ndata output_res;\n\tset mycas.output_res;\n\ty_pred = input(compress(I_y), 3.);\nrun;\n\n%score(output_res, y, y_pred);","logHTML":"","resultsHTML":"","variables":[{"name":"_output1","value":{"referenceType":"outputPort","portName":"outTables","portIndex":0,"arguments":{}}},{"name":"_input1","value":{"referenceType":"inputPort","portName":"inTables","portIndex":0}}],"contentType":"embedded"}}},"id-1733641065313-3697":{"nodeType":"step","version":1,"id":"id-1733641065313-3697","name":"Decision tree","note":{"version":1,"id":"id-1733641065315-3701","name":null,"description":null,"properties":{"UI_NOTE_PROP_IS_EXPANDED":"false","UI_NOTE_PROP_HEIGHT":"0","UI_NOTE_PROP_WIDTH":"0","UI_NOTE_PROP_IS_STICKYNOTE":"false"}},"priority":5,"properties":{"UI_PROP_XPOS":"442","UI_PROP_YPOS":"120","UI_PROP_COLORGRP":"0","UI_PROP_IS_INPUT_EXPANDED":"false","UI_PROP_IS_OUTPUT_EXPANDED":"false","UI_PROP_INPUT_PORT|inTables|0":"f4bd34ce-7bc8-48e7-a0a7-13ae11f4bed6|Input table 1|Input tables","UI_PROP_OUTPUT_PORT|outTables|0":"9cb74cc5-4d5d-44f4-a1a9-dd80ca013b50|Output table 1|Output tables","UI_PROP_NODE_DATA_ID":"a7190700-f59c-4a94-afe2-214ce639fcde","UI_PROP_NODE_DATA_MODIFIED_DATE":"1721912191502"},"portMappings":[{"mappingType":"tableStructure","portName":"outTables","portIndex":0,"tableStructure":{"columnDefinitions":[]}}],"stepReference":{"type":"uri","path":"/dataFlows/steps/a7190700-f59c-4a94-afe2-214ce639fcde"},"arguments":{"codeOptions":{"code":"proc treesplit data=MYCAS.DATA_FS_PS_CORR_5A maxdepth=10;\n\tpartition role=Selected (test='0');\n\tinput A1 A5 A6 A13 A15 A21 A24 A25 A27 A29 A30 A34 A36 A37 A41 A43 A45 A46 A49 \n\t\tA51 A54 A55 A57 A58 A59 A60 A61 A63 A64 / level=interval;\n\ttarget y / level=nominal;\n\tgrow gini;\n\tprune c45;\n\tscore out=MYCAS.OUTPUT_RES copyvars=(y A1 A5 A6 A13 A15 A21 A24 A25 A27 A29 \n\t\tA30 A34 A36 A37 A41 A43 A45 A46 A49 A51 A54 A55 A57 A58 A59 A60 A61 A63 A64);\nrun;\n\ndata output_res;\n\tset mycas.output_res;\n\ty_pred = input(compress(I_y), 3.);\nrun;\n\n%score(output_res, y, y_pred);","logHTML":"","resultsHTML":"","variables":[{"name":"_output1","value":{"referenceType":"outputPort","portName":"outTables","portIndex":0,"arguments":{}}},{"name":"_input1","value":{"referenceType":"inputPort","portName":"inTables","portIndex":0}}],"contentType":"embedded"}}}},"parameters":{},"connections":[],"extendedProperties":{},"stickyNotes":[]}