-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2_2_FEATURE_SELECTION.flw
executable file
·1 lines (1 loc) · 11.3 KB
/
2_2_FEATURE_SELECTION.flw
1
{"creationTimeStamp":null,"modifiedTimeStamp":null,"createdBy":null,"modifiedBy":null,"version":2,"id":null,"name":"2_2_FEATURE_SELECTION.flw","description":null,"properties":{"UI_PROP_DF_OPTIMIZE":"false","UI_PROP_DF_ID":"c86924be-934c-45b7-aa8a-0b1f85704888","UI_PROP_DF_EXECUTION_ORDERED":"false"},"links":[],"nodes":{"id-1733646707376-18473":{"nodeType":"step","version":1,"id":"id-1733646707376-18473","name":"Gradient boosting","note":{"version":1,"id":"id-1733646707380-18481","name":null,"description":null,"properties":{"UI_NOTE_PROP_IS_EXPANDED":"false","UI_NOTE_PROP_HEIGHT":"0","UI_NOTE_PROP_WIDTH":"0","UI_NOTE_PROP_IS_STICKYNOTE":"false"}},"priority":0,"properties":{"UI_PROP_XPOS":"827","UI_PROP_YPOS":"50","UI_PROP_COLORGRP":"0","UI_PROP_IS_INPUT_EXPANDED":"false","UI_PROP_IS_OUTPUT_EXPANDED":"false","UI_PROP_INPUT_PORT|inTables|0":"535e3328-3435-4a03-8941-b119ed9eec21|Input table 1|Input tables","UI_PROP_OUTPUT_PORT|outTables|0":"d58ed4a3-7941-4c4e-92af-5c6da9a417ef|Output table 1|Output tables","UI_PROP_NODE_DATA_ID":"a7190700-f59c-4a94-afe2-214ce639fcde","UI_PROP_NODE_DATA_MODIFIED_DATE":"1721912191502"},"portMappings":[{"mappingType":"tableStructure","portName":"outTables","portIndex":0,"tableStructure":{"columnDefinitions":[]}}],"stepReference":{"type":"uri","path":"/dataFlows/steps/a7190700-f59c-4a94-afe2-214ce639fcde"},"arguments":{"codeOptions":{"code":"proc gradboost data=MYCAS.DATA_FS_DT_5B;\n\tpartition role=Selected (test='0');\n\ttarget y / level=nominal;\n\tinput A5 A24 A27 A34 A39 A44 A46 A56 A58 / level=interval;\n\tscore out=MYCAS.OUTPUT_RES copyvars=(y A5 A24 A27 A34 A39 A44 A46 A56 A58);\nrun;\n\ndata output_res;\n\tset mycas.output_res;\n\ty_pred = input(compress(I_y), 3.);\nrun;\n\n%score(output_res, y, y_pred);","logHTML":"","resultsHTML":"","variables":[{"name":"_output1","value":{"referenceType":"outputPort","portName":"outTables","portIndex":0,"arguments":{}}},{"name":"_input1","value":{"referenceType":"inputPort","portName":"inTables","portIndex":0}}],"contentType":"embedded"}}},"id-1731616871109-744":{"nodeType":"step","version":1,"id":"id-1731616871109-744","name":"Train-test split","note":{"version":1,"id":"id-1731616871110-748","name":null,"description":null,"properties":{"UI_NOTE_PROP_IS_EXPANDED":"false","UI_NOTE_PROP_HEIGHT":"0","UI_NOTE_PROP_WIDTH":"0","UI_NOTE_PROP_IS_STICKYNOTE":"false"}},"priority":1,"properties":{"UI_PROP_XPOS":"147","UI_PROP_YPOS":"56","UI_PROP_COLORGRP":"0","UI_PROP_IS_INPUT_EXPANDED":"false","UI_PROP_IS_OUTPUT_EXPANDED":"false","UI_PROP_INPUT_PORT|inTables|0":"29e4850d-bfb1-4ba9-80e6-fdeb29cd25cb|Input table 1|Input tables","UI_PROP_OUTPUT_PORT|outTables|0":"2b6eff19-8695-4567-bb83-a7d47329974c|Output table 1|Output tables","UI_PROP_NODE_DATA_ID":"a7190700-f59c-4a94-afe2-214ce639fcde","UI_PROP_NODE_DATA_MODIFIED_DATE":"1721912191502"},"portMappings":[{"mappingType":"tableStructure","portName":"outTables","portIndex":0,"tableStructure":{"columnDefinitions":[]}}],"stepReference":{"type":"uri","path":"/dataFlows/steps/a7190700-f59c-4a94-afe2-214ce639fcde"},"arguments":{"codeOptions":{"code":"proc surveyselect data=default.DATA_FS_DT_5B rate=0.7 outall out=DATA_FS_DT_5B seed=1234 noprint;\nrun;\n\nproc sql noprint;\n\tselect name\n\tinto :list separated by ' '\n\tfrom sashelp.vcolumn\n\twhere libname eq 'WORK' and memname eq 'DATA_FS_DT_5B' and name ne 'year' and name ne 'y' and name ne 'Selected'\n\t;\nquit;\n\n%macro check_stability();\n\t%let i=1;\n\t%let var = %scan(&list., &i., ' ');\n\t%do %while(&var ne);\n\t\tproc ttest data=DATA_FS_DT_5B sides=2 h0=0 plots=none;\n\t\t\tclass Selected;\n\t\t\tvar &var.;\n\t\trun;\n\t\t%let i=%eval(&i+1);\n\t\t%let var = %scan(&list., &i., ' ');\n\t%end;\n%mend;\n\n%check_stability();\n\nproc casutil;\n load data=DATA_FS_DT_5B casout=\"DATA_FS_DT_5B\" replace;\nrun;","logHTML":"","resultsHTML":"","variables":[{"name":"_output1","value":{"referenceType":"outputPort","portName":"outTables","portIndex":0,"arguments":{}}},{"name":"_input1","value":{"referenceType":"inputPort","portName":"inTables","portIndex":0}}],"contentType":"embedded"}}},"id-1733593852976-18283":{"nodeType":"step","version":1,"id":"id-1733593852976-18283","name":"Logistic regression","note":{"version":1,"id":"id-1733593852977-18287","name":null,"description":null,"properties":{"UI_NOTE_PROP_IS_EXPANDED":"false","UI_NOTE_PROP_HEIGHT":"0","UI_NOTE_PROP_WIDTH":"0","UI_NOTE_PROP_IS_STICKYNOTE":"false"}},"priority":2,"properties":{"UI_PROP_XPOS":"305","UI_PROP_YPOS":"63","UI_PROP_COLORGRP":"0","UI_PROP_IS_INPUT_EXPANDED":"false","UI_PROP_IS_OUTPUT_EXPANDED":"false","UI_PROP_INPUT_PORT|inTables|0":"91294597-fbb6-4ce0-b5b7-7e5b42b945b5|Input table 1|Input tables","UI_PROP_OUTPUT_PORT|outTables|0":"d93f789c-bdb3-4b46-9f02-00f22b7e6848|Output table 1|Output tables","UI_PROP_NODE_DATA_ID":"a7190700-f59c-4a94-afe2-214ce639fcde","UI_PROP_NODE_DATA_MODIFIED_DATE":"1721912191502"},"portMappings":[{"mappingType":"tableStructure","portName":"outTables","portIndex":0,"tableStructure":{"columnDefinitions":[]}}],"stepReference":{"type":"uri","path":"/dataFlows/steps/a7190700-f59c-4a94-afe2-214ce639fcde"},"arguments":{"codeOptions":{"code":"proc logselect data=MYCAS.DATA_FS_DT_5B;\n\tpartition role=Selected (test='0');\n\tmodel y(event='1')=A5 A24 A27 A34 A39 A44 A46 A56 A58 / link=logit;\n\tselection method=stepwise\n (select=aic stop=aic choose=aic) hierarchy=none;\n\toutput out=MYCAS.OUTPUT_RES xbeta predicted lower upper copyvars=(_all_);\nrun;\n\ndata output_res;\n\tset mycas.output_res;\n\twhere selected eq 0;\n\tif _pred_ ge 0.5 then y_pred = 1;\n\telse y_pred = 0;\nrun;\n\n%score(output_res, y, y_pred);","logHTML":"","resultsHTML":"","variables":[{"name":"_output1","value":{"referenceType":"outputPort","portName":"outTables","portIndex":0,"arguments":{}}},{"name":"_input1","value":{"referenceType":"inputPort","portName":"inTables","portIndex":0}}],"contentType":"embedded"}}},"id-1733646707345-18442":{"nodeType":"step","version":1,"id":"id-1733646707345-18442","name":"Random forest","note":{"version":1,"id":"id-1733646707351-18450","name":null,"description":null,"properties":{"UI_NOTE_PROP_IS_EXPANDED":"false","UI_NOTE_PROP_HEIGHT":"0","UI_NOTE_PROP_WIDTH":"0","UI_NOTE_PROP_IS_STICKYNOTE":"false"}},"priority":3,"properties":{"UI_PROP_XPOS":"634","UI_PROP_YPOS":"63","UI_PROP_COLORGRP":"0","UI_PROP_IS_INPUT_EXPANDED":"false","UI_PROP_IS_OUTPUT_EXPANDED":"false","UI_PROP_INPUT_PORT|inTables|0":"01617e80-1ba3-4d22-9cfb-b77f81a1f48f|Input table 1|Input tables","UI_PROP_OUTPUT_PORT|outTables|0":"5f99843d-576e-4398-af13-5c3204e8d4c3|Output table 1|Output tables","UI_PROP_NODE_DATA_ID":"a7190700-f59c-4a94-afe2-214ce639fcde","UI_PROP_NODE_DATA_MODIFIED_DATE":"1721912191502"},"portMappings":[{"mappingType":"tableStructure","portName":"outTables","portIndex":0,"tableStructure":{"columnDefinitions":[]}}],"stepReference":{"type":"uri","path":"/dataFlows/steps/a7190700-f59c-4a94-afe2-214ce639fcde"},"arguments":{"codeOptions":{"code":"proc forest data=MYCAS.DATA_FS_DT_5B;\n\tpartition role=Selected (test='0');\n\ttarget y / level=nominal;\n\tinput A5 A24 A27 A34 A39 A44 A46 A56 A58 / level=interval;\n\tgrow gini;\n\tscore out=MYCAS.OUTPUT_RES copyvars=(y A5 A24 A27 A34 A39 A44 A46 A56 A58);\nrun;\n\ndata output_res;\n\tset mycas.output_res;\n\ty_pred = input(compress(I_y), 3.);\nrun;\n\n%score(output_res, y, y_pred);","logHTML":"","resultsHTML":"","variables":[{"name":"_output1","value":{"referenceType":"outputPort","portName":"outTables","portIndex":0,"arguments":{}}},{"name":"_input1","value":{"referenceType":"inputPort","portName":"inTables","portIndex":0}}],"contentType":"embedded"}}},"id-1733646707401-18504":{"nodeType":"step","version":1,"id":"id-1733646707401-18504","name":"Neural network","note":{"version":1,"id":"id-1733646707405-18512","name":null,"description":null,"properties":{"UI_NOTE_PROP_IS_EXPANDED":"false","UI_NOTE_PROP_HEIGHT":"0","UI_NOTE_PROP_WIDTH":"0","UI_NOTE_PROP_IS_STICKYNOTE":"false"}},"priority":4,"properties":{"UI_PROP_XPOS":"947","UI_PROP_YPOS":"74","UI_PROP_COLORGRP":"0","UI_PROP_IS_INPUT_EXPANDED":"false","UI_PROP_IS_OUTPUT_EXPANDED":"false","UI_PROP_INPUT_PORT|inTables|0":"582c4a08-f130-4e67-8af1-364f8c041a0d|Input table 1|Input tables","UI_PROP_OUTPUT_PORT|outTables|0":"fb080171-788f-4f1f-b908-f0334d69fa06|Output table 1|Output tables","UI_PROP_NODE_DATA_ID":"a7190700-f59c-4a94-afe2-214ce639fcde","UI_PROP_NODE_DATA_MODIFIED_DATE":"1721912191502"},"portMappings":[{"mappingType":"tableStructure","portName":"outTables","portIndex":0,"tableStructure":{"columnDefinitions":[]}}],"stepReference":{"type":"uri","path":"/dataFlows/steps/a7190700-f59c-4a94-afe2-214ce639fcde"},"arguments":{"codeOptions":{"code":"libname _tmpcas_ cas caslib=\"CASUSER\";\n\nproc nnet data=MYCAS.DATA_FS_DT_5B;\n\tpartition role=Selected (validate='0');\n\ttarget y / level=nominal;\n\tinput A5 A24 A27 A34 A39 A44 A46 A56 A58 / level=interval;\n\thidden 50;\n\thidden 50 / act=logistic;\n\ttrain stagnation=5 outmodel=_tmpcas_._Nnet_model_;\n\toptimization algorithm=sgd regL2=0.1 minibatchsize=50;\n\tscore out=MYCAS.OUTPUT_RES copyvars=(y A5 A24 A27 A34 A39 A44 A46 A56 A58 \n\t\tSelected);\nrun;\n\nproc delete data=_tmpcas_._Nnet_model_;\nrun;\n\nlibname _tmpcas_;\n\ndata output_res;\n\tset mycas.output_res;\n\ty_pred = input(compress(I_y), 3.);\nrun;\n\n%score(output_res, y, y_pred);","logHTML":"","resultsHTML":"","variables":[{"name":"_output1","value":{"referenceType":"outputPort","portName":"outTables","portIndex":0,"arguments":{}}},{"name":"_input1","value":{"referenceType":"inputPort","portName":"inTables","portIndex":0}}],"contentType":"embedded"}}},"id-1733646707315-18411":{"nodeType":"step","version":1,"id":"id-1733646707315-18411","name":"Decision tree","note":{"version":1,"id":"id-1733646707319-18419","name":null,"description":null,"properties":{"UI_NOTE_PROP_IS_EXPANDED":"false","UI_NOTE_PROP_HEIGHT":"0","UI_NOTE_PROP_WIDTH":"0","UI_NOTE_PROP_IS_STICKYNOTE":"false"}},"priority":5,"properties":{"UI_PROP_XPOS":"432","UI_PROP_YPOS":"91","UI_PROP_COLORGRP":"0","UI_PROP_IS_INPUT_EXPANDED":"false","UI_PROP_IS_OUTPUT_EXPANDED":"false","UI_PROP_INPUT_PORT|inTables|0":"8762d692-38d5-4580-8719-0502ed8c7e92|Input table 1|Input tables","UI_PROP_OUTPUT_PORT|outTables|0":"a4627e2e-be84-4cdc-83a9-07ee0c3051ff|Output table 1|Output tables","UI_PROP_NODE_DATA_ID":"a7190700-f59c-4a94-afe2-214ce639fcde","UI_PROP_NODE_DATA_MODIFIED_DATE":"1721912191502"},"portMappings":[{"mappingType":"tableStructure","portName":"outTables","portIndex":0,"tableStructure":{"columnDefinitions":[]}}],"stepReference":{"type":"uri","path":"/dataFlows/steps/a7190700-f59c-4a94-afe2-214ce639fcde"},"arguments":{"codeOptions":{"code":"proc treesplit data=MYCAS.DATA_FS_DT_5B maxdepth=10;\n\tpartition role=Selected (test='0');\n\tinput A5 A24 A27 A34 A39 A44 A46 A56 A58 / level=interval;\n\ttarget y / level=nominal;\n\tgrow gini;\n\tprune c45;\n\tscore out=MYCAS.OUTPUT_RES copyvars=(y A5 A24 A27 A34 A39 A44 A46 A56 A58);\nrun;\n\ndata output_res;\n\tset mycas.output_res;\n\ty_pred = input(compress(I_y), 3.);\nrun;\n\n%score(output_res, y, y_pred);","logHTML":"","resultsHTML":"","variables":[{"name":"_output1","value":{"referenceType":"outputPort","portName":"outTables","portIndex":0,"arguments":{}}},{"name":"_input1","value":{"referenceType":"inputPort","portName":"inTables","portIndex":0}}],"contentType":"embedded"}}}},"parameters":{},"connections":[],"extendedProperties":{},"stickyNotes":[]}