ceub47798680t 发表于 2017-7-11 11:39:16

怎么用SAS删除重复的变量?

数据中有一些变量重复值达到90%,想把多余的删掉,怎么操作

15829783809 发表于 2017-7-11 14:48:03

%macrovar_namelist(data=,tarvar=,dsor=);

%letlib=%upcase(%scan(&data.,1,'.'));

%letdname=%upcase(%scan(&data.,2,'.'));

%globalvar_list var_num;

proc sql ;

create table &dsor.as

select name

from sashelp.VCOLUMN

where left(libname)="&lib."and left(memname)="&dname."and lowcase(name)^=lowcase("&tarvar.");

quit;

%mend;

%macrotest(data,tarvar,data_result,data_drop,rate);

proc datasets lib=work;

delete base;

run;

data base;

length variable$100.;

run;

%var_namelist(data=&data.,/*coltype=num,*/tarvar=&tarvar.,dsor=aa);

data _null_;

set aa;

call symput(compress("var"||left(_n_)),compress(name));

call symput(compress("n"),compress(_n_));

run;

%put&n.;

%doi=1%to&n.;

%put&&var&i.;

proc freq data=&data.(keep=&&var&i.) noprint;

tables &&var&i./out=PERCENT_&&var&i.;

/*(keep=PERCENT)*/

run;

proc sql;

select max(PERCENT) into: max_percent from

PERCENT_&&var&i.;

quit;

%if&max_percent>&rate.%then%do;

data next;

variable="&&var&i.";

run;

proc append base=base data=next force;

run;

%end;

proc datasets lib=work noprint;

delete PERCENT_&&var&i.;

run;

%end;

data base;

set base(where=(variable^=''));

run;

proc transpose data=base out=base1(drop=_name_);

id variable;

run;

/*这步是删除单一变量超过90的重复值的缺失值的可以按照这个写下*/

proc sql noprint;

select name into :var_list separated by' '

from sashelp.VCOLUMN

where upcase(left(libname))="WORK"and UPCASE(left(memname))="BASE1";

quit;

%PUT&var_num1.;

data &data_result.;

set &data.;

drop &var_list.;

run;

data &data_drop.;

set &data.;

keep &tarvar.&var_list.;

run;

%mend;
页: [1]
查看完整版本: 怎么用SAS删除重复的变量?