数据挖掘：数据挖掘Apriori算法-数据挖掘-EPS数据狗论坛

那一抹辉煌 发表于 2019-6-14 15:30:19

数据挖掘：数据挖掘Apriori算法

代码:
%本文件生成了频繁项集
clear all;
clc;
TotalSheet=[];
dbstop if error;

brand_name=csvread('./data/data.csv');
brand_name=brand_name(:,2);
brand_name=unique(brand_name);

thconf=0.6;%最小置信度阈值
load('./data/table');
X=table;
=size(X);
D=X;
th=ceil(0.1*m);
%%
%寻找频繁1项集

C1=sum(D);
C1=C1';
C1=[' C1];

L1=C1;
support=L1(:,2);
L1(find(support<th),:)=[];

TotalSheet{1}=L1;

%%
%生成频繁2项集

C2=combntns(L1(:,1),2);
temp=[];
for i=1:size(C2)
%项目
temp{i,1}=C2(i,:);

%支持数
sum=0;

=size(D);
for j=1:p
if all(D(j,C2(i,:)))
sum=sum+1;
end
end
temp{i,2}=sum;

end
C2=temp;

L2=C2;
support=cell2mat(L2(:,2));
index=find(support<th);
L2(index,:)=[];

TotalSheet{2}=L2;

%%
for k=3:6

%生成频繁k(k>=3)项集
L3=[];
C3=cell2mat(L2(:,1));

=size(C3);
item_num=mm;
if item_num<2
break;
end
comb_index=combntns(1:item_num,2);
%按排列组合的方式进行联合
j=1;
for i=1:size(comb_index,1)
comb_temp=union(L2{comb_index(i,:),1});
len=size(comb_temp,1);
len2=size(L2{1,1},1);
%判断是否只差一个元素
if len~=len2+1
continue;
end
%判断子集是否都存在
sub_comb=combntns(comb_temp,len-1);

if size(intersect(sub_comb,C3,'rows'),1)~=size(sub_comb,1)
continue;
end
L3{j,1}=comb_temp;
j=j+1;
end
C3=[];
for i=1:size(L3,1)
C3(i,:)=L3{i,1};
end

temp=[];

for i=1:size(C3)
%项目
temp{i,1}=C3(i,:);

%支持数
sum=0;

for j=1:m
if all(D(j,C3(i,:)))
sum=sum+1;
end
end
temp{i,2}=sum;

end
L3=temp;
if size(L3,1)==0
break;
end

support=cell2mat(L3(:,2));
index=find(support<th);
L3(index,:)=[];

TotalSheet{k}=L3;
L2=L3;
k
end

页: [1]

EPS数据狗论坛's Archiver

数据挖掘：数据挖掘Apriori算法