【CUDA并行编程之六】KNN算法的并行实现

朱雀 2022-08-01 12:17 196阅读 0赞

之前写了两篇文章一个是[KNN算法的C++串行实现][KNN_C]，另一个是[CUDA计算向量的欧氏距离][CUDA]。那么这篇文章就可以说是前两篇文章的一个简单的整合。在看这篇文章之前可以先阅读前两篇文章。

**一、生成数据集**

现在需要生成一个N个D维的数据，没在一组数据都有一个类标，这个类标根据第一维的正负来进行标识样本数据的类标：Positive and Negative。

**\[python\]**  [view plain][] [copy][view plain]

1.  \#!/usr/bin/python  
2.    
3.  **import** re  
4.  **import** sys  
5.  **import** random  
6.  **import** os  
7.    
8.  filename = "input.txt"  
9.    
10. **if**(os.path.exists(filename)):  
11.     **print**("%s exists and del" % filename)  
12.     os.remove(filename)  
13.   
14. fout = open(filename,"w")  
15.   
16. **for** i **in** range( 0,int(sys.argv\[1\]) ): \#str to int  
17.     x = \[\]  
18.     **for** j **in** range(0,int(sys.argv\[2\])):  
19.         x.append( "%4f" % random.uniform(-1,1) ) \#generate random data and limit the digits into 4  
20.         fout.write("%s\\t" % x\[j\])  
21.         \#fout.write(x) : TypeError:expected a character buffer object   
22.   
23.     **if**(x\[0\]\[0\] == '-'):  
24.         fout.write(" Negative"\+"\\n")  
25.     **else**:  
26.         fout.write(" Positive"\+"\\n")  
27.   
28. fout.close()

运行程序，生成4000个维度为8的数据：

![SouthEast][]

生成了文件"input.txt"：

![SouthEast 1][]

**二、串行代码：**

这个代码和之前的文章的代码一致，我们选择400个数据进行作为测试数据，3600个数据进行训练数据。

KNN\_2.cc：

**\[cpp\]**  [view plain][] [copy][view plain]

1.  \#include<iostream>  
2.  \#include<map>  
3.  \#include<vector>  
4.  \#include<stdio.h>  
5.  \#include<cmath>  
6.  \#include<cstdlib>  
7.  \#include<algorithm>  
8.  \#include<fstream>  
9.    
10. **using** **namespace** std;  
11.   
12. **typedef** string tLabel;  
13. **typedef** **double** tData;  
14. **typedef** pair<**int**,**double**>  PAIR;  
15. **const** **int** MaxColLen = 10;  
16. **const** **int** MaxRowLen = 10000;  
17. ifstream fin;  
18.   
19. **class** KNN  
20. \{  
21. **private**:  
22.         tData dataSet\[MaxRowLen\]\[MaxColLen\];  
23.         tLabel labels\[MaxRowLen\];  
24.         tData testData\[MaxColLen\];  
25.         **int** rowLen;  
26.         **int** colLen;  
27.         **int** k;  
28.         **int** test\_data\_num;  
29.         map<**int**,**double**> map\_index\_dis;  
30.         map<tLabel,**int**> map\_label\_freq;  
31.         **double** get\_distance(tData \*d1,tData \*d2);  
32. **public**:  
33.         KNN(**int** k , **int** rowLen , **int** colLen , **char** \*filename);  
34.         **void** get\_all\_distance();  
35.         tLabel get\_max\_freq\_label();  
36.         **void** auto\_norm\_data();  
37.         **void** get\_error\_rate();  
38.         **struct** CmpByValue  
39.         \{  
40.             **bool** operator() (**const** PAIR& lhs,**const** PAIR& rhs)  
41.             \{  
42.                 **return** lhs.second < rhs.second;  
43.             \}  
44.         \};  
45.   
46.         ~KNN();   
47. \};  
48.   
49. KNN::~KNN()  
50. \{  
51.     fin.close();  
52.     map\_index\_dis.clear();  
53.     map\_label\_freq.clear();  
54. \}  
55.   
56. KNN::KNN(**int** k , **int** row ,**int** col , **char** \*filename)  
57. \{  
58.     **this**\->rowLen = row;  
59.     **this**\->colLen = col;  
60.     **this**\->k = k;  
61.     test\_data\_num = 0;  
62.       
63.     fin.open(filename);  
64.   
65.     **if**( !fin )  
66.     \{  
67.         cout<<"can not open the file"<<endl;  
68.         exit(0);  
69.     \}  
70.       
71.     //read data from file  
72.     **for**(**int** i=0;i<rowLen;i++)  
73.     \{  
74.         **for**(**int** j=0;j<colLen;j++)  
75.         \{  
76.             fin>>dataSet\[i\]\[j\];  
77.         \}  
78.         fin>>labels\[i\];  
79.     \}  
80.   
81. \}  
82.   
83. **void** KNN:: get\_error\_rate()  
84. \{  
85.     **int** i,j,count = 0;  
86.     tLabel label;  
87.     cout<<"please input the number of test data : "<<endl;  
88.     cin>>test\_data\_num;  
89.     **for**(i=0;i<test\_data\_num;i++)  
90.     \{  
91.         **for**(j=0;j<colLen;j++)  
92.         \{  
93.             testData\[j\] = dataSet\[i\]\[j\];          
94.         \}  
95.           
96.         get\_all\_distance();  
97.         label = get\_max\_freq\_label();  
98.         **if**( label!=labels\[i\] )  
99.             count++;  
100.         map\_index\_dis.clear();  
101.         map\_label\_freq.clear();  
102.     \}  
103.     cout<<"the error rate is = "<<(**double**)count/(**double**)test\_data\_num<<endl;  
104. \}  
105.   
106. **double** KNN:: get\_distance(tData \*d1,tData \*d2)  
107. \{  
108.     **double** sum = 0;  
109.     **for**(**int** i=0;i<colLen;i++)  
110.     \{  
111.         sum += pow( (d1\[i\]-d2\[i\]) , 2 );  
112.     \}  
113.   
114.     //cout<<"the sum is = "<<sum<<endl;  
115.     **return** sqrt(sum);  
116. \}  
117.   
118. //get distance between testData and all dataSet  
119. **void** KNN:: get\_all\_distance()  
120. \{  
121.     **double** distance;  
122.     **int** i;  
123.     **for**(i=test\_data\_num;i<rowLen;i++)  
124.     \{  
125.         distance = get\_distance(dataSet\[i\],testData);  
126.         map\_index\_dis\[i\] = distance;  
127.     \}  
128. \}  
129.   
130. tLabel KNN:: get\_max\_freq\_label()  
131. \{  
132.     vector<PAIR> vec\_index\_dis( map\_index\_dis.begin(),map\_index\_dis.end() );  
133.     sort(vec\_index\_dis.begin(),vec\_index\_dis.end(),CmpByValue());  
134.   
135.     **for**(**int** i=0;i<k;i++)  
136.     \{  
137.         /\* 
138.         cout<<"the index = "<<vec\_index\_dis\[i\].first<<" the distance = "<<vec\_index\_dis\[i\].second<<" the label = "<<labels\[ vec\_index\_dis\[i\].first \]<<" the coordinate ( "; 
139.         int j; 
140.         for(j=0;j<colLen-1;j++) 
141.         \{  
142.             cout<<dataSet\[ vec\_index\_dis\[i\].first \]\[j\]<<","; 
143.         \} 
144.         cout<<dataSet\[ vec\_index\_dis\[i\].first \]\[j\]<<" )"<<endl; 
145.         \*/  
146.         map\_label\_freq\[ labels\[ vec\_index\_dis\[i\].first \]  \]++;  
147.     \}  
148.   
149.     map<tLabel,**int**>::const\_iterator map\_it = map\_label\_freq.begin();  
150.     tLabel label;  
151.     **int** max\_freq = 0;  
152.     **while**( map\_it != map\_label\_freq.end() )  
153.     \{  
154.         **if**( map\_it->second > max\_freq )  
155.         \{  
156.             max\_freq = map\_it->second;  
157.             label = map\_it->first;  
158.         \}  
159.         map\_it++;  
160.     \}  
161.     //cout<<"The test data belongs to the "<<label<<" label"<<endl;  
162.     **return** label;  
163. \}  
164.   
165. **void** KNN::auto\_norm\_data()  
166. \{  
167.     tData maxa\[colLen\] ;  
168.     tData mina\[colLen\] ;  
169.     tData range\[colLen\] ;  
170.     **int** i,j;  
171.   
172.     **for**(i=0;i<colLen;i++)  
173.     \{  
174.         maxa\[i\] = max(dataSet\[0\]\[i\],dataSet\[1\]\[i\]);  
175.         mina\[i\] = min(dataSet\[0\]\[i\],dataSet\[1\]\[i\]);  
176.     \}  
177.   
178.     **for**(i=2;i<rowLen;i++)  
179.     \{  
180.         **for**(j=0;j<colLen;j++)  
181.         \{  
182.             **if**( dataSet\[i\]\[j\]>maxa\[j\] )  
183.             \{  
184.                 maxa\[j\] = dataSet\[i\]\[j\];  
185.             \}  
186.             **else** **if**( dataSet\[i\]\[j\]<mina\[j\] )  
187.             \{  
188.                 mina\[j\] = dataSet\[i\]\[j\];  
189.             \}  
190.         \}  
191.     \}  
192.   
193.     **for**(i=0;i<colLen;i++)  
194.     \{  
195.         range\[i\] = maxa\[i\] - mina\[i\] ;   
196.         //normalize the test data set  
197.         testData\[i\] = ( testData\[i\] - mina\[i\] )/range\[i\] ;  
198.     \}  
199.   
200.     //normalize the training data set  
201.     **for**(i=0;i<rowLen;i++)  
202.     \{  
203.         **for**(j=0;j<colLen;j++)  
204.         \{  
205.             dataSet\[i\]\[j\] = ( dataSet\[i\]\[j\] - mina\[j\] )/range\[j\];  
206.         \}  
207.     \}  
208. \}  
209.   
210. **int** main(**int** argc , **char**\*\* argv)  
211. \{  
212.     **int** k,row,col;  
213.     **char** \*filename;  
214.       
215.     **if**( argc!=5 )  
216.     \{  
217.         cout<<"The input should be like this : ./a.out k row col filename"<<endl;  
218.         exit(1);  
219.     \}  
220.   
221.     k = atoi(argv\[1\]);  
222.     row = atoi(argv\[2\]);  
223.     col = atoi(argv\[3\]);  
224.     filename = argv\[4\];  
225.   
226.     KNN knn(k,row,col,filename);  
227.   
228.     knn.auto\_norm\_data();  
229.     knn.get\_error\_rate();  
230.   
231.     **return** 0;  
232. \}

makefile：

**\[cpp\]**  [view plain][] [copy][view plain]

1.  target:  
2.      g++ KNN\_2.cc  
3.      ./a.out 7 4000 8 input.txt  
4.    
5.  cu:  
6.      nvcc KNN.cu  
7.      ./a.out 7 4000 8 input.txt

运行结果：

![SouthEast 2][]

**三、并行实现**

并行实现的过程就是将没一个测试样本到N个训练样本的距离进行并行化，如果串行计算的话，时间复杂度为：O(N\*D)，如果串行计算的话，时间复杂度为O(D)，其实D为数据的维度。

KNN.cu：

**\[cpp\]**  [view plain][] [copy][view plain] [![在CODE上查看代码片][CODE]][CODE_CODE] [![派生到我的代码片][ico_fork.svg]][ico_fork.svg 1]

1.  \#include<iostream>  
2.  \#include<map>  
3.  \#include<vector>  
4.  \#include<stdio.h>  
5.  \#include<cmath>  
6.  \#include<cstdlib>  
7.  \#include<algorithm>  
8.  \#include<fstream>  
9.    
10. **using** **namespace** std;  
11.   
12. **typedef** string tLabel;  
13. **typedef** **float** tData;  
14. **typedef** pair<**int**,**double**>  PAIR;  
15. **const** **int** MaxColLen = 10;  
16. **const** **int** MaxRowLen = 10010;  
17. **const** **int** test\_data\_num = 400;  
18. ifstream fin;  
19.   
20. **class** KNN  
21. \{  
22. **private**:  
23.         tData dataSet\[MaxRowLen\]\[MaxColLen\];  
24.         tLabel labels\[MaxRowLen\];  
25.         tData testData\[MaxColLen\];  
26.         tData trainingData\[3600\]\[8\];  
27.         **int** rowLen;  
28.         **int** colLen;  
29.         **int** k;  
30.         map<**int**,**double**> map\_index\_dis;  
31.         map<tLabel,**int**> map\_label\_freq;  
32.         **double** get\_distance(tData \*d1,tData \*d2);  
33. **public**:  
34.         KNN(**int** k , **int** rowLen , **int** colLen , **char** \*filename);  
35.         **void** get\_all\_distance();  
36.         tLabel get\_max\_freq\_label();  
37.         **void** auto\_norm\_data();  
38.         **void** get\_error\_rate();  
39.         **void** get\_training\_data();  
40.         **struct** CmpByValue  
41.         \{  
42.             **bool** operator() (**const** PAIR& lhs,**const** PAIR& rhs)  
43.             \{  
44.                 **return** lhs.second < rhs.second;  
45.             \}  
46.         \};  
47.   
48.         ~KNN();   
49. \};  
50.   
51. KNN::~KNN()  
52. \{  
53.     fin.close();  
54.     map\_index\_dis.clear();  
55.     map\_label\_freq.clear();  
56. \}  
57.   
58. KNN::KNN(**int** k , **int** row ,**int** col , **char** \*filename)  
59. \{  
60.     **this**\->rowLen = row;  
61.     **this**\->colLen = col;  
62.     **this**\->k = k;  
63.       
64.     fin.open(filename);  
65.   
66.     **if**( !fin )  
67.     \{  
68.         cout<<"can not open the file"<<endl;  
69.         exit(0);  
70.     \}  
71.   
72.     **for**(**int** i=0;i<rowLen;i++)  
73.     \{  
74.         **for**(**int** j=0;j<colLen;j++)  
75.         \{  
76.             fin>>dataSet\[i\]\[j\];  
77.         \}  
78.         fin>>labels\[i\];  
79.     \}  
80.   
81. \}  
82.   
83. **void** KNN:: get\_training\_data()  
84. \{  
85.     **for**(**int** i=test\_data\_num;i<rowLen;i++)  
86.     \{  
87.         **for**(**int** j=0;j<colLen;j++)  
88.         \{  
89.             trainingData\[i-test\_data\_num\]\[j\] = dataSet\[i\]\[j\];  
90.         \}  
91.     \}  
92. \}  
93.   
94. **void** KNN:: get\_error\_rate()  
95. \{  
96.     **int** i,j,count = 0;  
97.     tLabel label;  
98.   
99.     cout<<"the test data number is : "<<test\_data\_num<<endl;  
100.   
101.     get\_training\_data();  
102.   
103.     //get testing data and calculate  
104.     **for**(i=0;i<test\_data\_num;i++)  
105.     \{  
106.         **for**(j=0;j<colLen;j++)  
107.         \{  
108.             testData\[j\] = dataSet\[i\]\[j\];          
109.         \}  
110.           
111.         get\_all\_distance();  
112.         label = get\_max\_freq\_label();  
113.         **if**( label!=labels\[i\] )  
114.             count++;  
115.         map\_index\_dis.clear();  
116.         map\_label\_freq.clear();  
117.     \}  
118.     cout<<"the error rate is = "<<(**double**)count/(**double**)test\_data\_num<<endl;  
119. \}  
120.   
121. //global function  
122. \_\_global\_\_ **void** cal\_dis(tData \*train\_data,tData \*test\_data,tData\* dis,**int** pitch,**int** N , **int** D)  
123. \{  
124.     **int** tid = blockIdx.x;  
125.     **if**(tid<N)  
126.     \{  
127.         tData temp = 0;  
128.         tData sum = 0;  
129.         **for**(**int** i=0;i<D;i++)  
130.         \{  
131.             temp = \*( (tData\*)( (**char**\*)train\_data+tid\*pitch  )+i ) - test\_data\[i\];  
132.             sum += temp \* temp;  
133.         \}  
134.         dis\[tid\] = sum;  
135.     \}  
136. \}  
137.   
138. //Parallel calculate the distance  
139. **void** KNN:: get\_all\_distance()  
140. \{  
141.     **int** height = rowLen - test\_data\_num;  
142.     tData \*distance = **new** tData\[height\];  
143.     tData \*d\_train\_data,\*d\_test\_data,\*d\_dis;  
144.     **size\_t** pitch\_d ;  
145.     **size\_t** pitch\_h = colLen \* **sizeof**(tData);  
146.     //allocate memory on GPU  
147.     cudaMallocPitch( &d\_train\_data,&pitch\_d,colLen\***sizeof**(tData),height);  
148.     cudaMalloc( &d\_test\_data,colLen\***sizeof**(tData) );  
149.     cudaMalloc( &d\_dis, height\***sizeof**(tData) );  
150.   
151.     cudaMemset( d\_train\_data,0,height\*colLen\***sizeof**(tData) );  
152.     cudaMemset( d\_test\_data,0,colLen\***sizeof**(tData) );  
153.     cudaMemset( d\_dis , 0 , height\***sizeof**(tData) );  
154.   
155.     //copy training and testing data from host to device  
156.     cudaMemcpy2D( d\_train\_data,pitch\_d,trainingData,pitch\_h,colLen\***sizeof**(tData),height,cudaMemcpyHostToDevice);  
157.     cudaMemcpy( d\_test\_data,testData,colLen\***sizeof**(tData),cudaMemcpyHostToDevice);  
158.     //calculate the distance  
159.     cal\_dis<<<height,1>>>( d\_train\_data,d\_test\_data,d\_dis,pitch\_d,height,colLen );  
160.     //copy distance data from device to host  
161.     cudaMemcpy( distance,d\_dis,height\***sizeof**(tData),cudaMemcpyDeviceToHost);  
162.   
163.     **int** i;  
164.     **for**( i=0;i<rowLen-test\_data\_num;i++ )  
165.     \{  
166.         map\_index\_dis\[i+test\_data\_num\] = distance\[i\];  
167.     \}  
168.   
169. \}  
170.   
171. tLabel KNN:: get\_max\_freq\_label()  
172. \{  
173.     vector<PAIR> vec\_index\_dis( map\_index\_dis.begin(),map\_index\_dis.end() );  
174.     sort(vec\_index\_dis.begin(),vec\_index\_dis.end(),CmpByValue());  
175.   
176.     **for**(**int** i=0;i<k;i++)  
177.     \{  
178.         /\* 
179.         cout<<"the index = "<<vec\_index\_dis\[i\].first<<" the distance = "<<vec\_index\_dis\[i\].second<<" the label = "<<labels\[ vec\_index\_dis\[i\].first \]<<" the coordinate ( "; 
180.         int j; 
181.         for(j=0;j<colLen-1;j++) 
182.         \{  
183.             cout<<dataSet\[ vec\_index\_dis\[i\].first \]\[j\]<<","; 
184.         \} 
185.         cout<<dataSet\[ vec\_index\_dis\[i\].first \]\[j\]<<" )"<<endl; 
186.         \*/  
187.         map\_label\_freq\[ labels\[ vec\_index\_dis\[i\].first \]  \]++;  
188.     \}  
189.   
190.     map<tLabel,**int**>::const\_iterator map\_it = map\_label\_freq.begin();  
191.     tLabel label;  
192.     **int** max\_freq = 0;  
193.     **while**( map\_it != map\_label\_freq.end() )  
194.     \{  
195.         **if**( map\_it->second > max\_freq )  
196.         \{  
197.             max\_freq = map\_it->second;  
198.             label = map\_it->first;  
199.         \}  
200.         map\_it++;  
201.     \}  
202.     cout<<"The test data belongs to the "<<label<<" label"<<endl;  
203.     **return** label;  
204. \}  
205.   
206. **void** KNN::auto\_norm\_data()  
207. \{  
208.     tData maxa\[colLen\] ;  
209.     tData mina\[colLen\] ;  
210.     tData range\[colLen\] ;  
211.     **int** i,j;  
212.   
213.     **for**(i=0;i<colLen;i++)  
214.     \{  
215.         maxa\[i\] = max(dataSet\[0\]\[i\],dataSet\[1\]\[i\]);  
216.         mina\[i\] = min(dataSet\[0\]\[i\],dataSet\[1\]\[i\]);  
217.     \}  
218.   
219.     **for**(i=2;i<rowLen;i++)  
220.     \{  
221.         **for**(j=0;j<colLen;j++)  
222.         \{  
223.             **if**( dataSet\[i\]\[j\]>maxa\[j\] )  
224.             \{  
225.                 maxa\[j\] = dataSet\[i\]\[j\];  
226.             \}  
227.             **else** **if**( dataSet\[i\]\[j\]<mina\[j\] )  
228.             \{  
229.                 mina\[j\] = dataSet\[i\]\[j\];  
230.             \}  
231.         \}  
232.     \}  
233.   
234.     **for**(i=0;i<colLen;i++)  
235.     \{  
236.         range\[i\] = maxa\[i\] - mina\[i\] ;   
237.         //normalize the test data set  
238.         testData\[i\] = ( testData\[i\] - mina\[i\] )/range\[i\] ;  
239.     \}  
240.   
241.     //normalize the training data set  
242.     **for**(i=0;i<rowLen;i++)  
243.     \{  
244.         **for**(j=0;j<colLen;j++)  
245.         \{  
246.             dataSet\[i\]\[j\] = ( dataSet\[i\]\[j\] - mina\[j\] )/range\[j\];  
247.         \}  
248.     \}  
249. \}  
250.   
251. **int** main(**int** argc , **char**\*\* argv)  
252. \{  
253.     **int** k,row,col;  
254.     **char** \*filename;  
255.       
256.     **if**( argc!=5 )  
257.     \{  
258.         cout<<"The input should be like this : ./a.out k row col filename"<<endl;  
259.         exit(1);  
260.     \}  
261.   
262.     k = atoi(argv\[1\]);  
263.     row = atoi(argv\[2\]);  
264.     col = atoi(argv\[3\]);  
265.     filename = argv\[4\];  
266.   
267.     KNN knn(k,row,col,filename);  
268.   
269.     knn.auto\_norm\_data();  
270.     knn.get\_error\_rate();  
271.   
272.     **return** 0;  
273. \}

**运行结果：**

![SouthEast 3][]

因为内存分配的问题（之前文章提到过），那么就需要将训练数据trainingData进行静态的空间分配，这样不是很方便。

可以看到，在测试数据集和训练数据集完全相同的情况下，结果是完全一样的。数据量小，没有做时间性能上的对比。还有可以改进的地方就是可以一次性的将所有testData载入到显存中，而不是一个一个的载入，这样就能够减少训练数据拷贝到显存中的次数，提高效率。

Author：忆之独秀

Email：leaguenew@qq.com

注明出处：[http://blog.csdn.net/lavorange/article/details/42172451][http_blog.csdn.net_lavorange_article_details_42172451]

[KNN_C]: http://blog.csdn.net/lavorange/article/details/16924705
[CUDA]: http://blog.csdn.net/lavorange/article/details/42125029
[view plain]: http://blog.csdn.net/lavorange/article/details/42172451#
[SouthEast]: /images/20220731/24775ef9e26e4213bcbdc87ff970b79a.png
[SouthEast 1]: /images/20220731/8b92504f438a482aa1bd57e4dece8d04.png
[SouthEast 2]: /images/20220731/77d0ff56aaff48cfad33a7d01d3207ed.png
[CODE]: https://code.csdn.net/assets/CODE_ico.png
[CODE_CODE]: https://code.csdn.net/snippets/563513
[ico_fork.svg]: https://code.csdn.net/assets/ico_fork.svg
[ico_fork.svg 1]: https://code.csdn.net/snippets/563513/fork
[SouthEast 3]: /images/20220731/76fcbbab6d334263bd20a7c54b54d61f.png
[http_blog.csdn.net_lavorange_article_details_42172451]: http://blog.csdn.net/lavorange/article/details/42172451