【CUDA并行编程之六】KNN算法的并行实现

朱雀 2022-08-01 12:17 296阅读 0赞

之前写了两篇文章一个是KNN算法的C++串行实现,另一个是CUDA计算向量的欧氏距离。那么这篇文章就可以说是前两篇文章的一个简单的整合。在看这篇文章之前可以先阅读前两篇文章。

一、生成数据集

现在需要生成一个N个D维的数据,没在一组数据都有一个类标,这个类标根据第一维的正负来进行标识样本数据的类标:Positive and Negative。

[python] view plain copy

  1. #!/usr/bin/python
  2. import re
  3. import sys
  4. import random
  5. import os
  6. filename = “input.txt”
  7. if(os.path.exists(filename)):
  8. print(“%s exists and del” % filename)
  9. os.remove(filename)
  10. fout = open(filename,”w”)
  11. for i in range( 0,int(sys.argv[1]) ): #str to int
  12. x = []
  13. for j in range(0,int(sys.argv[2])):
  14. x.append( “%4f” % random.uniform(-1,1) ) #generate random data and limit the digits into 4
  15. fout.write(“%s\t” % x[j])
  16. #fout.write(x) : TypeError:expected a character buffer object
  17. if(x[0][0] == ‘-‘):
  18. fout.write(“ Negative”+“\n”)
  19. else:
  20. fout.write(“ Positive”+“\n”)
  21. fout.close()

运行程序,生成4000个维度为8的数据:

SouthEast

生成了文件”input.txt”:

SouthEast 1

二、串行代码:

这个代码和之前的文章的代码一致,我们选择400个数据进行作为测试数据,3600个数据进行训练数据。

KNN_2.cc:

[cpp] view plain copy

  1. #include
  2. #include
  3. #include
  4. #include
  5. #include
  6. #include
  7. #include
  8. #include
  9. using namespace std;
  10. typedef string tLabel;
  11. typedef double tData;
  12. typedef pair<int,double> PAIR;
  13. const int MaxColLen = 10;
  14. const int MaxRowLen = 10000;
  15. ifstream fin;
  16. class KNN
  17. {
  18. private:
  19. tData dataSet[MaxRowLen][MaxColLen];
  20. tLabel labels[MaxRowLen];
  21. tData testData[MaxColLen];
  22. int rowLen;
  23. int colLen;
  24. int k;
  25. int test_data_num;
  26. map<int,double> map_index_dis;
  27. map map_label_freq;
  28. double get_distance(tData *d1,tData *d2);
  29. public:
  30. KNN(int k , int rowLen , int colLen , char *filename);
  31. void get_all_distance();
  32. tLabel get_max_freq_label();
  33. void auto_norm_data();
  34. void get_error_rate();
  35. struct CmpByValue
  36. {
  37. bool operator() (const PAIR& lhs,const PAIR& rhs)
  38. {
  39. return lhs.second < rhs.second;
  40. }
  41. };
  42. ~KNN();
  43. };
  44. KNN::~KNN()
  45. {
  46. fin.close();
  47. map_index_dis.clear();
  48. map_label_freq.clear();
  49. }
  50. KNN::KNN(int k , int row ,int col , char *filename)
  51. {
  52. this->rowLen = row;
  53. this->colLen = col;
  54. this->k = k;
  55. test_data_num = 0;
  56. fin.open(filename);
  57. if( !fin )
  58. {
  59. cout<<”can not open the file”<<endl;
  60. exit(0);
  61. }
  62. //read data from file
  63. for(int i=0;i<rowLen;i++)
  64. {
  65. for(int j=0;j<colLen;j++)
  66. {
  67. fin>>dataSet[i][j];
  68. }
  69. fin>>labels[i];
  70. }
  71. }
  72. void KNN:: get_error_rate()
  73. {
  74. int i,j,count = 0;
  75. tLabel label;
  76. cout<<”please input the number of test data : “<<endl;
  77. cin>>test_data_num;
  78. for(i=0;i<test_data_num;i++)
  79. {
  80. for(j=0;j<colLen;j++)
  81. {
  82. testData[j] = dataSet[i][j];
  83. }
  84. get_all_distance();
  85. label = get_max_freq_label();
  86. if( label!=labels[i] )
  87. count++;
  88. map_index_dis.clear();
  89. map_label_freq.clear();
  90. }
  91. cout<<”the error rate is = “<<(double)count/(double)test_data_num<<endl;
  92. }
  93. double KNN:: get_distance(tData *d1,tData *d2)
  94. {
  95. double sum = 0;
  96. for(int i=0;i<colLen;i++)
  97. {
  98. sum += pow( (d1[i]-d2[i]) , 2 );
  99. }
  100. //cout<<”the sum is = “<<sum<<endl;
  101. return sqrt(sum);
  102. }
  103. //get distance between testData and all dataSet
  104. void KNN:: get_all_distance()
  105. {
  106. double distance;
  107. int i;
  108. for(i=test_data_num;i<rowLen;i++)
  109. {
  110. distance = get_distance(dataSet[i],testData);
  111. map_index_dis[i] = distance;
  112. }
  113. }
  114. tLabel KNN:: get_max_freq_label()
  115. {
  116. vector vec_index_dis( map_index_dis.begin(),map_index_dis.end() );
  117. sort(vec_index_dis.begin(),vec_index_dis.end(),CmpByValue());
  118. for(int i=0;i<k;i++)
  119. {
  120. /*
  121. cout<<”the index = “<<vec_index_dis[i].first<<” the distance = “<<vec_index_dis[i].second<<” the label = “<<labels[ vec_index_dis[i].first ]<<” the coordinate ( “;
  122. int j;
  123. for(j=0;j<colLen-1;j++)
  124. {
  125. cout<<dataSet[ vec_index_dis[i].first ][j]<<”,”;
  126. }
  127. cout<<dataSet[ vec_index_dis[i].first ][j]<<” )”<<endl;
  128. */
  129. map_label_freq[ labels[ vec_index_dis[i].first ] ]++;
  130. }
  131. map::const_iterator map_it = map_label_freq.begin();
  132. tLabel label;
  133. int max_freq = 0;
  134. while( map_it != map_label_freq.end() )
  135. {
  136. if( map_it->second > max_freq )
  137. {
  138. max_freq = map_it->second;
  139. label = map_it->first;
  140. }
  141. map_it++;
  142. }
  143. //cout<<”The test data belongs to the “<<label<<” label”<<endl;
  144. return label;
  145. }
  146. void KNN::auto_norm_data()
  147. {
  148. tData maxa[colLen] ;
  149. tData mina[colLen] ;
  150. tData range[colLen] ;
  151. int i,j;
  152. for(i=0;i<colLen;i++)
  153. {
  154. maxa[i] = max(dataSet[0][i],dataSet[1][i]);
  155. mina[i] = min(dataSet[0][i],dataSet[1][i]);
  156. }
  157. for(i=2;i<rowLen;i++)
  158. {
  159. for(j=0;j<colLen;j++)
  160. {
  161. if( dataSet[i][j]>maxa[j] )
  162. {
  163. maxa[j] = dataSet[i][j];
  164. }
  165. else if( dataSet[i][j]<mina[j] )
  166. {
  167. mina[j] = dataSet[i][j];
  168. }
  169. }
  170. }
  171. for(i=0;i<colLen;i++)
  172. {
  173. range[i] = maxa[i] - mina[i] ;
  174. //normalize the test data set
  175. testData[i] = ( testData[i] - mina[i] )/range[i] ;
  176. }
  177. //normalize the training data set
  178. for(i=0;i<rowLen;i++)
  179. {
  180. for(j=0;j<colLen;j++)
  181. {
  182. dataSet[i][j] = ( dataSet[i][j] - mina[j] )/range[j];
  183. }
  184. }
  185. }
  186. int main(int argc , char** argv)
  187. {
  188. int k,row,col;
  189. char *filename;
  190. if( argc!=5 )
  191. {
  192. cout<<”The input should be like this : ./a.out k row col filename”<<endl;
  193. exit(1);
  194. }
  195. k = atoi(argv[1]);
  196. row = atoi(argv[2]);
  197. col = atoi(argv[3]);
  198. filename = argv[4];
  199. KNN knn(k,row,col,filename);
  200. knn.auto_norm_data();
  201. knn.get_error_rate();
  202. return 0;
  203. }

makefile:

[cpp] view plain copy

  1. target:
  2. g++ KNN_2.cc
  3. ./a.out 7 4000 8 input.txt
  4. cu:
  5. nvcc KNN.cu
  6. ./a.out 7 4000 8 input.txt

运行结果:

SouthEast 2

三、并行实现

并行实现的过程就是将没一个测试样本到N个训练样本的距离进行并行化,如果串行计算的话,时间复杂度为:O(N*D),如果串行计算的话,时间复杂度为O(D),其实D为数据的维度。

KNN.cu:

[cpp] view plain copy 在CODE上查看代码片 派生到我的代码片

  1. #include
  2. #include
  3. #include
  4. #include
  5. #include
  6. #include
  7. #include
  8. #include
  9. using namespace std;
  10. typedef string tLabel;
  11. typedef float tData;
  12. typedef pair<int,double> PAIR;
  13. const int MaxColLen = 10;
  14. const int MaxRowLen = 10010;
  15. const int test_data_num = 400;
  16. ifstream fin;
  17. class KNN
  18. {
  19. private:
  20. tData dataSet[MaxRowLen][MaxColLen];
  21. tLabel labels[MaxRowLen];
  22. tData testData[MaxColLen];
  23. tData trainingData[3600][8];
  24. int rowLen;
  25. int colLen;
  26. int k;
  27. map<int,double> map_index_dis;
  28. map map_label_freq;
  29. double get_distance(tData *d1,tData *d2);
  30. public:
  31. KNN(int k , int rowLen , int colLen , char *filename);
  32. void get_all_distance();
  33. tLabel get_max_freq_label();
  34. void auto_norm_data();
  35. void get_error_rate();
  36. void get_training_data();
  37. struct CmpByValue
  38. {
  39. bool operator() (const PAIR& lhs,const PAIR& rhs)
  40. {
  41. return lhs.second < rhs.second;
  42. }
  43. };
  44. ~KNN();
  45. };
  46. KNN::~KNN()
  47. {
  48. fin.close();
  49. map_index_dis.clear();
  50. map_label_freq.clear();
  51. }
  52. KNN::KNN(int k , int row ,int col , char *filename)
  53. {
  54. this->rowLen = row;
  55. this->colLen = col;
  56. this->k = k;
  57. fin.open(filename);
  58. if( !fin )
  59. {
  60. cout<<”can not open the file”<<endl;
  61. exit(0);
  62. }
  63. for(int i=0;i<rowLen;i++)
  64. {
  65. for(int j=0;j<colLen;j++)
  66. {
  67. fin>>dataSet[i][j];
  68. }
  69. fin>>labels[i];
  70. }
  71. }
  72. void KNN:: get_training_data()
  73. {
  74. for(int i=test_data_num;i<rowLen;i++)
  75. {
  76. for(int j=0;j<colLen;j++)
  77. {
  78. trainingData[i-test_data_num][j] = dataSet[i][j];
  79. }
  80. }
  81. }
  82. void KNN:: get_error_rate()
  83. {
  84. int i,j,count = 0;
  85. tLabel label;
  86. cout<<”the test data number is : “<<test_data_num<<endl;
  87. get_training_data();
  88. //get testing data and calculate
  89. for(i=0;i<test_data_num;i++)
  90. {
  91. for(j=0;j<colLen;j++)
  92. {
  93. testData[j] = dataSet[i][j];
  94. }
  95. get_all_distance();
  96. label = get_max_freq_label();
  97. if( label!=labels[i] )
  98. count++;
  99. map_index_dis.clear();
  100. map_label_freq.clear();
  101. }
  102. cout<<”the error rate is = “<<(double)count/(double)test_data_num<<endl;
  103. }
  104. //global function
  105. __global__ void cal_dis(tData *train_data,tData *test_data,tData* dis,int pitch,int N , int D)
  106. {
  107. int tid = blockIdx.x;
  108. if(tid<N)
  109. {
  110. tData temp = 0;
  111. tData sum = 0;
  112. for(int i=0;i<D;i++)
  113. {
  114. temp = *( (tData*)( (char*)train_data+tid*pitch )+i ) - test_data[i];
  115. sum += temp * temp;
  116. }
  117. dis[tid] = sum;
  118. }
  119. }
  120. //Parallel calculate the distance
  121. void KNN:: get_all_distance()
  122. {
  123. int height = rowLen - test_data_num;
  124. tData *distance = new tData[height];
  125. tData *d_train_data,*d_test_data,*d_dis;
  126. size_t pitch_d ;
  127. size_t pitch_h = colLen * sizeof(tData);
  128. //allocate memory on GPU
  129. cudaMallocPitch( &d_train_data,&pitch_d,colLen*sizeof(tData),height);
  130. cudaMalloc( &d_test_data,colLen*sizeof(tData) );
  131. cudaMalloc( &d_dis, height*sizeof(tData) );
  132. cudaMemset( d_train_data,0,height*colLen*sizeof(tData) );
  133. cudaMemset( d_test_data,0,colLen*sizeof(tData) );
  134. cudaMemset( d_dis , 0 , height*sizeof(tData) );
  135. //copy training and testing data from host to device
  136. cudaMemcpy2D( d_train_data,pitch_d,trainingData,pitch_h,colLen*sizeof(tData),height,cudaMemcpyHostToDevice);
  137. cudaMemcpy( d_test_data,testData,colLen*sizeof(tData),cudaMemcpyHostToDevice);
  138. //calculate the distance
  139. cal_dis<<>>( d_train_data,d_test_data,d_dis,pitch_d,height,colLen );
  140. //copy distance data from device to host
  141. cudaMemcpy( distance,d_dis,height*sizeof(tData),cudaMemcpyDeviceToHost);
  142. int i;
  143. for( i=0;i<rowLen-test_data_num;i++ )
  144. {
  145. map_index_dis[i+test_data_num] = distance[i];
  146. }
  147. }
  148. tLabel KNN:: get_max_freq_label()
  149. {
  150. vector vec_index_dis( map_index_dis.begin(),map_index_dis.end() );
  151. sort(vec_index_dis.begin(),vec_index_dis.end(),CmpByValue());
  152. for(int i=0;i<k;i++)
  153. {
  154. /*
  155. cout<<”the index = “<<vec_index_dis[i].first<<” the distance = “<<vec_index_dis[i].second<<” the label = “<<labels[ vec_index_dis[i].first ]<<” the coordinate ( “;
  156. int j;
  157. for(j=0;j<colLen-1;j++)
  158. {
  159. cout<<dataSet[ vec_index_dis[i].first ][j]<<”,”;
  160. }
  161. cout<<dataSet[ vec_index_dis[i].first ][j]<<” )”<<endl;
  162. */
  163. map_label_freq[ labels[ vec_index_dis[i].first ] ]++;
  164. }
  165. map::const_iterator map_it = map_label_freq.begin();
  166. tLabel label;
  167. int max_freq = 0;
  168. while( map_it != map_label_freq.end() )
  169. {
  170. if( map_it->second > max_freq )
  171. {
  172. max_freq = map_it->second;
  173. label = map_it->first;
  174. }
  175. map_it++;
  176. }
  177. cout<<”The test data belongs to the “<<label<<” label”<<endl;
  178. return label;
  179. }
  180. void KNN::auto_norm_data()
  181. {
  182. tData maxa[colLen] ;
  183. tData mina[colLen] ;
  184. tData range[colLen] ;
  185. int i,j;
  186. for(i=0;i<colLen;i++)
  187. {
  188. maxa[i] = max(dataSet[0][i],dataSet[1][i]);
  189. mina[i] = min(dataSet[0][i],dataSet[1][i]);
  190. }
  191. for(i=2;i<rowLen;i++)
  192. {
  193. for(j=0;j<colLen;j++)
  194. {
  195. if( dataSet[i][j]>maxa[j] )
  196. {
  197. maxa[j] = dataSet[i][j];
  198. }
  199. else if( dataSet[i][j]<mina[j] )
  200. {
  201. mina[j] = dataSet[i][j];
  202. }
  203. }
  204. }
  205. for(i=0;i<colLen;i++)
  206. {
  207. range[i] = maxa[i] - mina[i] ;
  208. //normalize the test data set
  209. testData[i] = ( testData[i] - mina[i] )/range[i] ;
  210. }
  211. //normalize the training data set
  212. for(i=0;i<rowLen;i++)
  213. {
  214. for(j=0;j<colLen;j++)
  215. {
  216. dataSet[i][j] = ( dataSet[i][j] - mina[j] )/range[j];
  217. }
  218. }
  219. }
  220. int main(int argc , char** argv)
  221. {
  222. int k,row,col;
  223. char *filename;
  224. if( argc!=5 )
  225. {
  226. cout<<”The input should be like this : ./a.out k row col filename”<<endl;
  227. exit(1);
  228. }
  229. k = atoi(argv[1]);
  230. row = atoi(argv[2]);
  231. col = atoi(argv[3]);
  232. filename = argv[4];
  233. KNN knn(k,row,col,filename);
  234. knn.auto_norm_data();
  235. knn.get_error_rate();
  236. return 0;
  237. }

运行结果:

SouthEast 3

因为内存分配的问题(之前文章提到过),那么就需要将训练数据trainingData进行静态的空间分配,这样不是很方便。

可以看到,在测试数据集和训练数据集完全相同的情况下,结果是完全一样的。数据量小,没有做时间性能上的对比。还有可以改进的地方就是可以一次性的将所有testData载入到显存中,而不是一个一个的载入,这样就能够减少训练数据拷贝到显存中的次数,提高效率。

Author:忆之独秀

Email:leaguenew@qq.com

注明出处:http://blog.csdn.net/lavorange/article/details/42172451

发表评论

表情:
评论列表 (有 0 条评论,296人围观)

还没有评论,来说两句吧...

相关阅读

    相关 CUDA 并行计算

    CUDA 并行计算 并行计算可以被定义为同时使用许多计算资源 (核心或计算机) 来执行并发计算,一个大的问题可以被分解成多个小问题,然后在不同的计算资源上并行处理这些小