SIde-Channel Analysis toolKit (SICAK)
Software toolkit for side-channel analysis
All Classes Files Functions Variables Enumerations Modules Pages
oclcpaengine.hpp
Go to the documentation of this file.
1 /*
2 * SICAK - SIde-Channel Analysis toolKit
3 * Copyright (C) 2018 Petr Socha, FIT, CTU in Prague
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <https://www.gnu.org/licenses/>.
17 */
18 
29 #ifndef OCLCPAENGINE_HPP
30 #define OCLCPAENGINE_HPP
31 
32 #include "types_power.hpp"
33 #include "types_stat.hpp"
34 #include "exceptions.hpp"
35 #include "oclengine.hpp"
36 
43 template<class Tc, class Tt, class Tp>
44 class OclCpaEngine : public OclEngine<Tc> {
45 
46 protected:
47 
48  static const char * m_programCode;
49  unsigned int m_samplesPerTrace;
50  unsigned int m_noOfCandidates;
51  unsigned int m_noOfTraces;
52  bool m_compiled;
53 
54  /* ocl device buffers */
55  // input data
56  cl_mem m_predictions_mem;
57  cl_mem m_traces_mem;
58  // CPA context variables
59  cl_mem m_predsAvg_mem;
60  cl_mem m_predsMSum_mem;
61  cl_mem m_tracesAvg_mem;
62  cl_mem m_tracesMSum_mem;
63  cl_mem m_predsTracesCSum_mem;
64 
65  /* ocl program and kernels */
66  cl_program m_program;
67  cl_kernel m_kernel_computeTracesAvgMSum;
68  cl_kernel m_kernel_computePredsAvgMSum;
69  cl_kernel m_kernel_computeCSum;
70 
71 public:
72 
74  OclCpaEngine(unsigned int platform, unsigned int device, unsigned int samplesPerTrace, unsigned int noOfCandidates, unsigned int noOfTraces);
75 
76  virtual ~OclCpaEngine();
77 
79  void buildProgram();
81  void loadPredictionsToDevice(const PowerPredictions<Tp> & pp, bool blocking = false);
83  void loadTracesToDevice(const PowerTraces<Tt> & pt, bool blocking = false);
85  void compute(UnivariateContext<Tc> & context, unsigned int sliceSize);
86 
87 };
88 
89 
90 template<class Tc, class Tt, class Tp>
91 OclCpaEngine<Tc, Tt, Tp>::OclCpaEngine(unsigned int platform, unsigned int device, unsigned int samplesPerTrace, unsigned int noOfCandidates, unsigned int noOfTraces)
92  : OclEngine<Tc>(platform, device), m_samplesPerTrace(samplesPerTrace), m_noOfCandidates(noOfCandidates), m_noOfTraces(noOfTraces), m_compiled(false) {
93 
94  cl_int ret;
95 
96  // OclEngine provides with a working context and queue; so just acquire the memory buffers
97  m_predictions_mem = clCreateBuffer(this->m_context, CL_MEM_READ_ONLY, noOfCandidates * noOfTraces * sizeof(Tp), NULL, &ret);
98  if (ret) {
99  throw RuntimeException("Couldn't allocate a data buffer on the device", ret);
100  }
101 
102  m_traces_mem = clCreateBuffer(this->m_context, CL_MEM_READ_ONLY, samplesPerTrace * noOfTraces * sizeof(Tt), NULL, &ret);
103  if (ret) {
104  clReleaseMemObject(m_predictions_mem);
105  throw RuntimeException("Couldn't allocate a data buffer on the device", ret);
106  }
107 
108  m_predsAvg_mem = clCreateBuffer(this->m_context, CL_MEM_READ_WRITE, noOfCandidates * sizeof(Tc), NULL, &ret);
109  if (ret) {
110  clReleaseMemObject(m_predictions_mem);
111  clReleaseMemObject(m_traces_mem);
112  throw RuntimeException("Couldn't allocate a working context buffer on the device", ret);
113  }
114 
115  m_predsMSum_mem = clCreateBuffer(this->m_context, CL_MEM_READ_WRITE, noOfCandidates * sizeof(Tc), NULL, &ret);
116  if (ret) {
117  clReleaseMemObject(m_predictions_mem);
118  clReleaseMemObject(m_traces_mem);
119  clReleaseMemObject(m_predsAvg_mem);
120  throw RuntimeException("Couldn't allocate a working context buffer on the device", ret);
121  }
122 
123  m_tracesAvg_mem = clCreateBuffer(this->m_context, CL_MEM_READ_WRITE, samplesPerTrace * sizeof(Tc), NULL, &ret);
124  if (ret) {
125  clReleaseMemObject(m_predictions_mem);
126  clReleaseMemObject(m_traces_mem);
127  clReleaseMemObject(m_predsAvg_mem);
128  clReleaseMemObject(m_predsMSum_mem);
129  throw RuntimeException("Couldn't allocate a working context buffer on the device", ret);
130  }
131 
132  m_tracesMSum_mem = clCreateBuffer(this->m_context, CL_MEM_READ_WRITE, samplesPerTrace * sizeof(Tc), NULL, &ret);
133  if (ret) {
134  clReleaseMemObject(m_predictions_mem);
135  clReleaseMemObject(m_traces_mem);
136  clReleaseMemObject(m_predsAvg_mem);
137  clReleaseMemObject(m_predsMSum_mem);
138  clReleaseMemObject(m_tracesAvg_mem);
139  throw RuntimeException("Couldn't allocate a working context buffer on the device", ret);
140  }
141 
142  m_predsTracesCSum_mem = clCreateBuffer(this->m_context, CL_MEM_READ_WRITE, noOfCandidates * samplesPerTrace * sizeof(Tc), NULL, &ret);
143  if (ret) {
144  clReleaseMemObject(m_predictions_mem);
145  clReleaseMemObject(m_traces_mem);
146  clReleaseMemObject(m_predsAvg_mem);
147  clReleaseMemObject(m_predsMSum_mem);
148  clReleaseMemObject(m_tracesAvg_mem);
149  clReleaseMemObject(m_tracesMSum_mem);
150  throw RuntimeException("Couldn't allocate a working context buffer on the device", ret);
151  }
152 
153 }
154 
155 
156 template<class Tc, class Tt, class Tp>
158 
159  if (m_compiled) {
160  clReleaseKernel(m_kernel_computeCSum);
161  clReleaseKernel(m_kernel_computePredsAvgMSum);
162  clReleaseKernel(m_kernel_computeTracesAvgMSum);
163  clReleaseProgram(m_program);
164  }
165 
166  clReleaseMemObject(m_predictions_mem);
167  clReleaseMemObject(m_traces_mem);
168  clReleaseMemObject(m_predsAvg_mem);
169  clReleaseMemObject(m_predsMSum_mem);
170  clReleaseMemObject(m_tracesAvg_mem);
171  clReleaseMemObject(m_tracesMSum_mem);
172  clReleaseMemObject(m_predsTracesCSum_mem);
173 
174 }
175 
176 
177 template<class Tc, class Tt, class Tp>
179 
180  if(m_compiled) return;
181 
182  cl_int ret;
183 
184  Tc dummyTc;
185  Tt dummyTt;
186  Tp dummyTp;
187 
188  std::string code("");
189 
190  // get the program together first
191  if (!(this->getTypeName(dummyTc)).compare("double") || !(this->getTypeName(dummyTt)).compare("double") || !(this->getTypeName(dummyTp)).compare("double")) {
192  code.append("#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n");
193  }
194 
195  code.append("typedef ").append(this->getTypeName(dummyTc)).append(" Tc;\n");
196  code.append("typedef ").append(this->getTypeName(dummyTt)).append(" Tt;\n");
197  code.append("typedef ").append(this->getTypeName(dummyTp)).append(" Tp;\n");
198 
199  code.append(m_programCode);
200 
201  // no changes to code from now on
202  const char * codePtr = code.c_str(); // clCreateProgramWithSource requires **; & operator requires l-value
203  const size_t codeSize = code.size();
204 
205  m_program = clCreateProgramWithSource(this->m_context, 1, (const char **)&codePtr, (const size_t *)&codeSize, &ret);
206  if (ret) throw RuntimeException("Couldn't create the ocl program from source", ret);
207 
208  // build the ocl program
209  ret = clBuildProgram(m_program, 1, &((this->m_devices)[this->m_device]), NULL, NULL, NULL);
210  if (ret) {
211  clReleaseProgram(m_program);
212  throw RuntimeException("Couldn't build the ocl program", ret);
213  }
214 
215  // create the kernels
216  m_kernel_computeTracesAvgMSum = clCreateKernel(m_program, "computeTracesAvgMSum", &ret);
217  if (ret) {
218  clReleaseProgram(m_program);
219  throw RuntimeException("Couldn't create a kernel", ret);
220  }
221 
222  m_kernel_computePredsAvgMSum = clCreateKernel(m_program, "computePredsAvgMSum", &ret);
223  if (ret) {
224  clReleaseKernel(m_kernel_computeTracesAvgMSum);
225  clReleaseProgram(m_program);
226  throw RuntimeException("Couldn't create a kernel", ret);
227  }
228 
229  m_kernel_computeCSum = clCreateKernel(m_program, "computeCSum", &ret);
230  if (ret) {
231  clReleaseKernel(m_kernel_computePredsAvgMSum);
232  clReleaseKernel(m_kernel_computeTracesAvgMSum);
233  clReleaseProgram(m_program);
234  throw RuntimeException("Couldn't create a kernel", ret);
235  }
236 
237  m_compiled = true; // since now, the destructor takes care of the program and kernels structs
238 
239  // set kernel args
240  ret = clSetKernelArg(m_kernel_computeTracesAvgMSum, 0, sizeof(cl_mem), (void *)&m_traces_mem);
241  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
242 
243  ret = clSetKernelArg(m_kernel_computeTracesAvgMSum, 1, sizeof(cl_mem), (void *)&m_tracesAvg_mem);
244  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
245 
246  ret = clSetKernelArg(m_kernel_computeTracesAvgMSum, 2, sizeof(cl_mem), (void *)&m_tracesMSum_mem);
247  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
248 
249  ret = clSetKernelArg(m_kernel_computeTracesAvgMSum, 3, sizeof(unsigned int), (void *)&m_samplesPerTrace);
250  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
251 
252  // set kernel args
253  ret = clSetKernelArg(m_kernel_computePredsAvgMSum, 0, sizeof(cl_mem), (void *)&m_predictions_mem);
254  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
255 
256  ret = clSetKernelArg(m_kernel_computePredsAvgMSum, 1, sizeof(cl_mem), (void *)&m_predsAvg_mem);
257  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
258 
259  ret = clSetKernelArg(m_kernel_computePredsAvgMSum, 2, sizeof(cl_mem), (void *)&m_predsMSum_mem);
260  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
261 
262  ret = clSetKernelArg(m_kernel_computePredsAvgMSum, 3, sizeof(unsigned int), (void *)&m_noOfCandidates);
263  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
264 
265  // set kernel args
266  ret = clSetKernelArg(m_kernel_computeCSum, 0, sizeof(cl_mem), (void *)&m_traces_mem);
267  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
268 
269  ret = clSetKernelArg(m_kernel_computeCSum, 1, sizeof(cl_mem), (void *)&m_predictions_mem);
270  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
271 
272  ret = clSetKernelArg(m_kernel_computeCSum, 2, sizeof(cl_mem), (void *)&m_tracesAvg_mem);
273  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
274 
275  ret = clSetKernelArg(m_kernel_computeCSum, 3, sizeof(cl_mem), (void *)&m_predsAvg_mem);
276  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
277 
278  ret = clSetKernelArg(m_kernel_computeCSum, 4, sizeof(cl_mem), (void *)&m_predsTracesCSum_mem);
279  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
280 
281  ret = clSetKernelArg(m_kernel_computeCSum, 5, sizeof(unsigned int), (void *)&m_samplesPerTrace);
282  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
283 
284  ret = clSetKernelArg(m_kernel_computeCSum, 6, sizeof(unsigned int), (void *)&m_noOfCandidates);
285  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
286 
287 }
288 
289 
290 template<class Tc, class Tt, class Tp>
292 
293  if (m_noOfTraces * m_noOfCandidates * sizeof(Tp) != pp.size())
294  throw RuntimeException("Number of traces and/or number of candidates conflicts with values set within construction of the ocl engine");
295 
296  cl_int ret = clEnqueueWriteBuffer(this->m_command_queue, m_predictions_mem, blocking ? CL_TRUE : CL_FALSE, 0, pp.size(), pp.data(), 0, NULL, NULL);
297  if (ret) throw RuntimeException("Couldn't enqueue a data transmit to the device", ret);
298 
299 }
300 
301 
302 template<class Tc, class Tt, class Tp>
304 
305  if (m_noOfTraces * m_samplesPerTrace * sizeof(Tt) != pt.size())
306  throw RuntimeException("Number of traces and/or number of samples per trace conflicts with values set within construction of the ocl engine");
307 
308  cl_int ret = clEnqueueWriteBuffer(this->m_command_queue, m_traces_mem, blocking ? CL_TRUE : CL_FALSE, 0, pt.size(), pt.data(), 0, NULL, NULL);
309  if (ret) throw RuntimeException("Couldn't enqueue a data transmit to the device", ret);
310 
311 }
312 
313 
314 template<class Tc, class Tt, class Tp>
315 void OclCpaEngine<Tc, Tt, Tp>::compute(UnivariateContext<Tc> & corrContext, unsigned int sliceSize) {
316 
317  corrContext.init(m_samplesPerTrace, m_noOfCandidates, 1, 2, 1);
318  cl_int ret;
319  unsigned int noOfSlices = m_noOfTraces / sliceSize;
320  unsigned int remaindingSliceSize = m_noOfTraces - noOfSlices * sliceSize;
321  unsigned int offset;
322 
323  // first, compute the Avgs and MSums (variance) of the traces
324  size_t traces_global_item_size = (((m_samplesPerTrace - 1) / 64) + 1) * 64; // divide the work by 64
325  size_t traces_local_item_size = 64;
326 
327  ret = clSetKernelArg(m_kernel_computeTracesAvgMSum, 4, sizeof(unsigned int), (void *)&sliceSize);
328  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
329 
330  for (unsigned int i = 0; i < noOfSlices; i++) {
331 
332  offset = i * sliceSize;
333 
334  ret = clSetKernelArg(m_kernel_computeTracesAvgMSum, 5, sizeof(unsigned int), (void *)&offset);
335  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
336 
337  ret = clEnqueueNDRangeKernel(this->m_command_queue, m_kernel_computeTracesAvgMSum, 1, NULL, &traces_global_item_size, &traces_local_item_size, 0, NULL, NULL);
338  if (ret) throw RuntimeException("Couldn't enqueue a kernel to the device", ret);
339 
340  ret = clFinish(this->m_command_queue);
341  if (ret) throw RuntimeException("Error while processing the queue", ret);
342 
343  }
344 
345  offset = noOfSlices * sliceSize;
346 
347  ret = clSetKernelArg(m_kernel_computeTracesAvgMSum, 4, sizeof(unsigned int), (void *)&remaindingSliceSize);
348  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
349 
350  ret = clSetKernelArg(m_kernel_computeTracesAvgMSum, 5, sizeof(unsigned int), (void *)&offset);
351  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
352 
353  ret = clEnqueueNDRangeKernel(this->m_command_queue, m_kernel_computeTracesAvgMSum, 1, NULL, &traces_global_item_size, &traces_local_item_size, 0, NULL, NULL);
354  if (ret) throw RuntimeException("Couldn't enqueue a kernel to the device", ret);
355 
356  ret = clFinish(this->m_command_queue);
357  if (ret) throw RuntimeException("Error while processing the queue", ret);
358 
359 
360  // also Avgs and MSums (variance) of the predictions
361  size_t preds_global_item_size = (((m_noOfCandidates - 1) / 64) + 1) * 64; // divide the work by 64
362  size_t preds_local_item_size = 64;
363 
364  ret = clSetKernelArg(m_kernel_computePredsAvgMSum, 4, sizeof(unsigned int), (void *)&sliceSize);
365  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
366 
367  for (unsigned int i = 0; i < noOfSlices; i++) {
368 
369  offset = i * sliceSize;
370 
371  ret = clSetKernelArg(m_kernel_computePredsAvgMSum, 5, sizeof(unsigned int), (void *)&offset);
372  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
373 
374  ret = clEnqueueNDRangeKernel(this->m_command_queue, m_kernel_computePredsAvgMSum, 1, NULL, &preds_global_item_size, &preds_local_item_size, 0, NULL, NULL);
375  if (ret) throw RuntimeException("Couldn't enqueue a kernel to the device", ret);
376 
377  ret = clFinish(this->m_command_queue);
378  if (ret) throw RuntimeException("Error while processing the queue", ret);
379 
380  }
381 
382  offset = noOfSlices * sliceSize;
383 
384  ret = clSetKernelArg(m_kernel_computePredsAvgMSum, 4, sizeof(unsigned int), (void *)&remaindingSliceSize);
385  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
386 
387  ret = clSetKernelArg(m_kernel_computePredsAvgMSum, 5, sizeof(unsigned int), (void *)&offset);
388  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
389 
390  ret = clEnqueueNDRangeKernel(this->m_command_queue, m_kernel_computePredsAvgMSum, 1, NULL, &preds_global_item_size, &preds_local_item_size, 0, NULL, NULL);
391  if (ret) throw RuntimeException("Couldn't enqueue a kernel to the device", ret);
392 
393  ret = clFinish(this->m_command_queue);
394  if (ret) throw RuntimeException("Error while processing the queue", ret);
395 
396 
397  // finally, compute the CSums (covariance) matrix
398  size_t csum_global_item_size[2];
399  csum_global_item_size[0] = (((m_samplesPerTrace - 1) / 16) + 1) * 16; // divide the work by 16x16
400  csum_global_item_size[1] = (((m_noOfCandidates - 1) / 16) + 1) * 16;
401  size_t csum_local_item_size[] = { 16, 16 };
402 
403  ret = clSetKernelArg(m_kernel_computeCSum, 7, sizeof(unsigned int), (void *)&sliceSize);
404  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
405 
406  for (unsigned int i = 0; i < noOfSlices; i++) {
407 
408  offset = i * sliceSize;
409 
410  ret = clSetKernelArg(m_kernel_computeCSum, 8, sizeof(unsigned int), (void *)&offset);
411  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
412 
413  ret = clEnqueueNDRangeKernel(this->m_command_queue, m_kernel_computeCSum, 2, NULL, csum_global_item_size, csum_local_item_size, 0, NULL, NULL);
414  if (ret) throw RuntimeException("Couldn't enqueue a kernel to the device", ret);
415 
416  ret = clFinish(this->m_command_queue);
417  if (ret) throw RuntimeException("Error while processing the queue", ret);
418 
419  }
420 
421  offset = noOfSlices * sliceSize;
422 
423  ret = clSetKernelArg(m_kernel_computeCSum, 7, sizeof(unsigned int), (void *)&remaindingSliceSize);
424  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
425 
426  ret = clSetKernelArg(m_kernel_computeCSum, 8, sizeof(unsigned int), (void *)&offset);
427  if (ret) throw RuntimeException("Couldn't set kernel argument", ret);
428 
429  ret = clEnqueueNDRangeKernel(this->m_command_queue, m_kernel_computeCSum, 2, NULL, csum_global_item_size, csum_local_item_size, 0, NULL, NULL);
430  if (ret) throw RuntimeException("Couldn't enqueue a kernel to the device", ret);
431 
432  ret = clFinish(this->m_command_queue);
433  if (ret) throw RuntimeException("Error while processing the queue", ret);
434 
435 
436  // read the data back from the device
437  ret = clEnqueueReadBuffer(this->m_command_queue, m_predsAvg_mem, CL_TRUE, 0, corrContext.p2M(1).size(), corrContext.p2M(1).data(), 0, NULL, NULL);
438  if (ret) throw RuntimeException("Couldn't enqueue a data transmit from the device", ret);
439 
440  ret = clEnqueueReadBuffer(this->m_command_queue, m_predsMSum_mem, CL_TRUE, 0, corrContext.p2CS(2).size(), corrContext.p2CS(2).data(), 0, NULL, NULL);
441  if (ret) throw RuntimeException("Couldn't enqueue a data transmit from the device", ret);
442 
443  ret = clEnqueueReadBuffer(this->m_command_queue, m_tracesAvg_mem, CL_TRUE, 0, corrContext.p1M(1).size(), corrContext.p1M(1).data(), 0, NULL, NULL);
444  if (ret) throw RuntimeException("Couldn't enqueue a data transmit from the device", ret);
445 
446  ret = clEnqueueReadBuffer(this->m_command_queue, m_tracesMSum_mem, CL_TRUE, 0, corrContext.p1CS(2).size(), corrContext.p1CS(2).data(), 0, NULL, NULL);
447  if (ret) throw RuntimeException("Couldn't enqueue a data transmit from the device", ret);
448 
449  ret = clEnqueueReadBuffer(this->m_command_queue, m_predsTracesCSum_mem, CL_TRUE, 0, corrContext.p12ACS(1).size(), corrContext.p12ACS(1).data(), 0, NULL, NULL);
450  if (ret) throw RuntimeException("Couldn't enqueue a data transmit from the device", ret);
451 
452  ret = clFinish(this->m_command_queue);
453  if (ret) throw RuntimeException("Error while processing the queue", ret);
454 
455  corrContext.p1Card() = m_noOfTraces;
456  corrContext.p2Card() = corrContext.p1Card();
457 
458 }
459 
460 
461 template<class Tc, class Tt, class Tp>
462 const char * OclCpaEngine<Tc, Tt, Tp>::m_programCode = "\n\
463 \
464  __kernel void computeCSum(__global const Tt *traces, __global const Tp *predictions, __global const Tc *tracesAvg, __global const Tc *predsAvg, __global Tc *CSums, unsigned int samplesPerTrace, unsigned int noOfCandidates, unsigned int noOfTraces, unsigned int traceOffset) { \
465 \
466  unsigned int sample = get_global_id(0); \
467  unsigned int candidate = get_global_id(1);\
468 \
469  Tc localTracesAvg = (sample < samplesPerTrace) ? tracesAvg[sample] : (Tc)0;\
470  Tc localPredsAvg = (candidate < noOfCandidates) ? predsAvg[candidate] : (Tc)0;\
471  Tc localCSum = ((traceOffset > 0) && (sample < samplesPerTrace) && (candidate < noOfCandidates)) ? CSums[candidate * samplesPerTrace + sample] : (Tc)0;\
472 \
473  Tc val1;\
474  Tc val2;\
475 \
476  for (int trace = traceOffset; trace < (traceOffset + noOfTraces); trace++) {\
477 \
478  val1 = (sample < samplesPerTrace) ? traces[trace * samplesPerTrace + sample] : (Tc)0;\
479  val2 = (candidate < noOfCandidates) ? predictions[trace * noOfCandidates + candidate] : (Tc)0;\
480 \
481  localCSum += (val1 - localTracesAvg) * (val2 - localPredsAvg);\
482 \
483  barrier(CLK_LOCAL_MEM_FENCE);\
484 \
485  }\
486 \
487  if ((sample < samplesPerTrace) && (candidate < noOfCandidates)) {\
488 \
489  CSums[candidate * samplesPerTrace + sample] = localCSum;\
490 \
491  }\
492 \
493  }\
494 \
495 \
496  __kernel void computeTracesAvgMSum(__global const Tt *traces, __global Tc *avgs, __global Tc *msums, unsigned int samplesPerTrace, unsigned int noOfTraces, unsigned int traceOffset) {\
497 \
498  unsigned int sample = get_global_id(0);\
499 \
500  if (sample >= samplesPerTrace)\
501  return;\
502 \
503  unsigned int trace = traceOffset;\
504  Tc ctrace = (Tc)traceOffset;\
505 \
506  Tc localAvg = (traceOffset > 0) ? avgs[sample] : (Tc)0;\
507  Tc localMSum = (traceOffset > 0) ? msums[sample] : (Tc)0;\
508  Tc val;\
509 \
510  Tc temp;\
511 \
512  for (; trace < (traceOffset + noOfTraces); trace++) {\
513 \
514  val = traces[trace * samplesPerTrace + sample];\
515 \
516  temp = val - localAvg;\
517  ctrace = ctrace + (Tc)1;\
518  localAvg += temp / ctrace;\
519  localMSum += temp * (val - localAvg);\
520 \
521  }\
522 \
523  avgs[sample] = localAvg;\
524  msums[sample] = localMSum;\
525 \
526  }\
527 \
528 \
529  __kernel void computePredsAvgMSum(__global const Tp *preds, __global Tc *avgs, __global Tc *msums, unsigned int noOfCandidates, unsigned int noOfTraces, unsigned int traceOffset) {\
530 \
531  unsigned int candidate = get_global_id(0);\
532 \
533  if (candidate >= noOfCandidates)\
534  return;\
535 \
536  unsigned int trace = traceOffset;\
537  Tc ctrace = (Tc)traceOffset;\
538 \
539  Tc localAvg = (traceOffset > 0) ? avgs[candidate] : (Tc)0;\
540  Tc localMSum = (traceOffset > 0) ? msums[candidate] : (Tc)0;\
541  Tc val;\
542 \
543  Tc temp;\
544 \
545  for (; trace < (traceOffset + noOfTraces); trace++) {\
546 \
547  val = preds[trace * noOfCandidates + candidate];\
548 \
549  temp = val - localAvg;\
550  ctrace = ctrace + (Tc)1;\
551  localAvg += temp / ctrace;\
552  localMSum += temp * (val - localAvg);\
553 \
554  }\
555 \
556  avgs[candidate] = localAvg;\
557  msums[candidate] = localMSum;\
558 \
559  } \n";
560 
561 
562 
563 #endif /* OCLCPAENGINE_HPP */
OpenCL base class template for SICAK plugins.
virtual T * data()
Returns a pointer to the contained data.
Definition: types_basic.hpp:343
A class representing a Two-population Univariate Moment-based statistical context.
Definition: types_stat.hpp:43
virtual Matrix< T > & p12ACS(size_t order)
Adjusted central moment sum both populations, order 1 upto acsOrder.
Definition: types_stat.hpp:220
This header file contains exceptions.
OclCpaEngine(unsigned int platform, unsigned int device, unsigned int samplesPerTrace, unsigned int noOfCandidates, unsigned int noOfTraces)
Initialize given platform and device, create command queue and allocate device memory buffers.
Definition: oclcpaengine.hpp:91
OpenCL base class template used in other SICAK plugins.
Definition: oclengine.hpp:51
virtual size_t & p1Card()
Cardinality of the first population.
Definition: types_stat.hpp:190
Definition: oclcpaengine.hpp:44
virtual Vector< T > & p2CS(size_t order)
Central moment sum of the second population, order 2 upto csOrder.
Definition: types_stat.hpp:215
This header file contains class templates of power traces and power consumption containers.
A class representing a Matrix with 'noOfTraces' power predictions, with 'noOfCandidates' key candidat...
Definition: types_power.hpp:82
void loadPredictionsToDevice(const PowerPredictions< Tp > &pp, bool blocking=false)
Load power predictions from local memory to device buffers.
Definition: oclcpaengine.hpp:291
virtual size_t & p2Card()
Cardinality of the second population.
Definition: types_stat.hpp:195
void loadTracesToDevice(const PowerTraces< Tt > &pt, bool blocking=false)
Load power traces from local memory to device buffers.
Definition: oclcpaengine.hpp:303
virtual Vector< T > & p1CS(size_t order)
Central moment sum of the first population, order 2 upto csOrder.
Definition: types_stat.hpp:210
void buildProgram()
Build the OpenCL kernels.
Definition: oclcpaengine.hpp:178
virtual Vector< T > & p2M(size_t order)
Raw moment of the second population, order 1 upto mOrder.
Definition: types_stat.hpp:205
An exception which cannot be directly influenced by the user, or predicted beforehand.
Definition: exceptions.hpp:76
void compute(UnivariateContext< Tc > &context, unsigned int sliceSize)
Launch the computation kernel, divide the work by sliceSize (long running GPU kernel is not good),...
Definition: oclcpaengine.hpp:315
This header file contains class templates of statistical computational contexts.
virtual size_t size() const
Returns the size of the contained data (i.e. length * sizeof(T))
Definition: types_basic.hpp:347
A class representing a Matrix with 'noOfTraces' power traces, with 'samplesPerTrace' samples per powe...
Definition: types_power.hpp:44
virtual Vector< T > & p1M(size_t order)
Raw moment of the first population, order 1 upto mOrder.
Definition: types_stat.hpp:200