E cần trợ giúp code ở những phần để trống ạ. Em cảm ơn các a chị đã giúp đỡ, e chẳng hiểu phải làm như thế nào!
#include <cuda.h>
#include <stdio.h>
#define MAX_MASK_SIZE 10
__constant__ float MASK [MAX_MASK_SIZE];
__global__ void average_kernel (float *output, float *input, int input_size, int mask_size)
{
/******************/
/* Your code here */
/******************/
/* 1. calculate thread id, and use it as index to output */
/* 2. calculate number of threads */
/* 3. while index < input_size */
/* 4. initialize a running total */
/* 5. calculate start index */
/* 6. perform convolution with a for loop */
/* 7. write running total to output */
/* 8. increment index appropriately */
}
void process_data (float *output, float *input, float *mask, int input_size, int mask_size)
{
/******************/
/* Your code here */
/******************/
/* 1. declare device memory */
/* 2. allocate device memory */
/* 3. copy input data into device memory */
/* 4. copy mask into constant memory */
/* 5. invoke kernel */
/* 6. copy output from device memory */
/* 7. deallocate device memory */
}
int main (int argc, char **argv)
{
FILE *infile;
FILE *outfile;
float *input;
float *output;
float mask [] = {0.1, 0.2, 0.3, 0.4};
int i;
int n;
if (argc < 3)
{
fprintf (stderr, "Usage: %s <infile> <outfile>\n", argv [0]);
exit (1);
}
infile = fopen (argv [1], "r");
if (infile == NULL)
{
fprintf (stderr, "Error: cannot open input file [%s].\n", argv [1]);
exit (1);
}
fscanf (infile, "%d", &n);
input = (float *) malloc (n * sizeof (float));
for (i = 0; i < n; i++)
{
fscanf (infile, "%f", &(input [i]));
}
fclose (infile);
output = (float *) malloc (n * sizeof (float));
process_data (output, input, mask, n, 4);
outfile = fopen (argv [2], "w");
fprintf (outfile, "%d\n", n);
for (i = 0; i < n; i++)
{
fprintf (outfile, "%.3f\n", output [i]);
}
fclose (outfile);
free (input);
free (output);
return 0;
}