1 | /* |
---|
2 | * $Id: matmul.c 5712 2016-05-27 00:59:29Z skylar $ |
---|
3 | */ |
---|
4 | |
---|
5 | #include "matmul.h" |
---|
6 | |
---|
7 | void usage() { |
---|
8 | fprintf(stderr,"serial -x <m1-cols> -y <m1-rows> -a <m2-cols> -b <m2-rows> [ -p ] \n"); |
---|
9 | fprintf(stderr,"Supply -p if you want matrices printed out\n"); |
---|
10 | } |
---|
11 | |
---|
12 | // Take in an integer c and a message to print on failure |
---|
13 | // Allocate an array of c ints and return it |
---|
14 | int *safe_malloc_int(const unsigned int c,const char *msg) { |
---|
15 | int *m; |
---|
16 | if((m = malloc(c*sizeof(int))) == NULL) { |
---|
17 | fprintf(stderr,"%s failed at %s, line %d: %s\n", |
---|
18 | msg,__FILE__,__LINE__,strerror(errno)); |
---|
19 | exit(EXIT_FAILURE); |
---|
20 | } |
---|
21 | |
---|
22 | return m; |
---|
23 | } |
---|
24 | |
---|
25 | // Take in an unsigned integer c and a message to prunsigned int on failure |
---|
26 | // Allocate an array of c unsigned ints and return it |
---|
27 | unsigned int *safe_malloc_unsigned_int(const unsigned int c,const char *msg) { |
---|
28 | unsigned int *m; |
---|
29 | if((m = malloc(c*sizeof(unsigned int))) == NULL) { |
---|
30 | fprintf(stderr,"%s failed at %s, line %d: %s\n", |
---|
31 | msg,__FILE__,__LINE__,strerror(errno)); |
---|
32 | exit(EXIT_FAILURE); |
---|
33 | } |
---|
34 | |
---|
35 | return m; |
---|
36 | } |
---|
37 | |
---|
38 | // Return an array of random number seeds. Epoch time is used as a base, incremented by thread ID |
---|
39 | unsigned int *init_random_seeds() { |
---|
40 | unsigned int i,*seeds,num_threads; |
---|
41 | time_t t = time(NULL); |
---|
42 | #ifdef _OPENMP |
---|
43 | #pragma omp parallel |
---|
44 | { |
---|
45 | num_threads = omp_get_num_threads(); |
---|
46 | } |
---|
47 | #else |
---|
48 | num_threads = 1; |
---|
49 | #endif |
---|
50 | |
---|
51 | seeds = safe_malloc_unsigned_int(num_threads,"Allocating random seed array"); |
---|
52 | |
---|
53 | #ifdef DEBUG |
---|
54 | fprintf(stderr,"Setting seed for %u threads\n",num_threads); |
---|
55 | #endif |
---|
56 | for(i=0;i<num_threads;i++) { |
---|
57 | // Guarantee different starting seeds for random numbers in each thread |
---|
58 | seeds[i] = t+i; |
---|
59 | #ifdef DEBUG |
---|
60 | fprintf(stderr,"seeds[%d] = %u\n",i,seeds[i]); |
---|
61 | #endif |
---|
62 | } |
---|
63 | |
---|
64 | return seeds; |
---|
65 | } |
---|
66 | |
---|
67 | // Takes in a matrix, and a pointer to an array of random seeds. |
---|
68 | // Sets a random number in each cell |
---|
69 | void init_matrix(struct matrix *m,unsigned int *s) { |
---|
70 | int i,t; |
---|
71 | |
---|
72 | #ifdef _OPENMP |
---|
73 | #pragma omp parallel |
---|
74 | { |
---|
75 | // Initialize t to thread number if using OpenMP |
---|
76 | t = omp_get_thread_num(); |
---|
77 | #pragma omp for schedule(static,1) |
---|
78 | #else |
---|
79 | // When not using OpenMP, only one "thread" will be running |
---|
80 | t = 0; |
---|
81 | #endif |
---|
82 | for(i=0;i<m->rows*m->cols;i++) { |
---|
83 | // Limit matrix values < 10 |
---|
84 | m->matrix[i] = rand_r(&s[t]) % 10; |
---|
85 | } |
---|
86 | #ifdef _OPENMP |
---|
87 | } |
---|
88 | #endif |
---|
89 | } |
---|
90 | |
---|
91 | // Takes in a reference to a matrix and a row/column coordinate |
---|
92 | // Returns the one-dimensional index in the matrix for the coordinate |
---|
93 | unsigned int coord(const struct matrix *m,const unsigned int row,const unsigned int col) { |
---|
94 | return row*m->cols+col; |
---|
95 | } |
---|
96 | |
---|
97 | // Takes in a matrix and prints it to STDERR |
---|
98 | void print_matrix(const struct matrix *m) { |
---|
99 | int row,col; |
---|
100 | |
---|
101 | for(row=0;row<m->rows;row++) { |
---|
102 | for(col=0;col<m->cols;col++) { |
---|
103 | fprintf(stderr,"%d ",m->matrix[coord(m,row,col)]); |
---|
104 | } |
---|
105 | fprintf(stderr,"\n"); |
---|
106 | } |
---|
107 | } |
---|
108 | |
---|
109 | #ifndef _OPENMP |
---|
110 | struct timeval safe_gettimeofday() { |
---|
111 | struct timeval now_t; |
---|
112 | |
---|
113 | if((gettimeofday(&now_t,NULL)) == -1) { |
---|
114 | fprintf(stderr,"gettimeofday failed: %s\n",strerror(errno)); |
---|
115 | exit(EXIT_FAILURE); |
---|
116 | } |
---|
117 | |
---|
118 | return now_t; |
---|
119 | } |
---|
120 | #endif |
---|
121 | |
---|
122 | void matmul( |
---|
123 | struct matrix *m1, |
---|
124 | struct matrix *m2, |
---|
125 | struct matrix *dst_m, |
---|
126 | const unsigned int start_dst_row, // Start generating at this row of the product matrix |
---|
127 | const unsigned int end_dst_row // End generating at this row of the product matrix |
---|
128 | ) { |
---|
129 | unsigned int dst_row,dst_col,i,dst_coord; |
---|
130 | |
---|
131 | // Parallelize outer two loops. Make sure destination coordinate |
---|
132 | // and loop for array calculation have thread-local variables |
---|
133 | #ifdef _OPENMP |
---|
134 | double start_t = omp_get_wtime(); |
---|
135 | #pragma omp parallel for private(dst_coord,i) collapse(2) schedule(static,1) |
---|
136 | #else |
---|
137 | struct timeval start_t = safe_gettimeofday(); |
---|
138 | #endif |
---|
139 | // Process each cell in the destination matrix, and calculate the result |
---|
140 | // based on the two source matrices |
---|
141 | for(dst_row=start_dst_row;dst_row<end_dst_row;dst_row++) { |
---|
142 | for(dst_col=0;dst_col<dst_m->cols;dst_col++) { |
---|
143 | #ifdef DEBUG |
---|
144 | fprintf(stderr,"Calculating (%d,%d)\n",dst_row,dst_col); |
---|
145 | #endif |
---|
146 | // Assign destination coordinates based on the start row offset |
---|
147 | dst_coord = coord(dst_m,(dst_row-start_dst_row),dst_col); |
---|
148 | // Make sure destination matrix is initialized |
---|
149 | dst_m->matrix[dst_coord] = 0; |
---|
150 | /* |
---|
151 | * The number of rows in m1 is guaranteed to be the number |
---|
152 | * of columns in m2, so when we are done processing m1's rows |
---|
153 | * we are also done processing m2's columns |
---|
154 | * This part is not as amenable to parallelization since the |
---|
155 | * multiple threads would have to update the cell in the |
---|
156 | * product matrix at the same time |
---|
157 | */ |
---|
158 | for(i=0;i<m1->rows;i++) { |
---|
159 | /* |
---|
160 | * The destination matrix cell will accumulate pair-wise |
---|
161 | * multiplication results from every cell in the current |
---|
162 | * row in the first matrix, and every cell in the current |
---|
163 | * column in the second matrix |
---|
164 | */ |
---|
165 | dst_m->matrix[dst_coord] += |
---|
166 | m1->matrix[coord(m1,dst_row,i)] |
---|
167 | * m2->matrix[coord(m2,i,dst_col)]; |
---|
168 | } |
---|
169 | #ifdef DEBUG |
---|
170 | fprintf(stderr,"m1(%d,%d) * m2(%d,%d) = dst_m(%d,%d) (%d)\n", |
---|
171 | dst_row,i,i,dst_col,dst_row,dst_col, |
---|
172 | dst_m->matrix[dst_coord] |
---|
173 | ); |
---|
174 | #endif |
---|
175 | } |
---|
176 | } |
---|
177 | #ifdef _OPENMP |
---|
178 | fprintf(stderr,"OpenMP took %f seconds\n",omp_get_wtime()-start_t); |
---|
179 | #else |
---|
180 | struct timeval end_t,run_t; |
---|
181 | end_t = safe_gettimeofday(); |
---|
182 | timersub(&end_t,&start_t,&run_t); |
---|
183 | fprintf(stderr,"Serial took %ld.%ld seconds\n", |
---|
184 | (long int)run_t.tv_sec,(long int)run_t.tv_usec); |
---|
185 | #endif |
---|
186 | } |
---|