1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
|
#define _USE_MATH_DEFINES // for C++
#define FFTW_ESTIMATE (1U << 6)
#include<math.h>
#include<stdio.h>
# include <cmath>
# include <cstdlib>
# include <iostream>
# include <iomanip>
# include <ctime>
# include <omp.h>
#include "fftw3.h" //instead of this include the file fftw_threads.h
#include <valarray> // interesting stuff
#include <iostream> //for cout..
#include <vector>
# include <omp.h>
#include <cstring>
static const int nx = 16; //256;
static const int ny = 16; // 256;
static const int nyk = ny/2 + 1;
static const int ncomp = 2;
using namespace std;
// g++ U.cpp -lfftw3_threads -lfftw3 -lm -g -fopenmp -o test.out
// g++ U.cpp -lfftw3 -lm -g -fopenmp -o test.out
int main();
void r2cfft(double rArr[], double cArr[][ncomp]);
void print2DPhysArray(double arr[]);
double makeSpatialMesh2D(double dx, double dy, double xarr[], double yarr[]);
int main (void){
int fftw_threads_init(void); // should only be called once and (in your main()function), performs any one-time initialization required to use threads on your system. It returns zeros if succesful and a non-zero if not (error)
// hyperbolic tan paramters
double L = 2*M_PI; //useless
double Lx = 200000., Ly = 80000.; //different
//double Lx = L, Ly = L;
double dx = Lx / nx;
double dy = Ly / ny;
//I.Cs: Parameters for initializing hyperbolic tangent IC
double xg = Lx * (19./24); //xg center of hyperbolic tangent func
double m = 0.5;
double lg = 12000.; //lg is width of hyperbolic tangent func
double o = 1.;
double a = (o-m)/2.; //difference from background?
double c = -xg;
double d = (o + m)/2.; //size of the cloud ??
double a2 = -1*a;
double c2 = - Lx - c;
double d2 = d - m;
double b = abs(((((tanh(log(m/o)/4)) * a) + d) * pow(cosh(log(m/o)/4), 2))/(a * lg));
double bg = 2./lg/lg;
double start_time; // time before parallel region
double run_time; //time
//test
double *ne;
ne = (double*) fftw_malloc(nx*ny*sizeof(double));
fftw_complex *nek;
nek = (fftw_complex*) fftw_malloc(nx*nyk*sizeof(fftw_complex));
memset(nek, 42, nx*nyk* sizeof(fftw_complex));
double *XX;
XX = (double*) fftw_malloc(nx*ny*sizeof(double));
//print2DPhysArray(XX);
double *YY;
YY = (double*) fftw_malloc(nx*ny*sizeof(double));
double kmax = makeSpatialMesh2D(dx, dy, XX, YY); // returns XX, YY, ..
// start a threading loop
for (int nThreads= 1; nThreads<=16; nThreads++){
omp_set_num_threads(nThreads); // asks openmp to create a team of threads
clock_t start_time = clock(); // or use: start_time= omp_get_wtime();
#pragma omp parallel // start parallel region or fork threads
{
#pragma omp single // cause one thread to print number of threads used by the program
printf("num_threads = %d", omp_get_num_threads());
//#pragma omp for reduction(+ : sum) // this is to split up loop iterations among the team threads. It include 2 clauses:1) creates a private variable and 2) cause threads to compute their sums locally and then combine their local sums to a single gloabl value
#pragma omp for // more natural option to parallel execution of for loops: no need for new loop bounds
for (int i = 0; i < nx; i++){
for (int j = 0; j < ny; j++){
ne[j + ny*i] = (a * tanh(b*(XX[j + ny*i] + c)) + d + a2 * tanh(b*(XX[j + ny*i] + c2)) + d2 + .02*cos(4*M_PI * YY[j + ny*i]/Ly) *( exp( - bg * pow(XX[j + ny*i] - xg,2)) + exp(-bg* pow( XX[j + ny*i] - Lx + xg,2) ) ) ) * 1.E11;
//print2DPhysArray(ne);
}
}
} // convert to Fourier space
r2cfft(ne, nek);
clock_t run_time = clock() - start_time; // end time
printf("Compute Time: %f seconds\n",(double) run_time); // %f is for double try 1f
// print ne and nek for test
//print2DPhysArray(ne);
}
fftw_free(ne);
fftw_free(nek);
return 0;
}
void r2cfft(double rArr[], double cArr[][ncomp]){
fftw_plan r2c;
// FFTW Thread parameters
int nbthreads=2;
fftw_init_threads();
fftw_plan_with_nthreads(nbthreads);
//fftwf_init_threads();
//fftwf_plan_with_nthreads(nbthreads);
// void fftw_plan_with_nthreads(int nbthreads); // before calling any plan OR: fftw_plan_with_nthreads(omp_get_max_threads())
#pragma omp critical (FFTW)
r2c = fftw_plan_dft_r2c_2d(nx, ny, &rArr[0], &cArr[0], FFTW_ESTIMATE);
fftw_execute(r2c);
fftw_destroy_plan(r2c);
fftw_cleanup();
// Thread clean
fftw_cleanup_threads();
//fftwf_cleanup_threads();
// you should create/destroy plans only from a single thread, but can safely execute multiple plans in parallel.
}
void print2DPhysArray(double arr[]) {
#pragma omp parallel
#pragma omp for
for (int i = 0; i < nx; i++) {
for (int j = 0 ; j < ny ; j++) {
printf("%+4.2le ",arr[j + ny*i]);
}
printf("\n");
}
}
double makeSpatialMesh2D(double dx, double dy, double xarr[], double yarr[]){
// Make x coordinates. We want them to change as we change the zeroth index and be constant as we change the first index.
#pragma omp parallel
#pragma omp for
for(int i = 0; i< nx; i++){
for(int j = 0; j< ny; j++){
xarr[j + ny*i] = i*dx;
yarr[j + ny*i] = j * dy;
}
}
if (dx < dy){
return 1/(2*dx);
}else {
return 1/(2*dy);
}
}
| |