OK, so I'm trying to improve the performance of the code. I would like to substitute TNT library for another, more efficient (as suggest by Alain Ketterlin). So I tried to use direct pointers to store the data. Unfortunately,the code with TNT runs ~3 times faster than using pointers. How could I change this code to get a better performance over TNT? I've followed the suggestions of Öö Tiib and Alain Ketterlin (put some arrays in the outer loop) and the performance improved 10%~15%, but yet worse than using TNT (

http://math.nist.gov/tnt/overview.html).

Code with 3 tests:

#include "tnt/tnt.h"

#include "TicTac.h"

using namespace std;

using namespace TNT;

inline double getCM(double* array, int m0, int m1, int i, int j, int k) {

return array[i + m0*j + m0*m1*k];

}

inline void setCM(double* array, int m0, int m1, int i, int j, int k, double value) {

array[i + m0*j + m0*m1*k] = value;

}

inline void addCM(double* array, int m0, int m1, int i, int j, int k, double value) {

array[i + m0*j + m0*m1*k] += value;

}

int main() {

int Npml = 10;

int NxTOTAL = 100;

int NyTOTAL = 150;

int NzTOTAL = 200;

int TMAX = 1000;

int jb = NyTOTAL - Npml - 1;

int jyh;

int i, j, k, t;

double curl_h;

/* TNT - TEST 1 */

// Array1D<double> gi3 = Array1D<double>(NxTOTAL, 1.0);

// Array1D<double> cax = Array1D<double>(NxTOTAL, 0.0);

// Array1D<double> cay = Array1D<double>(NyTOTAL, 0.0);

// Array1D<double> caz = Array1D<double>(NzTOTAL, 0.0);

//

// Array3D<double> Hx = Array3D<double>(NxTOTAL,NyTOTAL,NzTOTAL, 0.0);

// Array3D<double> Hy = Array3D<double>(NxTOTAL,NyTOTAL,NzTOTAL, 0.0);

// Array3D<double> Hz = Array3D<double>(NxTOTAL,NyTOTAL,NzTOTAL, 0.0);

//

// Array1D<double> gk2 = Array1D<double>(NzTOTAL, 1.0);

// Array1D<double> gk3 = Array1D<double>(NzTOTAL, 1.0);

//

// Array3D<double> idyh = Array3D<double>(NxTOTAL, Npml, NzTOTAL, 0.0);

//

// Array3D<double> Dy = Array3D<double>(NxTOTAL,NyTOTAL,NzTOTAL, 0.0);

//

// Array1D<double> gi2 = Array1D<double>(NxTOTAL, 1.0);

// Array1D<double> gj1 = Array1D<double>(NyTOTAL, 0.0);

//

// TicTac tt;

// tt.Tic("TNT: ");

// for (t = 0; t < TMAX; t++) {

// for(i = 1; i < NxTOTAL; i++) {

// for(j = jb+1; j < NyTOTAL; j++) {

// jyh = j - jb - 1;

// for(k = 1; k < NzTOTAL; k++)

// {

// curl_h = caz[k]*(Hx[i][j][k] - Hx[i][j][k-1]) -

// cax[i]*(Hz[i][j][k] - Hz[i-1][j][k]);

// idyh[i][jyh][k] = idyh[i][jyh][k] + curl_h;

// Dy[i][j][k] = gi3[i]*gk3[k]*Dy[i][j][k] +

// gi2[i]*gk2[k]*(curl_h + gj1[j]*idyh[i][jyh][k]);

// }

// }

// }

// }

// tt.Tac();

/* C-ARRAY - TEST 2 */

// double* gi3 = new double[NxTOTAL];

// double* cax = new double[NxTOTAL];

// double* cay = new double[NyTOTAL];

// double* caz = new double[NzTOTAL];

//

// double* Hx = new double[NxTOTAL*NyTOTAL*NzTOTAL];

// double* Hy = new double[NxTOTAL*NyTOTAL*NzTOTAL];

// double* Hz = new double[NxTOTAL*NyTOTAL*NzTOTAL];

//

// double* gk2 = new double[NxTOTAL];

// double* gk3 = new double[NxTOTAL];

//

// double* idyh = new double[NxTOTAL*NyTOTAL*NzTOTAL];

// double* Dy = new double[NxTOTAL*NyTOTAL*NzTOTAL];

//

// double* gi2 = new double[NxTOTAL];

// double* gj1 = new double[NxTOTAL];

//

// TicTac tt;

// tt.Tic("Array");

// for (t = 0; t < TMAX; t++) {

// for(i = 1; i < NxTOTAL; i++) {

// for(j = jb+1; j < NyTOTAL; j++) {

// jyh = j - jb - 1;

// for(k = 1; k < NzTOTAL; k++)

// {

// curl_h = caz[k]*(getCM(Hx, NxTOTAL, NyTOTAL, i, j, k) - getCM(Hx, NxTOTAL, NyTOTAL, i, j, k-1)) -

// cax[i]*(getCM(Hz, NxTOTAL, NyTOTAL, i, j, k) - getCM(Hz, NxTOTAL,NyTOTAL, i-1, j, k));

//

// addCM(idyh, NxTOTAL, NyTOTAL, i, jyh, k, curl_h);

//

// double value = gi3[i]*gk3[k]*getCM(Dy, NxTOTAL, NyTOTAL, i, j, k) +

// gi2[i]*gk2[k]*(curl_h + gj1[j]*getCM(idyh, NxTOTAL, NyTOTAL, i, jyh, k));

// setCM(Dy, NxTOTAL, NyTOTAL, i, j, k, value);

//

// }

// }

// }

// }

// tt.Tac();

//

// delete[] gi3;

// delete[] cax;

// delete[] cay;

// delete[] caz;

//

// delete[] Hx;

// delete[] Hy;

// delete[] Hz;

//

// delete[] gk2;

// delete[] gk3;

//

// delete[] idyh;

// delete[] Dy;

//

// delete[] gi2;

// delete[] gj1;

/* C-ARRAY (IMPROVED) - TEST 3*/

double* gi3 = new double[NxTOTAL];

double* cax = new double[NxTOTAL];

double* cay = new double[NyTOTAL];

double* caz = new double[NzTOTAL];

double* Hx = new double[NxTOTAL*NyTOTAL*NzTOTAL];

double* Hy = new double[NxTOTAL*NyTOTAL*NzTOTAL];

double* Hz = new double[NxTOTAL*NyTOTAL*NzTOTAL];

double* gk2 = new double[NxTOTAL];

double* gk3 = new double[NxTOTAL];

double* idyh = new double[NxTOTAL*NyTOTAL*NzTOTAL];

double* Dy = new double[NxTOTAL*NyTOTAL*NzTOTAL];

double* gi2 = new double[NxTOTAL];

double* gj1 = new double[NxTOTAL];

TicTac tt;

tt.Tic("Array (improved): ");

for (t = 0; t < TMAX; t++) {

for(i = 1; i < NxTOTAL; i++) {

for(j = jb+1; j < NyTOTAL; j++) {

double* Dy_ij = Dy + (i + j*NxTOTAL);

double* Hx_ij = Hx + (i + j*NxTOTAL);

double* Hz_ij = Hz + (i + j*NxTOTAL);

double* Hz_i1_j = Hz + (i - 1 + j*NxTOTAL);

double jyh = j - jb - 1;

for(k = 1; k < NzTOTAL; k++)

{

double curl_h = caz[k]*(Hx_ij[NxTOTAL*NyTOTAL*k] - Hx_ij[NxTOTAL*NyTOTAL*k-1]) -

cax[i]*(Hz_ij[NxTOTAL*NyTOTAL*k] - Hz_i1_j[NxTOTAL*NyTOTAL*k]);

addCM(idyh, NxTOTAL, NyTOTAL, i, jyh, k, curl_h);

double value = gi3[i]*gk3[k]*Dy_ij[NxTOTAL*NyTOTAL*k] +

gi2[i]*gk2[k]*(curl_h + gj1[j]*getCM(idyh, NxTOTAL, NyTOTAL, i, jyh, k));

Dy_ij[NxTOTAL*NyTOTAL*k] = value;

}

}

}

}

tt.Tac();

delete[] gi3;

delete[] cax;

delete[] cay;

delete[] caz;

delete[] Hx;

delete[] Hy;

delete[] Hz;

delete[] gk2;

delete[] gk3;

delete[] idyh;

delete[] Dy;

delete[] gi2;

delete[] gj1;

}

----------------

TicTac just measures time and print:

class TicTac {

private:

string message;

double tstart;

double tend;

public:

TicTac();

~TicTac();

void Tic(string msg);

void Tac();

};

void TicTac::Tic(string msg) {

message = msg;

tstart = (double)clock()/CLOCKS_PER_SEC;

}

void TicTac::Tac() {

tend = (double)clock()/CLOCKS_PER_SEC;

cout << message << ". TIME: " << (tend-tstart) << "s" << endl;

}

Tks