Tuesday, October 13, 2009

GPU Accelerated FDTD Using Cg

Apparently there has been some interested in my old GPU accelerated FDTD code implemented in C and Cg. I also had a CUDA version that had a hard time meeting the performance of the Cg version. I will need to check when I get home to see if I can find the other versions. Here is one version that I found on one of my old sites. It looks like it is version 0.1 and about three years old. I know I have a better versions somewhere out there, but this will give you a "taste" until I can find the other versions. Still, there are some interesting aspects of this code. I used an interesting 3D volume packing scheme for 2D textures that you can read about in GPU Gems 2. Also a lot of the guts of this code initially came from Dom's basic math tutorial for Cg.


cg_fdtd: clean
gcc test.c -o cg_fdtd -lglut -lGLEW -lCgGL -lpthread

clean:
rm -f cg_fdtd

test: cg_fdtd
./cg_fdtd 256 10


/*
* test.c
*
* author : Sam Adams
* date : 20061027
* discription : This is my implementation of a basic FDTD using GPU with Cg
*/

#include
#include
#include
#include
#include
#include
#include
#include

#define GLUT_WINDOW_NAME "fdtd window"

typedef enum bool{false, true} bool;
//clock_gettime
// struct for variable parts of GL calls (texture format, float format etc)
struct struct_textureParameters {
char* name;
GLenum texTarget;
GLenum texInternalFormat;
GLenum texFormat;
char* shader_source;
};

// struct actually being used (set from command line)
struct struct_textureParameters textureParameters;

int verbose = 1;
int numIterations = 100; //timesteps for fdtd
int texSize = 256;

GLuint glutWindowHandle;
GLuint fb;

enum components{X, Y, Z};

GLuint *exTexID; // these are z slices for fdtd
GLuint *eyTexID; // the +1 is for the output of the calculations
GLuint *ezTexID;
GLuint *hxTexID;
GLuint *hyTexID;
GLuint *hzTexID;
GLuint psTexID, psEmptyTexID;

// Cg vars
CGcontext cgContext;
CGprofile fragmentProfile;
CGprogram fragmentProgram;
//fdtd
////for e fields
CGprogram ex_fp, ey_fp, ez_fp;
CGparameter ex_ps, ex_e, ex_h1, ex_h2, ex_h3, ex_h4, ex_esctc, ex_eincc, ex_ei, ex_edevcn, ex_dei, ex_ecrl1, ex_ecrl2;
CGparameter ey_e, ey_h1, ey_h2, ey_h3, ey_h4, ey_esctc, ey_eincc, ey_ei, ey_edevcn, ey_dei, ey_ecrl1, ey_ecrl2;
CGparameter ez_e, ez_h1, ez_h2, ez_h3, ez_h4, ez_esctc, ez_eincc, ez_ei, ez_edevcn, ez_dei, ez_ecrl1, ez_ecrl2;
////for h fields
CGprogram hx_fp, hy_fp, hz_fp;
CGparameter hx_h, hx_e1, hx_e2, hx_e3, hx_e4, hx_dt, hx_delta1, hx_delta2;
CGparameter hy_h, hy_e1, hy_e2, hy_e3, hy_e4, hy_dt, hy_delta1, hy_delta2;
CGparameter hz_h, hz_e1, hz_e2, hz_e3, hz_e4, hz_dt, hz_delta1, hz_delta2;

float* tmpData;
//fdtd Cg update sources
char *hx_hUpdate_source = \
"float hUpdate ("\
"in float2 coords : TEXCOORD0, "\
"uniform samplerRECT textureH, "\
"uniform samplerRECT textureE1, "\
"uniform samplerRECT textureE2, "\
"uniform samplerRECT textureE3, "\
"uniform samplerRECT textureE4, "\
"uniform float dt, "\
"uniform float delta1, "\
"uniform float delta2) : COLOR{ "\
"float h = texRECT(textureH, coords); "\
"float e1 = texRECT(textureE1, coords); "\
"float e2 = texRECT(textureE2, coords); "\
"float e3 = texRECT(textureE3, coords+(0.0,1.0)); "\
"float e4 = texRECT(textureE4, coords); "\
"return h + ((dt / 0.0000012566306) *((e1 - e2) / delta1 - (e3 - e4) / delta2)); }";

char *hy_hUpdate_source = \
"float hUpdate ("\
"in float2 coords : TEXCOORD0, "\
"uniform samplerRECT textureH, "\
"uniform samplerRECT textureE1, "\
"uniform samplerRECT textureE2, "\
"uniform samplerRECT textureE3, "\
"uniform samplerRECT textureE4, "\
"uniform float dt, "\
"uniform float delta1, "\
"uniform float delta2) : COLOR{ "\
"float h = texRECT(textureH, coords); "\
"float e1 = texRECT(textureE1, coords+(1.0,0.0)); "\
"float e2 = texRECT(textureE2, coords); "\
"float e3 = texRECT(textureE3, coords); "\
"float e4 = texRECT(textureE4, coords); "\
"return h + ((dt / 0.0000012566306) *((e1 - e2) / delta1 - (e3 - e4) / delta2)); }";

char *hz_hUpdate_source = \
"float hUpdate ("\
"in float2 coords : TEXCOORD0, "\
"uniform samplerRECT textureH, "\
"uniform samplerRECT textureE1, "\
"uniform samplerRECT textureE2, "\
"uniform samplerRECT textureE3, "\
"uniform samplerRECT textureE4, "\
"uniform float dt, "\
"uniform float delta1, "\
"uniform float delta2) : COLOR{ "\
"float h = texRECT(textureH, coords); "\
"float e1 = texRECT(textureE1, coords+(0.0,1.0)); "\
"float e2 = texRECT(textureE2, coords); "\
"float e3 = texRECT(textureE3, coords+(1.0,0.0)); "\
"float e4 = texRECT(textureE4, coords); "\
"return h + ((dt / 0.0000012566306) *((e1 - e2) / delta1 - (e3 - e4) / delta2)); }";

char *ex_eUpdate_source = \
"float eUpdate ("\
"in float2 coords : TEXCOORD0, "\
"uniform samplerRECT texturePS, "\
"uniform samplerRECT textureE, "\
"uniform samplerRECT textureH1, "\
"uniform samplerRECT textureH2, "\
"uniform samplerRECT textureH3, "\
"uniform samplerRECT textureH4, "\
"uniform float esctc, "\
"uniform float eincc, "\
"uniform float ei, "\
"uniform float edevcn, "\
"uniform float dei, "\
"uniform float ecrl1, "\
"uniform float ecrl2 ) : COLOR{ "\
"float ps = texRECT(texturePS, coords); "\
"float e = texRECT(textureE, coords); "\
"float h1 = texRECT(textureH1, coords); "\
"float h2 = texRECT(textureH2, coords+(0.0,1.0)); "\
"float h3 = texRECT(textureH3, coords); "\
"float h4 = texRECT(textureH4, coords); "\
"float tmp = ps;"\
"if(ps != 0.0) tmp = ps;"\
/*"else tmp = e * esctc - eincc * ei - edevcn * dei + (h1 - h2) * ecrl1 - (h3 - h4) * ecrl2;"\*/
"else tmp = e * esctc - eincc * ei - edevcn * dei + ((h1 - h2) * ecrl1) - ((h3 - h4) * ecrl2);"\
"return tmp;}";

//"return e * esctc - eincc * ei - edevcn * dei + (h1 - h2) * ecrl1 - (h3 - h4) * ecrl2;}";
//"return e * esctc - eincc * ei - edevcn * dei + (h1 -h2) * ecrl1;}";

char *ey_eUpdate_source = \
"float eUpdate ("\
"in float2 coords : TEXCOORD0, "\
"uniform samplerRECT textureE, "\
"uniform samplerRECT textureH1, "\
"uniform samplerRECT textureH2, "\
"uniform samplerRECT textureH3, "\
"uniform samplerRECT textureH4, "\
"uniform float esctc, "\
"uniform float eincc, "\
"uniform float ei, "\
"uniform float edevcn, "\
"uniform float dei, "\
"uniform float ecrl1, "\
"uniform float ecrl2 ) : COLOR{ "\
"float e = texRECT(textureE, coords); "\
"float h1 = texRECT(textureH1, coords); "\
"float h2 = texRECT(textureH2, coords); "\
"float h3 = texRECT(textureH3, coords); "\
"float h4 = texRECT(textureH4, coords+(1.0,0.0)); "\
"return e * esctc - eincc * ei - edevcn * dei + (h1 - h2) * ecrl1 - (h3 - h4) * ecrl2; }";

char *ez_eUpdate_source = \
"float eUpdate ("\
"in float2 coords : TEXCOORD0, "\
"uniform samplerRECT textureE, "\
"uniform samplerRECT textureH1, "\
"uniform samplerRECT textureH2, "\
"uniform samplerRECT textureH3, "\
"uniform samplerRECT textureH4, "\
"uniform float esctc, "\
"uniform float eincc, "\
"uniform float ei, "\
"uniform float edevcn, "\
"uniform float dei, "\
"uniform float ecrl1, "\
"uniform float ecrl2 ) : COLOR{ "\
"float e = texRECT(textureE, coords); "\
"float h1 = texRECT(textureH1, coords); "\
"float h2 = texRECT(textureH2, coords+(1.0,0.0)); "\
"float h3 = texRECT(textureH3, coords); "\
"float h4 = texRECT(textureH4, coords+(0.0,1.0)); "\
"return e * esctc - eincc * ei - edevcn * dei + (h1 - h2) * ecrl1 - (h3 - h4) * ecrl2; }";

time_t start, end;

/* prototypes */
void initGLUT(int argc, char **argv);
void initGLEW();
void initFBO();
void createTextures();
void initCG();
void performComputation();
void checkGLErrors (const char *label);
double cpuBench();
void printVector(float *v, int len);
void transferFromTexture(float* data, GLenum fb);
void initMemory();
//void nextSlice(int i);
void transferToTexture(float* data, GLuint texID);

int main(int argc, char **argv){
int i;
time_t start, stop;
float simTime;
double mflops;
FILE *f;

f = fopen("results", "a");

texSize = atoi(argv[1]);
numIterations = atoi(argv[2]);

// int start, stop;
// struct timespec res;
// clock_getres(CLOCK_REALTIME, &res);
textureParameters.name = "TEXRECT - float_NV - R - 32";
textureParameters.texTarget = GL_TEXTURE_RECTANGLE_ARB;
textureParameters.texInternalFormat = GL_FLOAT_R32_NV;
textureParameters.texFormat = GL_LUMINANCE;
// start = clock_gettime(CLOCK_REALTIME, &res);
start = clock();
initMemory();
if(verbose) fprintf(stderr,"initalizing GLUT\n");
initGLUT(argc, argv);
if(verbose) fprintf(stderr,"initalizing GLEW\n");
initGLEW();
if(verbose) fprintf(stderr,"initalizing FBOs\n");
initFBO();
if(verbose) fprintf(stderr,"creating textures\n");
createTextures();
if(verbose) fprintf(stderr,"initalizing Cg\n");
initCG();
if(verbose) fprintf(stderr,"performing calculations\n");
performComputation();
stop = clock();
simTime = (float)(stop-start)/(float)CLOCKS_PER_SEC;
mflops = (double)(63*texSize*texSize*texSize*numIterations)/1000000.0;
printf("time was %f\n", simTime);
fprintf(f,"%i\t%i\t%e\t%e\n",texSize,numIterations,simTime,mflops);
return 0;
}

void initMemory(){
tmpData = (float*)calloc(sizeof(float),texSize*texSize); // to initalize textures to 0.0
exTexID = (GLuint*)malloc(sizeof(GLuint) * texSize);
eyTexID = (GLuint*)malloc(sizeof(GLuint) * texSize);
ezTexID = (GLuint*)malloc(sizeof(GLuint) * texSize);
hxTexID = (GLuint*)malloc(sizeof(GLuint) * texSize);
hyTexID = (GLuint*)malloc(sizeof(GLuint) * texSize);
hzTexID = (GLuint*)malloc(sizeof(GLuint) * texSize);
/*
exLocation = (int*)malloc(sizeof(int)*texSize);
eyLocation = (int*)malloc(sizeof(int)*texSize);
ezLocation = (int*)malloc(sizeof(int)*texSize);
hxLocation = (int*)malloc(sizeof(int)*texSize);
hyLocation = (int*)malloc(sizeof(int)*texSize);
hzLocation = (int*)malloc(sizeof(int)*texSize);

exLocation_out = eyLocation_out = ezLocation_out = hxLocation_out = hyLocation_out = hzLocation_out = texSize;*/
}

void printVector(float *v, int len){
int i;

for(i = 0; i < len; i++) printf("%i)\t%f\n",i,v[i]);
}

/*
* Checks framebuffer status.
* Copied directly out of the spec, modified to deliver a return value.
*/
int checkFramebufferStatus() {
GLenum status;
status = (GLenum) glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
switch(status) {
case GL_FRAMEBUFFER_COMPLETE_EXT:
printf("Framebuffer complete\n");
return 1;
case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT_EXT:
printf("Framebuffer incomplete, incomplete attachment\n");
return 0;
case GL_FRAMEBUFFER_UNSUPPORTED_EXT:
printf("Unsupported framebuffer format\n");
return 0;
case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT:
printf("Framebuffer incomplete, missing attachment\n");
return 0;
case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS_EXT:
printf("Framebuffer incomplete, attached images must have same dimensions\n");
return 0;
case GL_FRAMEBUFFER_INCOMPLETE_FORMATS_EXT:
printf("Framebuffer incomplete, attached images must have same format\n");
return 0;
case GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER_EXT:
printf("Framebuffer incomplete, missing draw buffer\n");
return 0;
case GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER_EXT:
printf("Framebuffer incomplete, missing read buffer\n");
return 0;
default:
printf("Unknown framebuffer status %i\n", (int)status);
}
return 0;
}

void checkGLErrors (const char *label) {
GLenum errCode;
const GLubyte *errStr;

if ((errCode = glGetError()) != GL_NO_ERROR) {
errStr = gluErrorString(errCode);
printf("OpenGL ERROR: ");
printf((char*)errStr);
printf("(Label: ");
printf(label);
printf(")\n.");
}
}

/*
* Performs the actual calculation.
*/
void performComputation(){
int i, j;
int tmp;
double total;
double mflops;

start = clock();
glEnable(textureParameters.texTarget);
// attach textures to FBO
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, textureParameters.texTarget, exTexID[texSize], 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT1_EXT, textureParameters.texTarget, eyTexID[texSize], 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT2_EXT, textureParameters.texTarget, ezTexID[texSize], 0);
// check if that worked
if(!checkFramebufferStatus()) {
printf("glFramebufferTexture2DEXT():\t [FAIL]\n");
exit (-1);
}// else if (mode == 0) {
// printf("glFramebufferTexture2DEXT():\t [PASS]\n");
// }
// enable fragment profile
cgGLEnableProfile(fragmentProfile);
// bind fdtd program
cgSetParameter1f(ex_esctc, 1.0);
cgSetParameter1f(ex_eincc, 1.0);
cgSetParameter1f(ex_ei, 1.0);
cgSetParameter1f(ex_edevcn, 0.0);
cgSetParameter1f(ex_dei, 0.0);
cgSetParameter1f(ex_ecrl1, 217.51);
cgSetParameter1f(ex_ecrl2, 217.51);

cgGLBindProgram(ey_fp);
cgSetParameter1f(ey_esctc, 1.0);
cgSetParameter1f(ey_eincc, 0.0);
cgSetParameter1f(ey_ei, 0.0);
cgSetParameter1f(ey_edevcn, 0.0);
cgSetParameter1f(ey_dei, 0.0);
cgSetParameter1f(ey_ecrl1, 217.51);
cgSetParameter1f(ey_ecrl2, 217.51);

cgGLBindProgram(ez_fp);
cgSetParameter1f(ez_esctc, 1.0);
cgSetParameter1f(ez_eincc, 0.0);
cgSetParameter1f(ez_ei, 0.0);
cgSetParameter1f(ez_edevcn, 0.0);
cgSetParameter1f(ez_dei, 0.0);
cgSetParameter1f(ez_ecrl1, 217.51);
cgSetParameter1f(ez_ecrl2, 217.51);

cgGLBindProgram(hx_fp);
cgSetParameter1f(hx_dt, 1.0);
cgSetParameter1f(hx_delta1, 1.0);
cgSetParameter1f(hx_delta2, 1.0);

cgGLBindProgram(hy_fp);
cgSetParameter1f(hy_dt, 1.0);
cgSetParameter1f(hy_delta1, 1.0);
cgSetParameter1f(hy_delta2, 1.0);

cgGLBindProgram(hz_fp);
cgSetParameter1f(hz_dt, 1.0);
cgSetParameter1f(hz_delta1, 1.0);
cgSetParameter1f(hz_delta2, 1.0);

// sutff that changes should be in the loop...

for(j = 0; j < numIterations; j++){
//printf("iteration %i\n",j);
for(i = 0; i < texSize; i++){
//update x e field
cgGLBindProgram(ex_fp);
// if(i == texSize/2){
// cgGLSetTextureParameter(ex_ps, psTexID);
// cgGLEnableTextureParameter(ex_ps);
// }
// else{
// cgGLSetTextureParameter(ex_ps, psEmptyTexID);
// cgGLEnableTextureParameter(ex_ps);
// }
cgGLSetTextureParameter(ex_e, exTexID[i]);
cgGLEnableTextureParameter(ex_e);
cgGLSetTextureParameter(ex_h1, hzTexID[i]);
cgGLEnableTextureParameter(ex_h1);
cgGLSetTextureParameter(ex_h2, hzTexID[i]);
cgGLEnableTextureParameter(ex_h2);
cgGLSetTextureParameter(ex_h3, hyTexID[i]);
cgGLEnableTextureParameter(ex_h3);
// fprintf(stdout,"we are doing --i- %i --i-1- %i %i\n",i, i-1,((i-1 <= 0) ? 0 : i-1));
//if(i) cgGLSetTextureParameter(ex_h4, hyTexID[i-1]);
if(i > 0) cgGLSetTextureParameter(ex_h4, hyTexID[i]);
else cgGLSetTextureParameter(ex_h4, hyTexID[0]);
cgGLEnableTextureParameter(ex_h4);
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
glPolygonMode(GL_FRONT,GL_FILL);
//render/compute
glBegin(GL_QUADS);
glTexCoord2f(0.0, 0.0);
glVertex2f(0.0, 0.0);
glTexCoord2f(texSize, 0.0);
glVertex2f(texSize, 0.0);
glTexCoord2f(texSize, texSize);
glVertex2f(texSize, texSize);
glTexCoord2f(0.0, texSize);
glVertex2f(0.0, texSize);
glEnd();
//get result
// transferFromTexture(tmpData, hyTexID[i]);
/*if(i == texSize/2){*/// fprintf(stdout,"h%i) hy ps %e\n",i,tmpData[(texSize*texSize)/2 + 1]);
// printVector(tmpData, texSize*texSize);
//}
transferFromTexture(tmpData, GL_COLOR_ATTACHMENT0_EXT);
transferToTexture(tmpData, exTexID[i]);
// if(i == texSize/2){ fprintf(stdout,"e%i) ex ps %e\n",j,tmpData[(texSize*texSize)/2 + 1]);
// printVector(tmpData, texSize*texSize);
// }
//update y e field
cgGLBindProgram(ey_fp);
cgGLSetTextureParameter(ey_e, eyTexID[i]);
cgGLEnableTextureParameter(ey_e);
cgGLSetTextureParameter(ey_h1, hxTexID[i]);
cgGLEnableTextureParameter(ey_h1);
cgGLSetTextureParameter(ey_h2, hxTexID[((i-1 < 0) ? 0 : i-1)]);
cgGLEnableTextureParameter(ey_h2);
cgGLSetTextureParameter(ey_h3, hzTexID[i]);
cgGLEnableTextureParameter(ey_h3);
cgGLSetTextureParameter(ey_h4, hyTexID[i]);
cgGLEnableTextureParameter(ey_h4);
glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
glPolygonMode(GL_FRONT,GL_FILL);
//render/compute
glBegin(GL_QUADS);
glTexCoord2f(0.0, 0.0);
glVertex2f(0.0, 0.0);
glTexCoord2f(texSize, 0.0);
glVertex2f(texSize, 0.0);
glTexCoord2f(texSize, texSize);
glVertex2f(texSize, texSize);
glTexCoord2f(0.0, texSize);
glVertex2f(0.0, texSize);
glEnd();
//get result
transferFromTexture(tmpData, GL_COLOR_ATTACHMENT1_EXT);
transferToTexture(tmpData, eyTexID[i]);

// if(i == texSize/2){ fprintf(stdout,"e%i) ey ps %e\n",j,tmpData[(texSize*texSize)/2 + 1]);
// printVector(tmpData, texSize*texSize);
// }
//update z e field
cgGLBindProgram(ez_fp);
cgGLSetTextureParameter(ez_e, ezTexID[i]);
cgGLEnableTextureParameter(ez_e);
cgGLSetTextureParameter(ez_h1, hyTexID[i]);
cgGLEnableTextureParameter(ez_h1);
cgGLSetTextureParameter(ez_h2, hyTexID[i]);
cgGLEnableTextureParameter(ez_h2);
cgGLSetTextureParameter(ez_h3, hxTexID[i]);
cgGLEnableTextureParameter(ez_h3);
cgGLSetTextureParameter(ez_h4, hxTexID[i]);
cgGLEnableTextureParameter(ez_h4);
glDrawBuffer(GL_COLOR_ATTACHMENT2_EXT);
glPolygonMode(GL_FRONT,GL_FILL);
//render/compute
glBegin(GL_QUADS);
glTexCoord2f(0.0, 0.0);
glVertex2f(0.0, 0.0);
glTexCoord2f(texSize, 0.0);
glVertex2f(texSize, 0.0);
glTexCoord2f(texSize, texSize);
glVertex2f(texSize, texSize);
glTexCoord2f(0.0, texSize);
glVertex2f(0.0, texSize);
glEnd();
//get result
transferFromTexture(tmpData, GL_COLOR_ATTACHMENT2_EXT);
transferToTexture(tmpData, ezTexID[i]);

// if(i == texSize/2){ fprintf(stdout,"e%i) ez ps %e\n",j,tmpData[(texSize*texSize)/2 + 1]);
// printVector(tmpData, texSize*texSize);
// }
//update x h field
cgGLBindProgram(hx_fp);
cgGLSetTextureParameter(hx_h, hxTexID[i]);
cgGLEnableTextureParameter(hx_h);
cgGLSetTextureParameter(hx_e1, eyTexID[i+1]);
cgGLEnableTextureParameter(hx_e1);
cgGLSetTextureParameter(hx_e2, eyTexID[i]);
cgGLEnableTextureParameter(hx_e2);
cgGLSetTextureParameter(hx_e3, ezTexID[i]);
cgGLEnableTextureParameter(hx_e3);
cgGLSetTextureParameter(hx_e4, ezTexID[i]);
cgGLEnableTextureParameter(hx_e4);
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
glPolygonMode(GL_FRONT,GL_FILL);
//render/compute
glBegin(GL_QUADS);
glTexCoord2f(0.0, 0.0);
glVertex2f(0.0, 0.0);
glTexCoord2f(texSize, 0.0);
glVertex2f(texSize, 0.0);
glTexCoord2f(texSize, texSize);
glVertex2f(texSize, texSize);
glTexCoord2f(0.0, texSize);
glVertex2f(0.0, texSize);
glEnd();
//get result
transferFromTexture(tmpData, GL_COLOR_ATTACHMENT0_EXT);
transferToTexture(tmpData, hxTexID[i]);
// if(i == texSize/2){ fprintf(stdout,"h%i) hx ps %e\n",j,tmpData[(texSize*texSize)/2 + 1]);
// printVector(tmpData, texSize*texSize);
// }

//update y h field
cgGLBindProgram(hy_fp);
cgGLSetTextureParameter(hy_h, hyTexID[i]);
cgGLEnableTextureParameter(hy_h);
cgGLSetTextureParameter(hy_e1, ezTexID[i]);
cgGLEnableTextureParameter(hy_e1);
cgGLSetTextureParameter(hy_e2, ezTexID[i]);
cgGLEnableTextureParameter(hy_e2);
cgGLSetTextureParameter(hy_e3, exTexID[i+1]);
cgGLEnableTextureParameter(hy_e3);
cgGLSetTextureParameter(hy_e4, exTexID[i]);
cgGLEnableTextureParameter(hy_e4);
glDrawBuffer(GL_COLOR_ATTACHMENT1_EXT);
glPolygonMode(GL_FRONT,GL_FILL);
//render/compute
glBegin(GL_QUADS);
glTexCoord2f(0.0, 0.0);
glVertex2f(0.0, 0.0);
glTexCoord2f(texSize, 0.0);
glVertex2f(texSize, 0.0);
glTexCoord2f(texSize, texSize);
glVertex2f(texSize, texSize);
glTexCoord2f(0.0, texSize);
glVertex2f(0.0, texSize);
glEnd();
//get result
transferFromTexture(tmpData, GL_COLOR_ATTACHMENT1_EXT);
transferToTexture(tmpData, hyTexID[i]);

// if(i == texSize/2){ fprintf(stdout,"h%i) hy ps %e\n",j,tmpData[(texSize*texSize)/2 + 1]);
// printVector(tmpData, texSize*texSize);
// }
//update z h field
cgGLBindProgram(hz_fp);
cgGLSetTextureParameter(hz_h, hzTexID[i]);
cgGLEnableTextureParameter(hz_h);
cgGLSetTextureParameter(hz_e1, exTexID[i]);
cgGLEnableTextureParameter(hz_e1);
cgGLSetTextureParameter(hz_e2, exTexID[i]);
cgGLEnableTextureParameter(hz_e2);
cgGLSetTextureParameter(hz_e3, eyTexID[i]);
cgGLEnableTextureParameter(hz_e3);
cgGLSetTextureParameter(hz_e4, eyTexID[i]);
cgGLEnableTextureParameter(hz_e4);
glDrawBuffer(GL_COLOR_ATTACHMENT2_EXT);
glPolygonMode(GL_FRONT,GL_FILL);
//render/compute
glBegin(GL_QUADS);
glTexCoord2f(0.0, 0.0);
glVertex2f(0.0, 0.0);
glTexCoord2f(texSize, 0.0);
glVertex2f(texSize, 0.0);
glTexCoord2f(texSize, texSize);
glVertex2f(texSize, texSize);
glTexCoord2f(0.0, texSize);
glVertex2f(0.0, texSize);
glEnd();
//get result
transferFromTexture(tmpData, GL_COLOR_ATTACHMENT2_EXT);
transferToTexture(tmpData, hzTexID[i]);
// if(i == texSize/2){ fprintf(stdout,"h%i) hz ps %e\n",j,tmpData[(texSize*texSize)/2 + 1]);
// printVector(tmpData, texSize*texSize);
// }
}
}
// done, stop timer, calc MFLOP/s if neccessary
// glFinish();
// end = clock();
// total = (double)(end-start)/(double)CLOCKS_PER_SEC;
// mflops = (double)((3*12 + 3*9)*texSize*texSize*texSize*numIterations) / (total * 1000000.0);
// printf("GPU MFLOP/s for N=%d:\t\t%f\n",texSize, mflops);
// done, just do some checks if everything went smoothly.
checkFramebufferStatus();
checkGLErrors("render()");
glDisable(textureParameters.texTarget);
// transferFromTexture(tmpData);
// printVector(tmpData, texSize*texSize);
//printVector(dataX, texSize*texSize);

// do cpu comarison
// printf("GPU speedup %f\n \n", cpuBench()/total);
}

void transferFromTexture(float* data, GLenum fb){
// version (a): texture is attached
// recommended on both NVIDIA and ATI
glReadBuffer(fb);
glReadPixels(0, 0, texSize, texSize,textureParameters.texFormat,GL_FLOAT,data);
}

void cgErrorCallback(){
CGerror lastError = cgGetError();
if(lastError) {
printf(cgGetErrorString(lastError));
printf(cgGetLastListing(cgContext));
exit(lastError);
}
}

/*
* Sets up the Cg runtime and creates shader.
*/
void initCG(void) {
// set up Cg
cgSetErrorCallback(cgErrorCallback);
cgContext = cgCreateContext();
fragmentProfile = cgGLGetLatestProfile(CG_GL_FRAGMENT);
cgGLSetOptimalOptions(fragmentProfile);
// create fragment program
ex_fp = cgCreateProgram(cgContext, CG_SOURCE, ex_eUpdate_source, fragmentProfile, "eUpdate", NULL);
ey_fp = cgCreateProgram(cgContext, CG_SOURCE, ey_eUpdate_source, fragmentProfile, "eUpdate", NULL);
ez_fp = cgCreateProgram(cgContext, CG_SOURCE, ez_eUpdate_source, fragmentProfile, "eUpdate", NULL);

hx_fp = cgCreateProgram(cgContext, CG_SOURCE, hx_hUpdate_source, fragmentProfile, "hUpdate", NULL);
hy_fp = cgCreateProgram(cgContext, CG_SOURCE, hy_hUpdate_source, fragmentProfile, "hUpdate", NULL);
hz_fp = cgCreateProgram(cgContext, CG_SOURCE, hz_hUpdate_source, fragmentProfile, "hUpdate", NULL);
// // load programs
cgGLLoadProgram(ex_fp);
cgGLLoadProgram(ey_fp);
cgGLLoadProgram(ez_fp);
cgGLLoadProgram(hx_fp);
cgGLLoadProgram(hy_fp);
cgGLLoadProgram(hz_fp);
//and get parameter handles by name
//ex
ex_ps = cgGetNamedParameter(ex_fp, "texturePS");
ex_e = cgGetNamedParameter(ex_fp, "textureE");
ex_h1 = cgGetNamedParameter(ex_fp, "textureH1");
ex_h2 = cgGetNamedParameter(ex_fp, "textureH2");
ex_h3 = cgGetNamedParameter(ex_fp, "textureH3");
ex_h4 = cgGetNamedParameter(ex_fp, "textureH4");
ex_esctc = cgGetNamedParameter(ex_fp, "esctc");
ex_eincc = cgGetNamedParameter(ex_fp, "eincc");
ex_ei = cgGetNamedParameter(ex_fp, "ei");
ex_edevcn = cgGetNamedParameter(ex_fp, "edevcn");
ex_dei = cgGetNamedParameter(ex_fp, "dei");
ex_ecrl1 = cgGetNamedParameter(ex_fp, "ecrl1");
ex_ecrl2 = cgGetNamedParameter(ex_fp, "ecrl2");
//ey
ey_e = cgGetNamedParameter(ey_fp, "textureE");
ey_h1 = cgGetNamedParameter(ey_fp, "textureH1");
ey_h2 = cgGetNamedParameter(ey_fp, "textureH2");
ey_h3 = cgGetNamedParameter(ey_fp, "textureH3");
ey_h4 = cgGetNamedParameter(ey_fp, "textureH4");
ey_esctc = cgGetNamedParameter(ey_fp, "esctc");
ey_eincc = cgGetNamedParameter(ey_fp, "eincc");
ey_ei = cgGetNamedParameter(ey_fp, "ei");
ey_edevcn = cgGetNamedParameter(ey_fp, "edevcn");
ey_dei = cgGetNamedParameter(ey_fp, "dei");
ey_ecrl1 = cgGetNamedParameter(ey_fp, "ecrl1");
ey_ecrl2 = cgGetNamedParameter(ey_fp, "ecrl2");
//ez
ez_e = cgGetNamedParameter(ez_fp, "textureE");
ez_h1 = cgGetNamedParameter(ez_fp, "textureH1");
ez_h2 = cgGetNamedParameter(ez_fp, "textureH2");
ez_h3 = cgGetNamedParameter(ez_fp, "textureH3");
ez_h4 = cgGetNamedParameter(ez_fp, "textureH4");
ez_esctc = cgGetNamedParameter(ez_fp, "esctc");
ez_eincc = cgGetNamedParameter(ez_fp, "eincc");
ez_ei = cgGetNamedParameter(ez_fp, "ei");
ez_edevcn = cgGetNamedParameter(ez_fp, "edevcn");
ez_dei = cgGetNamedParameter(ez_fp, "dei");
ez_ecrl1 = cgGetNamedParameter(ez_fp, "ecrl1");
ez_ecrl2 = cgGetNamedParameter(ez_fp, "ecrl2");
//hx
hx_h = cgGetNamedParameter(hx_fp, "textureH");
hx_e1 = cgGetNamedParameter(hx_fp, "textureE1");
hx_e2 = cgGetNamedParameter(hx_fp, "textureE2");
hx_e3 = cgGetNamedParameter(hx_fp, "textureE3");
hx_e4 = cgGetNamedParameter(hx_fp, "textureE4");
hx_dt = cgGetNamedParameter(hx_fp, "dt");
hx_delta1 = cgGetNamedParameter(hx_fp, "delta1");
hx_delta2 = cgGetNamedParameter(hx_fp, "delta2");
//hy
hy_h = cgGetNamedParameter(hy_fp, "textureH");
hy_e1 = cgGetNamedParameter(hy_fp, "textureE1");
hy_e2 = cgGetNamedParameter(hy_fp, "textureE2");
hy_e3 = cgGetNamedParameter(hy_fp, "textureE3");
hy_e4 = cgGetNamedParameter(hy_fp, "textureE4");
hy_dt = cgGetNamedParameter(hy_fp, "dt");
hy_delta1 = cgGetNamedParameter(hy_fp, "delta1");
hy_delta2 = cgGetNamedParameter(hy_fp, "delta2");
//hz
hz_h = cgGetNamedParameter(hz_fp, "textureH");
hz_e1 = cgGetNamedParameter(hz_fp, "textureE1");
hz_e2 = cgGetNamedParameter(hz_fp, "textureE2");
hz_e3 = cgGetNamedParameter(hz_fp, "textureE3");
hz_e4 = cgGetNamedParameter(hz_fp, "textureE4");
hz_dt = cgGetNamedParameter(hz_fp, "dt");
hz_delta1 = cgGetNamedParameter(hz_fp, "delta1");
hz_delta2 = cgGetNamedParameter(hz_fp, "delta2");
}

/*
* Transfers data to texture.
* Check web page for detailed explanation on the difference between ATI and NVIDIA.
*/
void transferToTexture (float* data, GLuint texID) {
// version (a): HW-accelerated on NVIDIA
glBindTexture(textureParameters.texTarget, texID);
glTexSubImage2D(textureParameters.texTarget,0,0,0,texSize,texSize,textureParameters.texFormat,GL_FLOAT,data);
}

/*
* Sets up a floating point texture with NEAREST filtering.
* (mipmaps etc. are unsupported for floating point textures)
*/
void setupTexture (const GLuint texID) {
// make active and bin
int err;
glBindTexture(textureParameters.texTarget,texID);
// turn off filtering and wrap modes
glTexParameteri(textureParameters.texTarget, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(textureParameters.texTarget, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(textureParameters.texTarget, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(textureParameters.texTarget, GL_TEXTURE_WRAP_T, GL_CLAMP);
// define texture with floating point format
glTexImage2D(textureParameters.texTarget,0,textureParameters.texInternalFormat,texSize,texSize,0,textureParameters.texFormat,GL_FLOAT,0);
// define texture with floating point format
// check if that worked
if(err = glGetError() != GL_NO_ERROR){
printf("glTexImage2D():\t\t\t [FAIL]\n");
exit(err);
}
// else if(mode == 0){
printf("glTexImage2D():\t\t\t [PASS]\n");
// }
printf("Created a %i by %i floating point texture.\n",texSize,texSize);
}

void createTextures(){
int i;
// create textures
glGenTextures(texSize+1, exTexID);
glGenTextures(texSize+1, eyTexID);
glGenTextures(texSize+1, ezTexID);
glGenTextures(texSize+1, hxTexID);
glGenTextures(texSize+1, hyTexID);
glGenTextures(texSize+1, hzTexID);
glGenTextures(1, &psTexID);
glGenTextures(1, &psEmptyTexID);

tmpData[(texSize*texSize)/2] = 5.0;
setupTexture(psTexID);
transferToTexture(tmpData, psTexID);
tmpData[(texSize*texSize)/2] = 0.0;
setupTexture(psEmptyTexID);
transferToTexture(tmpData, psEmptyTexID);


// set up textures
for(i = 0; i <= texSize; i++){
setupTexture(exTexID[i]);
transferToTexture(tmpData, exTexID[i]);
setupTexture(eyTexID[i]);
transferToTexture(tmpData, eyTexID[i]);
setupTexture(ezTexID[i]);
transferToTexture(tmpData, ezTexID[i]);
setupTexture(hxTexID[i]);
transferToTexture(tmpData, hxTexID[i]);
setupTexture(hyTexID[i]);
transferToTexture(tmpData, hyTexID[i]);
setupTexture(hzTexID[i]);
transferToTexture(tmpData, hzTexID[i]);
}
// set texenv mode from modulate (the default) to replace)
glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
// check if something went completely wrong
checkGLErrors ("createFBOandTextures()");
}

/*
* Creates framebuffer object, binds it to reroute rendering operations
* from the traditional framebuffer to the offscreen buffer
*/
void initFBO(){
// create FBO (off-screen framebuffer)
glGenFramebuffersEXT(1, &fb);
// bind offscreen framebuffer (that is, skip the window-specific render target)
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb);
// viewport for 1:1 pixel=texture mapping
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluOrtho2D(0.0, texSize, 0.0, texSize);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glViewport(0, 0, texSize, texSize);
}

void initGLEW (void) {
int err = glewInit();
if (GLEW_OK != err) {
fprintf(stderr,"error: %s\n",(char*)glewGetErrorString(err));
exit(-1);
}
}

void initGLUT(int argc, char **argv){
glutInit(&argc, argv);
glutWindowHandle = glutCreateWindow(GLUT_WINDOW_NAME);
}

No comments:

Post a Comment