GPGPUで整数計算(12)

前に効率的なべき乗の計算というのをやったが、これは多項式の計算がしたかったからである。


例えば、

 f(x) = g(x) = 1 + x + x^2 + \dots
 h(x) = f(x)g(x)

という計算をやらせる。
代数的に簡単に計算できるが、それはおいといて、次のようになる。今回はテクスチャを2枚用意するので、久しぶりにフルソースを下に載せた。


しかし、このような計算は簡単にオーバーフローする。
結局、多倍長整数の計算をGPUですべきなのではないだろうか。



#extension GL_ARB_texture_rectangle : enable

uniform sampler2DRect texUnit0; // a
uniform sampler2DRect texUnit1; // b

uniform int bit;

void main(void) {
int n = int(gl_FragCoord.x) + int(gl_FragCoord.y) * (1 << bit);
int c = 0;

int mask = (1 << bit) - 1;
for(int i = 0; i <= n; i++) {
int k = n - i;
vec2 coord_a = vec2(i & mask, i >> bit);
vec2 coord_b = vec2(k & mask, k >> bit);
int a = int(texRECT(texUnit0, coord_a).x);
int b = int(texRECT(texUnit1, coord_b).x);

c += a * b;
}

gl_FragColor = vec4(c, 0, 0, 0);
}


#include
#include
#include
#include
#include

#include

using namespace std;

#pragma comment (lib, "glew32.lib")
#pragma comment (lib, "winmm.lib")

// GLSLシェーダの読み込み(CreateShaderから呼び出す)
int LoadGlslShader(GLhandleARB *handle, const char *filename)
{
int begin, end;
char *str;
FILE *F;
F = fopen(filename, "r");
if(F == NULL)
{
return -1;
}
begin = ftell(F);
fseek(F, 0, SEEK_END);
end = ftell(F);
fseek(F, 0, SEEK_SET);
str = (char*)calloc( (end-begin), sizeof(char));
fread( (void*)str, end-begin, 1, F);
fclose(F);
glShaderSourceARB(*handle, 1, (const char**)(&str), NULL);
free(str);
return 0;
}

// GLSLシェーダの生成
int CreateShader(GLhandleARB& g_programObject, const char *filename) {
int ret;
GLenum errCode;

g_programObject = glCreateProgramObjectARB();

GLhandleARB g_fragmentShader;
g_fragmentShader = glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB);
ret = LoadGlslShader(&g_fragmentShader, filename);
if(ret)
return -1;

glCompileShaderARB(g_fragmentShader);
glGetObjectParameterivARB(g_fragmentShader, GL_OBJECT_COMPILE_STATUS_ARB, &ret);
if( (errCode=glGetError()) != GL_NO_ERROR || ret == GL_FALSE) {
char str[1024];
int length = 0;
glGetInfoLogARB(g_fragmentShader, 0xff, &length, str);
fprintf(stderr, "Compile Failed: %s\n", filename);
fprintf(stderr, "%s\n", str);
return -1;
}
glAttachObjectARB(g_programObject, g_fragmentShader);
if( (errCode=glGetError()) != GL_NO_ERROR) {
return -1;
}

glLinkProgramARB(g_programObject);
glGetObjectParameterivARB(g_programObject, GL_OBJECT_LINK_STATUS_ARB, &ret);
if( (errCode=glGetError()) != GL_NO_ERROR || ret == GL_FALSE) {
return -1;
}

return 0;
}

// テクスチャ生成
int CreateTexture(int texid, int width, int height, unsigned short opt1, unsigned short opt2)
{
glBindTexture(opt1, texid);
glTexParameteri(opt1, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(opt1, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(opt1, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(opt1, GL_TEXTURE_WRAP_T, GL_CLAMP);
glTexImage2D(opt1, 0, opt2, width, height, 0, GL_RGBA, GL_FLOAT, 0);
if (glGetError() != GL_NO_ERROR) {
return -1;
}
return 0;
}

int main(int argc, char **argv) {
const int bit = 7;
const int nWidth = 1 << bit;
const int nHeight = 128;
const int nData = nWidth * nHeight * 4;
const int nCoeffs = nData / 4;
float *a = new float[nData];
float *b = new float[nData];
float *c = new float[nData];

glutInit( &argc, argv );
glutInitWindowSize(nWidth, nHeight);
unsigned int g_glutWindowHandle;
g_glutWindowHandle = glutCreateWindow("GpgpuHelloWorld");
glewInit();

unsigned int g_fb = -1;
glGenFramebuffersEXT(1, &g_fb);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, g_fb);

GLhandleARB g_programObject;
unsigned int g_nTexID[2];
CreateShader(g_programObject, "shader_nv13.frag");
glGenTextures(2, g_nTexID);
GLint texUnit0 = glGetUniformLocationARB(g_programObject, "texUnit0");
GLint texUnit1 = glGetUniformLocationARB(g_programObject, "texUnit1");

unsigned short TEX_OPT1 = GL_TEXTURE_RECTANGLE_NV;
unsigned short TEX_OPT2 = GL_FLOAT_RGBA32_NV;

CreateTexture(g_nTexID[0], nWidth, nHeight, TEX_OPT1, TEX_OPT2);
CreateTexture(g_nTexID[1], nWidth, nHeight, TEX_OPT1, TEX_OPT2);

glMatrixMode(GL_PROJECTION);
glLoadIdentity();
gluOrtho2D(0.0, nWidth, 0.0, nHeight);
glMatrixMode(GL_MODELVIEW);
glLoadIdentity();
glViewport(0, 0, nWidth, nHeight);

glFlush();

for(int i = 0; i < nCoeffs; i++) {
a[i*4] = 1;
b[i*4] = 1;
}

glActiveTexture(GL_TEXTURE0);
glBindTexture(TEX_OPT1, g_nTexID[0]);
glUniform1iARB(texUnit0, 0);
glBindTexture(TEX_OPT1, g_nTexID[1]);
glUniform1iARB(texUnit1, 1);

glBindTexture(TEX_OPT1, g_nTexID[0]);
glTexSubImage2D(TEX_OPT1, 0, 0, 0,
nWidth, nHeight, GL_RGBA, GL_FLOAT, a);
glBindTexture(TEX_OPT1, g_nTexID[1]);
glTexSubImage2D(TEX_OPT1, 0, 0, 0,
nWidth, nHeight, GL_RGBA, GL_FLOAT, b);

glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
GL_COLOR_ATTACHMENT0_EXT, TEX_OPT1, g_nTexID[0], 0);
glUseProgramObjectARB(g_programObject);

int h_nRep = glGetUniformLocationARB(g_programObject, "bit");
glUniform1iARB(h_nRep, bit);

glDrawBuffer (GL_COLOR_ATTACHMENT0_EXT);

glBegin(GL_QUADS);
glTexCoord2f(0.0, 0.0);
glVertex2f (0.0, 0.0);
glTexCoord2f(nWidth, 0.0);
glVertex2f (nWidth, 0.0);
glTexCoord2f(nWidth, nHeight);
glVertex2f (nWidth, nHeight);
glTexCoord2f(0.0, nHeight);
glVertex2f (0.0, nHeight);
glEnd();

glFlush();
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);

glReadPixels(0,0, nWidth, nHeight, GL_RGBA, GL_FLOAT, c);
glDeleteTextures(2, g_nTexID);

for(int i = nCoeffs - 3; i < nCoeffs; i++) {
printf("%3d %3d\n", i, (int)c[i*4]);
}

delete [] a;
delete [] b;
delete [] c;
glutDestroyWindow(g_glutWindowHandle);

return 0;
}