2015年6月23日火曜日

GLSL ES 3.1 メモ

抜けはあるけれど, メモなので.
チェックはglslangValidator External Linkで.



Vertex Shader

#version 310 es
//layout(location)を使用するために必要な場合もある
#extension GL_EXT_shader_io_blocks : enable

//struct
struct Structure
{
    //scalar
    bool sb_;
    int si_;
    uint sui_;
    float sf_;

    //vector
    vec2  vf2_; vec3  vf3_; vec4  vf4_; //floating-point
    bvec2 vb2_; bvec3 vb3_; bvec4 vb4_; //boolean
    ivec2 vi2_; ivec3 vi3_; ivec4 vi4_; //integer
    uvec2 vu2_; uvec3 vu3_; uvec4 vu4_; //unsigned integer

    //matrix
    mat2 mf2_; mat3 mf3_; mat4 mf4_; //NxN square matrix
    mat2x2 mf22_; mat2x3 mf23_; mat2x4 mf24_;
    mat3x2 mf32_; mat3x3 mf33_; mat3x4 mf34_;
    mat4x2 mf42_; mat4x3 mf43_; mat4x4 mf44_;
};

//packed: 実装がレイアウトを自由に決める.
//        glGetActiveUniformBlock, glGetActiveUniformで位置を取得する必要がある.
//shared: 実装がレイアウトを自由に決める.
//        メンバは削除されない. 宣言が同じなら異なるプログラム間で同じレイアウトになる.
//        デフォルトのレイアウト指定に注意
//std140: OpenGL 4.5, Section 7.6.2.2, page 137. 大体16 bytes, vec4.
//std430: std140に少しの最適化を許したもの.
//        OpenGLでは shader storage blocksだけで使用できる. OpenGL ESではUniform Blockでも使用できる.

layout(column_major) uniform; //default

layout(std140, binding=0) uniform UniformBlock
{
    layout(row_major) mat4 mview_;
    mat4 mproj_;
    uint sui0_;
    uint sui1_;
    uint sui2_;
    uint sui3_;
};

//シェーダから変数のようにアクセスできるバッファ
layout(std140, binding=0) buffer ShaderStorageBlock
{
    float f0_;
    coherent float f1_;
    volatile float f2_;
    restrict float f3_;
    readonly float f4_;
    writeonly vec4 v_[]; //OK
};

layout(location=0) in vec4 inPosition;
layout(location=1) in vec2 inTexcoord;

//smooth: 線形補間, 浮動小数点でデフォルト
//flat: 補間しない, 整数でデフォルト
//centroid: MSAA使用時, 補間するピクセル位置をポリゴン範囲内におさめる
layout(location=0) smooth out vec2 outTexcoord;
layout(location=1) out mat4 outMat; //occupy location=1,2,3,4
layout(location=5) flat centroid out vec4 outColor;

//Built-in variables
//in highp int gl_VertexID;
//in highp int gl_InstanceID;
//out highp vec4 gl_Position;
//out highp float gl_PointSize;

const float farray[2] = float[2] //same things: farray[2]=float[], farray[]=float[2], farray[]=float[]
(
   0.0, 1.0
   //, ERROR: last comma
);

void main()
{
    uint bop = (sui0_&0xFFU)^(sui0_|0xFF00U);

    vec4 a[3][2];
    a.length(); //this is 3
    //a[0].length(); // this is 2. glslangValidator not yet support

    gl_VertexID;
    gl_InstanceID;
    gl_Position = inPosition;
    gl_PointSize = 1.0;
    outTexcoord = inTexcoord;
    outColor = vec4(1,1,1,1);
}



Fragment Shader

#version 310 es
//layout(location)を使用するために必要な場合もある
#extension GL_EXT_shader_io_blocks : enable

precision mediump float;

//デフォルトの精度指定がないので, 書かなければならないもの
precision highp sampler3D;
precision highp samplerCubeShadow;
precision highp sampler2DShadow;
precision highp sampler2DArray;
precision highp sampler2DArrayShadow;
precision highp sampler2DMS;
precision highp isampler2D;
precision highp isampler3D;
precision highp isamplerCube;
precision highp isampler2DArray;
precision highp isampler2DMS;
precision highp usampler2D;
precision highp usampler3D;
precision highp usamplerCube;
precision highp usampler2DArray;
precision highp usampler2DMS;
precision highp image2D;
precision highp image3D;
precision highp imageCube;
precision highp image2DArray;
precision highp iimage2D;
precision highp iimage3D;
precision highp iimageCube;
precision highp iimage2DArray;
precision highp uimage2D;
precision highp uimage3D;
precision highp uimageCube;
precision highp uimage2DArray;

layout(binding=0) uniform sampler2D sam2D;
layout(binding=1) uniform sampler3D sam3D;
layout(binding=2) uniform samplerCube samCube;
layout(binding=3) uniform sampler2DArray sam2DArray;
layout(binding=4) uniform sampler2DShadow sam2DS;
layout(binding=5) uniform samplerCubeShadow sameCubeS;
layout(binding=6) uniform sampler2DArrayShadow sam2DArrayS;
layout(binding=7) uniform sampler2DMS sam2DMS;


//image
//----------------------------------
//imageに指定できるフォーマット
//rgba32f, rgba16f, r32f, rgba8, rgba8_snorm
//rgba32i, rgba16i, rgba8i, r32i
//rgba32ui, rgba16ui, rgba8ui, r32ui

//メモリ
//coherent: 通常コンパイラは, この変数を通して変更された値が, 別のシェーダから必ずしも見えるとは想定していない.
//          coherentは依存するシェーダ実行間で, そのような可視性を保証する.
//          全てのシェーダで同じオブジェクトがバインドされた変数に指定する必要がある.
//          別のレンダリングコマンドのシェーダで同期するには, coherentではなくglMemoryBarrierを使用する.
//volatile: 通常コンパイラは, プログラムがメモリバリアや同期の後に変数にアクセルすると想定しているが,
//          volatile指定では常に内容が変更されると想定する.
//restrict: image/bufferオブジェクトを同じシェーダの別変数にバインドできるが, restrictはコンパイラに,
//          この変数にバインドされたオブジェクトが別の変数にバインドされないことを伝える.
//          常に指定が推奨される.
//readonly:
//writeonly:
layout(binding=0, rgba8) uniform restrict readonly image2D img2D;
layout(binding=1, rgba8_snorm) uniform readonly image3D img3D;
layout(binding=2, rgba32f) uniform readonly imageCube imgCube;
layout(binding=3, rgba16f) uniform restrict readonly image2DArray img2DArray;
layout(binding=4, rgba8) uniform restrict writeonly image2D img2DOut;
layout(binding=5, r32ui) uniform restrict uimage2D uimg2D;

layout(binding=0, offset=0) uniform atomic_uint atomic_counter;

layout(location=0) smooth in vec2 inTexcoord;
layout(location=1) in mat4 inMat; //occupy location=1,2,3,4
layout(location=5) flat centroid in vec4 inColor;

layout(location=0) out vec4 outColor0;
layout(location=1) out vec4 outColor12[2];

//Built-in variables
//in highp vec4 gl_FragCoord;
//in bool gl_FrontFacing;
//in mediump vec2 gl_PointCoord;
//in bool gl_HelperInvocation
//out highp float gl_FragDepth;

void main()
{
    ivec2 isize2 = textureSize(sam2D, 0);
    ivec3 isize3 = textureSize(sam3D, 0);

    vec4 c0 = texture(sam2D, inTexcoord);

    ivec2 p1 = ivec2(inTexcoord * vec2(isize2));
    vec4 c1 = texelFetch(sam2D, p1, 0);

    float cshadow = texture(sam2DS, vec3(inTexcoord, 0.01));
    vec4 c2_x = textureGather(sam2D, inTexcoord, 0);

    isize2 = imageSize(img2D);
    vec4 c3 = imageLoad(img2D, p1);
    imageStore(img2DOut, p1, c3);

    outColor0 = c0;
    outColor12[0] = c1;
    outColor12[1] = inColor;
}



Compute Shader

#version 310 es

//Built-in variables
// work group dimensions
//in uvec3 gl_NumWorkGroups;
//const uvec3 gl_WorkGroupSize;

// work group and invocation IDs
//in uvec3 gl_WorkGroupID;
//in uvec3 gl_LocalInvocationID;

// derived variables
//in uvec3 gl_GlobalInvocationID;
//in uint gl_LocalInvocationIndex;


//gl_GlobalInvocationID = gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID;
//gl_LocalInvocationIndex =
//    gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y
//    + gl_LocalInvocationID.y * gl_WorkGroupSize.x
//    + gl_LocalInvocationID.x;

#define R (4)
#define GRIDSIZE(128)
#define NUMTHREADS 136
//#define NUMTHREADS (GRIDSIZE+R*2)

const float gaussian[R+1] = float[](0.398943, 0.241971, 0.0539911, 0.00443186, 0.000133831);

layout(binding=0) uniform sampler2D sam2D;
layout(binding=0, rgba8) uniform restrict writeonly image2D img2D;

layout(binding=0, offset=0) uniform atomic_uint atomic_counter;

shared vec4 sharedSamples[NUMTHREADS];

layout(local_size_x=NUMTHREADS, local_size_y=1, local_size_z=1) in;
void main()
{
    uint acounter = atomicCounter(atomic_counter);
    atomicCounterIncrement(atomic_counter);
    //atomicCounterDecrement(atomic_counter);

    ivec2 size = textureSize(sam2D, 0);
    int gridStartX = int(gl_WorkGroupID.x) * GRID_SIZE;
    int gridX = int(gl_LocalInvocationID.x) - R;

    int sampleX = gridStartX + gridX;
    int sampleY = int(gl_WorkGroupID.x);
    ivec2 samplePosition = ivec2(sampleX, sampleY);

    vec2 texcoord = clamp((vec2(samplePosition)+vec2(0.5, 0.5))/vec2(size), 0.0, 1.0);
    vec4 color = textureLod(sam2D, texcoord, 0.0);
    sharedSamples[gl_LocalInvocationID.x] = color;

    //memoryBarrier();
    //memoryBarrierAtomicCounter();
    //memoryBarrierBuffer();
    //memoryBarrierImage();
    //memoryBarrierShared();
    //groupMemoryBarrier();

    groupMemoryBarrier();

    if(0<=gridX && gridX<GRIDSIZE&& sampleX<size.x){
        int groupThreadIDX = int(gl_LocalInvocationID.x); //[r GRIDSIZE)
        vec4 sum = vec4(0.0);
        float w = 0.0;
        for(int r=-R; r<=R; ++r){
            int tap = groupThreadIDX + r;
            vec4 samp = sharedSamples[tap];
            float weight = gaussian[abs(r)];
            sum += samp * weight;
        }
        imageStore(img2D, samplePosition, sum);
    }
}



Bult-in Constants and Uniform State

//Built-in constants
//実装依存. 以下の値は仕様上の最低値
const mediump int gl_MaxVertexAttribs = 16;
const mediump int gl_MaxVertexUniformVectors = 256;
const mediump int gl_MaxVertexOutputVectors = 16;
const mediump int gl_MaxFragmentInputVectors = 15;
const mediump int gl_MaxFragmentUniformVectors = 224;
const mediump int gl_MaxDrawBuffers = 4;
const mediump int gl_MaxVertexTextureImageUnits = 16;
const mediump int gl_MaxCombinedTextureImageUnits = 48;
const mediump int gl_MaxTextureImageUnits = 16;
const mediump int gl_MinProgramTexelOffset = -8;
const mediump int gl_MaxProgramTexelOffset = 7;
const mediump int gl_MaxImageUnits = 4;
const mediump int gl_MaxVertexImageUniforms = 0;
const mediump int gl_MaxFragmentImageUniforms = 0;
const mediump int gl_MaxComputeImageUniforms = 4;
const mediump int gl_MaxCombinedImageUniforms = 4;
const mediump int gl_MaxCombinedShaderOutputResources = 4;
const highp ivec3 gl_MaxComputeWorkGroupCount = ivec3(65535, 65535, 65535);
const highp ivec3 gl_MaxComputeWorkGroupSize = ivec3(128, 128, 64);
const mediump int gl_MaxComputeUniformComponents = 512;
const mediump int gl_MaxComputeTextureImageUnits = 16;
const mediump int gl_MaxComputeAtomicCounters = 8;
const mediump int gl_MaxComputeAtomicCounterBuffers = 1;
const mediump int gl_MaxVertexAtomicCounters = 0;
const mediump int gl_MaxFragmentAtomicCounters = 0;
const mediump int gl_MaxCombinedAtomicCounters = 8;
const mediump int gl_MaxAtomicCounterBindings = 1;
const mediump int gl_MaxFragmentAtomicCounterBuffers = 0;
const mediump int gl_MaxVertexAtomicCounterBuffers = 0;
const mediump int gl_MaxCombinedAtomicCounterBuffers = 1;
const mediump int gl_MaxAtomicCounterBufferSize = 32;


//Built-in uniform state
struct gl_DepthRangeParameters
{
    highp float near; // n
    highp float far; // f
    highp float diff; // f - n
};
uniform gl_DepthRangeParameters gl_DepthRange;

0 件のコメント:

コメントを投稿