"""
Shared-resource 2D shader rendering for WolfArray.
Instead of creating a separate WolfArrayPlotShader per grid cell
(each with its own shader program, z-texture upload, palette upload),
we compile the shader program ONCE, upload the z-texture and palette
ONCE, concatenate all visible quad-centers into a SINGLE VBO, and
issue a SINGLE glDrawArrays call.
Author: HECE - University of Liege, Pierre Archambeau
Date: 2024
Copyright (c) 2024 University of Liege. All rights reserved.
"""
import logging
import numpy as np
from pathlib import Path
from OpenGL.GL import (
glCreateShader, glShaderSource, glCompileShader, glGetShaderiv,
glGetShaderInfoLog, glDeleteShader,
glCreateProgram, glAttachShader, glLinkProgram, glGetProgramiv,
glGetProgramInfoLog, glDeleteProgram,
glUseProgram, glGetUniformLocation,
glGenTextures, glBindTexture, glTexImage2D, glTexImage1D,
glTexParameteri, glDeleteTextures,
glGenVertexArrays, glBindVertexArray, glDeleteVertexArrays,
glGenBuffers, glBindBuffer, glBufferData, glDeleteBuffers,
glEnableVertexAttribArray, glVertexAttribPointer,
glDrawArrays, glActiveTexture, glPolygonMode,
glEnable, glDisable, glBlendFunc,
glUniform1f, glUniform1i, glUniform1fv,
glUniform3f, glUniformMatrix4fv,
GL_VERTEX_SHADER, GL_FRAGMENT_SHADER, GL_GEOMETRY_SHADER,
GL_COMPILE_STATUS, GL_LINK_STATUS, GL_FALSE,
GL_TEXTURE_RECTANGLE, GL_TEXTURE_1D,
GL_R32F, GL_RED, GL_FLOAT, GL_RGB,
GL_R8I, GL_R8UI, GL_R16I, GL_R16UI, GL_R32I, GL_R32UI,
GL_RED_INTEGER,
GL_BYTE, GL_UNSIGNED_BYTE, GL_SHORT, GL_UNSIGNED_SHORT,
GL_INT, GL_UNSIGNED_INT,
GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE,
GL_TEXTURE_MIN_FILTER, GL_TEXTURE_MAG_FILTER, GL_LINEAR,
GL_NEAREST,
GL_ARRAY_BUFFER, GL_DYNAMIC_DRAW,
GL_POINTS, GL_FRONT_AND_BACK, GL_FILL,
GL_BLEND, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA,
GL_TEXTURE0, GL_TEXTURE1,
GL_UNPACK_ALIGNMENT,
glPixelStorei,
glGetIntegerv,
GL_MAX_RECTANGLE_TEXTURE_SIZE,
)
import math
[docs]
SHADER_DIR = Path(__file__).parent.parent / "shaders"
# Mapping from numpy dtype to (gl_internal_format, gl_format, gl_type, shader_define).
# Types not listed here fall back to float32 conversion.
[docs]
_DTYPE_GL_MAP = {
np.dtype('float32'): (GL_R32F, GL_RED, GL_FLOAT, None),
np.dtype('int8'): (GL_R8I, GL_RED_INTEGER, GL_BYTE, 'SIGNED_INTEGER_DATA'),
np.dtype('uint8'): (GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, 'UNSIGNED_INTEGER_DATA'),
np.dtype('int16'): (GL_R16I, GL_RED_INTEGER, GL_SHORT, 'SIGNED_INTEGER_DATA'),
np.dtype('uint16'): (GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, 'UNSIGNED_INTEGER_DATA'),
np.dtype('int32'): (GL_R32I, GL_RED_INTEGER, GL_INT, 'SIGNED_INTEGER_DATA'),
np.dtype('uint32'): (GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, 'UNSIGNED_INTEGER_DATA'),
}
[docs]
class WolfArrayShader2D:
"""Manages shared OpenGL resources for 2D shader rendering of a WolfArray.
Holds a back-reference to the owning WolfArray to access grid data,
palette, mapviewer, and LOD helpers.
"""
def __init__(self, owner):
"""
:param owner: The WolfArray instance that owns this shader renderer.
"""
[docs]
self._ztexture_id = None
[docs]
self._palette_id = None
[docs]
self._visible_key = None
[docs]
self._quad_cache = {} # (lod, i, j) -> flat np.float32 array
[docs]
self._color_values_buf = np.zeros(256, dtype=np.float32)
[docs]
self._pal_values = None
[docs]
self._ztext_ready = False
[docs]
self._pal_ready = False
[docs]
self._dtype_define = None # current shader variant (None, 'SIGNED_INTEGER_DATA', 'UNSIGNED_INTEGER_DATA')
# Texture tiling state (for arrays exceeding GL_MAX_RECTANGLE_TEXTURE_SIZE)
[docs]
self._tile_textures = {} # (tx, ty) -> GL texture id
[docs]
self._tile_size = None # max tile size in texels (queried from GL)
[docs]
self._n_tiles = (1, 1) # (n_tiles_x, n_tiles_y)
[docs]
self._tiled = False # True when array exceeds single texture capacity
[docs]
self._tile_ranges = {} # (tx, ty) -> (vbo_offset, vertex_count)
[docs]
self._tile_visible_key = None # cache key for tiled VBO data
# -----------------------------------------------------------------
# Shader compilation (once)
# -----------------------------------------------------------------
[docs]
def _init_resources(self, dtype_define=None):
"""Compile the shared shader program and locate uniforms.
:param dtype_define: Optional preprocessor define to inject into the
geometry shader ('SIGNED_INTEGER_DATA' or 'UNSIGNED_INTEGER_DATA').
When *None* the shader uses ``sampler2DRect`` (float textures).
"""
if self._program is not None and self._dtype_define == dtype_define:
return
# If the dtype category changed, release the old program first.
if self._program is not None:
try:
glDeleteProgram(self._program)
except Exception:
pass
self._program = None
self._locs = None
# --- Compile vertex shader ---
vs = glCreateShader(GL_VERTEX_SHADER)
with open(SHADER_DIR / "simple_vertex_shader_wo_mvp.glsl") as f:
glShaderSource(vs, f.read())
glCompileShader(vs)
if glGetShaderiv(vs, GL_COMPILE_STATUS, None) == GL_FALSE:
info = glGetShaderInfoLog(vs)
glDeleteShader(vs)
raise RuntimeError(f"Vertex shader compilation failed: {info}")
# --- Compile fragment shader ---
fs = glCreateShader(GL_FRAGMENT_SHADER)
with open(SHADER_DIR / "quad_frag_shader.glsl") as f:
glShaderSource(fs, f.read())
glCompileShader(fs)
if glGetShaderiv(fs, GL_COMPILE_STATUS, None) == GL_FALSE:
info = glGetShaderInfoLog(fs)
glDeleteShader(vs); glDeleteShader(fs)
raise RuntimeError(f"Fragment shader compilation failed: {info}")
# --- Compile geometry shader ---
gs = glCreateShader(GL_GEOMETRY_SHADER)
with open(SHADER_DIR / "quad_geom_shader.glsl") as f:
gs_source = f.read()
# Inject a #define after the #version line for integer texture support
if dtype_define is not None:
lines = gs_source.split('\n')
for i, line in enumerate(lines):
if line.strip().startswith('#version'):
lines.insert(i + 1, f'#define {dtype_define}')
break
gs_source = '\n'.join(lines)
glShaderSource(gs, gs_source)
glCompileShader(gs)
if glGetShaderiv(gs, GL_COMPILE_STATUS, None) == GL_FALSE:
info = glGetShaderInfoLog(gs)
glDeleteShader(vs); glDeleteShader(fs); glDeleteShader(gs)
raise RuntimeError(f"Geometry shader compilation failed: {info}")
# --- Link program ---
program = glCreateProgram()
glAttachShader(program, vs)
glAttachShader(program, fs)
glAttachShader(program, gs)
glLinkProgram(program)
if glGetProgramiv(program, GL_LINK_STATUS) == GL_FALSE:
info = glGetProgramInfoLog(program)
glDeleteProgram(program)
raise RuntimeError(f"Shader program link failed: {info}")
glDeleteShader(vs)
glDeleteShader(fs)
glDeleteShader(gs)
self._program = program
self._dtype_define = dtype_define
# --- Locate uniforms (once) ---
glUseProgram(program)
locs = {}
for name in ('mvp', 'dx', 'dy', 'width', 'height', 'origx', 'origy',
'zScale', 'zText', 'colorPalette', 'colorValues', 'alpha',
'uniform_in_part', 'sunPosition', 'sunIntensity',
'paletteSize', 'idx', 'lod', 'compute_walls',
'tile_offset_i', 'tile_offset_j'):
locs[name] = glGetUniformLocation(program, name)
glUseProgram(0)
self._locs = locs
# -----------------------------------------------------------------
# Texture uploads (once, or on invalidation)
# -----------------------------------------------------------------
[docs]
def _ensure_ztexture(self):
"""Upload the z-texture to the GPU.
If the array fits within ``GL_MAX_RECTANGLE_TEXTURE_SIZE`` in
both dimensions, a single rectangle texture is created (fast
path). Otherwise the data is split into a grid of tile
textures, each within the hardware limit.
"""
wa = self._owner
if 'ztext' not in wa._cache_grid or wa._cache_grid['ztext'] is None:
src_dtype = wa.array.dtype
if src_dtype in _DTYPE_GL_MAP:
ztext = np.require(wa.array.data.copy(), requirements=['C'])
ztext[wa.array.mask] = wa.array.min()
else:
ztext = np.require(
wa.array.data.copy(), dtype=np.float32, requirements=['C'])
ztext[wa.array.mask] = wa.array.min()
wa._cache_grid['ztext'] = ztext
ztext = wa._cache_grid['ztext']
gl_info = _DTYPE_GL_MAP.get(ztext.dtype)
if gl_info is None:
gl_internal, gl_fmt, gl_type, dtype_define = GL_R32F, GL_RED, GL_FLOAT, None
else:
gl_internal, gl_fmt, gl_type, dtype_define = gl_info
self._init_resources(dtype_define)
# Query the maximum rectangle texture size (once per lifetime)
if self._tile_size is None:
self._tile_size = int(glGetIntegerv(GL_MAX_RECTANGLE_TEXTURE_SIZE))
logging.debug('GL_MAX_RECTANGLE_TEXTURE_SIZE = %d', self._tile_size)
nbx, nby = ztext.shape # rows=nbx, cols=nby
if nbx <= self._tile_size and nby <= self._tile_size:
# ----------------------------------------------------------
# Single texture (original fast path)
# ----------------------------------------------------------
self._tiled = False
self._n_tiles = (1, 1)
if self._ztexture_id is None:
self._ztexture_id = glGenTextures(1)
glBindTexture(GL_TEXTURE_RECTANGLE, self._ztexture_id)
if ztext.dtype.itemsize < 4:
glPixelStorei(GL_UNPACK_ALIGNMENT, 1)
glTexImage2D(GL_TEXTURE_RECTANGLE, 0, gl_internal,
nby, nbx, 0, gl_fmt, gl_type, ztext.data)
if ztext.dtype.itemsize < 4:
glPixelStorei(GL_UNPACK_ALIGNMENT, 4)
if dtype_define is not None:
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST)
glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST)
glBindTexture(GL_TEXTURE_RECTANGLE, 0)
else:
# ----------------------------------------------------------
# Tiled textures – split the array into chunks that fit
# ----------------------------------------------------------
self._tiled = True
ts = self._tile_size
n_tiles_x = math.ceil(nbx / ts)
n_tiles_y = math.ceil(nby / ts)
self._n_tiles = (n_tiles_x, n_tiles_y)
logging.info('Array %dx%d exceeds max texture size %d, '
'using %dx%d tile grid',
nbx, nby, ts, n_tiles_x, n_tiles_y)
for tx in range(n_tiles_x):
for ty in range(n_tiles_y):
row_start = tx * ts
row_end = min(row_start + ts, nbx)
col_start = ty * ts
col_end = min(col_start + ts, nby)
tile_data = np.ascontiguousarray(
ztext[row_start:row_end, col_start:col_end])
if (tx, ty) not in self._tile_textures:
self._tile_textures[(tx, ty)] = glGenTextures(1)
tex_id = self._tile_textures[(tx, ty)]
glBindTexture(GL_TEXTURE_RECTANGLE, tex_id)
if tile_data.dtype.itemsize < 4:
glPixelStorei(GL_UNPACK_ALIGNMENT, 1)
glTexImage2D(GL_TEXTURE_RECTANGLE, 0, gl_internal,
tile_data.shape[1], tile_data.shape[0],
0, gl_fmt, gl_type, tile_data.data)
if tile_data.dtype.itemsize < 4:
glPixelStorei(GL_UNPACK_ALIGNMENT, 4)
if dtype_define is not None:
glTexParameteri(GL_TEXTURE_RECTANGLE,
GL_TEXTURE_MIN_FILTER, GL_NEAREST)
glTexParameteri(GL_TEXTURE_RECTANGLE,
GL_TEXTURE_MAG_FILTER, GL_NEAREST)
glBindTexture(GL_TEXTURE_RECTANGLE, 0)
self._ztext_ready = True
[docs]
def _ensure_palette(self):
"""Upload the palette 1-D texture to the GPU."""
wa = self._owner
palette = wa.mypal.get_colors_f32().flatten()
values = wa.mypal.values.astype(np.float32)
if self._palette_id is None:
self._palette_id = glGenTextures(1)
glBindTexture(GL_TEXTURE_1D, self._palette_id)
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB,
len(palette) // 3, 0, GL_RGB, GL_FLOAT, palette.data)
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE)
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR)
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR)
glBindTexture(GL_TEXTURE_1D, 0)
self._pal_values = values
self._pal_size = len(palette) // 3
self._pal_ready = True
# -----------------------------------------------------------------
# Cleanup
# -----------------------------------------------------------------
[docs]
def cleanup(self):
"""Release all shared GL resources."""
for rid, deleter in [(self._vao, glDeleteVertexArrays),
(self._vbo, glDeleteBuffers),
(self._ztexture_id, glDeleteTextures),
(self._palette_id, glDeleteTextures)]:
if rid is not None:
try:
deleter(1, [rid])
except Exception:
pass
# Delete tile textures
for tex_id in self._tile_textures.values():
try:
glDeleteTextures(1, [tex_id])
except Exception:
pass
if self._program is not None:
try:
glDeleteProgram(self._program)
except Exception:
pass
# Reset all state
self._program = None
self._locs = None
self._ztexture_id = None
self._palette_id = None
self._vao = None
self._vbo = None
self._merged_count = 0
self._visible_key = None
self._quad_cache = {}
self._ztext_ready = False
self._pal_ready = False
self._dtype_define = None
self._tile_textures = {}
self._tile_size = None
self._n_tiles = (1, 1)
self._tiled = False
self._tile_ranges = {}
self._tile_visible_key = None
# Main draw entry point
# -----------------------------------------------------------------
[docs]
def _ensure_vao_vbo(self):
"""Create the shared VAO/VBO pair (once)."""
if self._vao is None:
self._vao = glGenVertexArrays(1)
self._vbo = glGenBuffers(1)
glBindVertexArray(self._vao)
glEnableVertexAttribArray(0)
glBindBuffer(GL_ARRAY_BUFFER, self._vbo)
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, None)
glBindVertexArray(0)
glBindBuffer(GL_ARRAY_BUFFER, 0)
[docs]
def _upload_vbo(self, data):
"""Upload float32 vertex data to the shared VBO."""
glBindBuffer(GL_ARRAY_BUFFER, self._vbo)
glBufferData(GL_ARRAY_BUFFER, data.nbytes, data, GL_DYNAMIC_DRAW)
glBindBuffer(GL_ARRAY_BUFFER, 0)
[docs]
def _build_tile_vbo_data(self, wa, lod, istart, iend, jstart, jend):
"""Group visible quad centers by texture tile and merge into one VBO.
:return: ``(merged_vbo, tile_ranges)`` where *merged_vbo* is a
concatenated ``np.float32`` array and *tile_ranges* maps
``(tx, ty)`` to ``(vbo_offset, vertex_count)`` for
``glDrawArrays``.
"""
tile_quads = {} # (tx, ty) -> list of flat float32 arrays
ts = self._tile_size
for j in range(jstart, jend + 1):
for i in range(istart, iend + 1):
ck = (lod, i, j)
if ck not in self._quad_cache:
self._quad_cache[ck] = wa._get_xy_centers_LOD(lod, i, j)
q = self._quad_cache[ck]
if len(q) == 0:
continue
quads_2d = q.reshape(-1, 2)
# Convert world coords back to array indices for tile assignment
arr_i = np.floor((quads_2d[:, 0] - wa.origx) / wa.dx).astype(int)
arr_j = np.floor((quads_2d[:, 1] - wa.origy) / wa.dy).astype(int)
np.clip(arr_i, 0, wa.nbx - 1, out=arr_i)
np.clip(arr_j, 0, wa.nby - 1, out=arr_j)
tx_arr = arr_i // ts
ty_arr = arr_j // ts
# Fast path: entire LOD tile maps to one texture tile
if tx_arr[0] == tx_arr[-1] and ty_arr[0] == ty_arr[-1]:
tk = (int(tx_arr[0]), int(ty_arr[0]))
tile_quads.setdefault(tk, []).append(q)
else:
# LOD tile straddles texture tile boundaries — split
tile_keys = tx_arr * self._n_tiles[1] + ty_arr
for uk in np.unique(tile_keys):
mask = (tile_keys == uk)
tx = int(uk // self._n_tiles[1])
ty = int(uk % self._n_tiles[1])
tk = (tx, ty)
tile_quads.setdefault(tk, []).append(
quads_2d[mask].flatten().astype(np.float32))
# Concatenate per-tile data and build offset table
all_parts = []
tile_ranges = {}
offset = 0
for tk in sorted(tile_quads.keys()):
merged_tile = np.concatenate(tile_quads[tk])
count = len(merged_tile) // 2
tile_ranges[tk] = (offset, count)
all_parts.append(merged_tile)
offset += count
merged = np.concatenate(all_parts).astype(np.float32) if all_parts else np.array([], dtype=np.float32)
return merged, tile_ranges
[docs]
def draw(self, sx: float = None, sy: float = None,
xmin: float = None, ymin: float = None,
xmax: float = None, ymax: float = None):
"""Draw using a single shared shader program.
When the array fits in a single texture, issues a single
``glDrawArrays`` call. When tiled (array exceeds max texture
size), iterates over visible tiles, binding each tile's texture
and setting per-tile offset uniforms.
"""
wa = self._owner
lod = wa._get_LOD(sx, sy, xmin, ymin, xmax, ymax)
if lod == -1:
return
(istart, iend), (jstart, jend) = wa._get_part_to_plot_LOD(lod, xmin, ymin, xmax, ymax)
if istart > iend or jstart > jend:
return
# --- Shared GL resources (textures & shader) — created once ---
if not self._ztext_ready:
self._ensure_ztexture()
if not self._pal_ready:
self._ensure_palette()
self._ensure_vao_vbo()
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL)
if wa.alpha < 1.0:
glEnable(GL_BLEND)
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
glUseProgram(self._program)
self._set_common_uniforms(wa, lod)
# Bind palette texture (shared across all tiles)
glActiveTexture(GL_TEXTURE1)
glBindTexture(GL_TEXTURE_1D, self._palette_id)
if not self._tiled:
# ===========================================================
# Single-texture path (original fast path)
# ===========================================================
glUniform1i(self._locs['tile_offset_i'], 0)
glUniform1i(self._locs['tile_offset_j'], 0)
visible_key = (lod, istart, iend, jstart, jend)
if self._visible_key != visible_key:
parts = []
for j in range(jstart, jend + 1):
for i in range(istart, iend + 1):
ck = (lod, i, j)
if ck not in self._quad_cache:
self._quad_cache[ck] = wa._get_xy_centers_LOD(lod, i, j)
q = self._quad_cache[ck]
if len(q) > 0:
parts.append(q)
merged = np.concatenate(parts) if parts else np.array([], dtype=np.float32)
self._upload_vbo(merged)
self._merged_count = len(merged) // 2
self._visible_key = visible_key
if self._merged_count > 0:
glActiveTexture(GL_TEXTURE0)
glBindTexture(GL_TEXTURE_RECTANGLE, self._ztexture_id)
glBindVertexArray(self._vao)
glDrawArrays(GL_POINTS, 0, self._merged_count)
glBindVertexArray(0)
glActiveTexture(GL_TEXTURE0)
glBindTexture(GL_TEXTURE_RECTANGLE, 0)
else:
# ===========================================================
# Tiled path – one draw call per visible texture tile
# ===========================================================
visible_key = ('tiled', lod, istart, iend, jstart, jend)
if self._tile_visible_key != visible_key:
merged, self._tile_ranges = self._build_tile_vbo_data(
wa, lod, istart, iend, jstart, jend)
self._upload_vbo(merged)
self._tile_visible_key = visible_key
locs = self._locs
ts = self._tile_size
for (tx, ty), (off, cnt) in self._tile_ranges.items():
if cnt == 0:
continue
if (tx, ty) not in self._tile_textures:
continue
glUniform1i(locs['tile_offset_i'], tx * ts)
glUniform1i(locs['tile_offset_j'], ty * ts)
glActiveTexture(GL_TEXTURE0)
glBindTexture(GL_TEXTURE_RECTANGLE,
self._tile_textures[(tx, ty)])
glBindVertexArray(self._vao)
glDrawArrays(GL_POINTS, off, cnt)
glBindVertexArray(0)
glActiveTexture(GL_TEXTURE0)
glBindTexture(GL_TEXTURE_RECTANGLE, 0)
# Teardown
glActiveTexture(GL_TEXTURE1)
glBindTexture(GL_TEXTURE_1D, 0)
glActiveTexture(GL_TEXTURE0)
glDisable(GL_TEXTURE_RECTANGLE)
glUseProgram(0)
if wa.alpha < 1.0:
glDisable(GL_BLEND)