Source code for wolfhece.opengl.wolf_array_shader2d

"""
Shared-resource 2D shader rendering for WolfArray.

Instead of creating a separate WolfArrayPlotShader per grid cell
(each with its own shader program, z-texture upload, palette upload),
we compile the shader program ONCE, upload the z-texture and palette
ONCE, concatenate all visible quad-centers into a SINGLE VBO, and
issue a SINGLE glDrawArrays call.

Author: HECE - University of Liege, Pierre Archambeau
Date: 2024

Copyright (c) 2024 University of Liege. All rights reserved.
"""

import logging
import numpy as np
from pathlib import Path

from OpenGL.GL import (
    glCreateShader, glShaderSource, glCompileShader, glGetShaderiv,
    glGetShaderInfoLog, glDeleteShader,
    glCreateProgram, glAttachShader, glLinkProgram, glGetProgramiv,
    glGetProgramInfoLog, glDeleteProgram,
    glUseProgram, glGetUniformLocation,
    glGenTextures, glBindTexture, glTexImage2D, glTexImage1D,
    glTexParameteri, glDeleteTextures,
    glGenVertexArrays, glBindVertexArray, glDeleteVertexArrays,
    glGenBuffers, glBindBuffer, glBufferData, glDeleteBuffers,
    glEnableVertexAttribArray, glVertexAttribPointer,
    glDrawArrays, glActiveTexture, glPolygonMode,
    glEnable, glDisable, glBlendFunc,
    glUniform1f, glUniform1i, glUniform1fv,
    glUniform3f, glUniformMatrix4fv,
    GL_VERTEX_SHADER, GL_FRAGMENT_SHADER, GL_GEOMETRY_SHADER,
    GL_COMPILE_STATUS, GL_LINK_STATUS, GL_FALSE,
    GL_TEXTURE_RECTANGLE, GL_TEXTURE_1D,
    GL_R32F, GL_RED, GL_FLOAT, GL_RGB,
    GL_R8I, GL_R8UI, GL_R16I, GL_R16UI, GL_R32I, GL_R32UI,
    GL_RED_INTEGER,
    GL_BYTE, GL_UNSIGNED_BYTE, GL_SHORT, GL_UNSIGNED_SHORT,
    GL_INT, GL_UNSIGNED_INT,
    GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE,
    GL_TEXTURE_MIN_FILTER, GL_TEXTURE_MAG_FILTER, GL_LINEAR,
    GL_NEAREST,
    GL_ARRAY_BUFFER, GL_DYNAMIC_DRAW,
    GL_POINTS, GL_FRONT_AND_BACK, GL_FILL,
    GL_BLEND, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA,
    GL_TEXTURE0, GL_TEXTURE1,
    GL_UNPACK_ALIGNMENT,
    glPixelStorei,
    glGetIntegerv,
    GL_MAX_RECTANGLE_TEXTURE_SIZE,
)

import math

[docs] SHADER_DIR = Path(__file__).parent.parent / "shaders"
# Mapping from numpy dtype to (gl_internal_format, gl_format, gl_type, shader_define). # Types not listed here fall back to float32 conversion.
[docs] _DTYPE_GL_MAP = { np.dtype('float32'): (GL_R32F, GL_RED, GL_FLOAT, None), np.dtype('int8'): (GL_R8I, GL_RED_INTEGER, GL_BYTE, 'SIGNED_INTEGER_DATA'), np.dtype('uint8'): (GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, 'UNSIGNED_INTEGER_DATA'), np.dtype('int16'): (GL_R16I, GL_RED_INTEGER, GL_SHORT, 'SIGNED_INTEGER_DATA'), np.dtype('uint16'): (GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, 'UNSIGNED_INTEGER_DATA'), np.dtype('int32'): (GL_R32I, GL_RED_INTEGER, GL_INT, 'SIGNED_INTEGER_DATA'), np.dtype('uint32'): (GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, 'UNSIGNED_INTEGER_DATA'), }
[docs] class WolfArrayShader2D: """Manages shared OpenGL resources for 2D shader rendering of a WolfArray. Holds a back-reference to the owning WolfArray to access grid data, palette, mapviewer, and LOD helpers. """ def __init__(self, owner): """ :param owner: The WolfArray instance that owns this shader renderer. """
[docs] self._owner = owner
[docs] self._program = None
[docs] self._locs = None
[docs] self._ztexture_id = None
[docs] self._palette_id = None
[docs] self._vao = None
[docs] self._vbo = None
[docs] self._merged_count = 0
[docs] self._visible_key = None
[docs] self._quad_cache = {} # (lod, i, j) -> flat np.float32 array
[docs] self._color_values_buf = np.zeros(256, dtype=np.float32)
[docs] self._pal_values = None
[docs] self._pal_size = 0
[docs] self._ztext_ready = False
[docs] self._pal_ready = False
[docs] self._dtype_define = None # current shader variant (None, 'SIGNED_INTEGER_DATA', 'UNSIGNED_INTEGER_DATA')
# Texture tiling state (for arrays exceeding GL_MAX_RECTANGLE_TEXTURE_SIZE)
[docs] self._tile_textures = {} # (tx, ty) -> GL texture id
[docs] self._tile_size = None # max tile size in texels (queried from GL)
[docs] self._n_tiles = (1, 1) # (n_tiles_x, n_tiles_y)
[docs] self._tiled = False # True when array exceeds single texture capacity
[docs] self._tile_ranges = {} # (tx, ty) -> (vbo_offset, vertex_count)
[docs] self._tile_visible_key = None # cache key for tiled VBO data
# ----------------------------------------------------------------- # Shader compilation (once) # -----------------------------------------------------------------
[docs] def _init_resources(self, dtype_define=None): """Compile the shared shader program and locate uniforms. :param dtype_define: Optional preprocessor define to inject into the geometry shader ('SIGNED_INTEGER_DATA' or 'UNSIGNED_INTEGER_DATA'). When *None* the shader uses ``sampler2DRect`` (float textures). """ if self._program is not None and self._dtype_define == dtype_define: return # If the dtype category changed, release the old program first. if self._program is not None: try: glDeleteProgram(self._program) except Exception: pass self._program = None self._locs = None # --- Compile vertex shader --- vs = glCreateShader(GL_VERTEX_SHADER) with open(SHADER_DIR / "simple_vertex_shader_wo_mvp.glsl") as f: glShaderSource(vs, f.read()) glCompileShader(vs) if glGetShaderiv(vs, GL_COMPILE_STATUS, None) == GL_FALSE: info = glGetShaderInfoLog(vs) glDeleteShader(vs) raise RuntimeError(f"Vertex shader compilation failed: {info}") # --- Compile fragment shader --- fs = glCreateShader(GL_FRAGMENT_SHADER) with open(SHADER_DIR / "quad_frag_shader.glsl") as f: glShaderSource(fs, f.read()) glCompileShader(fs) if glGetShaderiv(fs, GL_COMPILE_STATUS, None) == GL_FALSE: info = glGetShaderInfoLog(fs) glDeleteShader(vs); glDeleteShader(fs) raise RuntimeError(f"Fragment shader compilation failed: {info}") # --- Compile geometry shader --- gs = glCreateShader(GL_GEOMETRY_SHADER) with open(SHADER_DIR / "quad_geom_shader.glsl") as f: gs_source = f.read() # Inject a #define after the #version line for integer texture support if dtype_define is not None: lines = gs_source.split('\n') for i, line in enumerate(lines): if line.strip().startswith('#version'): lines.insert(i + 1, f'#define {dtype_define}') break gs_source = '\n'.join(lines) glShaderSource(gs, gs_source) glCompileShader(gs) if glGetShaderiv(gs, GL_COMPILE_STATUS, None) == GL_FALSE: info = glGetShaderInfoLog(gs) glDeleteShader(vs); glDeleteShader(fs); glDeleteShader(gs) raise RuntimeError(f"Geometry shader compilation failed: {info}") # --- Link program --- program = glCreateProgram() glAttachShader(program, vs) glAttachShader(program, fs) glAttachShader(program, gs) glLinkProgram(program) if glGetProgramiv(program, GL_LINK_STATUS) == GL_FALSE: info = glGetProgramInfoLog(program) glDeleteProgram(program) raise RuntimeError(f"Shader program link failed: {info}") glDeleteShader(vs) glDeleteShader(fs) glDeleteShader(gs) self._program = program self._dtype_define = dtype_define # --- Locate uniforms (once) --- glUseProgram(program) locs = {} for name in ('mvp', 'dx', 'dy', 'width', 'height', 'origx', 'origy', 'zScale', 'zText', 'colorPalette', 'colorValues', 'alpha', 'uniform_in_part', 'sunPosition', 'sunIntensity', 'paletteSize', 'idx', 'lod', 'compute_walls', 'tile_offset_i', 'tile_offset_j'): locs[name] = glGetUniformLocation(program, name) glUseProgram(0) self._locs = locs
# ----------------------------------------------------------------- # Texture uploads (once, or on invalidation) # -----------------------------------------------------------------
[docs] def _ensure_ztexture(self): """Upload the z-texture to the GPU. If the array fits within ``GL_MAX_RECTANGLE_TEXTURE_SIZE`` in both dimensions, a single rectangle texture is created (fast path). Otherwise the data is split into a grid of tile textures, each within the hardware limit. """ wa = self._owner if 'ztext' not in wa._cache_grid or wa._cache_grid['ztext'] is None: src_dtype = wa.array.dtype if src_dtype in _DTYPE_GL_MAP: ztext = np.require(wa.array.data.copy(), requirements=['C']) ztext[wa.array.mask] = wa.array.min() else: ztext = np.require( wa.array.data.copy(), dtype=np.float32, requirements=['C']) ztext[wa.array.mask] = wa.array.min() wa._cache_grid['ztext'] = ztext ztext = wa._cache_grid['ztext'] gl_info = _DTYPE_GL_MAP.get(ztext.dtype) if gl_info is None: gl_internal, gl_fmt, gl_type, dtype_define = GL_R32F, GL_RED, GL_FLOAT, None else: gl_internal, gl_fmt, gl_type, dtype_define = gl_info self._init_resources(dtype_define) # Query the maximum rectangle texture size (once per lifetime) if self._tile_size is None: self._tile_size = int(glGetIntegerv(GL_MAX_RECTANGLE_TEXTURE_SIZE)) logging.debug('GL_MAX_RECTANGLE_TEXTURE_SIZE = %d', self._tile_size) nbx, nby = ztext.shape # rows=nbx, cols=nby if nbx <= self._tile_size and nby <= self._tile_size: # ---------------------------------------------------------- # Single texture (original fast path) # ---------------------------------------------------------- self._tiled = False self._n_tiles = (1, 1) if self._ztexture_id is None: self._ztexture_id = glGenTextures(1) glBindTexture(GL_TEXTURE_RECTANGLE, self._ztexture_id) if ztext.dtype.itemsize < 4: glPixelStorei(GL_UNPACK_ALIGNMENT, 1) glTexImage2D(GL_TEXTURE_RECTANGLE, 0, gl_internal, nby, nbx, 0, gl_fmt, gl_type, ztext.data) if ztext.dtype.itemsize < 4: glPixelStorei(GL_UNPACK_ALIGNMENT, 4) if dtype_define is not None: glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST) glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST) glBindTexture(GL_TEXTURE_RECTANGLE, 0) else: # ---------------------------------------------------------- # Tiled textures – split the array into chunks that fit # ---------------------------------------------------------- self._tiled = True ts = self._tile_size n_tiles_x = math.ceil(nbx / ts) n_tiles_y = math.ceil(nby / ts) self._n_tiles = (n_tiles_x, n_tiles_y) logging.info('Array %dx%d exceeds max texture size %d, ' 'using %dx%d tile grid', nbx, nby, ts, n_tiles_x, n_tiles_y) for tx in range(n_tiles_x): for ty in range(n_tiles_y): row_start = tx * ts row_end = min(row_start + ts, nbx) col_start = ty * ts col_end = min(col_start + ts, nby) tile_data = np.ascontiguousarray( ztext[row_start:row_end, col_start:col_end]) if (tx, ty) not in self._tile_textures: self._tile_textures[(tx, ty)] = glGenTextures(1) tex_id = self._tile_textures[(tx, ty)] glBindTexture(GL_TEXTURE_RECTANGLE, tex_id) if tile_data.dtype.itemsize < 4: glPixelStorei(GL_UNPACK_ALIGNMENT, 1) glTexImage2D(GL_TEXTURE_RECTANGLE, 0, gl_internal, tile_data.shape[1], tile_data.shape[0], 0, gl_fmt, gl_type, tile_data.data) if tile_data.dtype.itemsize < 4: glPixelStorei(GL_UNPACK_ALIGNMENT, 4) if dtype_define is not None: glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST) glTexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST) glBindTexture(GL_TEXTURE_RECTANGLE, 0) self._ztext_ready = True
[docs] def _ensure_palette(self): """Upload the palette 1-D texture to the GPU.""" wa = self._owner palette = wa.mypal.get_colors_f32().flatten() values = wa.mypal.values.astype(np.float32) if self._palette_id is None: self._palette_id = glGenTextures(1) glBindTexture(GL_TEXTURE_1D, self._palette_id) glTexImage1D(GL_TEXTURE_1D, 0, GL_RGB, len(palette) // 3, 0, GL_RGB, GL_FLOAT, palette.data) glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE) glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR) glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR) glBindTexture(GL_TEXTURE_1D, 0) self._pal_values = values self._pal_size = len(palette) // 3 self._pal_ready = True
# ----------------------------------------------------------------- # Cleanup # -----------------------------------------------------------------
[docs] def cleanup(self): """Release all shared GL resources.""" for rid, deleter in [(self._vao, glDeleteVertexArrays), (self._vbo, glDeleteBuffers), (self._ztexture_id, glDeleteTextures), (self._palette_id, glDeleteTextures)]: if rid is not None: try: deleter(1, [rid]) except Exception: pass # Delete tile textures for tex_id in self._tile_textures.values(): try: glDeleteTextures(1, [tex_id]) except Exception: pass if self._program is not None: try: glDeleteProgram(self._program) except Exception: pass # Reset all state self._program = None self._locs = None self._ztexture_id = None self._palette_id = None self._vao = None self._vbo = None self._merged_count = 0 self._visible_key = None self._quad_cache = {} self._ztext_ready = False self._pal_ready = False self._dtype_define = None self._tile_textures = {} self._tile_size = None self._n_tiles = (1, 1) self._tiled = False self._tile_ranges = {} self._tile_visible_key = None
# Main draw entry point # -----------------------------------------------------------------
[docs] def _ensure_vao_vbo(self): """Create the shared VAO/VBO pair (once).""" if self._vao is None: self._vao = glGenVertexArrays(1) self._vbo = glGenBuffers(1) glBindVertexArray(self._vao) glEnableVertexAttribArray(0) glBindBuffer(GL_ARRAY_BUFFER, self._vbo) glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, None) glBindVertexArray(0) glBindBuffer(GL_ARRAY_BUFFER, 0)
[docs] def _upload_vbo(self, data): """Upload float32 vertex data to the shared VBO.""" glBindBuffer(GL_ARRAY_BUFFER, self._vbo) glBufferData(GL_ARRAY_BUFFER, data.nbytes, data, GL_DYNAMIC_DRAW) glBindBuffer(GL_ARRAY_BUFFER, 0)
[docs] def _set_common_uniforms(self, wa, lod): """Set uniforms shared between tiled and non-tiled draw paths.""" locs = self._locs glUniformMatrix4fv(locs['mvp'], 1, GL_FALSE, wa.mapviewer.mvp) glUniform1f(locs['width'], wa.mapviewer.width) glUniform1f(locs['height'], wa.mapviewer.height) glUniform1f(locs['dx'], wa.dx) glUniform1f(locs['dy'], wa.dy) glUniform1f(locs['origx'], wa.origx + wa.translx) glUniform1f(locs['origy'], wa.origy + wa.transly) glUniform1f(locs['zScale'], 1.0) glUniform1i(locs['lod'], lod) glUniform1i(locs['idx'], 0) glUniform1i(locs['compute_walls'], 0) glUniform1i(locs['uniform_in_part'], 1 if wa.mypal.interval_cst else 0) glUniform1i(locs['paletteSize'], self._pal_size) cv = self._color_values_buf pv = self._pal_values cv[:len(pv)] = pv glUniform1fv(locs['colorValues'], 256, cv) glUniform1i(locs['colorPalette'], 1) glUniform1f(locs['alpha'], wa.alpha) sun_pos = getattr(wa.mapviewer, 'sunposition', None) if sun_pos is not None: glUniform3f(locs['sunPosition'], sun_pos.x, sun_pos.y, sun_pos.z) else: glUniform3f(locs['sunPosition'], 10000.0, 10000.0, 10000.0) sun_int = getattr(wa.mapviewer, 'sunintensity', 1.0) glUniform1f(locs['sunIntensity'], sun_int if sun_int is not None else 1.0)
[docs] def _build_tile_vbo_data(self, wa, lod, istart, iend, jstart, jend): """Group visible quad centers by texture tile and merge into one VBO. :return: ``(merged_vbo, tile_ranges)`` where *merged_vbo* is a concatenated ``np.float32`` array and *tile_ranges* maps ``(tx, ty)`` to ``(vbo_offset, vertex_count)`` for ``glDrawArrays``. """ tile_quads = {} # (tx, ty) -> list of flat float32 arrays ts = self._tile_size for j in range(jstart, jend + 1): for i in range(istart, iend + 1): ck = (lod, i, j) if ck not in self._quad_cache: self._quad_cache[ck] = wa._get_xy_centers_LOD(lod, i, j) q = self._quad_cache[ck] if len(q) == 0: continue quads_2d = q.reshape(-1, 2) # Convert world coords back to array indices for tile assignment arr_i = np.floor((quads_2d[:, 0] - wa.origx) / wa.dx).astype(int) arr_j = np.floor((quads_2d[:, 1] - wa.origy) / wa.dy).astype(int) np.clip(arr_i, 0, wa.nbx - 1, out=arr_i) np.clip(arr_j, 0, wa.nby - 1, out=arr_j) tx_arr = arr_i // ts ty_arr = arr_j // ts # Fast path: entire LOD tile maps to one texture tile if tx_arr[0] == tx_arr[-1] and ty_arr[0] == ty_arr[-1]: tk = (int(tx_arr[0]), int(ty_arr[0])) tile_quads.setdefault(tk, []).append(q) else: # LOD tile straddles texture tile boundaries — split tile_keys = tx_arr * self._n_tiles[1] + ty_arr for uk in np.unique(tile_keys): mask = (tile_keys == uk) tx = int(uk // self._n_tiles[1]) ty = int(uk % self._n_tiles[1]) tk = (tx, ty) tile_quads.setdefault(tk, []).append( quads_2d[mask].flatten().astype(np.float32)) # Concatenate per-tile data and build offset table all_parts = [] tile_ranges = {} offset = 0 for tk in sorted(tile_quads.keys()): merged_tile = np.concatenate(tile_quads[tk]) count = len(merged_tile) // 2 tile_ranges[tk] = (offset, count) all_parts.append(merged_tile) offset += count merged = np.concatenate(all_parts).astype(np.float32) if all_parts else np.array([], dtype=np.float32) return merged, tile_ranges
[docs] def draw(self, sx: float = None, sy: float = None, xmin: float = None, ymin: float = None, xmax: float = None, ymax: float = None): """Draw using a single shared shader program. When the array fits in a single texture, issues a single ``glDrawArrays`` call. When tiled (array exceeds max texture size), iterates over visible tiles, binding each tile's texture and setting per-tile offset uniforms. """ wa = self._owner lod = wa._get_LOD(sx, sy, xmin, ymin, xmax, ymax) if lod == -1: return (istart, iend), (jstart, jend) = wa._get_part_to_plot_LOD(lod, xmin, ymin, xmax, ymax) if istart > iend or jstart > jend: return # --- Shared GL resources (textures & shader) — created once --- if not self._ztext_ready: self._ensure_ztexture() if not self._pal_ready: self._ensure_palette() self._ensure_vao_vbo() glPolygonMode(GL_FRONT_AND_BACK, GL_FILL) if wa.alpha < 1.0: glEnable(GL_BLEND) glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA) glUseProgram(self._program) self._set_common_uniforms(wa, lod) # Bind palette texture (shared across all tiles) glActiveTexture(GL_TEXTURE1) glBindTexture(GL_TEXTURE_1D, self._palette_id) if not self._tiled: # =========================================================== # Single-texture path (original fast path) # =========================================================== glUniform1i(self._locs['tile_offset_i'], 0) glUniform1i(self._locs['tile_offset_j'], 0) visible_key = (lod, istart, iend, jstart, jend) if self._visible_key != visible_key: parts = [] for j in range(jstart, jend + 1): for i in range(istart, iend + 1): ck = (lod, i, j) if ck not in self._quad_cache: self._quad_cache[ck] = wa._get_xy_centers_LOD(lod, i, j) q = self._quad_cache[ck] if len(q) > 0: parts.append(q) merged = np.concatenate(parts) if parts else np.array([], dtype=np.float32) self._upload_vbo(merged) self._merged_count = len(merged) // 2 self._visible_key = visible_key if self._merged_count > 0: glActiveTexture(GL_TEXTURE0) glBindTexture(GL_TEXTURE_RECTANGLE, self._ztexture_id) glBindVertexArray(self._vao) glDrawArrays(GL_POINTS, 0, self._merged_count) glBindVertexArray(0) glActiveTexture(GL_TEXTURE0) glBindTexture(GL_TEXTURE_RECTANGLE, 0) else: # =========================================================== # Tiled path – one draw call per visible texture tile # =========================================================== visible_key = ('tiled', lod, istart, iend, jstart, jend) if self._tile_visible_key != visible_key: merged, self._tile_ranges = self._build_tile_vbo_data( wa, lod, istart, iend, jstart, jend) self._upload_vbo(merged) self._tile_visible_key = visible_key locs = self._locs ts = self._tile_size for (tx, ty), (off, cnt) in self._tile_ranges.items(): if cnt == 0: continue if (tx, ty) not in self._tile_textures: continue glUniform1i(locs['tile_offset_i'], tx * ts) glUniform1i(locs['tile_offset_j'], ty * ts) glActiveTexture(GL_TEXTURE0) glBindTexture(GL_TEXTURE_RECTANGLE, self._tile_textures[(tx, ty)]) glBindVertexArray(self._vao) glDrawArrays(GL_POINTS, off, cnt) glBindVertexArray(0) glActiveTexture(GL_TEXTURE0) glBindTexture(GL_TEXTURE_RECTANGLE, 0) # Teardown glActiveTexture(GL_TEXTURE1) glBindTexture(GL_TEXTURE_1D, 0) glActiveTexture(GL_TEXTURE0) glDisable(GL_TEXTURE_RECTANGLE) glUseProgram(0) if wa.alpha < 1.0: glDisable(GL_BLEND)