Source code for wolfgpu.glsimulation

import logging
from enum import Enum
from math import floor, ceil, log2, sqrt
from pathlib import Path
import os
import struct
import time
from contextlib import contextmanager
from typing import Iterator


[docs] FINISH=False # FIXME Seems unused.
# How often we sample the simulation time in the GPU. # Warning! Querying the simulation time from the GPU is very # costly in terms of gpu performance becaues it forces the GPU # to synchronise memory. This can almost halve the speed of the GPU!!!
[docs] TIME_SAMPLING_RESOLUTION=100 # steps
# The default tile size (in meshes)
[docs] DEFAULT_TILE_SIZE=16
# If True, the stripes (used to better select which part of the map to compute) # will be determined *once* at the beginning of the simulation by looking at the # NAP map (so the stripes structure will remain constant throughout the # simulation). # FIXME DISABLED FOR THE MOMENT (code looks broken, I have fixed the NAP # functionality and I have yet to port it back to this one) # If False, the stripes will be computed on basis of a mesh detection map, that # is a map that gives which pixels belong to the computation domainor not. That # map is recomputed at a given frequency, based on the "quantity" (h,qx,qy) map. # So this is a dynamic tracking of the computation domain
[docs] STRIPES_ON_NAP=True
# The thread width tells how many column of meshes each thread in a thread # group will handle. We do that sort of prganisatipon because it helps the # GPU to go a bit faster.
[docs] WET_TILES_DETECT_THREAD_WIDTH = 4
# These must be synced with the GlobalStateBlock SSBO's structure
[docs] GLOBALS_STRUCT_PACK_FORMAT = "dfLLLLLL"
[docs] GLOBALS_BUFFER_SIZE = struct.calcsize(GLOBALS_STRUCT_PACK_FORMAT)
[docs] MAXIMUM_TIME_STEP = 1.0 # P.A.'s default (when no infiltration present)
# The value encoding the fact that in the infiltration zones, a mesh does *not* have # an infiltration (infiltration zones are numbered from zero, -1 represents "no zone")
[docs] MESH_WITHOUT_INFILTRATION = -1
[docs] ALPHA_VALUE_NDX = 0
[docs] ALPHA_FLAGS_NDX = 1
[docs] PARTS_QUADS=1
[docs] PARTS_STRIPES=2
MEM_TRACKING=os.environ['USERNAME'] == 'StephaneC' and os.environ['COMPUTERNAME'] == 'FEDER1'
[docs] MEM_TRACKING=False
if MEM_TRACKING: from pympler import tracker import tqdm import numpy as np import matplotlib.pyplot as plt plt.set_loglevel (level = 'warning') from OpenGL.GL import * from OpenGL.GL import ( glMemoryBarrier, GL_TEXTURE_FETCH_BARRIER_BIT, GL_SHADER_IMAGE_ACCESS_BARRIER_BIT, GL_WRITE_ONLY, GL_READ_ONLY, GL_TEXTURE_UPDATE_BARRIER_BIT, GL_SHADER_STORAGE_BARRIER_BIT, ) from OpenGL.GL import glBindBuffer, GL_SHADER_STORAGE_BUFFER, glGetBufferSubData, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS from OpenGL.GL import GL_RGB32F from .gl_utils import * from .results_store import ResultsStore from .utils import nice_timestamp from .simple_simulation import SimpleSimulation, InfiltrationInterpolation, SimulationDuration, SimulationDurationType, TimeStepStrategy # Allow forward declaration from .tile_packer import TilePacker, TilePackingMode # Path fiddling to be able to load wolfhece's classes import sys
[docs] _root_dir = Path(__file__).parent.parent.parent
if _root_dir not in sys.path: sys.path.insert(0,str(_root_dir)) # from wolfhece.PyGui import Wolf2DModel # deprecated because we don't use it anymore except for GUI from wolfhece.mesh2d.wolf2dprev import prev_sim2D # Doesn't seem to work because something else catch the SIGINT # before python does (likely bin libs we use) # import signal # control_c_was_hit = False # def handler(signum, frame): # global control_c_was_hit # print("Ctrl-C hit" * 200) # control_c_was_hit = True # signal.signal(signal.SIGINT, handler) # glReadPixels = None # read_texture = None
[docs] _perf_counters = dict()
@contextmanager
[docs] def time_it(msg:str = None) -> Iterator[None]: yield
# tic: float = time.perf_counter() # # print(f"...starting {msg}") # try: # yield # finally: # if MEM_TRACKING: # toc: float = time.perf_counter() # delta = (toc - tic)*1000 # to millisec. # if msg not in _perf_counters: # _perf_counters[msg] = [delta] # else: # _perf_counters[msg].append(delta) # #print(f"chrono: '{msg or ''}' = {1000*delta:.3f}ms")
[docs] _gl_perf_counters = dict()
[docs] _timers = dict()
@contextmanager
[docs] def time_it_gl(msg:str = None) -> Iterator[None]: yield
# query_id = glGenQueries(1) # _timers[f"(gl) {msg}"] = query_id[0] # glBeginQuery(GL_TIME_ELAPSED, query_id[0]) # try: # yield # finally: # glEndQuery(GL_TIME_ELAPSED)
[docs] _query_id = None
[docs] def test_query_timer(): global _query_id _query_id = glGenQueries(1)[0] glQueryCounter(_query_id, GL_TIMESTAMP)
[docs] def test_query_timer_end(): while not glGetQueryObjectiv(_query_id, GL_QUERY_RESULT_AVAILABLE): pass t = glGetQueryObjectuiv(_query_id, GL_QUERY_RESULT) if t != 4294967295: print("yo") pass
[docs] _gl_queries_full = dict()
@contextmanager
[docs] def time_it_full(msg:str = None) -> Iterator[None]: test_query_timer() query_id = glGenQueries(2) assert msg not in _gl_queries_full tic: float = time.perf_counter() glQueryCounter(query_id[1], GL_TIMESTAMP) glBeginQuery(GL_TIME_ELAPSED, query_id[0]) # Negative values are because it's a 32 bits counter that rolls # over eveyry 2^32 nanosecs (so rpetty often) tic_gpu = glGetIntegerv(GL_TIMESTAMP) # v,64v:always 1.9...e-196!,Integer gives negative numbaz, i_v is indexed; no good try: yield finally: toc: float = time.perf_counter() delta = (toc - tic)*1000 # to millisec. glEndQuery(GL_TIME_ELAPSED) _gl_queries_full[msg] = (query_id[0],query_id[1],tic_gpu,delta)
[docs] def finish_timings(): for msg, query_id in _timers.items(): while not glGetQueryObjectiv(query_id, GL_QUERY_RESULT_AVAILABLE): pass # From ns to ms. t = glGetQueryObjectuiv(query_id, GL_QUERY_RESULT) / 1_000_000.0 if MEM_TRACKING: if msg not in _gl_perf_counters: _gl_perf_counters[msg] = [t] else: _gl_perf_counters[msg].append(t) _timers.clear()
[docs] _perf_counters_full = dict()
[docs] def finish_timings_full(): if MEM_TRACKING: for msg, data in _gl_queries_full.items(): times = [] while not glGetQueryObjectiv(data[0], GL_QUERY_RESULT_AVAILABLE): pass times.append(glGetQueryObjectuiv(data[0], GL_QUERY_RESULT) / 1_000_000.0) # while not glGetQueryObjectuiv(data[1], GL_QUERY_RESULT_AVAILABLE): # pass times.append(glGetQueryObjectiv(data[1], GL_QUERY_RESULT)) times.append(data[2]) # So each time tuple is: # (timestamp, elapsedtime_gpu, elapsedtile_cpu) times.append(data[3]) # wall clock time if msg not in _perf_counters_full: _perf_counters_full[msg] = [times] else: _perf_counters_full[msg].append(times) _gl_queries_full.clear()
[docs] def gl_timings_results(figname): plt.figure(figsize=(20, 20), dpi=200) all_times = _gl_perf_counters | _perf_counters keys = list(filter(lambda k:"--" in k, sorted(all_times.keys()))) gl_total = 0 gndx = 1 for k in keys: if "--" in k: t = np.array(all_times[k]) if len(t) > 5: # Skip first iterations tc = t[5:] else: tc = t std = np.std(tc) mean = np.mean(tc) k = k.replace("--","") print(f"{k}: mean:{mean:.3f}ms std:{std:.3f}ms") if "(gl)" in k: gl_total += mean ax = plt.subplot((len(keys)+2)//3,3,gndx) ax.plot(t) ax.set_title(f"{k}") # : ZBuf={ZBUFFER} mean:{mean:.3f}ms std:{std:.3f}ms") ax.axhline(np.mean(tc),c="red",label=f"{mean:.2f}±{std:.2f}ms") ax.set_ylim(max(0,mean-2*std), mean+2*std) ax.legend() gndx += 1 plt.suptitle(f"GL total {gl_total:.2f} (GLFinish:{FINISH}, stripes wired on NAP only:{STRIPES_ON_NAP})") plt.savefig(figname,dpi=300)
#plt.show()
[docs] def gl_timings_results_full(): all_times = _perf_counters_full keys = list(filter(lambda k:"--" in k, sorted(all_times.keys()))) gl_total = 0 gndx = 1 ys = [] xfrom = [] xto = [] for k in keys: if "--" in k: t = np.array(all_times[k]) ys.extend([k]*t.shape[0]) xfrom.extend(t[:,0]) xto.extend(t[:,1]) import altair as alt import pandas as pd data = pd.DataFrame() data["activity"] = ys data["from"] = xfrom data["to"] = xto alt.Chart(data).mark_bar().encode( x='from', x2='to', y='activity', color=alt.Color('activity', scale=alt.Scale(scheme='dark2')))
import psutil
[docs] def log_mem_used(msg = None): process = psutil.Process(os.getpid())
# print(f"Used memory: {process.memory_info().rss // (1024*1024)}MB. {msg or ''}" )
[docs] def debug_framebuffer(framebuffer_id, color_attachment, width, height, format_, type_, title="", component=0): glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_id) # glReadBuffer — select a color buffer source for pixels glReadBuffer(GL_COLOR_ATTACHMENTS[color_attachment]) # Read the bottom left texel (only one !) # Note that pyopengl returns a numpy array texture_content = glReadPixels(0, 0, width, height, format_, type_) if format_ == GL_RGB: texture_content = texture_content.reshape(height, width, 3)[:,:,component] elif format_ == GL_RED_INTEGER: texture_content = texture_content.reshape(height, width) else: raise Exception(f"format unrecognized : {format_}") print(np.sum(texture_content)) print(np.max(texture_content)) print(np.min(texture_content)) plt.imshow(texture_content) plt.colorbar() plt.title(title) plt.show()
[docs] class TileOptimisation(Enum):
[docs] COMPUTE_SHADER = 1
[docs] REGULAR_SHADER = 2
[docs] SHADER_PATH = Path(Path(__file__).parent, "shaders")
# def draw_iso_quads(program, buffer_width, buffer_height): # # WARNING! This function expects a program to be "glUse" 'd # # WARNING! It also expects the DrawBuffer and any other relevant # # GL state to be set. We do the bare minimum here. # np_quad_vertices, np_quad_texcoords_rectangle = make_unit_quad(1, 1) # # We compute the Model (model crd -> world crd) * View (world -> camera) * Projection (camera -> clip) matrix # # Remember that all coordinates of our special "quad" are in [-1,+1] # # and we ultimately want everything to be in "clip"" coordinate, that is [-1,+1] # # (screen coordinates are handled elsewhere). # # Therefore, a simple unit matrix will do. Actually, we want to flatten the z # # coordinates, just to insist on the fact that we don't have any perspective, that # # the z coordinate is useless here. That's why there's a zero in the diagonal. # # See https://numpy.org/doc/stable/reference/arrays.ndarray.html#internal-memory-layout-of-an-ndarray # pm = np.asfortranarray(np.diag([1,1,0,1]).astype(np.float32)) # Fortran==column major, like OpenGL # set_uniform(program, "uModelViewProjectionMatrix", pm) # attr_loc = glGetAttribLocation(program, "aVertex") # for the simple vertex shader # assert attr_loc >= 0 # glEnableVertexAttribArray(attr_loc) # glVertexAttribPointer(attr_loc, 3, GL_FLOAT, GL_FALSE, 0, np_quad_vertices) # assert glGetError() == GL_NO_ERROR # """ # In the fragment shaders, the texture is indexed via gl_FragCoord.xy. # Therefore the texture coordinates will be computed on basis of the # screen coordinates. So we don't need to associate texture coordinates # to vertices. # This makes it easy to prepare the triangles that need to # be displayed: we just need the triangles to contain the # screen (exactly), their coordinates don't matter (since we infer the texture # indices form the *screen* coordinates of the drawn pixels.) # """ # glViewport(0, 0, buffer_width, buffer_height) # assert glGetError() == GL_NO_ERROR # glDrawArrays(GL_TRIANGLES, 0, len(np_quad_vertices)) # assert glGetError() == GL_NO_ERROR
[docs] def make_quad_fast(xmin, xmax, ymin, ymax): z = [[xmax, ymin, 0], [xmax, ymax, 0], [xmin, ymax, 0], [xmax, ymin, 0], [xmin, ymax, 0], [xmin, ymin, 0]] return z
[docs] def round_to_tile(x, round_func, tsize: int): assert round_func in (ceil, floor) assert tsize >= 1 if tsize > 1: return round_func(x / tsize)*tsize else: return x
""" class Stripes: def __init__(self, grid_width, grid_height): self._grid_width = grid_width self._grid_height = grid_height self._frozen = False self._stripes = [] self._zoom = 1 self._mean = 0 def same_as(self, b): a = np.array(self._stripes) b = np.array(b._stripes) return np.array_equal(a,b) def largest_dim(self): return max(self._grid_width, self._grid_height) def add_stripe(self, xmin, xmax, ymin, ymax): logging.trace(f"stripe: {xmin}, {xmax}, {ymin}, {ymax}") assert not self._frozen self._stripes.append([xmin, xmax, ymin, ymax]) def __len__(self): return len(self._stripes) def has_stripes(self): return len(self._stripes) >= 1 def group_by(self, n): # Join stripes together to reduce their number # Stripes are grouped by groups of size n. # Stripes are expected to be horizontal bands, # organized as a vertical stack. So we group them # "by rows". assert n >= 2 grouped_stripes = [] for j in range(0,len(self._stripes),n): # We take n stripes and create a new (minimal) strip that covers # them all. a = np.array(self._stripes[j:min(len(self._stripes), j+n)]) grouped_stripes.append([ np.min(a[:,0]), np.max(a[:,1]), np.min(a[:,2]), np.max(a[:,3])]) self._stripes = grouped_stripes # def reshape_and_freeze(self,w,h): # assert not self._frozen # assert w > self._grid_width and h > self._grid_height # self._frozen = True # segments = [] # a = np.array(self._stripes) # a[:,[0,2]] = np.floor(w*a[:,[0,2]]/self._grid_width) # a[:,[1,3]] = np.ceil( h*a[:,[1,3]]/self._grid_height) # for row in range(a.shape[0]): # segments.extend(make_quad_fast(*a[row])) # self._grid_width, self._grid_height = w, h # # To NDC # r = np.array(segments, dtype=np.float32) # r[:,0] = r[:,0]*2 / w - 1.0 # r[:,1] = r[:,1]*2 / h - 1.0 # self._stripes_ndc = r def freeze(self): # Transform the list of stripes into quads. # It is said "frozen" because it is expected that the stripes don't change # anymore (to remain in sync with the quads) assert not self._frozen self._frozen = True assert len(self._stripes), "Nothing to freeze ? I don't handle that yet !" # Stripes to quads segments = [] for xmin, xmax, ymin, ymax in self._stripes: segments.extend(make_quad_fast(xmin, xmax, ymin, ymax)) # Quads coordinates to NDC ( from -1 to +1) r = np.array(segments, dtype=np.float32) r[:,0] = 2*r[:,0] / self._grid_width - 1.0 r[:,1] = 2*r[:,1] / self._grid_height - 1.0 self._stripes_ndc = r def get_vao_len(self): # Computed for glDrawArrays return len(self._stripes)*6 def get_vao_id(self): # Vertex array object id return self._stripes_vao_id def to_opengl(self): assert self._frozen self._stripes_vao_id = glGenVertexArrays(1) glBindVertexArray( self._stripes_vao_id ) vertex_buffer_object_id = glGenBuffers(1) glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_object_id) # OpenGL doc: creates and initializes a buffer object's data store glBufferData(GL_ARRAY_BUFFER, self._stripes_ndc.nbytes, self._stripes_ndc, GL_STATIC_DRAW) # FIXME This is dangereous: I hard code the attrib location # so the expectation is that it will be the same in ALL programs. # attr_loc = glGetAttribLocation(program, "aVertex") # for the simple vertex shader # assert attr_loc >= 0 # NOTE You don't need to glUseProgram() before; this is just an # attribute number. attr_loc = 0 # Array access is enabled by binding the VAO in question and calling glEnableVertexAttribArray(attr_loc) # See https://www.khronos.org/opengl/wiki/Vertex_Specification#Vertex_Array_Object # "Think of it like this. glBindBuffer sets a global variable, then # glVertexAttribPointer reads that global variable and stores it in the # VAO." glVertexAttribPointer(attr_loc, 3, GL_FLOAT, GL_FALSE, 0, None) # order important else you clear the buffer bind in # the vertex array object :-) glBindVertexArray(GL_NONE) glBindBuffer(GL_ARRAY_BUFFER, GL_NONE) # def stripes_to_grid(self, stripes, new_grid_width, new_grid_height): # segments = [] # if new_grid_width != self._grid_width or new_grid_height != self._grid_height: # for xmin,xmax,ymin,ymax in self._stripes: # xmin = floor((xmin/self._grid_width)*new_grid_width) # xmax = ceil((xmax/self._grid_width)*new_grid_width) # ymin = floor((ymin/self._grid_height)*new_grid_height) # ymax = ceil((ymax/self._grid_height)*new_grid_height) # segments.extend(make_quad_fast(xmin,xmax,ymin,ymax)) # else: # for xmin,xmax,ymin,ymax in self._stripes: # segments.extend(make_quad_fast(xmin,xmax,ymin,ymax)) # r = np.array(segments, dtype=np.float32) # r[:,0] = r[:,0]*2 / width - 1.0 # r[:,1] = r[:,1]*2 / height - 1.0 # return r """
[docs] def shrink_array_by_two(a): """ Cut array in 2. A 5x5 array will be cut to a 3x3 (notiece the rounding) """ u = a[::2,:] # Even rows v = a[1::2,:] # Odd rows if v.shape[0] != u.shape[0]: # Odd number of rows => padding v= np.vstack([v,np.zeros((1, u.shape[1]))]) uv = u+v u = uv[:, ::2] # Even columns v = uv[:, 1::2] # Odd columns if v.shape[1] != u.shape[1]: # Odd number of columns => padding v= np.hstack([v,np.zeros((u.shape[0], 1))]) uv = u+v return uv
[docs] def divide_shape(shape): return ( (shape[0]+1)//2, (shape[1]+1)//2)
[docs] def integer_div(a,b): if a % b == 0: return a // b else: return (a // b) + 1
[docs] def find_active_tiles(a, tile_size = DEFAULT_TILE_SIZE): shrinked = np.zeros((integer_div(a.shape[0],tile_size), integer_div(a.shape[1],tile_size)), dtype=a.dtype) for r in range(0, a.shape[0], tile_size): r_min, r_max = r, min(a.shape[0], r+tile_size) for c in range(0, a.shape[1], tile_size): c_min, c_max = c, min(a.shape[1], c+tile_size) s = np.sum(a[r_min:r_max, c_min:c_max]) shrinked[ r//tile_size, c//tile_size] = s return shrinked
[docs] def isPowerOfTwo (x): # From https://www.geeksforgeeks.org/python-program-to-find-whether-a-no-is-power-of-two/ # First x in the below expression # is for the case when x is 0 return (x and (not(x & (x - 1))) )
# def build_stripes_for_nap_subsizes(nap, tile_size = DEFAULT_TILE_SIZE) -> dict[int,Stripes]: # # tile_size: size of tiles, a power of two. # # Based on the NAP map (which is supposed to remain constant during the # # simulation), we isolate tiles that are deemed "active" (because they # # contain at least one active mesh) Then we go on to shrink that map by a # # factor of 2 and compute stripes that cover the resulting map. We iterate # # until we reach a 1x1 shrinked map. # # Mainly used of compute max step size which needs to look at the entire NAP # # (because of infiltrations) # assert isPowerOfTwo(tile_size), "We only support tile size which are power of two" # stripes: dict[int,Stripes] = dict() # """ Read this this way: # We start at resolution 4000 with a stripe size of 16.. In parallel reduction we always # draw on the next resolution (4000/2=2000) so we must use # the correct tile size. # At resolution 2000, we'll consider stripes made of tiles of size 16 (!) # At resolution 1000, we'll consider stripes made of tiles of size 4 # At resolution 500, we'll consider stripes made of tiles of size 2 # At resolution 250, we'll consider stripes made of tiles of size 1 # At resolution 125, we'll consider stripes made of tiles of size 1 (!) # etc. # FIXME Nothing says that the way we reduce the tile size is optimal... # """ # tile_size_at_level = tile_size # stop = False # while True: # dims = nap.shape # # We look at our nap through a lens of tiles # shrinked = find_active_tiles(nap, tile_size_at_level) # # Once we have the tiles, we build the stripes # # The stripes will be used to draw on the current # # nap resolution (and that will be expanded at the upper # # resolution by a factor of two) # # The stripes coordinates are expressed in the # # context of the current resolution. # level_stripes = find_stripes(shrinked, dims[1], dims[0], # tile_size_at_level, tile_size_at_level, with_dilatation=False) # logging.debug(f"At resolution : {nap.shape}, shrinked:{shrinked.shape}, tile size {tile_size_at_level}, {len(level_stripes)} stripes") # stripes[max(nap.shape)] = level_stripes # # Cut the resolution in two # nap = shrink_array_by_two(nap) # # Cut the tile size too # tile_size_at_level = max(1, tile_size_at_level // 2) # if stop: # break # if max(nap.shape) == 1: # # 1x1 shrinked nap => end. # stop = True # for v in stripes.values(): # v.freeze() # v.to_opengl() # return stripes
[docs] def find_stripes(a, rw, rh, tile_w, tile_h, with_dilatation=True): """ Find stripes on a. Elements of a different than zero are the ones taken into account. There's one stripe per line of the array ! Please note that the stripes coordinates will be in NDC (normalized device coordinate, see OpenGL) over the array (that is, they're in [-1,+1] regardless of the size of the array) """ from scipy.ndimage import binary_dilation, generate_binary_structure # Why use a structure in the dilatation ? Because from one position, the # computations can cover up to two cells away. So if we go once cell up and # one cell right (for example), we may need to have a tile there... (a # regular dilatation has a default "+" shape which leaves 4 corners open) # The binary structure we ask here is a 3x3 all-true array. if with_dilatation: a = binary_dilation(a, structure=generate_binary_structure(2, 2)) else: a = a.astype(bool) zeros_column = np.zeros((a.shape[0],1), dtype=bool) width, height = a.shape[1], a.shape[0] a = np.hstack([zeros_column, a, zeros_column]) # true on the left side of the edge across which values change. # Ex. a[0]=True a[1]=False a[2] = False # diff[0] = a[0] ^ a[1] = True # diff[1] = a[1] ^ a[2] = False # BUT BUT BUT we have padded !!! # So it's true on the right side ! diff = a[:,1:] ^ a[:,:-1] stripes= Stripes(grid_width=rw, grid_height=rh) for y in range(height): changes = list(np.argwhere(diff[y,:] != 0)) if len(changes) % 2 != 0: # FIXME Since we pad with zeros above, this should never happen. raise Exception("I don't handle stripes that begin/end at borders of array") #changes = changes + [[width]] for i in range(0,len(changes),2): # +1 to account for the "xor" computation offset above. # FIXME compute the diff xor above in another way to avoid the (ugly) +1. #space += (changes[i+1][0] - changes[i][0])*1 stripes.add_stripe(changes[i][0]*tile_w, changes[i+1][0]*tile_w, y*tile_h, (y+1)*tile_h) return stripes
[docs] def find_non_empty_tiles_on_nap(nap: np.ndarray, n: int): """ Tiles on NAP are used to guide the first level of computation, that is at the maximum detail level. After that level, one uses stripes which will be dynamically adjusted to water coverage. Tiles on NAP are not meant to change over the course of the simulation. """ assert nap.dtype == bool h,w = nap.shape assert n < min(h,w) #npa = water_height all_quads = [] total_active_meshes_on_tile = 0 total_meshes_on_tile = 0 marks = [] for y in range(n): first_x = None last_x = None for x in range(n): xmin, xmax = int(w/n*x), int(w/n*(x+1)) # int is floor. ymin, ymax = int(h/n*y), int(h/n*(y+1)) active_meshes_on_tile = int(np.sum( nap[ymin:ymax, xmin:xmax])) if active_meshes_on_tile > 0: # (x+y)%2 == 0: total_active_meshes_on_tile += active_meshes_on_tile total_meshes_on_tile += (xmax - xmin)*(ymax - ymin) # In OpenGL clip space, the origin (0,0) is at # the center of the screen. The corners are (-1,-1) and # (1,1) so they span a range of 2x2. # Here we make coordinates that will be fed into # a vertex shader that is expected to NOT transform # them. So our "world coord" in (-1,+1) will be # mapped directly by the VtxShader to the clip space crd # which is also (-1,+1). if True: # Compressed stripes if first_x is None: first_x = x last_x = x elif x-1 == last_x: last_x = x else: xmin, xmax = int(w/n*first_x), int(w/n*(last_x+1)) # int is floor. all_quads.append(make_quad( float(xmin/w)*2.0-1, float(xmax/w)*2.0-1, float(ymin/h)*2.0-1, float(ymax/h)*2.0-1)) first_x = x last_x = x else: # Regaulr, uncompressed stripes all_quads.append(make_quad( float(xmin/w)*2.0-1, float(xmax/w)*2.0-1, float(ymin/h)*2.0-1, float(ymax/h)*2.0-1)) marks.append((x,y)) if first_x is not None: xmin, xmax = int(w/n*first_x), int(w/n*(last_x+1)) # int is floor. all_quads.append(make_quad( float(xmin/w)*2.0-1, float(xmax/w)*2.0-1, float(ymin/h)*2.0-1, float(ymax/h)*2.0-1)) # import matplotlib.pyplot as plt # plt.imshow(npa) # for x,y in marks: # plt.scatter(x*w/N,y*h/N) # plt.show() logging.debug(f"{w}x{h} area, {len(all_quads)} tiles. {round(100*len(all_quads)/(n**2))}% tile coverage. Active meshes on active tiles: {100*total_active_meshes_on_tile/total_meshes_on_tile:.1f}%") logging.debug(f"{total_active_meshes_on_tile} active on tiles / {np.sum(nap)} active / {nap.size} total meshes.") r = np.vstack([np_quad_vertices for np_quad_vertices, _ in all_quads]).astype(np.float32) return r
#draw_iso_quad_regular = draw_iso_quads # cache = dict() # quad_buffer = None # nb_quads = None # all_triangles = None
[docs] class GLSimulationGlobalState: def __init__( self, simulation_time, time_step, active_infiltration_line, nb_dryup_iterations, nb_active_tiles, status_code, total_active_tiles, infiltration_zones ): ( self.simulation_time, self.time_step, self.active_infiltration_line, self.nb_dryup_iterations, self._nb_active_tiles, self.status_code, self.total_active_tiles, self.infiltration_zones ) = ( simulation_time, time_step, active_infiltration_line, nb_dryup_iterations, nb_active_tiles, status_code, total_active_tiles, infiltration_zones ) @property
[docs] def nb_active_tiles(self): return self._nb_active_tiles
[docs] class GLSimulation:
[docs] def set_params(self, width, height, ponderation_runge_kutta: float=0.3, dx: float=1.0, dy: float=1.0, courant: float=0.1, froude_max: float=10.0, time_step_strategy=TimeStepStrategy.OPTIMIZED_TIME_STEP, time_step: float = None, simulation_duration=SimulationDuration.from_seconds(10), report_frequency= SimulationDuration.from_steps(10), basefilename: str="simul", tile_size=DEFAULT_TILE_SIZE, infiltration_interpolation:InfiltrationInterpolation=InfiltrationInterpolation.NONE, tile_packing_mode: TilePackingMode = TilePackingMode.REGULAR, optimized_indirection = False, track_flows_at_borders = False): #print(f"set params !!! {width} {height}") assert type(width) == type(height) == int assert width >= 1, "The domain has no width" assert height >= 1, "The domain has no height" assert isinstance(report_frequency, SimulationDuration) assert (time_step_strategy == TimeStepStrategy.FIXED_TIME_STEP and (time_step is not None and time_step > 0)) \ or (time_step_strategy == TimeStepStrategy.OPTIMIZED_TIME_STEP and time_step == None), \ f"Time step strategy is '{time_step_strategy}' but time_step is '{time_step}', set it to None." self.set_infiltration_interpolation(infiltration_interpolation) self.basefilename= basefilename self.time_step_strategy = time_step_strategy self.time_step = time_step or 0 self.report_frequency = report_frequency self.g = 9.81 self.dx = dx self.dy = dy self.courant = courant self.froude_max = froude_max self.set_tile_size(tile_size) # FIXME This is quite arbitrary. One should check it's the # same in WolfCLI. float32 machine epsilon is # 1.19e-07 (see wikipedia). self.epsilon = min( min(self.dx, self.dy)**4, 0.000_001) assert ponderation_runge_kutta <= 1.0, f"Bad RungeKutta ponderation:{ponderation_runge_kutta}, should be <= 1" self.ponderation_runge_kutta = ponderation_runge_kutta assert isinstance(simulation_duration, SimulationDuration), f"Expected SimulationDuration, you gave {simulation_duration}" self.simulation_duration = simulation_duration self.nb_active_meshes = 0 self._tile_packing_mode = tile_packing_mode self._optimize_tile_indirection = optimized_indirection self._track_flows_at_border = False self.reset_size( width, height)
[docs] def set_track_flows_at_borders(self, v): self._track_flows_at_border = v if v: packed_width, packed_height = self._tile_packer.packed_size() # First texture is for the Euler step. Second one is for the RK step. self._flows_at_border_tex = [ upload_blank_texture_to_gpu(packed_width, packed_height, GL_TEXTURE_RECTANGLE, format=GL_RGBA32F) for _ in range(2) ] # This one will be reused in Euler and RK self._rhs_tex = upload_blank_texture_to_gpu(packed_width, packed_height, GL_TEXTURE_RECTANGLE, format=GL_RGBA32F)
[docs] def force_max_step_size(self, s: float): """ Prevents the max step size computation. Pass None to re-enable the computation. Used in test/debug. """ if s: self.max_time_step_inactive_tile = s self._forced_max_step_size = True else: self._forced_max_step_size = False
[docs] def set_ensure_positive_height(self, b: bool): """ Enable or disable positive height enforcment. Used in debugging/testing """ self._ensure_positive_height = b
[docs] def set_g(self, g): assert g >= 0 self.g = g
[docs] def set_froude_limit(self, l): """ l = Froude max. If l is none, then we turn of froude limitation. """ self.froude_max = l
[docs] def set_fixed_time_step(self, d): assert d > 0 self.time_step = d self.time_step_strategy = TimeStepStrategy.FIXED_TIME_STEP if self.max_step_size_tex is not None: ntx, nty = self._tile_packer.packed_size_in_tiles() # Here we store the time step so that in case of FixedTimeStep strategy, # one can use this one. self.max_step_size_tex = upload_blank_texture_to_gpu(ntx, nty, GL_TEXTURE_RECTANGLE, GL_R32F, self.time_step)
[docs] def set_report_frequency(self, d: SimulationDuration): assert isinstance(d, SimulationDuration) self.report_frequency = d
[docs] def set_simulation_duration(self, d: SimulationDuration): assert isinstance(d, SimulationDuration) self.simulation_duration = d
[docs] def set_courant(self, c): self.courant = c
[docs] def set_euler_ponderation(self): """ Disable Runge Kutta and let Euler be the scheme to use. """ self.ponderation_runge_kutta = None
[docs] def set_rk_ponderation(self, p): # assert 0 < p < 1, "A correct weight for runge kat must be > 0 and < 1" self.ponderation_runge_kutta = p
[docs] def set_infiltration_interpolation(self, lerp:InfiltrationInterpolation): assert type(lerp) == type(InfiltrationInterpolation.NONE) assert lerp in (InfiltrationInterpolation.NONE, InfiltrationInterpolation.LINEAR), f"Expected a proper infiltration interpolation: you gave '{lerp}'" self._infiltration_interpolation = lerp
@property
[docs] def infiltration_interpolation(self) -> InfiltrationInterpolation: return self._infiltration_interpolation
[docs] def set_debug(self, dbg: bool): # Record more data in results assert type(dbg) == bool self._debugging = dbg self._create_debugging_textures()
[docs] def set_tiles_packing_mode(self, mode: TilePackingMode): self._tile_packing_mode = mode
[docs] def set_optim_geom_shaders(self, enable): if enable == "CS" or enable == TileOptimisation.COMPUTE_SHADER: self._optim_geom_shaders = TileOptimisation.COMPUTE_SHADER elif enable == "RS" or enable == TileOptimisation.REGULAR_SHADER: logging.warning("Deprecated tiles optimisation") self._optim_geom_shaders = TileOptimisation.REGULAR_SHADER elif enable is None: self._optim_geom_shaders = None else: raise Exception("Unknown tile optimisation strategy") if self._optim_geom_shaders: logging.debug(f"GeoShader optimisation enabled {self._optim_geom_shaders.name}") else: logging.debug("GeoShader optimisation disabled")
def __init__(self, width, height, ponderation_runge_kutta: float=0.3, dx: float=1.0, dy: float=1.0, courant: float=0.1, froude_max: float=10.0, time_step_strategy=TimeStepStrategy.OPTIMIZED_TIME_STEP, time_step: float = 1.0, max_steps=SimulationDuration.from_seconds(10), report_frequency= SimulationDuration.from_steps(10), basefilename: str="simul", tile_size = DEFAULT_TILE_SIZE, shader_log_path: Path = None, tile_packing_mode: TilePackingMode = TilePackingMode.REGULAR, optimized_indirection: bool = False): try: is_gl_good_enough = (glGetIntegerv(GL_MAJOR_VERSION), glGetIntegerv(GL_MINOR_VERSION)) >= (4, 6) except Exception as ex: raise Exception("Seems like no OpenGL context has been set up. Suggestion: start pygame, glfw or a wxWidget canvas before creating a simulation.") if not is_gl_good_enough: raise Exception("I need OpenGL 4.6 at least") self._shader_log_path = shader_log_path self.set_params(width, height, ponderation_runge_kutta, dx, dy, courant, froude_max, time_step_strategy, time_step, max_steps, report_frequency, basefilename, tile_size=tile_size, tile_packing_mode=tile_packing_mode, optimized_indirection=optimized_indirection) self.infiltration_timings = None self.infiltration_zones_texture = None self.infiltration_zones_values_array = None self.nb_infiltration_zones = 0 self.nb_infiltration_lines = 0 self.max_time_step_inactive_tile = MAXIMUM_TIME_STEP # P.A.'s default (when no infiltration present) self._debugging = False self._froude_limit_on_uv = True self._debugging_skip_euler = False self._dbg_max_nb_of_dry_up_inner_loops = tile_size*tile_size self._total_number_of_dry_up_outer_loops = None #None means "until done", 0 means "disabled". N means at most N iterations. self._ensure_positive_height = True self._forced_max_step_size = False self.last_euler_dry_up_fixes_count = -1 self.last_rk_dry_up_fixes_count = -1 self._tile_packer = None self._optim_geom_shaders = TileOptimisation.REGULAR_SHADER self.state_buffer_ssbo_id = None self._currentMinStepSizeTexture = 0 self._shaders_loaded = False self.iteration_vs_simtime = [] self._next_stop = TIME_SAMPLING_RESOLUTION self.integration_fb = None self._debug_fb = None self._infiltration_fb = None self.strong_boundary_cond_fb = None self.param_copy_tex = None
[docs] def _load_shaders(self): assert not self._shaders_loaded # Coordintaes are NDC # Aligned on the active_tile_texture dimensions # To be sure we're in the pixels, we lay the grid on the pixels' centers. nb_tiles_horiz, nb_tiles_verti = self._tile_packer.size_in_tiles() # Width, Height of a tile in NDC (Normalized device coordinates) ndc_w = 2 # width of the NDC unit square (from -1 to +1 => 2) ndc_tile_w = self._tile_size * (ndc_w / self.width) ndc_tile_h = self._tile_size * (ndc_w / self.height) # Use const : https://www.khronos.org/opengl/wiki/Type_Qualifier_(GLSL)#Constant_qualifier with open(SHADER_PATH / "generated_constants.frs","w",encoding='utf-8') as fout: fout.write(f"// This file was generated at runtime ({nice_timestamp()}). DO NOT EDIT, regenerate it!\n\n") # Make sure we can define an array with proper dimension if self.nb_infiltration_zones >= 1: aiz_size = "NB_INFILTRATION_ZONES" else: aiz_size = "1" def glbool(v): return str(v).lower() fout.write( f"""// Since constants don't change during the simulation, I don't // pass them as uniform but as, well, constants. I think it helps the compiler // to simplify code. const bool RUNGE_KUTTA = true; const float FROUDE_MAX = {self.froude_max or 0:f}; // 10.0 is the original one const bool FROUDE_LIMIT = {glbool(self.froude_max is not None)}; const float g = {self.g:f}; // Tell if the simulator must try to fix dry up's (true) or not (false). const bool DRY_UP_FIXING = {glbool(not self.dry_up_disabled())}; // Additional testing to enforce positive heights. // Make sure you use Froude limitations and dry ups management so that // a hard limit is not imposed (which will add water to the simulation). const bool ENSURE_POSITIVE_HEIGHT = {glbool(self._ensure_positive_height)}; // This is the padded size of the domain. const uint DOMAIN_WIDTH={nb_tiles_horiz*self._tile_size}; const uint DOMAIN_HEIGHT={nb_tiles_verti*self._tile_size}; // The tile size, expressed in meshes. Tiles are squares. const uint TILE_SIZE={self._tile_size}; // Tile size in normal device coordinates const float NDC_TILE_WIDTH = {ndc_tile_w}; const float NDC_TILE_HEIGHT = {ndc_tile_h}; // Size of the computation domain expressed in tiles const int NB_TILES_HORIZ = {nb_tiles_horiz}; const int NB_TILES_VERTI = {nb_tiles_verti}; // When true, the tile_packing indirection functions will become // effectless (they don't do indirection). const bool TRANSPARENT_PACK = {glbool(self._tile_packing_mode == TilePackingMode.TRANSPARENT)}; // In the infiltration table, the index of the column containing // the start time of an infiltration. const int INFILTRATION_TABLE_TIME_INDEX = 0; const uint WET_TILES_DETECT_THREAD_WIDTH = {WET_TILES_DETECT_THREAD_WIDTH}; const uint NB_INFILTRATION_ZONES = {self.nb_infiltration_zones}; const uint NB_INFILTRATION_LINES = {self.nb_infiltration_lines}; // Should we linearly interpolate the infiltration quantity across // infiltration steps ? const bool INFILTRATION_LERP = {glbool(self._infiltration_interpolation == InfiltrationInterpolation.LINEAR)}; // When a tile has no active mesh (for example when there's no water), // one cannot compute a corresponding Δt. In that case, we take a // default value for that Δt. // That default value is chosen big enough so that regular time steps // (which happens on tile with active meshes) will be below it. // Moreover, in case there are infiltrations, this default time step // is chosen in a way that we will not skip a full infiltration interval // when going from t to Δt const float MAX_TIME_STEP_INACTIVE_TILE = {self.max_time_step_inactive_tile:g}; // If we run with the "fixed time step", then this is its value. // If not, this will be 0 (indicating a dynamic time step). const float MIN_TIME_STEP = {self.time_step if self.time_step_strategy == TimeStepStrategy.FIXED_TIME_STEP else 0.0}; // This constant is used to determine if an alpha correction // is the same as the one applied before. const float NUMERICAL_ERROR_TOLERANCE_FOR_ALPHA = 5e-7; // Record the flows at the border of each mesh. const bool TRACK_FLOWS_AT_BORDER={glbool(self._track_flows_at_border)}; // Status codes are output by the "global shader (i.e. at each simulation step) const uint STATUS_CODE_OK = 0; // Something went wrong in the timestep computation. Either it's zero // either it's a NaN (this is to help debugging NaNs appearance during execution // which often occur at random). const uint STATUS_CODE_TIMESTEP_WRONG = 1; // If true, we record debug information and enable other debug behaviours const bool DEBUGGING = {glbool(self._debugging)}; const float RUNGE_KUTTA_PONDERATION = {self.ponderation_runge_kutta if self.ponderation_runge_kutta is not None else 0.0}; // Allows to disable Froudem limit application at the u,v computation level // This is used while testing. const bool FROUDE_LIMIT_ON_UV = {glbool(self._froude_limit_on_uv)}; // This is an SSBO (because it's qualified as 'buffer'). It will be shared // amongst shaders. It acts as a global variable which lives through the // iterations. // Be carefule as this data structure is mapped in python too, so one // has to keep all the fields the same and in the same order in both // implementations layout(std430) buffer GlobalStateBlock {{ double simulation_time; float time_step; // Line number in the array containing the infiltration chronology. // This value is 1-based (1 means first line of array). 0 means // there's no active line. uint active_infiltration_line; uint dry_up_iterations; // Number of active(wet) tiles detected during one simulation step. uint nb_active_tiles; uint status_code; // We store the cumulative number of active(wet) tiles // over two 32-bits value because that number can // grow pretty fast. uint total_active_tiles_hi; uint total_active_tiles_lo; float active_infiltration_zones[{aiz_size}]; }}; // no instance name => fields are globals. //layout(binding = 16) uniform atomic_uint two; const bool OPTIMIZED_TILE_INDIRECTION={glbool(self._optimize_tile_indirection)}; float baby_sqrt(in float n) {{ // Babylonian approximation of sqrt. Here to avoid to use "sqrt()" // to test if the latter is stable across GPU generations. if (n > 0) {{ float x_n = n; for(int i = 0; i< 12; i++) {{ x_n = 0.5 * (x_n + n / x_n); }} return x_n; }} else {{ return 0; }} }} """ ) self.vertex_shader = load_shader_from_file(Path(SHADER_PATH,"simplevertexshader.vs")) # self.blind_shader = load_shader_from_file(Path(SHADER_PATH,"blind.frs"), log_path=self._shader_log_path) # self.blinder_program = load_program(self.vertex_shader, self.blind_shader) # self.cell_detector_shader = load_shader_from_file(Path(SHADER_PATH,"detect_cells.frs"), log_path=self._shader_log_path) # self.cell_detector_program = load_program(self.vertex_shader, self.cell_detector_shader) # self.cell_detector_cruncher_shader = load_shader_from_file(Path(SHADER_PATH,"detect_cells_cruncher.frs"), log_path=self._shader_log_path) # self.cell_detector_cruncher_program = load_program(self.vertex_shader, self.cell_detector_cruncher_shader) # self.min_step_size_shader = load_shader_from_file(Path(SHADER_PATH,"Water2MinStepSizeShader.frs"), log_path=self._shader_log_path) # self.min_step_size_program = load_program(self.vertex_shader, self.min_step_size_shader) # self.max_step_size_shader = load_shader_from_file(Path(SHADER_PATH,"Water2MaxStepSizeShader.frs"), log_path=self._shader_log_path) # self.max_step_size_program = load_program(self.vertex_shader, self.max_step_size_shader) #FIXME : adapt OpenGL version in shader to avoid error relative to gl_FragColor ? # self.strong_boundary_cond_shader = load_shader_from_file(Path(SHADER_PATH,"Water2StrongBC.frs"), log_path=self._shader_log_path) # self.strong_boundary_cond_program = load_program(self.vertex_shader, self.strong_boundary_cond_shader) #self.euler_shader = load_shader_from_file(Path(SHADER_PATH,"Water2EulerStepShader.frs"), log_path=self._shader_log_path) #self.euler_program = load_program(self.vertex_shader, self.euler_shader) #self.runge_kutta_shader = load_shader_from_file(Path(SHADER_PATH,"Water2RungeKuttaStepShader.frs"), log_path=self._shader_log_path) #self.runge_kutta_program = load_program(self.vertex_shader, self.runge_kutta_shader) # self.slope_flux_derivative_shader = load_shader_from_file(Path(SHADER_PATH,"Wolf2SlopeAndFluxAndDerivativeShader.frs"), log_path=self._shader_log_path) # self.slope_flux_derivative_program = load_program(self.vertex_shader, self.slope_flux_derivative_shader) # self.copy_alpha_counters_shader = load_shader_from_file(Path(SHADER_PATH,"copy_alpha_to_counters.frs"), log_path=self._shader_log_path) # self.copy_alpha_counters_program = load_program(self.vertex_shader, self.copy_alpha_counters_shader) # self.detect_alpha_cruncher_shader = load_shader_from_file(Path(SHADER_PATH,"detect_alpha_cruncher.frs"), log_path=self._shader_log_path) # self.detect_alpha_cruncher_program = load_program(self.vertex_shader, self.detect_alpha_cruncher_shader) # Compute shader active tile detection self.active_tiles_detector_cs2 = load_shader_from_file(Path(SHADER_PATH,"parallel_tile_detect.comp"), log_path=self._shader_log_path) self.active_tiles_detector_program2 = load_program(compute_shader=self.active_tiles_detector_cs2) # Fragment shader active tile detection (should be deprecated) # self.active_tiles_detector_shader = load_shader_from_file(Path(SHADER_PATH,"active_tile_detector.frs"), log_path=self._shader_log_path) # self.active_tiles_detector_program = load_program(self.vertex_shader, self.active_tiles_detector_shader) # Geometry shader to plot active tiles (only) self.active_tiles_quad_shader = load_shader_from_file(Path(SHADER_PATH,"active_tiles_quad.gs"), log_path=self._shader_log_path) # self.slope_flux_derivative_shader2 = load_shader_from_file(Path(SHADER_PATH,"Wolf2SlopeAndFluxAndDerivativeShader.frs"), log_path=self._shader_log_path) # self.slope_flux_derivative_program2 = load_program(self.vertex_shader, self.slope_flux_derivative_shader2, self.active_tiles_quad_shader) # self.euler_program2 = load_program(self.vertex_shader, self.euler_shader, self.active_tiles_quad_shader) # self.runge_kutta_program2 = load_program(self.cd .. # vertex_shader, self.runge_kutta_shader, self.active_tiles_quad_shader) self.derivative_and_dry_up_cs = load_shader_from_file(Path(SHADER_PATH,"deriv_euler_and_dry.comp"), log_path=self._shader_log_path) self.derivative_and_dry_up_cs_program = load_program(compute_shader=self.derivative_and_dry_up_cs) self.derivative_and_dry_up_and_rk_cs = load_shader_from_file(Path(SHADER_PATH,"rk_and_dry.comp"), log_path=self._shader_log_path) self.derivative_and_dry_up_and_rk_cs_program = load_program(compute_shader=self.derivative_and_dry_up_and_rk_cs) self.min_step_size_cs_shader = load_shader_from_file(Path(SHADER_PATH,"min_step_size_over_one_tile.comp"), log_path=self._shader_log_path) self.min_step_size_cs_program = load_program(compute_shader=self.min_step_size_cs_shader) self.min_step_size_crunch_cs_shader = load_shader_from_file(Path(SHADER_PATH,"min_step_size_parallel.comp"), log_path=self._shader_log_path) self.min_step_size_crunch_cs_program = load_program(compute_shader=self.min_step_size_crunch_cs_shader) self.update_globals_cs_shader = load_shader_from_file(Path(SHADER_PATH,"update_globals.comp"), log_path=self._shader_log_path) self.update_globals_cs_program = load_program(compute_shader=self.update_globals_cs_shader) self.update_globals_cs_shader2 = load_shader_from_file(Path(SHADER_PATH,"update_globals2.comp"), log_path=self._shader_log_path) self.update_globals_cs_program2 = load_program(compute_shader=self.update_globals_cs_shader2) self._texture_viewer_shader2 = load_shader_from_file(Path(SHADER_PATH,"textureViewerShader.frs")) self._texture_viewer_program2 = load_program(self.vertex_shader, self._texture_viewer_shader2, self.active_tiles_quad_shader) self._shaders_loaded = True
[docs] def compute_time_step_for_tiles_phase1(self, current_quantity): # Phase 1: compute the time steps for all tiles (and already a first minimum) # Attention! Remember that the tile packing has a neutral tiles. Make # sure the neutral tiles' value doesn't affect the computations. packed_width, packed_height = self._tile_packer.packed_size() ntx, nty = self._tile_packer.packed_size_in_tiles() program = self.min_step_size_cs_program # Using min_step_size.comp glUseProgram(program) wire_program(program, { "max_width": packed_width, "max_height": packed_height, "dx": self.dx, "dy": self.dy, "courant": self.courant }, { "quantitySampler": self.quantity_tex[current_quantity], "activeTilesSampler": self._active_tiles_map_tex[0], "tiles_mins": (self.max_step_size_tex, GL_WRITE_ONLY) }, #ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id} ) glDispatchCompute( ntx, nty, 1) # make sure writing to image has finished before read glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) if False: mins = read_texture2(self.max_step_size_tex, GL_R32F) np.save( f"{self._prefix}_min_steps_tiles_{self._current_step}", mins)
[docs] def compute_time_step_for_tiles_phase2(self, current_quantity): # Phase 2: reduce the matrix of minimums program = self.min_step_size_crunch_cs_program glUseProgram(program) # The previous step computed one value per tile. So now # we have to work on the remaining tiles w,h = self._tile_packer.packed_size_in_tiles() #print(f"compute_time_step_for_tiles_phase2: {w} {h}") # For the reduction, we can choose whatever tile size we want. # 16 was chosen because it gave good performances. REDUCTION_TILE_SIZE = 16 # FIXME Duplicated in the shader's code. while True: max_width, max_height = w, h w,h = ceil(w/REDUCTION_TILE_SIZE), ceil(h/REDUCTION_TILE_SIZE) assert w >= 1 and h >= 1, "I must have something to do" wire_program(program, { "max_width": int(max_width), "max_height": int(max_height) }, { "tiles_mins": (self.max_step_size_tex, GL_READ_WRITE) }, ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id}) logging.debug(f"compute_time_step_for_tiles_phase2: glDispatchCompute: {w} {h} (tiles); max {max_width}x{max_height} (meshes)") glDispatchCompute(w, h, 1) glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) if w <= 1 and h <= 1: break if False: np.save(f"{self._prefix}_para_step_{self._current_step}_{w}_{h}", read_texture2(self.max_step_size_tex, GL_R32F))
@property
[docs] def nbx(self): return self.width
@property
[docs] def nby(self): return self.height
# def __del__(self): # self.drop_gl_resources()
[docs] def drop_gl_resources(self): # FIXME BUG For the moment, deleting GPU resources doesn't work #self._shifter = upload_blank_texture_to_gpu(256, 256, GL_TEXTURE_RECTANGLE, GL_R8UI, value=0) # We try to reuse the textures if thay are already created and # has the same size as the requested size. all_textures = [self.max_step_size_tex, self.quantity_tex, self.bathymetry_tex, self.manning_tex, self.strong_boundary_cond_tex, self.strong_boundary_cond_values_tex, self.weak_boundary_cond_tex, self.infiltration_zones_texture, self.infiltration_zones_values_array, self.boundary_cond_cells_tex, self.alpha_tex, self._active_tiles_map_tex, self.param_copy_tex ] if self.optimize_tile_indirection: all_textures.append(self._reversed_tile_indirection_tex) if self._track_flows_at_border: all_textures.append( self._flows_at_border_tex) all_textures.append( self._rhs_tex) if self._debugging: all_textures.append( self.debug_tex) for ndx, t in enumerate(all_textures): if t is not None: logging.debug(f"dropping texture {ndx} with id(s) {t}") try: drop_textures(t) except Exception as ex: logging.error(f"Can't drop a texture, ndx={ndx}, tid={t} because {ex}") from traceback import print_stack print_stack() #raise Exception(f"Can't drop a texture, ndx={ndx}, tid={t} because {ex}") for ndx, t in enumerate([self.state_buffer_ssbo_id, self.integration_fb, self.strong_boundary_cond_fb]): if t is not None: logging.debug(f"dropping Buffer {ndx} with id(s) {t}") glDeleteBuffers(1, [t]) self.state_buffer_ssbo_id = None self.max_step_size_tex = None self.quantity_tex = None self.bathymetry_tex = None self.manning_tex = None self.strong_boundary_cond_tex = None self.strong_boundary_cond_values_tex = None self.weak_boundary_cond_tex = None self.infiltration_zones_texture = None self.infiltration_zones_values_array = None self.boundary_cond_cells_tex = None self.alpha_tex = None self._active_tiles_map_tex = None self.param_copy_tex = None self._reversed_tile_indirection_tex = None self._flows_at_border_tex = None self._rhs_tex = None self.debug_tex = None #glDeleteFramebuffers(4, [self._active_tiles_map_fb, self.integration_fb, self._infiltration_fb, self.strong_boundary_cond_fb]) if self._shaders_loaded: glDeleteShader(self.vertex_shader) glDeleteShader(self.active_tiles_detector_cs2) glDeleteShader(self.active_tiles_quad_shader) glDeleteShader(self.derivative_and_dry_up_cs) glDeleteShader(self.derivative_and_dry_up_and_rk_cs) glDeleteShader(self.min_step_size_cs_shader) glDeleteShader(self.min_step_size_crunch_cs_shader) glDeleteShader(self.update_globals_cs_shader) glDeleteShader(self.update_globals_cs_shader2) glDeleteShader(self._texture_viewer_shader2) self.vertex_shader = None self.active_tiles_detector_cs2 = None self.active_tiles_quad_shader = None self.derivative_and_dry_up_cs = None self.derivative_and_dry_up_and_rk_cs = None self.min_step_size_cs_shader = None self.min_step_size_crunch_cs_shader = None self.update_globals_cs_shader = None self.update_globals_cs_shader2 = None self._texture_viewer_shader2 = None glDeleteProgram(self.min_step_size_cs_program) glDeleteProgram(self.active_tiles_detector_program2) glDeleteProgram(self.update_globals_cs_program) glDeleteProgram(self.update_globals_cs_program2) glDeleteProgram(self.min_step_size_crunch_cs_program) glDeleteProgram(self.derivative_and_dry_up_cs_program) glDeleteProgram(self.derivative_and_dry_up_and_rk_cs_program) self.min_step_size_cs_program = None self.active_tiles_detector_program2 = None self.update_globals_cs_program = None self.update_globals_cs_program2 = None self.min_step_size_crunch_cs_program = None self.derivative_and_dry_up_cs_program = None self.derivative_and_dry_up_and_rk_cs_program = None self._shaders_loaded = False glFinish()
[docs] def reset_size(self, width: int, height: int): assert width > 0 and height > 0 if hasattr(self,"width"): # and (self.width != width or self.height != height): self.drop_gl_resources() self.max_step_size_tex = None self.quantity_tex = None self.integration_fb = None self.bathymetry_tex = None self.manning_tex = None self.strong_boundary_cond_tex = None self.strong_boundary_cond_values_tex = None self.weak_boundary_cond_tex = None self.infiltration_zones_texture = None self.infiltration_zones_values_array = None self.infiltration_timings = None self.nb_infiltration_lines = 0 self.nb_infiltration_zones = 0 self.boundary_cond_cells_tex = None self.alpha_tex = None self.debug_tex = None self._active_tiles_map_tex = None self.state_buffer_ssbo_id = None self._atomic_counter_buffer = None self.width, self.height = width, height self._old_stripes = None self._current_step = 0 # 0 based !!! self.last_gpu_time = None self.current_simulation_time_step = None self._tile_packer = None # Invalidating the tile packer
[docs] def complete_textures(self): """ After having set several data textures, call this to upload blank texture replacement for those you didn't set. and to recompute the framebuffers and create additional textures necessary to the computation. """ logging.debug("Completing textures") packed_width, packed_height = self._tile_packer.packed_size() # glUseProgram(self.cell_detector_program) self._gen_single_quad() #self._active_tile_grid_vao_id = None self._cache = dict() # if self._atomic_counter_buffer is None: # self._atomic_counter_buffer_id = glGenBuffers(1) # glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, self._atomic_counter_buffer_id) # glBufferData(GL_ATOMIC_COUNTER_BUFFER, len(data_buffer), data_buffer, GL_DYNAMIC_COPY) # glBindBufferRange(GL_ATOMIC_COUNTER_BUFFER, 16, self._atomic_counter_buffer_id, 0, 16) # glBindBuffer(GL_NONE) if self._active_tiles_map_tex is None: # FIXME You don't need an array of textures for one texture. ntx, nty = self._tile_packer.packed_size_in_tiles() self._active_tiles_map_tex = [ # This texture is accessed without indirection (which is not # that bad since its size is self._tile_size**2 less than the # orignal data). upload_blank_texture_to_gpu(ntx, nty, GL_TEXTURE_RECTANGLE, GL_R8UI, value=0)] # self._active_tiles_map_fb = create_frame_buffer_for_computation( # self._active_tiles_map_tex, # texture_target=GL_TEXTURE_2D, out_to_fragdata=False) if self._optimize_tile_indirection: self._reversed_tile_indirection_tex = upload_np_array_to_gpu( GL_TEXTURE_RECTANGLE, format=GL_RG16UI, img_data=self._tile_packer.tile_reversed_indirection_map(), ) if self.max_step_size_tex is None: ntx, nty = self._tile_packer.packed_size_in_tiles() # Here we store the time step so that in case of FixedTimeStep strategy, # one can use this one. self.max_step_size_tex = upload_blank_texture_to_gpu(ntx, nty, GL_TEXTURE_RECTANGLE, GL_R32F, self.time_step) # For debugging: # test_pattern = (np.arange(ntx*nty, dtype=np.float32)+0.5).reshape((nty, ntx)) # self.max_step_size_tex = upload_np_array_to_gpu(GL_TEXTURE_RECTANGLE, GL_R32F, test_pattern) # Double-buffered three-components color texture object holding the # cell-centered conserved quantity grid (w, hu, hv) if self.quantity_tex is None: logging.debug("Blank quantities textures") self.quantity_tex = [upload_blank_texture_to_gpu(packed_width, packed_height, GL_TEXTURE_RECTANGLE, format=GL_RGBA32F, value=0) for i in range(3)] # # For computing Euler and Runge Kutta integration self.integration_fb = create_frame_buffer_for_computation( self.quantity_tex) # FIXME I think we only need one texture for bathymetry 'cos it doesn't change over time. if self.bathymetry_tex is None: logging.debug("Blank bathymetry textures") self.bathymetry_tex = upload_blank_texture_to_gpu(packed_width, packed_height, GL_TEXTURE_RECTANGLE, format=GL_R32F, value=99.0) # Source term for continuity equation (h only) # self.source_cont_tex = upload_blank_texture_to_gpu(self.width, self.height, GL_TEXTURE_RECTANGLE, format=GL_R32F, value=-99_999.0) if self.manning_tex is None: logging.debug("Blank Manning textures") self.manning_tex = upload_blank_texture_to_gpu(packed_width, packed_height, GL_TEXTURE_RECTANGLE, format=GL_R32F, value=0.04) self.strong_boundary_cond_fb = create_frame_buffer_for_computation(self.quantity_tex) if self.strong_boundary_cond_tex is None: logging.debug("Blank StrongBC textures") self.strong_boundary_cond_tex = upload_blank_texture_to_gpu(self.width, self.height, GL_TEXTURE_RECTANGLE, format=GL_R32F, value=-99_999.0) if self.strong_boundary_cond_values_tex is None: logging.debug("Blank StrongBC values textures") self.strong_boundary_cond_values_tex = upload_blank_texture_to_gpu(self.width, self.height, GL_TEXTURE_RECTANGLE, format=GL_RGB32F, value=0.0) # Things are coded in a GL_R32UI. I have tested GL_R8UI # but somehow I had the impression the memory layout # was broken. That's why I use 32bits (my reasoning was that # if 8 bits don't work then it's because the sampler can # only work with 32 bits at a time). FIXME That's a pretty # weak explanation... # value=1 == mark all cells as active (without that nothing will get # computed) if self.weak_boundary_cond_tex is None: logging.debug("Blank WeakBC textures") self.weak_boundary_cond_tex = upload_blank_texture_to_gpu( self.width, self.height, GL_TEXTURE_RECTANGLE, format=GL_R32UI, value=1) # Makes all cells active by default # FIXME This is not part of a resize operation if self.boundary_cond_cells_tex is None: self.boundary_cond_cells_tex = upload_blank_texture_to_gpu( 1, 1, GL_TEXTURE_RECTANGLE, format=GL_R32F, value=0) if self.infiltration_zones_texture is None: logging.debug("Blank infiltration zones texture") self.infiltration_zones_texture = upload_blank_texture_to_gpu(packed_width, packed_height, GL_TEXTURE_RECTANGLE, format=GL_R32I, value=MESH_WITHOUT_INFILTRATION) if self.infiltration_zones_texture is not None: self._infiltration_fb = create_frame_buffer_for_computation(self.infiltration_zones_texture) logging.debug("Dry up corrections textures") if self.alpha_tex is None: self.alpha_tex = [ # In these two we have vec(alpha_value, alpha_mask) # FIXME I think I only use RG coordinates => move to GL_RG32F upload_blank_texture_to_gpu(packed_width, packed_height, GL_TEXTURE_RECTANGLE, format=GL_RGBA32F, value=[1.0,0.0,0.0,0.0]) for i in range(2) ] # FIXME This exists for the sole purpose of dowlonading the manning # texture from the GPU. # FIXME Used for debugging => group them if self._debugging: logging.debug("Debug textures") self._create_debugging_textures() # 2/ alpha_tex 1 (alpha and mask): GL_COLOR_ATTACHMENT0 # 3/ alpha_tex 2 (alpha and mask): GL_COLOR_ATTACHMENT1 # 6/ debug_tex 1 : GL_COLOR_ATTACHMENT2 # 7/ debug_tex 2 : GL_COLOR_ATTACHMENT3 # assert len(self.alpha_tex) == 2 # if self.debug_tex: # self.access_fb = create_frame_buffer_for_computation( # self.alpha_tex + self.debug_tex, # out_to_fragdata=True) # else: # self.access_fb = create_frame_buffer_for_computation( # self.alpha_tex, # out_to_fragdata=True) if self.time_step_strategy == TimeStepStrategy.FIXED_TIME_STEP: ts = self.time_step else: # This is actually the first time step value ts = 0.0 struct_len =struct.calcsize( GLOBALS_STRUCT_PACK_FORMAT + "f" * self.nb_infiltration_zones) if self.state_buffer_ssbo_id is None: # The SSBO set up is done like in https://www.khronos.org/opengl/wiki/Shader_Storage_Buffer_Object self.state_buffer_ssbo_id = glGenBuffers(1) glBindBuffer(GL_SHADER_STORAGE_BUFFER, self.state_buffer_ssbo_id) # IIUC, Dynamic copy means that I will modify and read from/to GL (not the application). # That is, the buffer will be interanl to GL... glBufferData( GL_SHADER_STORAGE_BUFFER, struct_len, bytes( struct_len), GL_DYNAMIC_COPY, ) glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0) # Complete the configuration self._clear_global_variables_buffer(sim_time=0, time_step=ts) # A triple buffered copy of the global paramaters of the simulation (such as current step, time, etc.) self.param_copy_tex = [ upload_blank_texture_to_gpu(1, 1, GL_TEXTURE_RECTANGLE, format=GL_RGBA32F, value=0) for i in range(1) ] self._current_step = 0
[docs] def _create_debugging_textures(self): if self._debugging: logging.debug("Debug textures") packed_width, packed_height = self._tile_packer.packed_size() dbg_tex = [] if self.manning_tex is not None: dbg_tex.append(self.manning_tex) if self.weak_boundary_cond_tex is not None: dbg_tex.append(self.weak_boundary_cond_tex) self._debug_fb = create_frame_buffer_for_computation(dbg_tex) if self.debug_tex is None: self.debug_tex = [ upload_blank_texture_to_gpu( packed_width, packed_height, GL_TEXTURE_RECTANGLE, format=GL_RGBA32F, value=0, ), upload_blank_texture_to_gpu( packed_width, packed_height, GL_TEXTURE_RECTANGLE, format=GL_RGBA32F, value=0, ), ]
# def _setup_tiles(self, active_meshes): # assert active_meshes.dtype == bool, "NAP must be xformed to bool array !" # with time_it("find_non_empty_tiles"): # self._tiles = find_non_empty_tiles_on_nap(active_meshes, self._nb_tiles_per_dimension)
[docs] def _gen_single_quad(self): self._single_quad_vao_id = glGenVertexArrays(1) glBindVertexArray( self._single_quad_vao_id ) vertex_buffer_object_id = glGenBuffers(1) glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_object_id) quad = np.array(make_quad_fast(-1,+1,-1,+1), dtype=np.float32) # OpenGL doc: creates and initializes a buffer object's data store glBufferData(GL_ARRAY_BUFFER, quad.nbytes, quad, GL_STATIC_DRAW) # FIXME This is dangereous: I hard code the attrib location # so the exspezcation is taht ti will be the same in ALL programs. # attr_loc = glGetAttribLocation(program, "aVertex") # for the simple vertex shader # assert attr_loc >= 0 attr_loc = 0 # OpenGL: glEnableVertexAttribArray uses currently bound vertex array # object for the operation. glEnableVertexAttribArray(attr_loc) # See https://www.khronos.org/opengl/wiki/Vertex_Specification#Vertex_Array_Object # "Think of it like this. glBindBuffer sets a global variable, then # glVertexAttribPointer reads that global variable and stores it in the # VAO." glVertexAttribPointer(attr_loc, 3, GL_FLOAT, GL_FALSE, 0, None) # order important else you clear the buffer bind in # the vertex array object :-) glBindVertexArray(GL_NONE) glBindBuffer(GL_ARRAY_BUFFER, GL_NONE)
# def _precompute_step_size_vbos(self, program, nap): # """ We build grids of halving number of tiles. # (so each of the tiles at a given number of tiles # are different in number as well as in size of the # level above) # """ # self._step_size_tiles_vbo_id = dict() # # Very important: we wire ourselves to the size of the computaiton # # domain. # factor = max(self.width, self.height) # while factor >= 1: # # if the number of tiles on a dimension is greater # # than the dimension, then there are too many tiles. # # In that case we clamp the number of tiles. # if self._nb_tiles_per_dimension > factor: # n_tiles = factor # else: # n_tiles = self._nb_tiles_per_dimension # vertex_array = find_non_empty_tiles_on_nap(nap, n_tiles) # vertex_array_object_id = glGenVertexArrays(1) # #glBindVertexArray — bind a vertex array object # glBindVertexArray( vertex_array_object_id ) # tri_buffer_id = glGenBuffers(1) # # binds the buffer to the vertex array # glBindBuffer(GL_ARRAY_BUFFER, tri_buffer_id) # # OpenGL doc: creates and initializes a buffer object's data store # glBufferData(GL_ARRAY_BUFFER, vertex_array.nbytes, vertex_array, GL_STATIC_DRAW) # attr_loc = glGetAttribLocation(program, "aVertex") # for the simple vertex shader # assert attr_loc >= 0 # glEnableVertexAttribArray(attr_loc) # # size= number of components per generic vertex # # normalized = ... # # stride= Specifies the byte offset between consecutive generic vertex attributes. # # pointer= Specifies a offset of the first component of the first generic # # vertex attribute in the array in the data store of the buffer currently bound to the GL_ARRAY_BUFFER target. # glVertexAttribPointer(attr_loc, 3, GL_FLOAT, GL_FALSE, 0, None) # # order important else you clear the buffer bind in # # the vertex array object :-) # glBindVertexArray(GL_NONE) # glBindBuffer(GL_ARRAY_BUFFER, GL_NONE) # self._step_size_tiles_vbo_id[factor] = (vertex_array_object_id, # tri_buffer_id, vertex_array.shape[0]) # factor = factor // 2 # logging.info(f"Precomputed {len(self._step_size_tiles_vbo_id)} tilesets for max step size. Sizes {list(self._step_size_tiles_vbo_id.keys())}")
[docs] def _draw_active_parts_deprecated(self, program, x=None, y=None, w=None,h=None,zoom=None): # x,y,w,h in *screen* coordinates (2560*1440 for example) # The idea is to attach a lot of configuration (and vertex array) # to a VBO (vtx buffer object). This way, when we need to draw, we just # recall that configuration to the GPU (instead or uploading it again and again) # This brings in a lot of performance because communicating with the GPU is # expensive. assert glGetAttribLocation(program, "aVertex") == 0 if False and program not in self._cache: vertex_array_object = glGenVertexArrays(1) glBindVertexArray( vertex_array_object ) quad_buffer = glGenBuffers(1) glBindBuffer(GL_ARRAY_BUFFER, quad_buffer) glBufferData(GL_ARRAY_BUFFER, self._tiles.nbytes, self._tiles, GL_STATIC_DRAW) attr_loc = glGetAttribLocation(program, "aVertex") # for the simple vertex shader assert attr_loc >= 0 glEnableVertexAttribArray(attr_loc) # size= number of components per generic vertex # normalized = ... # stride= Specifies the byte offset between consecutive generic vertex attributes. # pointer= Specifies a offset of the first component of the first generic # vertex attribute in the array in the data store of the buffer currently bound to the GL_ARRAY_BUFFER target. glVertexAttribPointer(attr_loc, 3, GL_FLOAT, GL_FALSE, 0, None) self._cache[program] = vertex_array_object # order important else you clear the buffer bind in the vertex array object :-) glBindVertexArray(0) glBindBuffer(GL_ARRAY_BUFFER, 0) # Matrix is: # | 1 | |x| |x| # | 1 | * |y| = |y| # | 0 | |z| |0| # | 1 | |w| |w| pm = np.asfortranarray(np.diag([1,1,0,1]).astype(np.float32)) # Fortran==column major, like OpenGL set_uniform(program, "uModelViewProjectionMatrix", pm) # OpengGL doc: glViewport specifies the affine transformation of x and y from # normalized device coordinates (-1,+1) to window coordinates (e.g. 1024*768). if w is None: glViewport(0, 0, self.width, self.height) else: glViewport(x, y, w, h) # print(f"{self.width}, {self.height} {self._tiles.shape[0]}") s = self._stripes_on_nap[ max(self.nby, self.nbx) ] glBindVertexArray( s.get_vao_id() ) glDrawArrays(GL_TRIANGLES, 0, s.get_vao_len()) glBindVertexArray( GL_NONE)
# def _draw_single_quad(self,program, x=None, y=None, w=None,h=None): # pm = np.asfortranarray(np.diag([1,1,0,1]).astype(np.float32)) # Fortran==column major, like OpenGL # set_uniform(program, "uModelViewProjectionMatrix", pm) # assert glGetAttribLocation(program, "aVertex") == 0 # # OpengGL doc: glViewport specifies the affine transformation of x and y from # # normalized device coordinates (-1,+1) to window coordinates (e.g. 1024*768). # if w is None: # assert h is None, "If w is None then H must be too !" # glViewport(0, 0, self.width, self.height) # else: # glViewport(x, y, w, h) # # print(f"{self.width}, {self.height} {self._tiles.shape[0]}") # glBindVertexArray( self._single_quad_vao_id ) # glDrawArrays(GL_TRIANGLES, 0, 6) # Draw 2 triangles of 3 vertices each # glBindVertexArray( GL_NONE)
[docs] def _has_good_shape(self, a: np.ndarray): expected_np_array_shape = (self.height, self.width) return a.shape[0:2] == expected_np_array_shape
[docs] def set_tile_size(self, tile_size: int): if tile_size > 0: assert log2(tile_size).is_integer(), "Tile size must be a power of two." assert tile_size >= 4, "Tile size smaller than 4 have not been tested yet." assert tile_size <= 32, "Tile size bigger than 32 have not been tested yet." #assert n <= min(self.width, self.height) self._tile_size = tile_size self._tile_packer = None # Invalidating the tile packer
@property
[docs] def tile_size(self): return self._tile_size
[docs] def set_buffers(self, max_step_size=None, strong_bc_array=None, strong_bc_values_array=None, quantity_arrays=None, bathymetry_array=None, manning_array=None, active_meshes_array=None, weak_bc_array=None, bc_cells_array=None, infiltration_zones_array=None, infiltration_timings=None): if active_meshes_array is not None: logging.debug("Active mesh array and texture indirection") assert self._has_good_shape(active_meshes_array), "active_meshes_array has bad size" self.nb_active_meshes = np.count_nonzero(active_meshes_array) #self._init_indirection(active_meshes_array) self._tile_packer = TilePacker(active_meshes_array, self._tile_size, self._tile_packing_mode) #padded_tile_indirection_map = np.ascontiguousarray( np.pad( self._tile_packer.tile_indirection_map(), ((0,0),(0,0),(0,1))).astype(np.uint32)) # The active mesh is not stored, only the texture indirection. # RG16 = (x:uint16, y:uint16) self._tile_indirection_tex = upload_np_array_to_gpu( GL_TEXTURE_RECTANGLE, format=GL_RG16UI, img_data=self._tile_packer.tile_indirection_map()) if False: ntx, nty = self.tile_packer().size_in_tiles() # ATTENTION! You clearly see a mismatch between the storage type # of the indirection texture and the format we read it into. # That's because python opengl bindings don't like RG16UI. tile_indirection_tex = read_texture2(self._tile_indirection_tex, GL_RGB16UI, ntx, nty) fig, axs = plt.subplots(1, 2) z = np.ascontiguousarray( np.pad( self._tile_packer.tile_indirection_map().astype(float), ((0,0),(0,0),(0,1))).astype(np.uint32)) z = z / np.max(z) axs[0].imshow(z) axs[0].set_title("padded_tile_indirection_map") #tile_indirection_tex[:,:,2] = 0 axs[1].imshow(tile_indirection_tex[:,:,0]) axs[1].set_title("downloaded version") plt.show() if max_step_size is not None: logging.debug("Max step size array") # This should be used only for testing purposes assert max_step_size.shape[0:2] == self._tile_packer.packed_size_in_tiles() assert max_step_size.dtype in (float, np.float32) self.max_step_size_tex = upload_np_array_to_gpu(GL_TEXTURE_RECTANGLE, GL_R32F, max_step_size), # Tell that the first texture is the one where the max step size is # to be read self._currentMinStepSizeTexture = 0 if strong_bc_array is not None: logging.debug("StrongBC array") assert self._has_good_shape(strong_bc_array) assert len(strong_bc_array.shape) == 2, "Only one strong BC, for h by default." self.strong_boundary_cond_tex = upload_np_array_to_gpu(GL_TEXTURE_RECTANGLE, GL_R32F, strong_bc_array) if strong_bc_values_array is not None: logging.debug("StrongBC values array") assert self._has_good_shape(strong_bc_values_array) assert len(strong_bc_values_array.shape) == 3 and strong_bc_values_array.shape[2] == 3, "strong cell Conditiosn must be h,uh,vh" self.strong_boundary_cond_values_tex = upload_np_array_to_gpu( GL_TEXTURE_RECTANGLE, GL_RGB32F, strong_bc_values_array) if bc_cells_array is not None: logging.debug("BCCells array") assert bc_cells_array.shape[0] >= 1, "You must give at least one boundary condition" self.boundary_cond_cells_tex = upload_np_array_to_gpu( GL_TEXTURE_RECTANGLE, GL_R32F, bc_cells_array) if weak_bc_array is not None: logging.debug("WeakBC array") assert self._has_good_shape(weak_bc_array), \ f"Weak boundary condition has bad shape: {weak_bc_array.shape}, expected: {(self.height, self.width)}" self.weak_boundary_cond_tex = upload_np_array_to_gpu( GL_TEXTURE_RECTANGLE, GL_R32UI, self._tile_packer.shuffle_and_pack_array( weak_bc_array)) if quantity_arrays is not None: logging.debug("Quantities array") # These are the qty0, qty1, qty2 arrays... # FIXME Why set up the three of them ? assert isinstance(quantity_arrays, list) assert len(quantity_arrays) == 3 assert len(quantity_arrays[0].shape) == 3, "Need (h,qx,qy) arrays for each of the qty buffer" assert len(quantity_arrays[1].shape) == 3, "Need (h,qx,qy) arrays for each of the qty buffer" assert len(quantity_arrays[2].shape) == 3, "Need (h,qx,qy) arrays for each of the qty buffer" assert self._has_good_shape(quantity_arrays[0]), f"Bad shape for array 0: {quantity_arrays[0].shape}" assert self._has_good_shape(quantity_arrays[1]), f"Bad shape for array 0: {quantity_arrays[1].shape}" assert self._has_good_shape(quantity_arrays[2]), f"Bad shape for array 0: {quantity_arrays[2].shape}" assert quantity_arrays[0].dtype == np.float32 assert quantity_arrays[1].dtype == np.float32 assert quantity_arrays[2].dtype == np.float32 self.quantity_tex = [ # Padding to move from vec3 to vec4. upload_np_array_to_gpu( GL_TEXTURE_RECTANGLE, GL_RGBA32F, self._tile_packer.shuffle_and_pack_array( np.pad( qa, ((0,0),(0,0),(0,1)) )), texture_id=None) for qa in quantity_arrays ] self.integration_fb = create_frame_buffer_for_computation(self.quantity_tex) self.strong_boundary_cond_fb = create_frame_buffer_for_computation(self.quantity_tex) if bathymetry_array is not None: logging.debug("Bathymetry array") if self._has_good_shape(bathymetry_array): self.bathymetry_tex = upload_np_array_to_gpu( GL_TEXTURE_RECTANGLE, GL_R32F, self._tile_packer.shuffle_and_pack_array( np.ascontiguousarray(bathymetry_array), np.max(bathymetry_array))) else: raise Exception(f"The bathymetry array doesn't have the right size. I expect: {self.height} rows, {self.width} columns. You gave {bathymetry_array.shape[0]} rows by {bathymetry_array.shape[1]} columns.") if manning_array is not None: logging.debug("Manning array") assert self._has_good_shape(manning_array) self.manning_tex = upload_np_array_to_gpu( GL_TEXTURE_RECTANGLE, GL_R32F, self._tile_packer.shuffle_and_pack_array(np.ascontiguousarray(manning_array))) # The map of infilotration zones if infiltration_zones_array is not None: logging.debug("Infiltration zones array") assert self._has_good_shape(infiltration_zones_array), \ f"Infiltration array has bad shape: {infiltration_zones_array.shape}, expected: {(self.height, self.width)}" # for row in infiltration_zones_array: # print(row) # print(infiltration_zones_array.dtype) # exit() packed = self._tile_packer.shuffle_and_pack_array( np.ascontiguousarray(infiltration_zones_array), neutral_values=MESH_WITHOUT_INFILTRATION, debug=True ) self.infiltration_zones_texture = upload_np_array_to_gpu( GL_TEXTURE_RECTANGLE, GL_R32I, packed) # The list of the infiltration zones with their time periods. if infiltration_timings is not None and len(infiltration_timings) >= 1: logging.debug("Infiltration timings array") # Each infiltration period has a corresponding column : # [start time, zone1 value, zone2 value, ...] if isinstance(infiltration_timings, list): a = np.array(infiltration_timings, dtype=float) elif isinstance(infiltration_timings, np.ndarray): a = infiltration_timings else: raise Exception(f"Bad type {type(infiltration_timings)}") assert a.shape[0] >= 1, "I don't handle empty lists for infiltrations" assert a.shape[1] >= 2, "I need at least one infiltration zone" test_order = a[:,0].flatten() assert np.all(test_order[:-1] <= test_order[1:]), f"The infiltration timings are not ordered ! : {test_order}" self.infiltration_timings = a[:,0] # Only the timings self.nb_infiltration_lines = a.shape[0] self.nb_infiltration_zones = a.shape[1] - 1 #print(f"set buffer : self.nb_infiltration_zones={self.nb_infiltration_zones}") if self.nb_infiltration_lines >= 2: deltas = self.infiltration_timings[1:] - self.infiltration_timings[0:-1] #print(deltas) if self.infiltration_timings[0] > 0: self.max_time_step_inactive_tile = min( self.max_time_step_inactive_tile, self.infiltration_timings[0], np.min(deltas[deltas > 0])) else: self.max_time_step_inactive_tile = min( self.max_time_step_inactive_tile, np.min(deltas[deltas > 0])) else: self.max_time_step_inactive_tile = MAXIMUM_TIME_STEP self.infiltration_zones_values_array = upload_np_array_to_gpu( GL_TEXTURE_RECTANGLE, GL_R32F, a)
[docs] def simulation_step(self, source_quantity): """ Perform a full simulation step (with all substeps: euler, RK,...) `source_quantity` is 0 or 1 and tells in which of the ping/pong textures we'll *read* information. Consequently, we'll *write* into `1 - source_quantity`. """ assert source_quantity in (0,1) if not self._shaders_loaded: self._load_shaders() # for tex in self.alpha_tex: # clear_texture(tex) #glClear(GL_COLOR_BUFFER_BIT) # Updating and setting some global values program = self.update_globals_cs_program glUseProgram(program) if self.nb_infiltration_lines >= 1: # Avoid to set an inexistent uniform (will be optimized out by # GL/SL compiler) samplers= {"infiltrationValuesSampler": self.infiltration_zones_values_array} else: samplers = {} samplers["param_copy"] = ( self.param_copy_tex[self._current_step % len(self.param_copy_tex)], GL_WRITE_ONLY) uniforms = {} wire_program(program, uniforms, samplers, ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id}) glDispatchCompute(1,1,1) glMemoryBarrier( GL_SHADER_STORAGE_BARRIER_BIT) if self._optim_geom_shaders is not None: self.detect_wet_tiles2(source_quantity) if self.time_step_strategy == TimeStepStrategy.OPTIMIZED_TIME_STEP: logging.debug("Max step size compute") with time_it_gl("-- compute_max_step_size"): # self.compute_all_time_step_step(source_quantity) # self.last_step_duration = self.compute_max_step_size() self.compute_time_step_for_tiles_phase1(source_quantity) self.compute_time_step_for_tiles_phase2(source_quantity) #print("all time steps") else: assert self.time_step > 0 # self._skip_euler Used for debugging if not self._debugging_skip_euler: old_nb_dryup_iterations = None # while True: if self.ponderation_runge_kutta is None: # Euler without RungeKutta self.last_euler_dry_up_fixes_count = self.compute_derivative_with_dry_up_and_euler(source_quantity, source_quantity, 1 - source_quantity) #self.compute_euler_step(source_quantity, dest_quantity=1 - source_quantity) else: # Euler will be followed by Runge Kutta self.last_euler_dry_up_fixes_count = self.compute_derivative_with_dry_up_and_euler(source_quantity, source_quantity, 2) #self.compute_euler_step(source_quantity, dest_quantity=2) # glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT) # gs = self.read_global_state() # old_nb_dryup_iterations = gs.nb_dryup_iterations # print(gs.nb_dryup_iterations) with time_it_gl("-- strong BC1"): if False: self.apply_strong_boundary_conditions(2) #print("strong bc") if self.ponderation_runge_kutta is not None: glMemoryBarrier( GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT ) with time_it_gl("-- second derivative"): # self.compute_derivative(current_bathymetry, 2, first_pass=False, simulation_time=sim_time) # self.last_rk_dry_up_fixes_count = self.compute_derivative_with_dry_up(current_bathymetry, 2, source_quantity, sim_time) self.last_rk_dry_up_fixes_count = -1 self.compute_derivative_with_dry_up_and_rk( source_quantity, # at t0 2, # at euler 1 - source_quantity) # self.compute_derivative_with_dry_up2(current_bathymetry, 2, source_quantity, sim_time, 1 - source_quantity) # with time_it_gl("-- RK"): # self.compute_runge_kutta(source_quantity) # #print("RK") with time_it_gl("-- strong bc2"): if False: self.apply_strong_boundary_conditions(1 - source_quantity) #print("strong bc RK") program = self.update_globals_cs_program2 glUseProgram(program) wire_program(program, {}, {}, ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id}) glDispatchCompute(1,1,1) glMemoryBarrier( GL_SHADER_STORAGE_BARRIER_BIT) #if self.report_frequency_type == ReportFrequencyType.SIMULATION_TIME: if self._current_step > self._next_stop: gs = self.read_global_state() if gs.time_step == 0: raise Exception("Simulation can't progress anymore, delta_t == 0 !!!") old_sim_time = self.last_gpu_time self.last_gpu_time, self.current_simulation_time_step = gs.simulation_time, gs.time_step # We now control if the sampling frequency is enough to do satisfy # the request reportnig period. if old_sim_time is None: old_sim_time = self.last_gpu_time sampling_period = None else: sampling_period = self.last_gpu_time - old_sim_time old_sim_time = self.last_gpu_time if self.report_frequency.type == SimulationDurationType.SECONDS: if sampling_period is not None: if sampling_period > self.report_frequency.duration: logging.warning(f"The recording period ({self.report_frequency}) is smaller than the time between two sampled steps ({sampling_period:.3f} s.). Remember we sample every {TIME_SAMPLING_RESOLUTION} steps.") if False and self.report_frequency.type == SimulationDurationType.SECONDS: # Trying to predict the best moment to check for the time reporting. # In practice: doesn't work very well... I leave this code here in case # I'd like to improve it. self.iteration_vs_simtime.append( [self._current_step, self.last_gpu_time]) if len(self.iteration_vs_simtime) > 2: self.iteration_vs_simtime = self.iteration_vs_simtime[-2:] iter_time = self.iteration_vs_simtime delta_iter = iter_time[1][0] - iter_time[0][0] delta_time = iter_time[1][1] - iter_time[0][1] iter_per_second = delta_iter / delta_time self._next_stop = self._next_stop + iter_per_second * self.report_frequency # print(self.iteration_vs_simtime) # print(self._next_stop) else: self._next_stop += TIME_SAMPLING_RESOLUTION else: self._next_stop += TIME_SAMPLING_RESOLUTION # finish_timings() # finish_timings_full() self._current_step += 1
[docs] def detect_wet_tiles2(self, current_quantity): """ Detect wet tiles FIXME we also call them active but that a misnomer because active also means NAP-active... """ # for i in range(8): # glActiveTexture(TEXTURE_UNITS[i]) # glBindTexture(GL_TEXTURE_RECTANGLE, GL_NONE) # #glBindImageTexture( TEXTURE_UNITS[i], GL_NONE, 0, GL_FALSE, 0, GL_NONE, GL_NONE) if False: from matplotlib import pyplot as plt ntx, nty = self._tile_packer.size_in_tiles() tile_indirection_tex = read_texture2(self._tile_indirection_tex, GL_RGB16UI, ntx, nty)[:,:,0:2] np.save( f"{self._prefix}_tile_indirection_{self._current_step}", tile_indirection_tex) program = self.active_tiles_detector_program2 # parallel_tile_detect.comp glUseProgram(program) textures = { "q": self.quantity_tex[current_quantity], "infiltrationZonesImage": (self.infiltration_zones_texture, GL_READ_ONLY), "tileIndirection": (self._tile_indirection_tex, GL_READ_ONLY), "activeTilesImage": (self._active_tiles_map_tex[0], GL_WRITE_ONLY), } if self._optimize_tile_indirection: textures["reversedTileIndirection"] = self._reversed_tile_indirection_tex wire_program( program, uniforms={}, textures=textures, ssbos={"GlobalStateBlock": self.state_buffer_ssbo_id}, ) if self._optimize_tile_indirection: # We dispatch only over the tiles which are part of the active domain. ntx, nty = self._tile_packer.packed_size_in_tiles() else: # We dispatch over the whole domain, one workgroup per tile. # When tile packing is active, we dispatch over the whole domain. The shader # will take care of indirecting absolute coordinates to virtual ones. ntx, nty = self._tile_packer.size_in_tiles() glDispatchCompute(ntx, nty, 1) check_gl_error() # make sure writing to image has finished before read glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) if False: ntx, nty = self._tile_packer.packed_size_in_tiles() active_tiles_map = read_texture2(self._active_tiles_map_tex[0], GL_R8UI, ntx, nty) np.save( f"{self._prefix}_active_tiles_map_{self._current_step}", active_tiles_map) h = self.read_quantity_result(current_quantity)[:,:,0] np.save( f"{self._prefix}_h_{self._current_step}", h) active_tiles_map = active_tiles_map.copy() active_tiles_map[active_tiles_map.shape[0]//2,active_tiles_map.shape[1]//2] = 0 # make more constrasted view fig, axs = plt.subplots(1,3) axs[0].imshow(active_tiles_map) axs[0].set_title(f"active_tiles_map {active_tiles_map.shape} {active_tiles_map.dtype} ") axs[1].imshow(tile_indirection_tex[:,:,0]) axs[1].set_title(f"Inidrection- dispatch {self._tile_packer.size_in_tiles()}") axs[2].imshow(h) axs[2].set_title(f"h") plt.show()
[docs] def _tiles_ndc_dims(self): # Size of the tiles in NDC coordinates. nb_tiles_horiz, nb_tiles_verti = ceil(self.width / self._tile_size), ceil(self.height / self._tile_size) ndc_w = 2 return (ndc_w * (self._tile_size/self.width), ndc_w * (self._tile_size/self.height))
[docs] def _draw_active_tiles(self, program, uniforms: dict, textures: dict, viewport=None): nb_tiles_horiz, nb_tiles_verti = ceil(self.width / self._tile_size), ceil(self.height / self._tile_size) # FIXME Dirty state management if not hasattr(self, "_active_tile_grid_vao_id") or self._active_tile_grid_vao_id is None: # We build the lower left corner of the quads # that will be *generated* by the geometry shader. # from 0 to (l-1)/l then *2-1 to be in [-1,+1] # grid_line(4) will give (-1, -0.5, 0, 0.5) def grid_line(nb_tiles,tile_size_ndc): return (np.arange(0,nb_tiles)*tile_size_ndc-1).astype(np.float32) #tw, th = self._tiles_ndc_dims() grid = np.meshgrid( grid_line(nb_tiles_horiz, 2/self.width*self._tile_size), grid_line(nb_tiles_verti, 2/self.height*self._tile_size)) # print(nb_tiles_horiz, nb_tiles_verti) # print(grid[0].shape) # print(grid[1].shape) # print(np.min(grid[0]), np.max(grid[0])) # grid[0] = a 2d array of all the points of the gird. Each cell of the array contains the X coordinate. # grid[1] = a 2d array of all the points of the gird. Each cell of the array contains the Y coordinate. # z = np.ravel(grid[0]) lower_left_corners = np.stack( [np.ravel(grid[0]), np.ravel(grid[1]), np.zeros((grid[0].size,), dtype=np.float32)]).T # print(lower_left_corners.shape) # for i in range(0,nb_tiles_horiz*nb_tiles_verti,7): # lower_left_corners[i,:] = [0,0,0] #lower_left_corners= lower_left_corners[0:250,:] # lower_left_corners = np.array([[0,0,0], # [0.125,0.125,0],], dtype=np.float32) #print(lower_left_corners.ravel()) self._active_tile_grid_vao_id = upload_geometry_to_vao(lower_left_corners) # For the vertex shader uniforms["uModelViewProjectionMatrix"] = np.asfortranarray(np.diag([1,1,0,1]).astype(np.float32)) #uniforms["uModelViewProjectionMatrixGS"] = np.asfortranarray(np.diag([1,1,0,1]).astype(np.float32)) textures["activeTilesSampler"] = self._active_tiles_map_tex[0] wire_program(program, uniforms, textures, #ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id} ) if viewport is None: glViewport(0, 0, self.width, self.height) #print(self.width, self.height) else: glViewport(viewport[0],viewport[1],viewport[2],viewport[3]) #print(viewport) glBindVertexArray( self._active_tile_grid_vao_id ) # Pass the vertices to the geometry shader. The geometry shader will # output the quads corresponding to each tile. glDrawArrays(GL_POINTS, 0, nb_tiles_horiz*nb_tiles_verti)
[docs] def read_euler_step_result(self, tex=2): """ Returns a 3D array (q, qu, qv) """ return read_texture(self.integration_fb, tex, self.width, self.height, GL_RGB32F)
[docs] def apply_strong_boundary_conditions(self, tex_out): program = self.strong_boundary_cond_program glUseProgram(program) # Remember the framebuffer is wired to the 3 "quantity" # textures (h,hu,hv) (each texture being one float wide) glBindFramebuffer(GL_FRAMEBUFFER, self.strong_boundary_cond_fb) # Choose destination texture (indexed on frame buffer's textures) glDrawBuffer(GL_COLOR_ATTACHMENTS[tex_out]) # We copy from strong boundary condition texture to quantity texture. # We copy only the RED coordinate, which represents the "h" value. # Notice we read and write the same (quantity) texture. wire_program( program, textures={ "strongbcSampler": self.strong_boundary_cond_tex, "strongbcvaluesSampler": self.strong_boundary_cond_values_tex, "quantitySampler": self.quantity_tex[tex_out] }, ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id} ) self._draw_active_parts_deprecated(program)
[docs] def active_infiltration_line(self, t: float) -> int: # Compute the active infiltration event number (or line in the # array) at time t (number is one based); !!! One based !!!. # Returns 0 if none is active. assert t >= 0, f"You're in the past (t={t}) !" if self.infiltration_timings is None: # There are no configured infiltrations. return 0 assert np.all(self.infiltration_timings[:-1] <= self.infiltration_timings[1:]), \ "timings are not in ascending order" if t < self.infiltration_timings[0]: return 0 # Nothing to do, the infiltration has not yet started for i in range(self.infiltration_timings.shape[0]-1): if self.infiltration_timings[i] <= t < self.infiltration_timings[i+1]: return i+1 assert t > self.infiltration_timings[-1], "We should be past the last time" return self.infiltration_timings.shape[0] # one based !
[docs] def compute_derivative_with_dry_up_and_euler( self, current_quantity_tex_ndx:int, euler_base_quantity_tex_ndx:int, # Base for dry up computation (we'll take h as a starting point) quantity_out_tex_ndx:int, current_alpha_tex_ndx=0, ignore_alpha=False): program = self.derivative_and_dry_up_cs_program glUseProgram(program) uniforms = { "epsilon": float(self.epsilon), "dx": self.dx, "dy": self.dy, "max_nb_of_dry_up_loops": np.uintc(self._dbg_max_nb_of_dry_up_inner_loops), } assert self._currentMinStepSizeTexture == 0, f"debugging assert... {self._currentMinStepSizeTexture}" samplers = { "bathy": self.bathymetry_tex, # FIXME when you see the caller's params values, you see quantitySampler == quantityEulerSampler "quantitySampler": self.quantity_tex[current_quantity_tex_ndx], "quantityEulerBaseSampler": self.quantity_tex[euler_base_quantity_tex_ndx], "weakBCSampler": self.weak_boundary_cond_tex, "BCCellsSampler": self.boundary_cond_cells_tex, "manning": self.manning_tex, "q_out": (self.quantity_tex[quantity_out_tex_ndx], GL_WRITE_ONLY), # These two are interanl to the compute shader # "rhs_out": (self.derivative_tex, GL_WRITE_ONLY), # "source_impl_out": (self.source_impl_tex, GL_WRITE_ONLY), "alpha_image_1": (self.alpha_tex[0], GL_READ_WRITE), "alpha_image_2": (self.alpha_tex[1], GL_READ_WRITE), "activeTilesSampler": self._active_tiles_map_tex[0], "tileIndirection": self._tile_indirection_tex, } if self._debugging: samplers.update( { "debug_out": (self.debug_tex[0], GL_READ_WRITE), "debug_out2": (self.debug_tex[1], GL_READ_WRITE), } ) if self.infiltration_timings is not None: # Note that this code implies that an infiltration source term can be # there at the first compute_derivative and not at the second. So # Runge-Kutta should be correct. #uniforms["active_infiltration_line"] = self.active_infiltration_line(simulation_time) samplers["infiltrationZonesSampler"] = self.infiltration_zones_texture #samplers["infiltrationValuesSampler"] = self.infiltration_zones_values_array if self._track_flows_at_border: # Texture 0 is for Euler (1 is RK) samplers["flows_at_border"] = self._flows_at_border_tex[0] samplers["rhs_texture"] = self._rhs_tex wire_program(program, uniforms, samplers, ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id}) self._handle_dry_up_iterations(program)
@property
[docs] def optimize_tile_indirection(self): return self._optimize_tile_indirection
[docs] def set_total_number_of_dry_up_outer_loops(self, n): """Configure the number of desired iterations to solve all or some of the dry ups. :param n: Number of iterations: * n == 0 means no dry up (only Euler/RK) * n > 0 means a *fixed* number of dry up iterations * None means as many dry up iterations as needed to solve all dry ups :type n: None or int. """ assert n is None or n >= 0 self._total_number_of_dry_up_outer_loops = n
#print(f"set {self._max_number_of_dry_up_loops}" + "*"*100)
[docs] def set_no_dry(self, dummy): """ DEPRECATED !!! """ self.set_total_number_of_dry_up_outer_loops(0)
[docs] def emergency_save(self): logging.error(f"Emergency save at step {self._current_step} !!!") q0 = self.read_quantity_result(0) q1 = self.read_quantity_result(1) alpha0 = self.read_alpha(0) alpha1 = self.read_alpha(1) bathy = self.read_bathymetry() if self._debugging: debug0 = self.read_debug_texture(0) debug1 = self.read_debug_texture(1) np.save("q0",q0) np.save("q1",q1) np.save("alpha0",alpha0) np.save("alpha1",alpha1) np.save("bathy",bathy) if self._debugging: np.save("debug0",debug0) np.save("debug1",debug1)
@property
[docs] def total_number_of_dry_up_outer_loops(self): return self._total_number_of_dry_up_outer_loops
[docs] def dry_up_disabled(self): return self._total_number_of_dry_up_outer_loops == 0
[docs] def _handle_dry_up_iterations(self, program): set_uniform(program, "first_dry_up_iteration", True) # We dispatch over the virtual computation domain. ntx, nty = self._tile_packer.size_in_tiles() all_last_number_dryup_iterations = [] last_number_dryup_iterations = -1 nb_outer_loop_iterations = 0 while True: glDispatchCompute(ntx, nty, 1) glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) nb_outer_loop_iterations += 1 if self._total_number_of_dry_up_outer_loops is None: # We iterate until done. So we have to ask the # question : "are we done ?" gs = self.read_global_state() # VERY costly !!! if self._debugging: if nb_outer_loop_iterations > 10: print(nb_outer_loop_iterations, gs.nb_dryup_iterations) if nb_outer_loop_iterations > 100: self.emergency_save() from traceback import print_stack print_stack() exit() if nb_outer_loop_iterations > 200: if nb_outer_loop_iterations % 100 == 0: logging.warning(f"The total number of outer loop iterations ({nb_outer_loop_iterations}) in dry up management seems high") if nb_outer_loop_iterations > 400: logging.error(f"The total number of outer loop iterations ({nb_outer_loop_iterations}) in dry up management seems high. Aborting.") break # Remember! gs.nb_dryup_iterations is the total number of inner # loop iterations that occured *across all tiles*. # So if 2 tiles have dry ups, this count will bet we # least 2. So DO NOT interpret this number too much # as it will vary wildly depending on the number of # inner loops necessary to complete the dry ups. if gs.nb_dryup_iterations == 0: # 0 is a bit of a special case. It occurs only when # no dry ups are detected as early as the first iteration. break elif gs.nb_dryup_iterations == last_number_dryup_iterations: # The number of dry ups iterations inside the tiles # didn't change from the last iteration to the present # one. Therefore we can conclude all the dry ups have # been solved. # Note that gs.nb_dryup_iterations is a global variable # across all tiles (threads). Moreover it is never # reset across outer loops. So the only way to conclude # that nothing happened is to track the fact that # gs.nb_dryup_iterations remains constant over two # successive outer loops. break else: last_number_dryup_iterations = gs.nb_dryup_iterations elif self._total_number_of_dry_up_outer_loops == 0: # Dry ups are disabled (the shader won't handle dry up inside # the tiles either). So we basically do simply Euler or RK. break elif nb_outer_loop_iterations >= self._total_number_of_dry_up_outer_loops: # We want an exact number of iterations, therefore we # don't need to query the global state to know if # there are more iterations to do. break # Prepare for a next iteration set_uniform(program, "first_dry_up_iteration", False)
[docs] def compute_derivative_with_dry_up_and_rk( self, current_quantity_tex_ndx:int, # at t0 euler_base_quantity_tex_ndx:int, # index of the result of the Euler step quantity_out_tex_ndx:int, current_alpha_tex_ndx=0, ignore_alpha=False): program = self.derivative_and_dry_up_and_rk_cs_program glUseProgram(program) uniforms = { "epsilon": self.epsilon, "dx": self.dx, "dy": self.dy, #"ponderation": float(self.ponderation_runge_kutta), # Allow an integer ponderation (eg. 0 or 1) "max_nb_of_dry_up_loops": np.uintc(self._dbg_max_nb_of_dry_up_inner_loops), } samplers = { "bathy": self.bathymetry_tex, # This one will be the base to compute the final RK value. "quantityt0Sampler": self.quantity_tex[current_quantity_tex_ndx], # These two will be used to compute the derivative over Euler's step. # Derivative code is hardcoded to read from quantitySampler and for # RK this is the result of the previous Euler step "quantitySampler": self.quantity_tex[2], # This is used as the "base" when simulating potential conditions for dry up. "quantityEulerBaseSampler": self.quantity_tex[current_quantity_tex_ndx], "weakBCSampler": self.weak_boundary_cond_tex, "BCCellsSampler": self.boundary_cond_cells_tex, "manning": self.manning_tex, "q_out": (self.quantity_tex[quantity_out_tex_ndx], GL_WRITE_ONLY), # "rhs_out": (self.derivative_tex, GL_WRITE_ONLY), # "source_impl_out": (self.source_impl_tex, GL_WRITE_ONLY), "alpha_image_1": (self.alpha_tex[0], GL_READ_WRITE), "alpha_image_2": (self.alpha_tex[1], GL_READ_WRITE), "activeTilesSampler": self._active_tiles_map_tex[0], "tileIndirection": self._tile_indirection_tex, } if self._debugging: samplers.update( { "debug_out": (self.debug_tex[0], GL_READ_WRITE), "debug_out2": (self.debug_tex[1], GL_READ_WRITE), } ) if self.infiltration_timings is not None: # Note that this code implies that an infiltration source term can be # there at the first compute_derivative and not at the second. So # Runge-Kutta should be correct. samplers["infiltrationZonesSampler"] = self.infiltration_zones_texture #samplers["infiltrationValuesSampler"] = self.infiltration_zones_values_array if self._track_flows_at_border: # Texture 1 is for RK (0 is Euler) samplers["flows_at_border"] = self._flows_at_border_tex[1] # FIXME This will erase Eulere's step results. # samplers["rhs_texture"] = self._rhs_tex wire_program(program, uniforms, samplers,ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id}) self._handle_dry_up_iterations(program)
[docs] def tile_packer(self) -> TilePacker: return self._tile_packer
[docs] def read_debug_texture(self, tex=0): assert self._debugging, "You forgot to enable debugging" assert tex in (0,1), "I only use two debugging textures" #return read_texture2(self.debug_tex[tex], GL_RGBA32F) psw, psh = self._tile_packer.packed_size() return self._tile_packer.unpack_and_deshuffle_array( read_texture2(self.debug_tex[tex], GL_RGBA32F))
[docs] def read_infiltration_map(self): psw, psh = self._tile_packer.packed_size() return self._tile_packer.unpack_and_deshuffle_array( read_texture2(self.infiltration_zones_texture, GL_R32I, width=self.width, height=self.height))
[docs] def read_flows_on_borders(self, which:int): """ Read flows on borders. Each cell of the returned arrays contains: (q_left, q_right, q_top, q_bottom). Used only while debugging :param which: 0 for Euler, 1 for Runge Kutta. """ assert which in [0,1], "You must choose either Euler substep (zero) or Runge Kutta sub step (1)" assert self._track_flows_at_border, "You must enable this functionality first" return self._tile_packer.unpack_and_deshuffle_array( read_texture2(self._flows_at_border_tex[which], GL_RGBA32F))
[docs] def read_rhs(self): """ Read RHS at Euler step or RK step (make sure you know which one is valid when). Each cell of the returned arrays contains: (q_left, q_right, q_top, q_bottom). Used only while debugging """ assert self._track_flows_at_border, "You must enable this functionality first" return self._tile_packer.unpack_and_deshuffle_array( read_texture2(self._rhs_tex, GL_RGBA32F))
[docs] def read_quantity_result(self, current_quantity) -> np.array: """ Returns the matrix of quantities (h,qx,qy), unpacked. Used for debugging. Prefer the ResultStore. :param current_quantity: is 0,1 for the flip flop textures and 2 for the intermediate euler resul (if one uses Runge-Kutta). """ assert current_quantity in (0,1,2), f"Bad quantity index : {current_quantity}" psw, psh = self._tile_packer.packed_size() return self._tile_packer.unpack_and_deshuffle_array( read_texture(self.integration_fb, current_quantity, psw, psh, GL_RGB32F))
# def read_weak_bc_map(self) -> np.array: # psw, psh = self._tile_packer.packed_size() # return self._tile_packer.unpack_and_deshuffle_array( # read_texture2(self.weak_boundary_cond_tex, GL_R32UI) # )
[docs] def read_tile_packed_quantity_result(self, current_quantity): """ Returns the matrix of quantities (h,qx,qy), "tile-packed". """ assert current_quantity in (0,1) psw, psh = self._tile_packer.packed_size() return read_texture(self.integration_fb, current_quantity, psw, psh, GL_RGB32F)
[docs] def read_bathymetry(self): return self._tile_packer.unpack_and_deshuffle_array( read_texture2(self.bathymetry_tex, GL_R32F))
[docs] def read_alpha(self, current_quantity): """Returns the matrix of alpha-correction (dry ups), "tile-_un_packed".""" assert current_quantity in (0, 1) return self._tile_packer.unpack_and_deshuffle_array( self.read_tile_packed_alpha(current_quantity))
[docs] def read_tile_packed_alpha(self, current_quantity): """ Returns the matrix of alpha-correction (dry ups), "tile-packed". """ assert current_quantity in (0,1) psw, psh = self._tile_packer.packed_size() tex = read_texture2(self.alpha_tex[current_quantity], GL_RGBA32F) # Alpha texture is stored as red/green (two components) # but somehow pyopengl can't read into that format, therefore # I ask RGBA and then I slice out what I need. return tex[:,:,0:2]
[docs] def read_active_cells_map(self): """ Read active cells map. NOTE ! Be ware that this map is "packed" (as in tile packing) """ ntx, nty = self._tile_packer.packed_size_in_tiles() active_tiles = read_texture2(self._active_tiles_map_tex[0], GL_R8UI, width=ntx, height=nty) return active_tiles
[docs] def read_global_state(self) -> GLSimulationGlobalState: """Read the global state variables of the simulation, as reported by the shaders in the GPU. Be aware that this implies reading information from the GPU which usually leads to a huge drop in performance (up to 50% !!!). :return: simulation_time, time_step, active_infiltration_line, nb_dryup_iterations, nb_active_tiles, status_code """ assert self.state_buffer_ssbo_id is not None, "The SSBO is not configured" glBindBuffer(GL_SHADER_STORAGE_BUFFER, self.state_buffer_ssbo_id) # GL_INVALID_VALUE is generated if offset or size is negative, or if # offset+size is greater than the value of GL_BUFFER_SIZE for the buffer # object. res = glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, GLOBALS_BUFFER_SIZE +self.nb_infiltration_zones*4).tobytes() glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE) parsed_ssbo = struct.unpack(GLOBALS_STRUCT_PACK_FORMAT+"f"*self.nb_infiltration_zones, res) ( simulation_time, time_step, active_infiltration_line, nb_dryup_iterations, nb_active_tiles, status_code, total_active_tiles_hi, total_active_tiles_lo, ) = parsed_ssbo[0 : len(GLOBALS_STRUCT_PACK_FORMAT)] infiltration_zones = parsed_ssbo[len(GLOBALS_STRUCT_PACK_FORMAT):] total_active_tiles = total_active_tiles_hi * 0x100000000 + total_active_tiles_lo # print(simulation_time, time_step, active_infiltration_line, nb_dryup_iterations, nb_active_tiles) #print(f"Infiltration zones: {infiltration_zones}") #r= [simulation_time, time_step, active_infiltration_line, nb_dryup_iterations, infiltration_zones, nb_active_tiles, status_code] assert nb_active_tiles >= 0 r = GLSimulationGlobalState( simulation_time, time_step, active_infiltration_line, nb_dryup_iterations, nb_active_tiles, status_code, total_active_tiles, infiltration_zones ) #print(self._current_step, r.simulation_time, r.time_step, r.status_code) return r
[docs] def _clear_global_variables_buffer(self, sim_time, time_step): data = [0] * len(GLOBALS_STRUCT_PACK_FORMAT) + [0] * self.nb_infiltration_zones data[0] = sim_time data[1] = time_step data_buffer = struct.pack( GLOBALS_STRUCT_PACK_FORMAT + "f" * self.nb_infiltration_zones, *data ) glBindBuffer(GL_SHADER_STORAGE_BUFFER, self.state_buffer_ssbo_id) glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, len(data_buffer), data_buffer) glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0)
[docs] def reset_globals(self, sim_time, time_step, step_num, h, qx, qy): """ :param time_step: the duration of the time step (in case you have a fixed time step) :param step_num: The step number of the next computed simulation step. """ self._current_step = step_num self._clear_global_variables_buffer(sim_time, time_step) self.set_buffers(quantity_arrays=[np.dstack([h, qx, qy]) for i in range(3)])
[docs] def compute_max_step_size2(self): self.compute_time_step_for_tiles_phase1(0) self.compute_time_step_for_tiles_phase2(0)
# def apply_parallel_reduction(self, program, frame_buffer, textures_ids, # buffer_attachments = [GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1], # debug=False, read_result=True): # """ Apply a parallel reduction algorithm using the `program`. # The computation will be applied only on active stripes. # IT IS NOT SAFE to use this if the data that is present *outside* # the region defined by the stripes is not "neutral" as far as # the computation done in the `program` are concerned. This is # du to rounding errors that may leed the program to consider # pixels outside the stripe area. # FIXME That's dirty, one should make sure this function is safe # to use. But I'm afraid it won't be possible since sooner or later # there will be odd-sized stripes. # FIXME The current way of using the parallel execution might not # be safe to a change in the structure of stripes. Double check that! # """ # assert len(textures_ids) == 2 # assert len(buffer_attachments) == 2 # _, internal_format = textures_formats[textures_ids[0]] # assert textures_formats[textures_ids[0]][1] == internal_format, \ # "I expect both the texture to have the same internal format" # glUseProgram(program) # source_texture_ndx = 0 # half_width, half_height = self.width, self.height # while max(half_width, half_height) > 1: # with time_it(f"tiles={half_width*2}x{half_height*2}"): # if debug: # # Debugging code # #print(f"Debug: width={half_width}-height={half_height}, buffer:{currentMinStepSizeTexture}") # p = read_texture(frame_buffer, buffer_attachments[source_texture_ndx], half_width, half_height, internal_format) # s = np.argwhere( p > 0) # p2 = read_texture(frame_buffer, buffer_attachments[1-source_texture_ndx], half_width, half_height, internal_format) # s2 = np.argwhere( p2 > 0) # # The ceil operation is very important to handle # # odd dimensions. Note that if a dimension is a prime number, then # # we never get even-sized half textures ! For example # # 17 will give : 9, 5, 3, 1 ! # # IMPORTANT This computation allows us to do the parallel reduction # # without cleaning the 2 buffers we use for it. Without that, the # # values picked at borders of buffers (quarter buffers) may be off # # by one texel and pick trashed data coming from previous iteration. # # This has nothing to do with the use of stripes but much to do # # with the fact that we have quarter buffers of width/height which # # are sometimes odd, sometimes even. # # set_uniform(program, "factor_x", half_width/ceil(half_width/2)) # # set_uniform(program, "factor_y", half_height/ceil(half_height/2)) # # if half_width % 2 == 0: # # factor_x = 2.0 # # else: # # factor_x = (half_width-2)/(ceil(half_width/2)-1) # # #factor_x = (half_width-1)/half_width/2.0 # # if half_height % 2 == 0: # # factor_y = 2.0 # # else: # # factor_y = (half_height-2)/(ceil(half_height/2)-1) # # #factor_y = (half_height-1)/half_height/2.0 # # factor_x = factor_y = 2.0 # # This one is tricky. In principle, factor shoud be two # # as we're doing the reduction by a factor two. # # Issues occur when width/height are odd. # # In that case, on the second iteration of the reduction # # you may end up collecting data out of the source # # area because the destination area dimension has been # # rounded up (and you'll multiply coordinates by two). # # FIXME A cleaner way to handle this would be to # # bound the texture look up inside the source rectangle. # # Another way would be to sitck the "quarter" destination # # area in the bottom right corner instead of top left. # # This way, we'd use OpenGL's texture coordinates clipping # # to our advantage. # factor_x = half_width/(ceil(half_width/2)) # factor_y = half_height/(ceil(half_height/2)) # set_uniform(program, "factor_x", factor_x) # set_uniform(program, "factor_y", factor_y) # old_half_width = half_width # old_half_height = half_height # # half_width = max(1,floor(half_width/2)) # # half_height = max(1,floor(half_height/2)) # half_width = ceil(half_width/2) # half_height = ceil(half_height/2) # # Source texture # glActiveTexture(GL_TEXTURE0) # activate texture unit # glBindTexture(GL_TEXTURE_RECTANGLE, textures_ids[source_texture_ndx]) # set_uniform(program, "minStepSizeSampler", 0) # wire texture unit 0 to texture sampler # # Destination texture # glBindFramebuffer(GL_FRAMEBUFFER, frame_buffer) # # glDrawBuffer - specify which color buffers of the FB are to be drawn into # # (this one sets only ONE buffer so you'll have access to only one "gl_FragColor" # # or "out" varaible; for more use glDrawBuffer_s_) # #glDrawBuffer(GL_COLOR_ATTACHMENTS[1 - source_texture_ndx]) # glDrawBuffer(buffer_attachments[1 - source_texture_ndx]) # if True: # #draw_iso_quads(program, half_width, half_height) # self._draw_single_quad(program, x=0, y=0, w=half_width,h=half_height) # else: # # FIXME CRITICAL Dirty and needless but to remove it I have to get the rounding issues # # right. I have already spent countless hours on these issues and I have yet to # # find a 100% working solution. This is a super TOUGH problem. # # I clear the destiantion texture so that when I'll use it as a asource # # at the next iteration, I'll be able to query "off by one" texels without # # issues. A first idea would be to clear only the texels that can be # # a problem; not the whole texture. # # Second idea: just do parallel reduction without stripes. # clear_texture(textures_ids[1-source_texture_ndx]) # # Fortran==column major, like OpenGL. # # FIXME But since this is a diagonal matrix, do we care ??? # # FIXME Do we have to set it again and again ? # pm = np.asfortranarray(np.diag([1,1,0,1]).astype(np.float32)) # set_uniform(program, "uModelViewProjectionMatrix", pm) # # Bottom/left quarter of the texture. But that will be expanded # # to the full texture in the shader because it multiplies every # # texture coord by 2. # glViewport(0, 0, half_width, half_height) # stripes = self._stripes_on_nap[max(half_width, half_height)] # glBindVertexArray(stripes.get_vao_id()) # glDrawArrays(GL_TRIANGLES, 0, stripes.get_vao_len()) # glBindVertexArray( GL_NONE) # # Swap buffers # source_texture_ndx = 1 - source_texture_ndx # if debug: # # debugging code # p2 = read_texture(frame_buffer, buffer_attachments[source_texture_ndx], half_width, half_height, internal_format) # s2 = np.argwhere( p2 > 0) # # Read the result # assert half_width == 1 and half_height == 1 # if read_result: # # FIXME This is costly !!!! Find another way # with time_it("-- Get min step size 1pixel"): # # Read the bottom left texel (only one !) # result = read_texture(frame_buffer, buffer_attachments[source_texture_ndx], 1, 1, internal_format) # # Note that pyopengl returns a numpy array # # item() translates numpy float to python float # result = result[0,0].item() # else: # result = None # # # Now lets read the result # # glBindFramebuffer(GL_FRAMEBUFFER, frame_buffer) # # # glReadBuffer — select a color buffer source for pixels # # glReadBuffer(GL_COLOR_ATTACHMENTS[source_texture_ndx]) # # assert half_width == 1 and half_height == 1 # # # Read the bottom left texel (only one !) # # # Note that pyopengl returns a numpy array # # with time_it("-- Get min step size 1pixel"): # # result = glReadPixels(0, 0, 1, 1, GL_RED, GL_FLOAT)[0,0].item() # item() translates numpy float to python float # return result, source_texture_ndx
[docs] def plot_progress(self, current_quantity, zoom=1.0, mean=0.5, flip=True, gl_window_viewport=(0,0,100,100)): self._gl_window_viewport = glGetIntegerv(GL_VIEWPORT) # Used int tests program = self._texture_viewer_program2 uniforms = { "zoom": float(zoom), "mean": float(mean), # FragCoord (screen) to texture coord "textureCoordinateTransform": ( float(self.nbx/self._gl_window_viewport[2]), float(self.nby/self._gl_window_viewport[3])) } textures= { "quantityTexture": self.quantity_tex[current_quantity], "tileIndirection" : self._tile_indirection_tex, } glUseProgram(program) glBindFramebuffer(GL_FRAMEBUFFER, GL_NONE) # draw on screen glClear(GL_COLOR_BUFFER_BIT) self._draw_active_tiles(program, uniforms, textures, gl_window_viewport) if flip: pygame.display.flip()
#gl_timings_results_full() #gl_timings_results(label)
[docs] def set_border(array, value): array[0,:] = value array[ array.shape[0]-1,:] = value array[:, 0] = value array[:, array.shape[1]-1] = value return array
[docs] def swimming_pool_with_hole_in_water(): width, height = 50, 50 array_shape = (height, width) bathymetry = np.zeros(array_shape) set_border(bathymetry, 10) active_meshes = np.ones(array_shape, dtype=bool) water_height = np.ones(array_shape) set_border(water_height, 0) water_height[20:50-20, 20:50-20] = 0 quantity = np.dstack([water_height, np.zeros(array_shape), np.zeros(array_shape) ]) manning = np.zeros(array_shape) glsim = GLSimulation( width, height, dx=0.01, dy=0.01) glsim.set_buffers( quantity_arrays=[quantity]*3, bathymetry_array=bathymetry, source_cont_array=np.zeros(array_shape), source_impl_array=np.zeros((height, width ,3)), manning_array=manning, active_meshes_array=active_meshes) return glsim
[docs] def convert_wolf_rk_ponderation(wolf2d: prev_sim2D): """ Convert the ponderation value from prev_sim2D to the one used in the GLSimulation. """ ponderation = wolf2d.parameters._scheme_rk if ponderation >= 1.0 or ponderation <= 0.0: logging.error(f"Unsupported ponderation value {ponderation}. I expect 'ponderation' between zero and one. I'll replace by R-K with 0.3/0.7.") ponderation = 0.3 return ponderation
[docs] def get_epsilon(wolf2d: prev_sim2D): """ Get the epsilon value used in the GLSimulation. Based on the spatial resolution of the simulation, we take the minimum of dx and dy to the power of 4. The result is then compared to 0.000_001 and the minimum of the two values is returned. """ return min( min(wolf2d.dx, wolf2d.dy)**4, 0.000_001)
[docs] def get_report_frequency(m: prev_sim2D) -> SimulationDuration: if m.parameters._writing_mode == 1: return SimulationDuration.from_seconds(m.parameters._writing_frequency) elif m.parameters._writing_mode == 0: return SimulationDuration.from_steps(int(m.parameters._writing_frequency)) else: raise Exception(f"Unrecognized vallue for 'ntypefreq' in prev_sim2D : {m.parameters._writing_frequency}")
[docs] def plot_evolution_of_simulation_base(sim: SimpleSimulation, rs: ResultsStore): #dx, dy = wolf2d.myparam.dxfin, wolf2d.myparam.dyfin dx, dy = sim.param_dx, sim.param_dy r = [] times = [] clock_times = [] for i in tqdm.tqdm(range(1,rs.nb_results)): t,h,qx,qy,dryup_niter,clock_t = rs.get_named_result(["t","h","qx","qy","dryup_niter","clock_t"],i) msk = h > 0 x_discharge = dx * qx[msk] y_discharge = dy * qy[msk] q = np.sqrt(x_discharge**2 + y_discharge**2) r.append( [np.sum(h[msk])*dx*dy, np.sum(q), dryup_niter]) times.append(t) clock_times.append(clock_t) rs.close() r = np.array(r) fig, axs = plt.subplots(2,2) axs[0,0].plot(times, r[:,0]) axs[0,0].set_xlabel("Sim. Time (s)") axs[0,0].set_ylabel("Water (m³)") axs[0,1].plot(times, r[:,1]) axs[0,1].set_xlabel("Sim. Time (s)") axs[0,1].set_ylabel("Q (m³/s)") #print((r[1:,2]-r[:-1,2]).tolist()) clock_times = np.array(clock_times) axs[1,0].bar(times, np.hstack([clock_times[1:][0], clock_times[1:] - clock_times[:-1]]), width=times[-1] /(1.5*rs.nb_results)) axs[1,0].set_xlabel("Clock Time (s)") axs[1,0].set_ylabel("Sim. Time (sec./recd)") # dry ups tiles * loops * RK or euler axs[1,1].bar(times, [r[0,2]] + (r[1:,2]-r[:-1,2]).tolist(), width=times[-1] /(1.5*rs.nb_results)) axs[1,1].set_xlabel("Sim. Time (s)") axs[1,1].set_ylabel("Dried ups tiles × loops") fig.tight_layout() plt.show()
[docs] def plot_evolution_of_simulation(path_to_model: Path, path_to_results_store: Path): # FIXME Put this in another package: the simulator should not know about the ResultStore import matplotlib.pyplot as plt # wolf2d: prev_sim2D =prev_sim2D(dir=str(path_to_model), splash=False, in_gui=False) rs = ResultsStore(path_to_results_store, "r") #dx, dy = wolf2d.dx_fine, wolf2d.dy_fine dx, dy = 1, 1 r = [] times = [] clock_times = [] for i in tqdm.tqdm(range(1,rs.nb_results)): t,h,qx,qy,dryup_niter,clock_t = rs.get_named_result(["t","h","qx","qy","dryup_niter","clock_t"],i) msk = h > 0 x_discharge = dx * qx[msk] y_discharge = dy * qy[msk] q = np.sqrt(x_discharge**2 + y_discharge**2) r.append( [np.sum(h[msk])*dx*dy, np.sum(q), dryup_niter]) times.append(t) clock_times.append(clock_t) rs.close() r = np.array(r) fig, axs = plt.subplots(2,2) axs[0,0].plot(times, r[:,0]) axs[0,0].set_xlabel("Sim. Time (s)") axs[0,0].set_ylabel("Water (m³)") axs[0,1].plot(times, r[:,1]) axs[0,1].set_xlabel("Sim. Time (s)") axs[0,1].set_ylabel("Q (m³/s)") #print((r[1:,2]-r[:-1,2]).tolist()) clock_times = np.array(clock_times) axs[1,0].bar(times, np.hstack([clock_times[1:][0], clock_times[1:] - clock_times[:-1]]), width=times[-1] /(1.5*rs.nb_results)) axs[1,0].set_xlabel("Clock Time (s)") axs[1,0].set_ylabel("Sim. Time (sec./recd)") # dry ups tiles * loops * RK or euler axs[1,1].bar(times, [r[0,2]] + (r[1:,2]-r[:-1,2]).tolist(), width=times[-1] /(1.5*rs.nb_results)) axs[1,1].set_xlabel("Sim. Time (s)") axs[1,1].set_ylabel("Dried ups tiles × loops") fig.tight_layout() plt.show()