Source code for wolfgpu.glsimulation

"""
Author: HECE - University of Liege, Stéphane Champailler, Pierre Archambeau
Date: 2024

Copyright (c) 2024 University of Liege. All rights reserved.

This script and its content are protected by copyright law. Unauthorized
copying or distribution of this file, via any medium, is strictly prohibited.
"""

import logging
from enum import Enum
from math import floor, ceil, log2, sqrt
from pathlib import Path
import os
import struct
import time
from contextlib import contextmanager
from typing import Iterator

from .simple_simulation import BoundaryConditionIndices, InfiltrationChronology


[docs]
FINISH=False # FIXME Seems unused.


# How often we sample the simulation time in the GPU.
# Warning! Querying the simulation time from the GPU is very
# costly in terms of gpu performance because it forces the GPU
# to synchronise memory. This can almost halve the speed of the GPU!!!

[docs]
TIME_SAMPLING_RESOLUTION=100 # steps


# The default tile size (in cells)

[docs]
DEFAULT_TILE_SIZE=16



# If True, the stripes (used to better select which part of the map to compute)
# will be determined *once* at the beginning of the simulation by looking at the
# NAP map (so the stripes structure will remain constant throughout the
# simulation).

# FIXME DISABLED FOR THE MOMENT (code looks broken, I have fixed the NAP
# functionality and I have yet to port it back to this one)
# If False, the stripes will be computed on basis of a cell detection map, that
# is a map that gives which pixels belong to the computation domainor not. That
# map is recomputed at a given frequency, based on the "quantity" (h,qx,qy) map.
# So this is a dynamic tracking of the computation domain

[docs]
STRIPES_ON_NAP=True


# The thread width tells how many column of cells each thread in a thread
# group will handle. We do that sort of prganisatipon because it helps the
# GPU to go a bit faster.

[docs]
WET_TILES_DETECT_THREAD_WIDTH = 4



# These must be synced with the GlobalStateBlock SSBO's structure

[docs]
GLOBALS_STRUCT_PACK_FORMAT = "ddfLLLLLLL"

#GLOBALS_BUFFER_SIZE = struct.calcsize(GLOBALS_STRUCT_PACK_FORMAT)

[docs]
MAXIMUM_TIME_STEP = 1.0 # P.A.'s default (when no infiltration present)


# The value encoding the fact that in the infiltration zones, a cell does *not* have
# an infiltration (infiltration zones are numbered from zero, -1 represents "no zone")

[docs]
MESH_WITHOUT_INFILTRATION = -1



[docs]
ALPHA_VALUE_NDX = 0


[docs]
ALPHA_FLAGS_NDX = 1


[docs]
PARTS_QUADS=1


[docs]
PARTS_STRIPES=2


MEM_TRACKING=os.environ['USERNAME'] == 'StephaneC' and os.environ['COMPUTERNAME'] == 'FEDER1'

[docs]
MEM_TRACKING=False

if MEM_TRACKING:
    from pympler import tracker

import tqdm
import numpy as np
import matplotlib.pyplot as plt
plt.set_loglevel (level = 'warning')
from OpenGL.GL import *
from OpenGL.GL import (
    glMemoryBarrier,
    GL_TEXTURE_FETCH_BARRIER_BIT,
    GL_SHADER_IMAGE_ACCESS_BARRIER_BIT,
    GL_WRITE_ONLY, GL_READ_ONLY,
    GL_TEXTURE_UPDATE_BARRIER_BIT,
    GL_SHADER_STORAGE_BARRIER_BIT,
)
from OpenGL.GL import glBindBuffer, GL_SHADER_STORAGE_BUFFER, glGetBufferSubData, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS
from OpenGL.GL import GL_RGB32F

from .gl_utils import *
from .gl_utils import read_texture2
from .results_store import ResultsStore
from .utils import nice_timestamp
from .simple_simulation import SimpleSimulation, InfiltrationInterpolation, SimulationDuration, SimulationDurationType, TimeStepStrategy # Allow forward declaration
from .tile_packer import TilePacker, TilePackingMode

# Path fiddling to be able to load wolfhece's classes
import sys

[docs]
_root_dir = Path(__file__).parent.parent.parent

if _root_dir not in sys.path:
    sys.path.insert(0,str(_root_dir))

# from wolfhece.PyGui import Wolf2DModel # deprecated because we don't use it anymore except for GUI
from wolfhece.mesh2d.wolf2dprev import prev_sim2D

# PEP-8 way of making a version number available.
from wolfgpu.version import __version__
#__version__ = read_version()

# Doesn't seem to work because something else catch the SIGINT
# before python does (likely bin libs we use)
# import signal
# control_c_was_hit = False
# def handler(signum, frame):
#     global control_c_was_hit
#     print("Ctrl-C hit" * 200)
#     control_c_was_hit = True
# signal.signal(signal.SIGINT, handler)


# glReadPixels = None
# read_texture = None


[docs]
_perf_counters = dict()


@contextmanager

[docs]
def time_it(msg:str = None) -> Iterator[None]:
    yield



    # tic: float = time.perf_counter()
    # # print(f"...starting {msg}")
    # try:
    #     yield
    # finally:
    #     if MEM_TRACKING:
    #         toc: float = time.perf_counter()
    #         delta = (toc - tic)*1000 # to millisec.
    #         if msg not in _perf_counters:
    #             _perf_counters[msg] = [delta]
    #         else:
    #             _perf_counters[msg].append(delta)

    #         #print(f"chrono: '{msg or ''}' = {1000*delta:.3f}ms")


[docs]
_gl_perf_counters = dict()


[docs]
_timers = dict()


@contextmanager

[docs]
def time_it_gl(msg:str = None) -> Iterator[None]:
    yield


    # query_id = glGenQueries(1)
    # _timers[f"(gl) {msg}"] = query_id[0]
    # glBeginQuery(GL_TIME_ELAPSED, query_id[0])
    # try:
    #     yield
    # finally:
    #     glEndQuery(GL_TIME_ELAPSED)


[docs]
_query_id = None


[docs]
def test_query_timer():
    global _query_id
    _query_id = glGenQueries(1)[0]
    glQueryCounter(_query_id, GL_TIMESTAMP)



[docs]
def test_query_timer_end():
    while not glGetQueryObjectiv(_query_id, GL_QUERY_RESULT_AVAILABLE):
        pass
    t = glGetQueryObjectuiv(_query_id, GL_QUERY_RESULT)
    if t != 4294967295:
        print("yo")
    pass




[docs]
_gl_queries_full = dict()

@contextmanager

[docs]
def time_it_full(msg:str = None) -> Iterator[None]:
    test_query_timer()

    query_id = glGenQueries(2)
    assert msg not in _gl_queries_full

    tic: float = time.perf_counter()
    glQueryCounter(query_id[1], GL_TIMESTAMP)
    glBeginQuery(GL_TIME_ELAPSED, query_id[0])
    # Negative values are because it's a 32 bits counter that rolls
    # over eveyry 2^32 nanosecs (so rpetty often)
    tic_gpu = glGetIntegerv(GL_TIMESTAMP) # v,64v:always 1.9...e-196!,Integer gives negative numbaz, i_v is indexed;   no good
    try:
        yield
    finally:
        toc: float = time.perf_counter()
        delta = (toc - tic)*1000 # to millisec.
        glEndQuery(GL_TIME_ELAPSED)
        _gl_queries_full[msg] = (query_id[0],query_id[1],tic_gpu,delta)




[docs]
def finish_timings():
    for msg, query_id in _timers.items():
        while not glGetQueryObjectiv(query_id, GL_QUERY_RESULT_AVAILABLE):
            pass
        # From ns to ms.
        t = glGetQueryObjectuiv(query_id, GL_QUERY_RESULT) / 1_000_000.0

        if MEM_TRACKING:
            if msg not in _gl_perf_counters:
                _gl_perf_counters[msg] = [t]
            else:
                _gl_perf_counters[msg].append(t)

    _timers.clear()



[docs]
_perf_counters_full = dict()


[docs]
def finish_timings_full():
    if MEM_TRACKING:
        for msg, data in _gl_queries_full.items():
            times = []
            while not glGetQueryObjectiv(data[0], GL_QUERY_RESULT_AVAILABLE):
                pass
            times.append(glGetQueryObjectuiv(data[0], GL_QUERY_RESULT) / 1_000_000.0)
            # while not glGetQueryObjectuiv(data[1], GL_QUERY_RESULT_AVAILABLE):
            #      pass
            times.append(glGetQueryObjectiv(data[1], GL_QUERY_RESULT))
            times.append(data[2])

            # So each time tuple is:
            # (timestamp, elapsedtime_gpu, elapsedtile_cpu)
            times.append(data[3]) # wall clock time

            if msg not in _perf_counters_full:
                _perf_counters_full[msg] = [times]
            else:
                _perf_counters_full[msg].append(times)

        _gl_queries_full.clear()



[docs]
def gl_timings_results(figname):
    plt.figure(figsize=(20, 20), dpi=200)
    all_times = _gl_perf_counters | _perf_counters
    keys = list(filter(lambda k:"--" in k, sorted(all_times.keys())))
    gl_total = 0
    gndx = 1
    for k in keys:
        if "--" in k:
            t = np.array(all_times[k])
            if len(t) > 5:
                # Skip first iterations
                tc = t[5:]
            else:
                tc = t
            std = np.std(tc)
            mean = np.mean(tc)

            k = k.replace("--","")
            print(f"{k}: mean:{mean:.3f}ms std:{std:.3f}ms")
            if "(gl)" in k:
                gl_total += mean
            ax = plt.subplot((len(keys)+2)//3,3,gndx)
            ax.plot(t)
            ax.set_title(f"{k}") # : ZBuf={ZBUFFER} mean:{mean:.3f}ms std:{std:.3f}ms")
            ax.axhline(np.mean(tc),c="red",label=f"{mean:.2f}±{std:.2f}ms")
            ax.set_ylim(max(0,mean-2*std), mean+2*std)
            ax.legend()
            gndx += 1

    plt.suptitle(f"GL total {gl_total:.2f} (GLFinish:{FINISH}, stripes wired on NAP only:{STRIPES_ON_NAP})")
    plt.savefig(figname,dpi=300)

    #plt.show()



[docs]
def gl_timings_results_full():

    all_times = _perf_counters_full
    keys = list(filter(lambda k:"--" in k, sorted(all_times.keys())))
    gl_total = 0
    gndx = 1

    ys = []
    xfrom = []
    xto = []
    for k in keys:
        if "--" in k:
            t = np.array(all_times[k])
            ys.extend([k]*t.shape[0])
            xfrom.extend(t[:,0])
            xto.extend(t[:,1])

    import altair as alt
    import pandas as pd

    data = pd.DataFrame()
    data["activity"] = ys
    data["from"] = xfrom
    data["to"] = xto

    alt.Chart(data).mark_bar().encode(
        x='from',
        x2='to',
        y='activity',
        color=alt.Color('activity', scale=alt.Scale(scheme='dark2')))



import psutil

[docs]
def log_mem_used(msg = None):
    process = psutil.Process(os.getpid())

    # print(f"Used memory: {process.memory_info().rss // (1024*1024)}MB. {msg or ''}" )

"""
# not used anymore

def debug_framebuffer(framebuffer_id, color_attachment, width, height, format_, type_, title="", component=0):
    glBindFramebuffer(GL_FRAMEBUFFER, framebuffer_id)
    # glReadBuffer — select a color buffer source for pixels
    glReadBuffer(GL_COLOR_ATTACHMENTS[color_attachment])
    # Read the bottom left texel (only one !)
    # Note that pyopengl returns a numpy array
    texture_content = glReadPixels(0, 0, width, height, format_, type_)

    if format_ == GL_RGB:
        texture_content = texture_content.reshape(height, width, 3)[:,:,component]
    elif format_ == GL_RED_INTEGER:
        texture_content = texture_content.reshape(height, width)
    else:
        raise Exception(f"format unrecognized : {format_}")

    print(np.sum(texture_content))
    print(np.max(texture_content))
    print(np.min(texture_content))
    plt.imshow(texture_content)
    plt.colorbar()
    plt.title(title)
    plt.show()
"""


[docs]
class TileOptimisation(Enum):

[docs]
    COMPUTE_SHADER = 1


[docs]
    REGULAR_SHADER = 2




[docs]
SHADER_PATH = Path(Path(__file__).parent, "shaders")



# def draw_iso_quads(program, buffer_width, buffer_height):
#     # WARNING! This function expects a program to be "glUse" 'd
#     # WARNING! It also expects the DrawBuffer and any other relevant
#     # GL state to be set. We do the bare minimum here.

#     np_quad_vertices, np_quad_texcoords_rectangle = make_unit_quad(1, 1)
#     # We compute the Model (model crd -> world crd) * View (world -> camera) * Projection (camera -> clip) matrix

#     # Remember that all coordinates of our special "quad" are in [-1,+1]
#     # and we ultimately want everything to be in "clip"" coordinate, that is [-1,+1]
#     # (screen coordinates are handled elsewhere).
#     # Therefore, a simple unit matrix will do. Actually, we want to flatten the z
#     # coordinates, just to insist on the fact that we don't have any perspective, that
#     # the z coordinate is useless here. That's why there's a zero in the diagonal.

#     # See https://numpy.org/doc/stable/reference/arrays.ndarray.html#internal-memory-layout-of-an-ndarray
#     pm = np.asfortranarray(np.diag([1,1,0,1]).astype(np.float32)) # Fortran==column major, like OpenGL
#     set_uniform(program, "uModelViewProjectionMatrix", pm)

#     attr_loc = glGetAttribLocation(program, "aVertex") # for the simple vertex shader
#     assert attr_loc >= 0
#     glEnableVertexAttribArray(attr_loc)
#     glVertexAttribPointer(attr_loc, 3, GL_FLOAT, GL_FALSE, 0, np_quad_vertices)
#     assert glGetError() == GL_NO_ERROR

#     """
#     In the fragment shaders, the texture is indexed via gl_FragCoord.xy.
#     Therefore the texture coordinates will be computed on basis of the
#     screen coordinates. So we don't need to associate  texture coordinates
#     to vertices.

#     This makes it easy to prepare the triangles that need to
#     be displayed:  we just need the triangles to contain the
#     screen (exactly), their coordinates don't matter (since we infer the texture
#     indices form the *screen* coordinates of the drawn pixels.)
#     """

#     glViewport(0, 0, buffer_width, buffer_height)
#     assert glGetError() == GL_NO_ERROR

#     glDrawArrays(GL_TRIANGLES, 0, len(np_quad_vertices))
#     assert glGetError() == GL_NO_ERROR




[docs]
def make_quad_fast(xmin, xmax, ymin, ymax):
    z = [[xmax, ymin, 0],
         [xmax, ymax, 0],
         [xmin, ymax, 0],
         [xmax, ymin, 0],
         [xmin, ymax, 0],
         [xmin, ymin, 0]]
    return z




[docs]
def round_to_tile(x, round_func, tsize: int):
    assert round_func in (ceil, floor)
    assert tsize >= 1

    if tsize > 1:
        return round_func(x / tsize)*tsize
    else:
        return x


"""
class Stripes:
    def __init__(self, grid_width, grid_height):
        self._grid_width = grid_width
        self._grid_height = grid_height
        self._frozen = False
        self._stripes = []
        self._zoom = 1
        self._mean = 0

    def same_as(self, b):
        a = np.array(self._stripes)
        b = np.array(b._stripes)
        return np.array_equal(a,b)

    def largest_dim(self):
        return max(self._grid_width, self._grid_height)

    def add_stripe(self, xmin, xmax, ymin, ymax):
        logging.trace(f"stripe: {xmin}, {xmax}, {ymin}, {ymax}")
        assert not self._frozen
        self._stripes.append([xmin, xmax, ymin, ymax])

    def __len__(self):
        return len(self._stripes)

    def has_stripes(self):
        return len(self._stripes) >= 1

    def group_by(self, n):
        # Join stripes together to reduce their number
        # Stripes are grouped by groups of size n.
        # Stripes are expected to be horizontal bands,
        # organized as a vertical stack. So we group them
        # "by rows".
        assert n >= 2
        grouped_stripes = []
        for j in range(0,len(self._stripes),n):
            # We take n stripes and create a new (minimal) strip that covers
            # them all.
            a = np.array(self._stripes[j:min(len(self._stripes), j+n)])
            grouped_stripes.append([
                np.min(a[:,0]), np.max(a[:,1]),
                np.min(a[:,2]), np.max(a[:,3])])
        self._stripes = grouped_stripes

    # def reshape_and_freeze(self,w,h):
    #     assert not self._frozen
    #     assert w > self._grid_width and h > self._grid_height
    #     self._frozen = True

    #     segments = []
    #     a = np.array(self._stripes)
    #     a[:,[0,2]] = np.floor(w*a[:,[0,2]]/self._grid_width)
    #     a[:,[1,3]] = np.ceil( h*a[:,[1,3]]/self._grid_height)
    #     for row in range(a.shape[0]):
    #         segments.extend(make_quad_fast(*a[row]))
    #     self._grid_width, self._grid_height = w, h

    #     # To NDC
    #     r = np.array(segments, dtype=np.float32)
    #     r[:,0] = r[:,0]*2 / w - 1.0
    #     r[:,1] = r[:,1]*2 / h - 1.0
    #     self._stripes_ndc = r

    def freeze(self):
        # Transform the list of stripes into quads.
        # It is said "frozen" because it is expected that the stripes don't change
        # anymore (to remain in sync with the quads)

        assert not self._frozen
        self._frozen = True
        assert len(self._stripes), "Nothing to freeze ? I don't handle that yet !"

        # Stripes to quads
        segments = []
        for xmin, xmax, ymin, ymax in self._stripes:
            segments.extend(make_quad_fast(xmin, xmax, ymin, ymax))
        # Quads coordinates to NDC ( from -1 to +1)
        r = np.array(segments, dtype=np.float32)
        r[:,0] = 2*r[:,0] / self._grid_width - 1.0
        r[:,1] = 2*r[:,1] / self._grid_height - 1.0
        self._stripes_ndc = r

    def get_vao_len(self):
        # Computed for glDrawArrays
        return len(self._stripes)*6

    def get_vao_id(self):
        # Vertex array object id
        return self._stripes_vao_id

    def to_opengl(self):
        assert self._frozen
        self._stripes_vao_id = glGenVertexArrays(1)
        glBindVertexArray( self._stripes_vao_id )

        vertex_buffer_object_id = glGenBuffers(1)
        glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_object_id)
        # OpenGL doc: creates and initializes a buffer object's data store
        glBufferData(GL_ARRAY_BUFFER, self._stripes_ndc.nbytes, self._stripes_ndc, GL_STATIC_DRAW)

        # FIXME This is dangereous: I hard code the attrib location
        # so the expectation is that it will be the same in ALL programs.
        # attr_loc = glGetAttribLocation(program, "aVertex") # for the simple vertex shader
        # assert attr_loc >= 0
        # NOTE You don't need to glUseProgram() before; this is just an
        # attribute number.
        attr_loc = 0
        # Array access is enabled by binding the VAO in question and calling
        glEnableVertexAttribArray(attr_loc)

        # See https://www.khronos.org/opengl/wiki/Vertex_Specification#Vertex_Array_Object
        # "Think of it like this. glBindBuffer sets a global variable, then
        # glVertexAttribPointer reads that global variable and stores it in the
        # VAO."
        glVertexAttribPointer(attr_loc, 3, GL_FLOAT, GL_FALSE, 0, None)

        # order important else you clear the buffer bind in
        # the vertex array object :-)
        glBindVertexArray(GL_NONE)
        glBindBuffer(GL_ARRAY_BUFFER, GL_NONE)


    # def stripes_to_grid(self, stripes, new_grid_width, new_grid_height):
    #     segments = []
    #     if new_grid_width != self._grid_width or new_grid_height != self._grid_height:
    #         for xmin,xmax,ymin,ymax in self._stripes:
    #             xmin = floor((xmin/self._grid_width)*new_grid_width)
    #             xmax = ceil((xmax/self._grid_width)*new_grid_width)
    #             ymin = floor((ymin/self._grid_height)*new_grid_height)
    #             ymax = ceil((ymax/self._grid_height)*new_grid_height)
    #             segments.extend(make_quad_fast(xmin,xmax,ymin,ymax))
    #     else:
    #         for xmin,xmax,ymin,ymax in self._stripes:
    #             segments.extend(make_quad_fast(xmin,xmax,ymin,ymax))

    #     r = np.array(segments, dtype=np.float32)
    #     r[:,0] = r[:,0]*2 / width - 1.0
    #     r[:,1] = r[:,1]*2 / height - 1.0
    #     return r
"""


[docs]
def shrink_array_by_two(a):
    """
    Cut array in 2.
    A 5x5 array will be cut to a 3x3 (notiece the rounding)
    """
    u = a[::2,:] # Even rows
    v = a[1::2,:] # Odd rows
    if v.shape[0] != u.shape[0]: # Odd number of rows => padding
        v= np.vstack([v,np.zeros((1, u.shape[1]))])
    uv = u+v

    u = uv[:, ::2] # Even columns
    v = uv[:, 1::2] # Odd columns
    if v.shape[1] != u.shape[1]: # Odd number of columns => padding
        v= np.hstack([v,np.zeros((u.shape[0], 1))])
    uv = u+v
    return uv



[docs]
def divide_shape(shape):
    return ( (shape[0]+1)//2, (shape[1]+1)//2)




[docs]
def integer_div(a,b):
    if a % b == 0:
        return a // b
    else:
        return (a // b) + 1



[docs]
def find_active_tiles(a, tile_size = DEFAULT_TILE_SIZE):
    shrinked = np.zeros((integer_div(a.shape[0],tile_size),
                         integer_div(a.shape[1],tile_size)),
                        dtype=a.dtype)

    for r in range(0, a.shape[0], tile_size):
        r_min, r_max = r, min(a.shape[0], r+tile_size)

        for c in range(0, a.shape[1], tile_size):
            c_min, c_max = c, min(a.shape[1], c+tile_size)

            s = np.sum(a[r_min:r_max, c_min:c_max])
            shrinked[ r//tile_size, c//tile_size] = s

    return shrinked



[docs]
def isPowerOfTwo (x):
    # From https://www.geeksforgeeks.org/python-program-to-find-whether-a-no-is-power-of-two/
    # First x in the below expression
    # is for the case when x is 0
    return (x and (not(x & (x - 1))) )


# def build_stripes_for_nap_subsizes(nap, tile_size = DEFAULT_TILE_SIZE) -> dict[int,Stripes]:
#     # tile_size: size of tiles, a power of two.

#     # Based on the NAP map (which is supposed to remain constant during the
#     # simulation), we isolate tiles that are deemed "active" (because they
#     # contain at least one active cell) Then we go on to shrink that map by a
#     # factor of 2 and compute stripes that cover the resulting map. We iterate
#     # until we reach a 1x1 shrinked map.

#     # Mainly used of compute max step size which needs to look at the entire NAP
#     # (because of infiltrations)
#     assert isPowerOfTwo(tile_size), "We only support tile size which are power of two"
#     stripes: dict[int,Stripes] = dict()

#     """ Read this this way:

#     We start at resolution 4000 with a stripe size of 16.. In parallel reduction we always
#     draw on the next resolution (4000/2=2000) so we must use
#     the correct tile size.

#     At resolution 2000, we'll consider stripes made of tiles of size 16 (!)
#     At resolution 1000, we'll consider stripes made of tiles of size 4
#     At resolution 500, we'll consider stripes made of tiles of size 2
#     At resolution 250, we'll consider stripes made of tiles of size 1
#     At resolution 125, we'll consider stripes made of tiles of size 1 (!)
#     etc.

#     FIXME Nothing says that the way we reduce the tile size is optimal...
#     """

#     tile_size_at_level = tile_size
#     stop = False
#     while True:
#         dims = nap.shape
#         # We look at our nap through a lens of tiles
#         shrinked = find_active_tiles(nap, tile_size_at_level)

#         # Once we have the tiles, we build the stripes
#         # The stripes will be used to draw on the current
#         # nap resolution (and that will be expanded at the upper
#         # resolution by a factor of two)
#         # The stripes coordinates are expressed in the
#         # context of the current resolution.
#         level_stripes = find_stripes(shrinked, dims[1], dims[0],
#                                      tile_size_at_level, tile_size_at_level, with_dilatation=False)
#         logging.debug(f"At resolution : {nap.shape}, shrinked:{shrinked.shape}, tile size {tile_size_at_level}, {len(level_stripes)} stripes")
#         stripes[max(nap.shape)] = level_stripes

#         # Cut the resolution in two
#         nap = shrink_array_by_two(nap)

#         # Cut the tile size too
#         tile_size_at_level = max(1, tile_size_at_level // 2)
#         if stop:
#             break

#         if max(nap.shape) == 1:
#             # 1x1 shrinked nap => end.
#             stop = True

#     for v in stripes.values():
#         v.freeze()
#         v.to_opengl()

#     return stripes





[docs]
def find_stripes(a, rw, rh, tile_w, tile_h, with_dilatation=True):
    """
    Find stripes on a. Elements of a different than zero are the ones taken into
    account.

    There's one stripe per line of the array !

    Please note that the stripes coordinates will be in NDC (normalized device
    coordinate, see OpenGL) over the array (that is, they're in [-1,+1]
    regardless of the size of the array)
    """

    from scipy.ndimage import binary_dilation, generate_binary_structure

    # Why use a structure in the dilatation ? Because from one position, the
    # computations can cover up to two cells away. So if we go once cell up and
    # one cell right (for example), we may need to have a tile there... (a
    # regular dilatation has a default "+" shape which leaves 4 corners open)
    # The binary structure we ask here is a 3x3 all-true array.
    if with_dilatation:
        a = binary_dilation(a, structure=generate_binary_structure(2, 2))
    else:
        a = a.astype(bool)
    zeros_column = np.zeros((a.shape[0],1), dtype=bool)
    width, height = a.shape[1], a.shape[0]
    a = np.hstack([zeros_column, a, zeros_column])
    # true on the left side of the edge across which values change.
    # Ex. a[0]=True a[1]=False a[2] = False
    # diff[0] = a[0] ^ a[1] = True
    # diff[1] = a[1] ^ a[2] = False
    # BUT BUT BUT we have padded !!!
    # So it's true on the right side !

    diff = a[:,1:] ^ a[:,:-1]
    stripes= Stripes(grid_width=rw, grid_height=rh)
    for y in range(height):
        changes = list(np.argwhere(diff[y,:] != 0))
        if len(changes) % 2 != 0:
            # FIXME Since we pad with zeros above, this should never happen.
            raise Exception("I don't handle stripes that begin/end at borders of array")
            #changes = changes + [[width]]
        for i in range(0,len(changes),2):
            # +1 to account for the "xor" computation offset above.
            # FIXME compute the diff xor above in another way to avoid the (ugly) +1.
            #space += (changes[i+1][0] - changes[i][0])*1
            stripes.add_stripe(changes[i][0]*tile_w, changes[i+1][0]*tile_w,
                               y*tile_h, (y+1)*tile_h)

    return stripes





[docs]
def find_non_empty_tiles_on_nap(nap: np.ndarray, n: int):
    """
    Tiles on NAP are used to guide the first level of computation, that is at
    the maximum detail level. After that level, one uses stripes which will be
    dynamically adjusted to water coverage. Tiles on NAP are not meant to change
    over the course of the simulation.
    """
    assert nap.dtype == bool
    h,w = nap.shape
    assert n < min(h,w)

    #npa = water_height
    all_quads = []

    total_active_meshes_on_tile = 0
    total_meshes_on_tile = 0
    marks = []
    for y in range(n):
        first_x = None
        last_x = None
        for x in range(n):
            xmin, xmax = int(w/n*x), int(w/n*(x+1)) # int is floor.
            ymin, ymax = int(h/n*y), int(h/n*(y+1))
            active_meshes_on_tile = int(np.sum( nap[ymin:ymax, xmin:xmax]))
            if active_meshes_on_tile > 0: # (x+y)%2 == 0:
                total_active_meshes_on_tile += active_meshes_on_tile
                total_meshes_on_tile += (xmax - xmin)*(ymax - ymin)
                # In OpenGL clip space, the origin (0,0) is at
                # the center of the screen. The corners are (-1,-1) and
                # (1,1) so they span a range of 2x2.
                # Here we make coordinates that will be fed into
                # a vertex shader that is expected to NOT transform
                # them. So our "world coord" in (-1,+1) will be
                # mapped directly by the VtxShader to the clip space crd
                # which is also (-1,+1).

                if True:
                    # Compressed stripes
                    if first_x is None:
                        first_x = x
                        last_x = x
                    elif x-1 == last_x:
                        last_x = x
                    else:
                        xmin, xmax = int(w/n*first_x), int(w/n*(last_x+1)) # int is floor.
                        all_quads.append(make_quad( float(xmin/w)*2.0-1, float(xmax/w)*2.0-1,
                                                    float(ymin/h)*2.0-1, float(ymax/h)*2.0-1))
                        first_x = x
                        last_x = x
                else:
                    # Regaulr, uncompressed stripes
                    all_quads.append(make_quad( float(xmin/w)*2.0-1, float(xmax/w)*2.0-1,
                                                float(ymin/h)*2.0-1, float(ymax/h)*2.0-1))
                marks.append((x,y))

        if first_x is not None:
            xmin, xmax = int(w/n*first_x), int(w/n*(last_x+1)) # int is floor.
            all_quads.append(make_quad( float(xmin/w)*2.0-1, float(xmax/w)*2.0-1,
                                        float(ymin/h)*2.0-1, float(ymax/h)*2.0-1))

    # import matplotlib.pyplot as plt
    # plt.imshow(npa)
    # for x,y in marks:
    #     plt.scatter(x*w/N,y*h/N)
    # plt.show()

    logging.debug(f"{w}x{h} area, {len(all_quads)} tiles. {round(100*len(all_quads)/(n**2))}% tile coverage. Active cells on active tiles: {100*total_active_meshes_on_tile/total_meshes_on_tile:.1f}%")
    logging.debug(f"{total_active_meshes_on_tile} active on tiles / {np.sum(nap)} active / {nap.size} total cells.")
    r = np.vstack([np_quad_vertices for np_quad_vertices, _ in all_quads]).astype(np.float32)
    return r




#draw_iso_quad_regular = draw_iso_quads
# cache = dict()
# quad_buffer = None
# nb_quads = None
# all_triangles = None





[docs]
class GLSimulationGlobalState:

[docs]
    simulation_step: int

    """ Simulation current step.
    """
    def __init__(
        self,
        simulation_time,
        previous_step_simulation_time,
        time_step,
        active_infiltration_line,
        nb_dryup_iterations,
        nb_active_tiles,
        status_code,
        total_active_tiles,
        infiltration_zones,
        simulation_step,
        step_times
    ):
        (
            self.simulation_time,
            self.previous_step_simulation_time,
            self.time_step,
            self.active_infiltration_line,
            self.nb_dryup_iterations,
            self._nb_active_tiles,
            self.status_code,
            self.total_active_tiles,
            self.infiltration_zones,
            self.simulation_step,
            self.steps_times
        ) = (
            simulation_time,
            previous_step_simulation_time,
            time_step,
            active_infiltration_line,
            nb_dryup_iterations,
            nb_active_tiles,
            status_code,
            total_active_tiles,
            infiltration_zones,
            simulation_step,
            step_times
        )

    @property

[docs]
    def nb_active_tiles(self):
        return self._nb_active_tiles


    def __str__(self):
        return f"Step:{self.simulation_step} Time:{self.simulation_time}"



[docs]
class GLSimulation:
    """ OpenGL simulation.

    Can be initialized from scractch, with SimpleSimulation or PrevWOLF2D CPU model.

    This class can be used in a runner, which will call the simulation step
    method at each frame. The simulation step method will update the simulation
    state and draw the simulation on the screen.
    """


[docs]
    def set_params(self, width, height,
        ponderation_runge_kutta: float=0.3,
        dx: float=1.0,
        dy: float=1.0,
        courant: float=0.1,
        froude_max: float=10.0,
        time_step_strategy=TimeStepStrategy.OPTIMIZED_TIME_STEP,
        time_step: float = None,
        simulation_duration=SimulationDuration.from_seconds(10),
        report_frequency= SimulationDuration.from_steps(10),
        basefilename: str="simul",
        tile_size=DEFAULT_TILE_SIZE,
        infiltration_interpolation:InfiltrationInterpolation=InfiltrationInterpolation.NONE,
        tile_packing_mode: TilePackingMode = TilePackingMode.REGULAR,
        optimized_indirection = False,
        track_flows_at_borders = False):

        #print(f"set params !!! {width} {height}")
        assert type(width) == type(height) == int
        assert width >= 1, "The domain has no width"
        assert height >= 1, "The domain has no height"
        assert isinstance(report_frequency, SimulationDuration)
        assert (time_step_strategy == TimeStepStrategy.FIXED_TIME_STEP and (time_step is not None and time_step > 0)) \
            or (time_step_strategy == TimeStepStrategy.OPTIMIZED_TIME_STEP and time_step == None), \
            f"Time step strategy is '{time_step_strategy}' but time_step is '{time_step}', set it to None."

        self.set_infiltration_interpolation(infiltration_interpolation)

        self.basefilename= basefilename
        self.time_step_strategy = time_step_strategy
        self.time_step = time_step or 0

        self.report_frequency = report_frequency

        self.g = 9.81
        self.dx = dx
        self.dy = dy
        self.courant = courant
        self.froude_max = froude_max
        self.set_tile_size(tile_size)

        # FIXME This is quite arbitrary. One should check it's the
        # same in WolfCLI. float32 machine epsilon is
        # 1.19e-07 (see wikipedia).

        self.epsilon = min( min(self.dx, self.dy)**4, 0.000_001)

        assert ponderation_runge_kutta <= 1.0, f"Bad RungeKutta ponderation:{ponderation_runge_kutta}, should be <= 1"
        self.ponderation_runge_kutta = ponderation_runge_kutta

        assert isinstance(simulation_duration, SimulationDuration), f"Expected SimulationDuration, you gave {simulation_duration}"
        self.simulation_duration = simulation_duration

        self.nb_active_meshes = 0
        self._tile_packing_mode = tile_packing_mode
        self._optimize_tile_indirection = optimized_indirection
        self._track_flows_at_border = False
        self.reset_size( width, height)



[docs]
    def set_track_flows_at_borders(self, v:bool):
        """ Track fluxes at borders.

        This can be used to properly compute discharge across sections
        without recalculation of these fluxes from cell-centered values/unknowns.
        """
        assert isinstance(v, bool)

        self._track_flows_at_border = v

        if v:
            packed_width, packed_height = self._tile_packer.packed_size()
            # First texture is for the Euler step. Second one is for the RK step.
            self._flows_at_border_tex = [
                upload_blank_texture_to_gpu(packed_width, packed_height, GL_TEXTURE_RECTANGLE, format=GL_RGBA32F)
                for _ in range(2)
            ]
            # This one will be reused in Euler and RK
            self._rhs_tex = upload_blank_texture_to_gpu(packed_width, packed_height, GL_TEXTURE_RECTANGLE, format=GL_RGBA32F)



[docs]
    def force_max_step_size(self, s: float):
        """ Prevents the max step size computation.

        :param s: Maximum step size [s]. Pass None to re-enable the computation.

        Used in test/debug.
        """

        assert s is None or s > 0, "Max step size must be > 0"

        if s:
            self.max_time_step_inactive_tile = s
            self._forced_max_step_size = True
        else:
            self._forced_max_step_size = False



[docs]
    def set_ensure_positive_height(self, b: bool):
        """ Enable or disable positive height enforcment.

        Used in debugging/testing
        """
        assert isinstance(b, bool)
        self._ensure_positive_height = b



[docs]
    def set_g(self, g:float = 9.81):
        """ Set gravity.

        :param g: gravity in m/s^2
        """
        assert isinstance(g, (int, float))
        assert g >= 0
        self.g = float(g)



[docs]
    def set_froude_limit(self, l:float):
        """
        :param l: Froude max [-]. If None, then we turn off froude limitation.
        """
        assert l is None or l > 0
        self.froude_max = l



[docs]
    def set_froude_bc_tolerance(self, l:float = 1.):
        """ Set the tolerance for the Froude number at the boundaries.

        If toleranceis 1.0, then the boundary condition impose the minimum of the
        water height computed based on the Froude number and the water height inside
        the computed domain.

        If tolerance is greater than 1.0, then the water height inside the
        domain is multiplied by the tolerance before applying the minimum operator.

        It could be useful if the water height inside the domain is too low and
        the regime is (near) supercritical that is not theoretically correct if
        we want to impose a BC at this position.
        In subcritical regime, usually, it is not a problem.

        :param l: tolerance >= 1.0
        """
        assert isinstance(l, (int, float))
        self._froude_bc_tolerance = float(l)


    @property

[docs]
    def froude_bc_tolerance(self) -> float:
        """ Get the Froude BC tolerance. """
        return self._froude_bc_tolerance



[docs]
    def set_fixed_time_step(self, d:float):
        """ Set a fixed time step.

        :param d: time step in seconds."""

        assert isinstance(d, (int, float))
        assert d > 0

        self.time_step = d
        self.time_step_strategy = TimeStepStrategy.FIXED_TIME_STEP

        if self.max_step_size_tex is not None:
            ntx, nty = self._tile_packer.packed_size_in_tiles()

            # Here we store the time step so that in case of FixedTimeStep strategy,
            # **each cell** can use this one in the compute shader.

            # FIXME: Better than used a uniform float ?
            self.max_step_size_tex = upload_blank_texture_to_gpu(ntx, nty,
                                                                 GL_TEXTURE_RECTANGLE, GL_R32F,
                                                                 self.time_step)



[docs]
    def set_report_frequency(self, d: SimulationDuration):
        """ Set the report frequency in seconds or iterations.

        :type d: SimulationDuration.
        """
        assert isinstance(d, SimulationDuration)
        self.report_frequency = d



[docs]
    def set_simulation_duration(self, d: SimulationDuration):
        """ Set the simulation duration in seconds or iterations.

        :type d: SimulationDuration.
        """
        assert isinstance(d, SimulationDuration)
        self.simulation_duration = d



[docs]
    def set_courant(self, c:float):
        """ Set the Courant number.

        :param c: Courant number [-].
        :type c: float
        """
        assert isinstance(c, (int, float))
        assert c > 0
        assert c <= 1

        self.courant = float(c)



[docs]
    def set_euler_ponderation(self):
        """ Force Euler temporal scheme.

        Disable Runge Kutta.
        """
        self.ponderation_runge_kutta = None



[docs]
    def set_rk_ponderation(self, p:float):
        """ Set the predictor's ponderation of the Runge Kutta 2x scheme.

        A RK22 scheme is obtained by setting p=0.5.
        A RK21 scheme is obtained by setting p=0.3.

        For compatibility with previous WOLF2D CPU model,
        we accept p > 1.0 (like 3.). In this case, the
        runner will correct the value and will apply the RK21
        scheme with a ponderation of 0.3.

        :param p: ponderation [0,1].
        :type p: float
        """
        # assert 0 < p < 1, "A correct weight for runge kat must be > 0 and < 1"
        assert p>=0, "A correct weight for runge kat must be >= 0"
        if p> 1.0:
            logging.warning(f"Runge Kutta ponderation is > 1.0 ({p}). Are you sure ?")

        self.ponderation_runge_kutta = float(p)



[docs]
    def set_infiltration_interpolation(self, lerp:InfiltrationInterpolation):
        """ Set the infiltration interpolation mode (stepwise or linear interpolation).

        :param lerp: interpolation mode.
        :type lerp: InfiltrationInterpolation
        """
        assert type(lerp) == type(InfiltrationInterpolation.NONE)
        assert lerp in (InfiltrationInterpolation.NONE, InfiltrationInterpolation.LINEAR), f"Expected a proper infiltration interpolation: you gave '{lerp}'"
        self._infiltration_interpolation = lerp


    @property

[docs]
    def infiltration_interpolation(self) -> InfiltrationInterpolation:
        """ Get the infiltration interpolation mode. """
        return self._infiltration_interpolation


    @property

[docs]
    def nbx(self):
        """ Get the width of the simulation domain.

        Useful for interoperability with wolfhece package.
        """
        return self.width


    @property

[docs]
    def nby(self):
        """ Get the height of the simulation domain.

        Useful for interoperability with wolfhece package.
        """
        return self.height



[docs]
    def set_debug(self, dbg: bool):
        """ Record more data in results and activate debug textures.

        :attention: Make sure to call this before everything else.
        """

        assert isinstance(dbg, bool)
        self._debugging = dbg
        self._create_debugging_textures()


    @property

[docs]
    def debugging_mode(self) -> bool:
        """ Get the debugging mode. """
        return self._debugging


    @debugging_mode.setter
    def debugging_mode(self, dbg: bool):
        """ Set the debugging mode. """
        self.set_debug(dbg)


[docs]
    def set_tiles_packing_mode(self, mode: TilePackingMode):
        """ Set the tiles packing mode (TRANSPARENT or REGULAR).

        Transparent mode will not pack the tiles in a regular grid but will
        use the tiles as they are. This is useful for debugging purposes or small
        models.

        :attention: This mode has **no impact** on the simulation results. Some unit tests
        are performed o ensure that the simulation results are the same in both modes.

        :param mode: packing mode.
        :type mode: TilePackingMode
        """
        self._tile_packing_mode = mode



[docs]
    def set_tile_size(self, tile_size: int):
        """ Set the tile size.

        :param tile_size: tile size in number of cells (must be a power of 2).
        :type tile_size: int
        """

        if tile_size > 0:
            assert log2(tile_size).is_integer(), "Tile size must be a power of two."
        assert tile_size >= 4, "Tile size smaller than 4 have not been tested yet."
        assert tile_size <= 32, "Tile size bigger than 32 have not been tested yet."

        #assert n <= min(self.width, self.height)
        self._tile_size = tile_size
        self._tile_packer = None # Invalidating the tile packer


    @property

[docs]
    def tile_size(self):
        """ Get the tile size. """
        return self._tile_size



[docs]
    def set_optim_geom_shaders(self, enable):
        """ FIXME: document this routine ?? """

        if enable == "CS" or enable == TileOptimisation.COMPUTE_SHADER:
            self._optim_geom_shaders = TileOptimisation.COMPUTE_SHADER
        elif enable == "RS" or enable == TileOptimisation.REGULAR_SHADER:
            logging.warning("Deprecated tiles optimisation")
            self._optim_geom_shaders = TileOptimisation.REGULAR_SHADER
        elif enable is None:
            self._optim_geom_shaders = None
        else:
            raise Exception("Unknown tile optimisation strategy")

        if self._optim_geom_shaders:
            logging.debug(f"GeoShader optimisation enabled {self._optim_geom_shaders.name}")
        else:
            logging.debug("GeoShader optimisation disabled")


    @property

[docs]
    def optimize_tile_indirection(self):
        return self._optimize_tile_indirection



[docs]
    def set_total_number_of_dry_up_outer_loops(self, n):
        """Configure the number of desired iterations to solve all or some of
        the dry ups.

        :param n: Number of iterations:
            * n == 0 means no dry up (only Euler/RK)
            * n > 0 means a *fixed* number of dry up iterations
            * None means as many dry up iterations as needed to solve all dry ups
        :type n: None or int.
        """
        assert n is None or n >= 0
        self._total_number_of_dry_up_outer_loops = n

        #print(f"set {self._max_number_of_dry_up_loops}" + "*"*100)


[docs]
    def set_no_dry(self, dummy):
        """ DEPRECATED !!!
        """
        self.set_total_number_of_dry_up_outer_loops(0)



[docs]
    def emergency_save(self):
        logging.error(f"Emergency save at step {self._current_step} !!!")

        q0 = self.read_quantity_result(0)
        q1 = self.read_quantity_result(1)
        alpha0 = self.read_alpha(0)
        alpha1 = self.read_alpha(1)
        bathy = self.read_bathymetry()

        if self._debugging:
            debug0 = self.read_debug_texture(0)
            debug1 = self.read_debug_texture(1)

        np.save("q0",q0)
        np.save("q1",q1)
        np.save("alpha0",alpha0)
        np.save("alpha1",alpha1)
        np.save("bathy",bathy)
        if self._debugging:
            np.save("debug0",debug0)
            np.save("debug1",debug1)



[docs]
    def tile_packer(self) -> TilePacker:
        return self._tile_packer


    @property

[docs]
    def total_number_of_dry_up_outer_loops(self):
        return self._total_number_of_dry_up_outer_loops



[docs]
    def dry_up_disabled(self):
        return self._total_number_of_dry_up_outer_loops == 0



    def __init__(self, width, height,
        ponderation_runge_kutta: float=0.3,
        dx: float=1.0,
        dy: float=1.0,
        courant: float=0.1,
        froude_max: float=10.0,
        time_step_strategy=TimeStepStrategy.OPTIMIZED_TIME_STEP,
        time_step: float = 1.0,
        max_steps=SimulationDuration.from_seconds(10),
        report_frequency= SimulationDuration.from_steps(10),
        basefilename: str="simul",
        tile_size = DEFAULT_TILE_SIZE,
        shader_log_path: Path = None,
        tile_packing_mode: TilePackingMode = TilePackingMode.REGULAR,
        optimized_indirection: bool = False):

        # Check OpenGL version
        try:
            is_gl_good_enough = (glGetIntegerv(GL_MAJOR_VERSION), glGetIntegerv(GL_MINOR_VERSION)) >= (4, 6)
        except Exception as ex:
            raise Exception("Seems like no OpenGL context has been set up. Suggestion: start pygame, glfw or a wxWidget canvas before creating a simulation.")

        if not is_gl_good_enough:
            raise Exception("I need OpenGL 4.6 at least")


[docs]
        self._shader_log_path = shader_log_path


        self.set_params(width, height,
                        ponderation_runge_kutta,
                        dx, dy,
                        courant, froude_max,
                        time_step_strategy, time_step,
                        max_steps, report_frequency,
                        basefilename, tile_size=tile_size,
                        tile_packing_mode=tile_packing_mode,
                        optimized_indirection=optimized_indirection)


[docs]
        self.infiltration_timings = None



[docs]
        self.b_min = 0.


[docs]
        self.b_max = 600.


        # Tells the infiltration zone number for each cell.

[docs]
        self.infiltration_zones_texture = None


[docs]
        self.infiltration_zones_values_array = None


[docs]
        self.nb_infiltration_zones = 0


[docs]
        self.nb_infiltration_lines = 0


[docs]
        self.max_time_step_inactive_tile = MAXIMUM_TIME_STEP # P.A.'s default (when no infiltration present)



[docs]
        self._debugging = False


[docs]
        self._froude_limit_on_uv = True


[docs]
        self._froude_bc_tolerance = 1.0


[docs]
        self._debugging_skip_euler = False


[docs]
        self._dbg_max_nb_of_dry_up_inner_loops = tile_size*tile_size


[docs]
        self._total_number_of_dry_up_outer_loops = None #None means "until done", 0 means "disabled". N means at most N iterations.


[docs]
        self._ensure_positive_height = True


[docs]
        self._forced_max_step_size = False


[docs]
        self.last_euler_dry_up_fixes_count = -1


[docs]
        self.last_rk_dry_up_fixes_count = -1



[docs]
        self._tile_packer = None


[docs]
        self._optim_geom_shaders = TileOptimisation.REGULAR_SHADER


[docs]
        self.state_buffer_ssbo_id = None


[docs]
        self._currentMinStepSizeTexture = 0


[docs]
        self._shaders_loaded = False



[docs]
        self.iteration_vs_simtime = []


[docs]
        self._next_stop = 0



[docs]
        self.integration_fb = None


[docs]
        self._debug_fb = None

        # self._infiltration_fb = None
        # self.strong_boundary_cond_fb = None

[docs]
        self.param_copy_tex = None




[docs]
    def _load_shaders(self):
        """ Load the shaders on the GPU. """

        assert not self._shaders_loaded

        # Coordintaes are NDC
        # Aligned on the active_tile_texture dimensions
        # To be sure we're in the pixels, we lay the grid on the pixels' centers.

        nb_tiles_horiz, nb_tiles_verti = self._tile_packer.size_in_tiles()

        # Width, Height of a tile in NDC (Normalized device coordinates)
        ndc_w = 2 # width of the NDC unit square (from -1 to +1 => 2)
        ndc_tile_w = self._tile_size * (ndc_w / self.width)
        ndc_tile_h = self._tile_size * (ndc_w / self.height)

        # Use const : https://www.khronos.org/opengl/wiki/Type_Qualifier_(GLSL)#Constant_qualifier

        # FIXME: Using a file is not robust. We should use a string instead but we have
        # t manage %%include in each shader.
        with open(SHADER_PATH / "constants.frs","w",encoding='utf-8') as fout:
            fout.write(f"// This file was generated at runtime ({nice_timestamp()}). DO NOT EDIT, regenerate it!\n\n")

            # Make sure we can define an array with proper dimension
            if self.nb_infiltration_zones >= 1:
                aiz_size = "NB_INFILTRATION_ZONES"
            else:
                aiz_size = "1"

            def glbool(v):
                return str(v).lower()

            from wolfgpu.simple_simulation import BC_BLANK_VALUE

            fout.write("""
// Indices of the boundary condition values in the boundary condition table.
// FIXME Why are they floats ???
""")
            fout.write(
                "\n".join([f"const float {bci.name} = {bci.value:.1f};" for bci in BoundaryConditionIndices])
            )

            fout.write(f"""
// This value is used to indicate a boundary condition parameter or
// cell parameter is not used.
const float UNUSED_BOUNDARY_CONDITION = {BC_BLANK_VALUE:.1f};\n""")



            fout.write(
                f"""
// Since constants don't change during the simulation, I don't
// pass them as uniform but as, well, constants. I think it helps the compiler
// to simplify code.

const bool RUNGE_KUTTA = true;
const float FROUDE_MAX = {self.froude_max or 0:f}; // 10.0 is the original one
const bool FROUDE_LIMIT = {glbool(self.froude_max is not None)};
const float g = {self.g:f};

// Tell if the simulator must try to fix dry up's (true) or not (false).
const bool DRY_UP_FIXING = {glbool(not self.dry_up_disabled())};

// Additional testing to enforce positive heights.
// Make sure you use Froude limitations and dry ups management so that
// a hard limit is not imposed (which will add water to the simulation).
const bool ENSURE_POSITIVE_HEIGHT = {glbool(self._ensure_positive_height)};

// This is the padded size of the domain.
const uint DOMAIN_WIDTH={nb_tiles_horiz*self._tile_size};
const uint DOMAIN_HEIGHT={nb_tiles_verti*self._tile_size};

// The tile size, expressed in cells. Tiles are squares.
const uint TILE_SIZE={self._tile_size};

// Tile size in normalized device coordinates (NDC)
// NDC: https://learnopengl.com/Getting-started/Coordinate-Systems
const float NDC_TILE_WIDTH = {ndc_tile_w};
const float NDC_TILE_HEIGHT = {ndc_tile_h};

// Size of the computation domain expressed in tiles
const int NB_TILES_HORIZ = {nb_tiles_horiz};
const int NB_TILES_VERTI = {nb_tiles_verti};

// When true, the tile_packing indirection functions will become
// effectless (they don't do indirection).
const bool TRANSPARENT_PACK = {glbool(self._tile_packing_mode == TilePackingMode.TRANSPARENT)};

// In the infiltration table, the index of the column containing
// the start time of an infiltration.
const int INFILTRATION_TABLE_TIME_INDEX = 0;

const uint WET_TILES_DETECT_THREAD_WIDTH = {WET_TILES_DETECT_THREAD_WIDTH};

const uint NB_INFILTRATION_ZONES = {self.nb_infiltration_zones};

// FIXME rename this as a boolean indicator
const uint NB_INFILTRATION_LINES = {1 if self.nb_infiltration_lines >= 1 else 0};

// Should we linearly interpolate the infiltration quantity across
// infiltration steps ?
const bool INFILTRATION_LERP = {glbool(self._infiltration_interpolation == InfiltrationInterpolation.LINEAR)};

// When a tile has no active cell (for example when there's no water),
// one cannot compute a corresponding Δt. In that case, we take a
// default value for that Δt.
// That default value is chosen big enough so that regular time steps
// (which happens on tile with active cells) will be below it.
// Moreover, in case there are infiltrations, this default time step
// is chosen in a way that we will not skip a full infiltration interval
// when going from t to Δt
const float MAX_TIME_STEP_INACTIVE_TILE = {self.max_time_step_inactive_tile:g};

// If we run with the "fixed time step", then this is its value.
// If not, this will be 0 (indicating a dynamic time step).
const float MIN_TIME_STEP = {self.time_step if self.time_step_strategy == TimeStepStrategy.FIXED_TIME_STEP else 0.0};

// This constant is used to determine if an alpha correction
// is the same as the one applied before.
const float NUMERICAL_ERROR_TOLERANCE_FOR_ALPHA = 5e-7;

// Record the flows at the border of each cell.
const bool TRACK_FLOWS_AT_BORDER={glbool(self._track_flows_at_border)};

// Status codes are output by the "global shader (i.e. at each simulation step)
const uint STATUS_CODE_OK = 0;

// Something went wrong in the timestep computation. Either it's zero
// either it's a NaN (this is to help debugging NaNs appearance during execution
// which often occur at random).
const uint STATUS_CODE_TIMESTEP_WRONG = 1;

// If true, we record debug information and enable other debug behaviours
const bool DEBUGGING = {glbool(self._debugging)};

const float RUNGE_KUTTA_PONDERATION = {self.ponderation_runge_kutta if self.ponderation_runge_kutta is not None else 0.0};

// Allows to disable Froude limit application at the u,v computation level
// This is used while testing.
const bool FROUDE_LIMIT_ON_UV = {glbool(self._froude_limit_on_uv)};

// When applying the Froude limit boundary condition, h is always
// pulled down as to increase the speed of the flow. That's because
// the new h = min(h_current, h_matching_BC). To allow for some leeway
// we use h = min( h*FROUDE_LIMIT_TOLERANCE,h_matching_BC)

// NOTE: This is a first implementation. Therefore the parameter is
//       global to the whole simulation. Maybe one day we may need
//       to make this parameter different at each Froude b.c.
const float FROUDE_LIMIT_TOLERANCE = {self._froude_bc_tolerance};

// This is an SSBO (because it's qualified as 'buffer'). It will be shared
// amongst shaders. It acts as a global variable which lives through the
// iterations.
// Be carefule as this data structure is mapped in python too, so one
// has to keep all the fields the same and in the same order in both
// implementations
layout(std430) buffer GlobalStateBlock
{{
    double simulation_time;
    double simulation_time_previous_step;

    // Duration of the last time step
    float time_step;

    // Line number in the array containing the infiltration chronology.
    // This value is 1-based (1 means first line of array). 0 means
    // there's no active line.
    uint active_infiltration_line;

    uint dry_up_iterations;

    // Number of active(wet) tiles detected during one simulation step.
    uint nb_active_tiles;
    uint status_code;

    // We store the cumulative number of active(wet) tiles
    // over two 32-bits value because that number can
    // grow pretty fast.

    uint total_active_tiles_hi;
    uint total_active_tiles_lo;

    // A recording of each simulation time of each step. The array is a
    // circular buffer. To get the last 100 times, in chronological order do:
    //    step_times[step_number:] + step_times[:step_number]
    // or just:
    //    step_times[:step_number]
    // if step_number <= 100.

    uint step_number;
    float step_times[100];

    float active_infiltration_zones[{aiz_size}];
}}; // no instance name => fields are globals.

//layout(binding = 16) uniform atomic_uint two;

const bool OPTIMIZED_TILE_INDIRECTION={glbool(self._optimize_tile_indirection)};

float baby_sqrt(in float n) {{
    // Babylonian approximation of sqrt. Here to avoid to use "sqrt()"
    // to test if the latter is stable across GPU generations.
    if (n > 0) {{
        float x_n = n;
        for(int i = 0; i< 12; i++) {{
            x_n = 0.5 * (x_n + n / x_n);
        }}
        return x_n;
    }} else {{
        return 0;
    }}
}}

"""
            )

        self.vertex_shader = load_shader_from_file(Path(SHADER_PATH,"simplevertexshader.vs"))

        # self.blind_shader = load_shader_from_file(Path(SHADER_PATH,"blind.frs"), log_path=self._shader_log_path)
        # self.blinder_program = load_program(self.vertex_shader, self.blind_shader)

        # self.cell_detector_shader = load_shader_from_file(Path(SHADER_PATH,"detect_cells.frs"), log_path=self._shader_log_path)
        # self.cell_detector_program = load_program(self.vertex_shader, self.cell_detector_shader)

        # self.cell_detector_cruncher_shader = load_shader_from_file(Path(SHADER_PATH,"detect_cells_cruncher.frs"), log_path=self._shader_log_path)
        # self.cell_detector_cruncher_program = load_program(self.vertex_shader, self.cell_detector_cruncher_shader)

        # self.min_step_size_shader = load_shader_from_file(Path(SHADER_PATH,"Water2MinStepSizeShader.frs"), log_path=self._shader_log_path)
        # self.min_step_size_program = load_program(self.vertex_shader, self.min_step_size_shader)

        # self.max_step_size_shader = load_shader_from_file(Path(SHADER_PATH,"Water2MaxStepSizeShader.frs"), log_path=self._shader_log_path)
        # self.max_step_size_program = load_program(self.vertex_shader, self.max_step_size_shader)

        #FIXME : adapt OpenGL version in shader to avoid error relative to gl_FragColor ?
        # self.strong_boundary_cond_shader = load_shader_from_file(Path(SHADER_PATH,"Water2StrongBC.frs"), log_path=self._shader_log_path)
        # self.strong_boundary_cond_program = load_program(self.vertex_shader, self.strong_boundary_cond_shader)

        #self.euler_shader = load_shader_from_file(Path(SHADER_PATH,"Water2EulerStepShader.frs"), log_path=self._shader_log_path)
        #self.euler_program = load_program(self.vertex_shader, self.euler_shader)
        #self.runge_kutta_shader = load_shader_from_file(Path(SHADER_PATH,"Water2RungeKuttaStepShader.frs"), log_path=self._shader_log_path)
        #self.runge_kutta_program = load_program(self.vertex_shader, self.runge_kutta_shader)
        # self.slope_flux_derivative_shader = load_shader_from_file(Path(SHADER_PATH,"Wolf2SlopeAndFluxAndDerivativeShader.frs"), log_path=self._shader_log_path)
        # self.slope_flux_derivative_program = load_program(self.vertex_shader, self.slope_flux_derivative_shader)

        # self.copy_alpha_counters_shader = load_shader_from_file(Path(SHADER_PATH,"copy_alpha_to_counters.frs"), log_path=self._shader_log_path)
        # self.copy_alpha_counters_program = load_program(self.vertex_shader, self.copy_alpha_counters_shader)
        # self.detect_alpha_cruncher_shader = load_shader_from_file(Path(SHADER_PATH,"detect_alpha_cruncher.frs"), log_path=self._shader_log_path)
        # self.detect_alpha_cruncher_program = load_program(self.vertex_shader, self.detect_alpha_cruncher_shader)


        # Compute shader active tile detection
        self.active_tiles_detector_cs2 = load_shader_from_file(Path(SHADER_PATH,"parallel_tile_detect.comp"), log_path=self._shader_log_path)
        self.active_tiles_detector_program2 = load_program(compute_shader=self.active_tiles_detector_cs2)

        # Fragment shader active tile detection (should be deprecated)
        # self.active_tiles_detector_shader = load_shader_from_file(Path(SHADER_PATH,"active_tile_detector.frs"), log_path=self._shader_log_path)
        # self.active_tiles_detector_program = load_program(self.vertex_shader, self.active_tiles_detector_shader)

        # Geometry shader to plot active tiles (only)
        self.active_tiles_quad_shader = load_shader_from_file(Path(SHADER_PATH,"active_tiles_quad.gs"), log_path=self._shader_log_path)

        # self.slope_flux_derivative_shader2 = load_shader_from_file(Path(SHADER_PATH,"Wolf2SlopeAndFluxAndDerivativeShader.frs"), log_path=self._shader_log_path)
        # self.slope_flux_derivative_program2 = load_program(self.vertex_shader, self.slope_flux_derivative_shader2, self.active_tiles_quad_shader)

        # self.euler_program2 = load_program(self.vertex_shader, self.euler_shader, self.active_tiles_quad_shader)
        # self.runge_kutta_program2 = load_program(self.cd ..
        # vertex_shader, self.runge_kutta_shader, self.active_tiles_quad_shader)

        self.derivative_and_dry_up_cs = load_shader_from_file(Path(SHADER_PATH,"deriv_euler_and_dry.comp"), log_path=self._shader_log_path)
        self.derivative_and_dry_up_cs_program = load_program(compute_shader=self.derivative_and_dry_up_cs)

        self.derivative_and_dry_up_and_rk_cs = load_shader_from_file(Path(SHADER_PATH,"rk_and_dry.comp"), log_path=self._shader_log_path)
        self.derivative_and_dry_up_and_rk_cs_program = load_program(compute_shader=self.derivative_and_dry_up_and_rk_cs)

        self.min_step_size_cs_shader = load_shader_from_file(Path(SHADER_PATH,"min_step_size_over_one_tile.comp"), log_path=self._shader_log_path)
        self.min_step_size_cs_program = load_program(compute_shader=self.min_step_size_cs_shader)

        self.min_step_size_crunch_cs_shader = load_shader_from_file(Path(SHADER_PATH,"min_step_size_parallel.comp"), log_path=self._shader_log_path)
        self.min_step_size_crunch_cs_program = load_program(compute_shader=self.min_step_size_crunch_cs_shader)

        self.update_globals_cs_shader = load_shader_from_file(Path(SHADER_PATH,"update_globals.comp"), log_path=self._shader_log_path)
        self.update_globals_cs_program = load_program(compute_shader=self.update_globals_cs_shader)

        self.update_globals_cs_shader2 = load_shader_from_file(Path(SHADER_PATH,"update_globals2.comp"), log_path=self._shader_log_path)
        self.update_globals_cs_program2 = load_program(compute_shader=self.update_globals_cs_shader2)

        self._texture_viewer_shader2 = load_shader_from_file(Path(SHADER_PATH,"textureViewerShader.frs"))
        self._texture_viewer_program2 = load_program(self.vertex_shader, self._texture_viewer_shader2, self.active_tiles_quad_shader)

        self.filter_low_water_cs = load_shader_from_file(Path(SHADER_PATH,"filter_low_water.comp"), log_path=self._shader_log_path)
        self.filter_low_water_cs_program = load_program(compute_shader=self.filter_low_water_cs)

        self._shaders_loaded = True


    # ---------------------
    # TIME STEP COMPUTATION
    # ---------------------


[docs]
    def compute_time_step_for_tiles_phase1(self, current_quantity):
        """ Phase 1 of computing the minimum time step for the simulation.
        It consists of computing one minimum for each tile (the second phase will
        compute the minium across tiles).

        :param current_quantity: The quantity index for which we compute the time step.
        :type current_quantity: int

        The current_quantity can be 0 or 1. It's the index of the quantity in the
        quantity textures.

        The minimum time step is computed for each tile and
        will be stored in the tiles_mins texture.
        """

        # Attention! Remember that the tile packing has a neutral tiles. Make
        # sure the neutral tiles' value doesn't affect the computations.
        packed_width, packed_height = self._tile_packer.packed_size()
        ntx, nty = self._tile_packer.packed_size_in_tiles()

        program = self.min_step_size_cs_program # Using min_step_size.comp
        glUseProgram(program)
        samplers = {
                    "quantitySampler": self.quantity_tex[current_quantity],
                    # "bathy": self.bathymetry_tex,
                    "weakBCSampler": self.weak_boundary_cond_tex,
                    "BCCellsSampler": self.boundary_cond_cells_tex,
                    "activeTilesSampler": self._active_tiles_map_tex[0],
                    "tiles_mins": (self.max_step_size_tex, GL_WRITE_ONLY),
                    "reversedTileIndirection": self._reversed_tile_indirection_tex
                    }

        #if self._optimize_tile_indirection:


        if self._debugging:
            samplers.update(
                {
                    "debug_out": (self.debug_tex[0], GL_READ_WRITE),
                }
            )

        wire_program(program,
                    {
                        "max_width": packed_width,
                        "max_height": packed_height,
                        "dx": self.dx,
                        "dy": self.dy,
                        "courant": self.courant
                    },
                    samplers,
                    #ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id}
                    )


        glDispatchCompute( ntx, nty, 1)
        # make sure writing to image has finished before read
        glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT)

        if False:
            mins = read_texture2(self.max_step_size_tex, GL_R32F)
            np.save( f"{self._prefix}_min_steps_tiles_{self._current_step}", mins)



[docs]
    def compute_time_step_for_tiles_phase2(self):
        """ Phase 2: reduce the matrix of minimums.

        No need to pass argument while the Phase 1 has already set the
        minimums in the tiles_mins texture.
        """

        program = self.min_step_size_crunch_cs_program
        glUseProgram(program)

        # The previous step computed one value per tile. So now
        # we have to work on the remaining tiles
        w,h = self._tile_packer.packed_size_in_tiles()

        #print(f"compute_time_step_for_tiles_phase2: {w} {h}")

        # For the reduction, we can choose whatever tile size we want.
        # 16 was chosen because it gave good performances.
        REDUCTION_TILE_SIZE = 16 # FIXME Duplicated in the shader's code.
        while True:
            max_width, max_height = w, h
            w,h = ceil(w/REDUCTION_TILE_SIZE), ceil(h/REDUCTION_TILE_SIZE)
            assert w >= 1 and h >= 1, "I must have something to do"
            wire_program(program,
                        {
                            "max_width": int(max_width),
                            "max_height": int(max_height)
                        },
                        {
                            "tiles_mins": (self.max_step_size_tex, GL_READ_WRITE)
                        },
                        ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id})


            logging.debug(f"compute_time_step_for_tiles_phase2: glDispatchCompute: {w} {h} (tiles); max {max_width}x{max_height} (cells)")
            glDispatchCompute(w, h, 1)
            glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT)

            if w <= 1 and h <= 1:
                break

            if False:
                np.save(f"{self._prefix}_para_step_{self._current_step}_{w}_{h}", read_texture2(self.max_step_size_tex, GL_R32F))



[docs]
    def compute_max_step_size2(self):
        self.compute_time_step_for_tiles_phase1(0)
        self.compute_time_step_for_tiles_phase2()


    # def __del__(self):
    #     self.drop_gl_resources()

    # ------------------------------------------
    # TEXTURES / BUFFERS / RESSOURCES MANAGEMENT
    # ------------------------------------------


[docs]
    def drop_gl_resources(self):
        """ Delete all the OpenGL resources. """
        # FIXME BUG For the moment, deleting GPU resources doesn't work

        #self._shifter = upload_blank_texture_to_gpu(256, 256, GL_TEXTURE_RECTANGLE, GL_R8UI, value=0)

        # We try to reuse the textures if thay are already created and
        # has the same size as the requested size.
        all_textures = [self.max_step_size_tex, self.quantity_tex,
            self.bathymetry_tex, self.manning_tex,
            # self.strong_boundary_cond_tex, self.strong_boundary_cond_values_tex,
            self.weak_boundary_cond_tex,
            self.infiltration_zones_texture, self.infiltration_zones_values_array,
            self.boundary_cond_cells_tex, self.alpha_tex,
            self._active_tiles_map_tex, self.param_copy_tex
            ]

        all_textures.append(self._reversed_tile_indirection_tex)

        if self._track_flows_at_border:
            all_textures.append( self._flows_at_border_tex)
            all_textures.append( self._rhs_tex)

        if self._debugging:
            all_textures.append( self.debug_tex)

        for ndx, t in enumerate(all_textures):
            if t is not None:
                logging.debug(f"dropping texture {ndx} with id(s) {t}")
                try:
                    drop_textures(t)
                except Exception as ex:
                    logging.error(f"Can't drop a texture, ndx={ndx}, tid={t} because {ex}")
                    from traceback import print_stack
                    print_stack()
                    #raise Exception(f"Can't drop a texture, ndx={ndx}, tid={t} because {ex}")

        for ndx, t in enumerate([self.state_buffer_ssbo_id, self.integration_fb]): #, self.strong_boundary_cond_fb]):
            if t is not None:
                logging.debug(f"dropping Buffer {ndx} with id(s) {t}")
                glDeleteBuffers(1, [t])
        self.state_buffer_ssbo_id = None


        self.max_step_size_tex = None
        self.quantity_tex = None
        self.bathymetry_tex = None
        self.manning_tex = None
        # self.strong_boundary_cond_tex = None
        # self.strong_boundary_cond_values_tex = None
        self.weak_boundary_cond_tex = None
        self.infiltration_zones_texture = None
        self.infiltration_zones_values_array = None
        self.boundary_cond_cells_tex = None
        self.alpha_tex = None
        self._active_tiles_map_tex = None
        self.param_copy_tex = None
        self._reversed_tile_indirection_tex = None
        self._flows_at_border_tex = None
        self._rhs_tex = None
        self.debug_tex = None


        #glDeleteFramebuffers(4, [self._active_tiles_map_fb, self.integration_fb, self._infiltration_fb, self.strong_boundary_cond_fb])

        if self._shaders_loaded:
            glDeleteShader(self.vertex_shader)
            glDeleteShader(self.active_tiles_detector_cs2)
            glDeleteShader(self.active_tiles_quad_shader)
            glDeleteShader(self.derivative_and_dry_up_cs)
            glDeleteShader(self.derivative_and_dry_up_and_rk_cs)
            glDeleteShader(self.min_step_size_cs_shader)
            glDeleteShader(self.min_step_size_crunch_cs_shader)
            glDeleteShader(self.update_globals_cs_shader)
            glDeleteShader(self.update_globals_cs_shader2)
            glDeleteShader(self._texture_viewer_shader2)

            self.vertex_shader = None
            self.active_tiles_detector_cs2 = None
            self.active_tiles_quad_shader = None
            self.derivative_and_dry_up_cs = None
            self.derivative_and_dry_up_and_rk_cs = None
            self.min_step_size_cs_shader = None
            self.min_step_size_crunch_cs_shader = None
            self.update_globals_cs_shader = None
            self.update_globals_cs_shader2 = None
            self._texture_viewer_shader2 = None

            glDeleteProgram(self.min_step_size_cs_program)
            glDeleteProgram(self.active_tiles_detector_program2)
            glDeleteProgram(self.update_globals_cs_program)
            glDeleteProgram(self.update_globals_cs_program2)
            glDeleteProgram(self.min_step_size_crunch_cs_program)
            glDeleteProgram(self.derivative_and_dry_up_cs_program)
            glDeleteProgram(self.derivative_and_dry_up_and_rk_cs_program)

            self.min_step_size_cs_program = None
            self.active_tiles_detector_program2 = None
            self.update_globals_cs_program = None
            self.update_globals_cs_program2 = None
            self.min_step_size_crunch_cs_program = None
            self.derivative_and_dry_up_cs_program = None
            self.derivative_and_dry_up_and_rk_cs_program = None






            self._shaders_loaded = False
        glFinish()




[docs]
    def reset_size(self, width: int, height: int):
        """ Reset the size of the simulation.

        :attention: Used in tests
        """

        assert width > 0 and height > 0

        if hasattr(self,"width"): # and (self.width != width or self.height != height):
            self.drop_gl_resources()


        self.max_step_size_tex = None
        self.quantity_tex = None
        self.integration_fb = None
        self.bathymetry_tex = None
        self.manning_tex = None
        # self.strong_boundary_cond_tex = None
        # self.strong_boundary_cond_values_tex = None
        self.weak_boundary_cond_tex = None
        self.infiltration_zones_texture = None
        self.infiltration_zones_values_array = None

        self._reversed_tile_indirection_tex = None

        self.infiltration_timings = None
        self.nb_infiltration_lines = 0
        self.nb_infiltration_zones = 0

        self.boundary_cond_cells_tex = None
        self.alpha_tex = None
        self.debug_tex = None
        self._active_tiles_map_tex = None
        self.state_buffer_ssbo_id = None
        self._atomic_counter_buffer = None

        self.width, self.height = width, height
        self._old_stripes = None
        self._current_step = 0 # 0 based !!!
        self.last_gpu_time = None
        self.current_simulation_time_step = None

        self._tile_packer = None # Invalidating the tile packer



[docs]
    def complete_textures(self):
        """ After having set several data textures, call this to
        upload blank texture replacement for those you didn't set.
        and to recompute the framebuffers and create additional textures
        necessary to the computation.

        We are using some routines from gl_utils.py like:
            - upload_blank_texture_to_gpu
            - upload_np_array_to_gpu
            - create_frame_buffer_for_computation
        """

        logging.debug("Completing textures")

        packed_width, packed_height = self._tile_packer.packed_size()

        # glUseProgram(self.cell_detector_program)
        self._gen_single_quad()
        #self._active_tile_grid_vao_id = None

        self._cache = dict()

        # if self._atomic_counter_buffer is None:
        #     self._atomic_counter_buffer_id = glGenBuffers(1)
        #     glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, self._atomic_counter_buffer_id)
        #     glBufferData(GL_ATOMIC_COUNTER_BUFFER, len(data_buffer), data_buffer, GL_DYNAMIC_COPY)
        #     glBindBufferRange(GL_ATOMIC_COUNTER_BUFFER, 16, self._atomic_counter_buffer_id, 0, 16)
        #     glBindBuffer(GL_NONE)


        # TILING
        # ------

        if  self._active_tiles_map_tex is None:
            # FIXME You don't need an array of textures for one texture.
            ntx, nty = self._tile_packer.packed_size_in_tiles()
            self._active_tiles_map_tex = [
                # This texture is accessed without indirection (which is not
                # that bad since its size is self._tile_size**2 less than the
                # orignal data).
                upload_blank_texture_to_gpu(ntx, nty, GL_TEXTURE_RECTANGLE,
                                            GL_R8UI, # FIXME : Why GL_R8UI ?
                                            value=0)]
        # self._active_tiles_map_fb = create_frame_buffer_for_computation(
        #     self._active_tiles_map_tex,
        #     texture_target=GL_TEXTURE_2D, out_to_fragdata=False)

        revmap = self._tile_packer.tile_reversed_indirection_map()
        self._reversed_tile_indirection_tex = upload_np_array_to_gpu(GL_TEXTURE_RECTANGLE,
                                                                     format=GL_RG16UI,
                                                                     img_data=revmap,
                                                                     )


        # TIME STEP
        # ---------

        if self.max_step_size_tex is None:
            ntx, nty = self._tile_packer.packed_size_in_tiles()

            # Here we store the time step so that in case of FixedTimeStep strategy,
            # one can use this one.
            self.max_step_size_tex = upload_blank_texture_to_gpu(ntx, nty,
                                                                 GL_TEXTURE_RECTANGLE,
                                                                 GL_R32F, self.time_step)

            # For debugging:
            # test_pattern = (np.arange(ntx*nty, dtype=np.float32)+0.5).reshape((nty, ntx))
            # self.max_step_size_tex = upload_np_array_to_gpu(GL_TEXTURE_RECTANGLE, GL_R32F, test_pattern)


        # UNKNOWNS / QUANTITIES
        # ---------------------

        # 0 is the current time step
        # 1 is the predictor time step (Euler and/or Runge-Kutta)
        # 2 is the corrector time step (Runge-Kutta)

        # Double-buffered three-components color texture object holding the
        # cell-centered conserved quantity grid (w, hu, hv)
        if self.quantity_tex is None:
            logging.debug("Blank quantities textures")
            self.quantity_tex = [upload_blank_texture_to_gpu(packed_width, packed_height,
                                                            GL_TEXTURE_RECTANGLE,
                                                            format=GL_RGBA32F, value=0)
                                for i in range(3)]

        # # For computing Euler and Runge Kutta integration
        # FIXME Now, the only Frame buffer Objects ? Should/can we replaced them by simple textures ?
        self.integration_fb = create_frame_buffer_for_computation(self.quantity_tex)


        # TOPO/BATHYMETRY
        # ---------------

        # FIXME I think we only need one texture for bathymetry 'cos it doesn't change over time.
        # FIXME True if we manage temporal bathymetry inside "injectors"
        # FIXME False if we extend the model's capacity to sediment transport

        if self.bathymetry_tex is None:
            logging.debug("Blank bathymetry textures")
            self.bathymetry_tex = upload_blank_texture_to_gpu(packed_width, packed_height,
                                                              GL_TEXTURE_RECTANGLE,
                                                              format=GL_R32F, value=99.0)

        # Source term for continuity equation (h only)
        # self.source_cont_tex = upload_blank_texture_to_gpu(self.width, self.height, GL_TEXTURE_RECTANGLE, format=GL_R32F, value=-99_999.0)


        # MANNING
        # -------

        if self.manning_tex is None:
            logging.debug("Blank Manning textures")
            self.manning_tex = upload_blank_texture_to_gpu(packed_width, packed_height,
                                                           GL_TEXTURE_RECTANGLE,
                                                           format=GL_R32F, value=0.04)

        # DEPRECATED -- STRONG BC
        # -----------------------

        # self.strong_boundary_cond_fb = create_frame_buffer_for_computation(self.quantity_tex)

        # if self.strong_boundary_cond_tex is None:
        #     logging.debug("Blank StrongBC textures")
        #     self.strong_boundary_cond_tex = upload_blank_texture_to_gpu(self.width, self.height,
        #                                                                 GL_TEXTURE_RECTANGLE,
        #                                                                 format=GL_R32F, value=-99_999.0)

        # if self.strong_boundary_cond_values_tex is None:
        #     logging.debug("Blank StrongBC values textures")
        #     self.strong_boundary_cond_values_tex = upload_blank_texture_to_gpu(self.width, self.height,
        #                                                                        GL_TEXTURE_RECTANGLE,
        #                                                                        format=GL_RGB32F, value=0.0)

        # Things are coded in a GL_R32UI. I have tested GL_R8UI
        # but somehow I had the impression the memory layout
        # was broken. That's why I use 32bits (my reasoning was that
        # if 8 bits don't work then it's because the sampler can
        # only work with 32 bits at a time). FIXME That's a pretty
        # weak explanation...
        # value=1 == mark all cells as active (without that nothing will get
        # computed)


        # WEAK BC
        # -------

        if self.weak_boundary_cond_tex is None:
            logging.debug("Blank WeakBC textures")
            self.weak_boundary_cond_tex = upload_blank_texture_to_gpu(
                self.width, self.height,
                GL_TEXTURE_RECTANGLE,
                format=GL_R32UI,
                value=1) # Makes all cells active by default

        # FIXME This is not part of a resize operation
        if self.boundary_cond_cells_tex is None:
            self.boundary_cond_cells_tex = upload_blank_texture_to_gpu(
                1, 1,
                GL_TEXTURE_RECTANGLE,
                format=GL_R32F,
                value=0)


        # INFILTRATION
        # ------------

        if self.infiltration_zones_texture is None:
            logging.debug("Blank infiltration zones texture")
            self.infiltration_zones_texture = upload_blank_texture_to_gpu(
                packed_width, packed_height,
                GL_TEXTURE_RECTANGLE,
                format=GL_R32I,
                value=MESH_WITHOUT_INFILTRATION)

        # if self.infiltration_zones_texture is not None:
        #     self._infiltration_fb = create_frame_buffer_for_computation(self.infiltration_zones_texture)


        # ALPHA CORRECTIONS
        # -----------------

        logging.debug("Dry up corrections textures")
        if self.alpha_tex is None:
            self.alpha_tex = [
                # In these two we have vec(alpha_value, alpha_mask)
                # FIXME I think I only use RG coordinates => move to GL_RG32F
                upload_blank_texture_to_gpu(packed_width, packed_height,
                                            GL_TEXTURE_RECTANGLE,
                                            format=GL_RGBA32F,
                                            value=[1.0,0.0,0.0,0.0])
                for i in range(2)]


        # DEBUGGING
        # ---------

        if self._debugging:
            logging.debug("Debug textures")
            self._create_debugging_textures()


        # 2/ alpha_tex 1 (alpha and mask): GL_COLOR_ATTACHMENT0
        # 3/ alpha_tex 2 (alpha and mask): GL_COLOR_ATTACHMENT1
        # 6/ debug_tex 1 : GL_COLOR_ATTACHMENT2
        # 7/ debug_tex 2 : GL_COLOR_ATTACHMENT3

        # assert len(self.alpha_tex) == 2
        # if self.debug_tex:
        #     self.access_fb = create_frame_buffer_for_computation(
        #         self.alpha_tex + self.debug_tex,
        #         out_to_fragdata=True)
        # else:
        #     self.access_fb = create_frame_buffer_for_computation(
        #         self.alpha_tex,
        #         out_to_fragdata=True)

        if self.time_step_strategy == TimeStepStrategy.FIXED_TIME_STEP:
            ts = self.time_step
        else:
            # This is actually the first time step value
            ts = 0.0


        # SSBO
        # ----

        struct_len = struct.calcsize( GLOBALS_STRUCT_PACK_FORMAT + "f"*100 + "f" * self.nb_infiltration_zones)

        if self.state_buffer_ssbo_id is None:
            # The SSBO set up is done like in https://www.khronos.org/opengl/wiki/Shader_Storage_Buffer_Object
            self.state_buffer_ssbo_id = glGenBuffers(1)

        glBindBuffer(GL_SHADER_STORAGE_BUFFER, self.state_buffer_ssbo_id)
        # IIUC, Dynamic copy means that I will modify and read from/to GL (not the application).
        # That is, the buffer will be interanl to GL...
        glBufferData(GL_SHADER_STORAGE_BUFFER,
                     struct_len,
                     bytes( struct_len ),
                     GL_DYNAMIC_COPY)

        glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0) # Complete the configuration

        self._clear_global_variables_buffer(sim_time=0, time_step=ts)


        # A triple buffered copy of the global paramaters of the simulation (such as current step, time, etc.)
        # FIXME Why triple? I see for i in range(1) below...
        self.param_copy_tex = [upload_blank_texture_to_gpu(1, 1,
                                                           GL_TEXTURE_RECTANGLE,
                                                           format=GL_RGBA32F,
                                                           value=0) for i in range(1)]

        self._current_step = 0




[docs]
    def _create_debugging_textures(self):
        """ Create textures for debugging purposes.

        Even if we used Framebuffers for debugging, we now use textures
        because it is more easy to download them to the CPU.

        """

        if self._debugging:
            logging.debug("Debug textures")

            packed_width, packed_height = self._tile_packer.packed_size()

            # dbg_tex = []
            # if self.manning_tex is not None:
            #     dbg_tex.append(self.manning_tex)
            # if self.weak_boundary_cond_tex is not None:
            #     dbg_tex.append(self.weak_boundary_cond_tex)

            # self._debug_fb = create_frame_buffer_for_computation(dbg_tex)

            if self.debug_tex is None:
                self.debug_tex = [
                    upload_blank_texture_to_gpu(
                        packed_width,
                        packed_height,
                        GL_TEXTURE_RECTANGLE,
                        format=GL_RGBA32F,
                        value=0,
                    ),
                    upload_blank_texture_to_gpu(
                        packed_width,
                        packed_height,
                        GL_TEXTURE_RECTANGLE,
                        format=GL_RGBA32F,
                        value=0,
                    ),
                ]


    # def _setup_tiles(self, active_meshes):
    #     assert active_meshes.dtype == bool, "NAP must be xformed to bool array !"
    #     with time_it("find_non_empty_tiles"):
    #         self._tiles = find_non_empty_tiles_on_nap(active_meshes, self._nb_tiles_per_dimension)


[docs]
    def _gen_single_quad(self):
        """ #FIXME DOC ?"""

        self._single_quad_vao_id = glGenVertexArrays(1)
        glBindVertexArray( self._single_quad_vao_id )

        vertex_buffer_object_id = glGenBuffers(1)
        glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_object_id)
        quad = np.array(make_quad_fast(-1,+1,-1,+1), dtype=np.float32)
        # OpenGL doc: creates and initializes a buffer object's data store
        glBufferData(GL_ARRAY_BUFFER, quad.nbytes, quad, GL_STATIC_DRAW)

        # FIXME This is dangereous: I hard code the attrib location
        # so the expectation is that it will be the same in ALL programs.
        # attr_loc = glGetAttribLocation(program, "aVertex") # for the simple vertex shader
        # assert attr_loc >= 0
        attr_loc = 0
        # OpenGL: glEnableVertexAttribArray uses currently bound vertex array
        # object for the operation.
        glEnableVertexAttribArray(attr_loc)

        # See https://www.khronos.org/opengl/wiki/Vertex_Specification#Vertex_Array_Object
        # "Think of it like this. glBindBuffer sets a global variable, then
        # glVertexAttribPointer reads that global variable and stores it in the
        # VAO."
        glVertexAttribPointer(attr_loc, 3, GL_FLOAT, GL_FALSE, 0, None)

        # order important else you clear the buffer bind in
        # the vertex array object :-)
        glBindVertexArray(GL_NONE)
        glBindBuffer(GL_ARRAY_BUFFER, GL_NONE)


    """
    # def _precompute_step_size_vbos(self, program, nap):
    #     #We build grids of halving number of tiles.
    #     #(so each of the tiles at a given number of tiles
    #     #are different in number as well as in size of the
    #     #level above)
    #
    #     self._step_size_tiles_vbo_id = dict()

    #     # Very important: we wire ourselves to the size of the computaiton
    #     # domain.
    #     factor = max(self.width, self.height)

    #     while factor >= 1:
    #         # if the number of tiles on a dimension is greater
    #         # than the dimension, then there are too many tiles.
    #         # In that case we clamp the number of tiles.
    #         if self._nb_tiles_per_dimension > factor:
    #             n_tiles = factor
    #         else:
    #             n_tiles = self._nb_tiles_per_dimension
    #         vertex_array = find_non_empty_tiles_on_nap(nap, n_tiles)

    #         vertex_array_object_id = glGenVertexArrays(1)

    #         #glBindVertexArray — bind a vertex array object
    #         glBindVertexArray( vertex_array_object_id )
    #         tri_buffer_id = glGenBuffers(1)
    #         # binds the buffer to the vertex array
    #         glBindBuffer(GL_ARRAY_BUFFER, tri_buffer_id)
    #         # OpenGL doc: creates and initializes a buffer object's data store
    #         glBufferData(GL_ARRAY_BUFFER, vertex_array.nbytes, vertex_array, GL_STATIC_DRAW)


    #         attr_loc = glGetAttribLocation(program, "aVertex") # for the simple vertex shader
    #         assert attr_loc >= 0
    #         glEnableVertexAttribArray(attr_loc)
    #         # size= number of components per generic vertex
    #         # normalized = ...
    #         # stride= Specifies the byte offset between consecutive generic vertex attributes.
    #         # pointer= Specifies a offset of the first component of the first generic
    #         #          vertex attribute in the array in the data store of the buffer currently bound to the GL_ARRAY_BUFFER target.
    #         glVertexAttribPointer(attr_loc, 3, GL_FLOAT, GL_FALSE, 0, None)


    #         # order important else you clear the buffer bind in
    #         # the vertex array object :-)
    #         glBindVertexArray(GL_NONE)
    #         glBindBuffer(GL_ARRAY_BUFFER, GL_NONE)

    #         self._step_size_tiles_vbo_id[factor] = (vertex_array_object_id,
    #                                                 tri_buffer_id, vertex_array.shape[0])
    #         factor = factor // 2

    #     logging.info(f"Precomputed {len(self._step_size_tiles_vbo_id)} tilesets for max step size. Sizes {list(self._step_size_tiles_vbo_id.keys())}")
    """

    """
    def _draw_active_parts_deprecated(self, program, x=None, y=None, w=None,h=None,zoom=None):
        # x,y,w,h in *screen* coordinates (2560*1440 for example)

        # The idea is to attach a lot of configuration (and vertex array)
        # to a VBO (vtx buffer object). This way, when we need to draw, we just
        # recall that configuration to the GPU (instead or uploading it again and again)
        # This brings in a lot of performance because communicating with the GPU is
        # expensive.

        assert glGetAttribLocation(program, "aVertex") == 0
        if False and program not in self._cache:
            vertex_array_object = glGenVertexArrays(1)
            glBindVertexArray( vertex_array_object )
            quad_buffer = glGenBuffers(1)
            glBindBuffer(GL_ARRAY_BUFFER, quad_buffer)
            glBufferData(GL_ARRAY_BUFFER, self._tiles.nbytes, self._tiles, GL_STATIC_DRAW)
            attr_loc = glGetAttribLocation(program, "aVertex") # for the simple vertex shader
            assert attr_loc >= 0
            glEnableVertexAttribArray(attr_loc)
            # size= number of components per generic vertex
            # normalized = ...
            # stride= Specifies the byte offset between consecutive generic vertex attributes.
            # pointer= Specifies a offset of the first component of the first generic
            #          vertex attribute in the array in the data store of the buffer currently bound to the GL_ARRAY_BUFFER target.
            glVertexAttribPointer(attr_loc, 3, GL_FLOAT, GL_FALSE, 0, None)

            self._cache[program] = vertex_array_object

            # order important else you clear the buffer bind in the vertex array object :-)
            glBindVertexArray(0)
            glBindBuffer(GL_ARRAY_BUFFER, 0)

            # Matrix is:
            # | 1       |   |x|   |x|
            # |   1     | * |y| = |y|
            # |     0   |   |z|   |0|
            # |       1 |   |w|   |w|

        pm = np.asfortranarray(np.diag([1,1,0,1]).astype(np.float32)) # Fortran==column major, like OpenGL
        set_uniform(program, "uModelViewProjectionMatrix", pm)

        # OpengGL doc: glViewport specifies the affine transformation of x and y from
        # normalized device coordinates (-1,+1) to window coordinates (e.g. 1024*768).
        if w is None:
            glViewport(0, 0, self.width, self.height)
        else:
            glViewport(x, y, w, h)
        # print(f"{self.width}, {self.height} {self._tiles.shape[0]}")

        s = self._stripes_on_nap[ max(self.nby, self.nbx) ]
        glBindVertexArray( s.get_vao_id() )
        glDrawArrays(GL_TRIANGLES, 0, s.get_vao_len())

        glBindVertexArray( GL_NONE)

    # def _draw_single_quad(self,program, x=None, y=None, w=None,h=None):
    #     pm = np.asfortranarray(np.diag([1,1,0,1]).astype(np.float32)) # Fortran==column major, like OpenGL
    #     set_uniform(program, "uModelViewProjectionMatrix", pm)

    #     assert  glGetAttribLocation(program, "aVertex") == 0

    #     # OpengGL doc: glViewport specifies the affine transformation of x and y from
    #     # normalized device coordinates (-1,+1) to window coordinates (e.g. 1024*768).
    #     if w is None:
    #         assert h is None, "If w is None then H must be too !"
    #         glViewport(0, 0, self.width, self.height)
    #     else:
    #         glViewport(x, y, w, h)
    #     # print(f"{self.width}, {self.height} {self._tiles.shape[0]}")

    #     glBindVertexArray( self._single_quad_vao_id )
    #     glDrawArrays(GL_TRIANGLES, 0, 6) # Draw 2 triangles of 3 vertices each
    #     glBindVertexArray( GL_NONE)

    """


[docs]
    def _has_good_shape(self, a: np.ndarray):
        expected_np_array_shape = (self.height, self.width)
        return a.shape[0:2] == expected_np_array_shape



[docs]
    def set_buffers(self,
            max_step_size=None,
            strong_bc_array=None,
            strong_bc_values_array=None,
            quantity_arrays=None,
            bathymetry_array=None,
            manning_array=None,
            active_meshes_array=None,
            weak_bc_array=None,
            bc_cells_array=None,
            infiltration_zones_array=None,
            infiltration_timings=None):

        if active_meshes_array is not None:
            logging.debug("Active cell array and texture indirection")
            assert self._has_good_shape(active_meshes_array), "active_meshes_array has bad size"
            self.nb_active_meshes = np.count_nonzero(active_meshes_array)
            #self._init_indirection(active_meshes_array)
            self._tile_packer = TilePacker(active_meshes_array, self._tile_size, self._tile_packing_mode)
            #padded_tile_indirection_map = np.ascontiguousarray( np.pad( self._tile_packer.tile_indirection_map(), ((0,0),(0,0),(0,1))).astype(np.uint32))

            # The active cell is not stored, only the texture indirection.
            # RG16 = (x:uint16, y:uint16)
            self._tile_indirection_tex = upload_np_array_to_gpu(
                GL_TEXTURE_RECTANGLE,
                format=GL_RG16UI,
                img_data=self._tile_packer.tile_indirection_map())


            if False:
                ntx, nty = self.tile_packer().size_in_tiles()

                # ATTENTION! You clearly see a mismatch between the storage type
                # of the indirection texture and the format we read it into.
                # That's because python opengl bindings don't like RG16UI.

                tile_indirection_tex = read_texture2(self._tile_indirection_tex, GL_RGB16UI, ntx, nty)
                fig, axs = plt.subplots(1, 2)
                z = np.ascontiguousarray( np.pad( self._tile_packer.tile_indirection_map().astype(float), ((0,0),(0,0),(0,1))).astype(np.uint32))
                z = z / np.max(z)
                axs[0].imshow(z)
                axs[0].set_title("padded_tile_indirection_map")
                #tile_indirection_tex[:,:,2] = 0
                axs[1].imshow(tile_indirection_tex[:,:,0])
                axs[1].set_title("downloaded version")
                plt.show()

        if max_step_size is not None:
            logging.debug("Max step size array")
            # This should be used only for testing purposes
            assert max_step_size.shape[0:2] == self._tile_packer.packed_size_in_tiles()
            assert max_step_size.dtype in (float, np.float32)

            self.max_step_size_tex = upload_np_array_to_gpu(GL_TEXTURE_RECTANGLE, GL_R32F, max_step_size),

            # Tell that the first texture is the one where the max step size is
            # to be read
            self._currentMinStepSizeTexture = 0

        if strong_bc_array is not None:
            logging.debug("StrongBC array")
            assert self._has_good_shape(strong_bc_array)
            assert len(strong_bc_array.shape) == 2, "Only one strong BC, for h by default."
            self.strong_boundary_cond_tex = upload_np_array_to_gpu(GL_TEXTURE_RECTANGLE, GL_R32F,
                                                                   strong_bc_array)

        if strong_bc_values_array is not None:
            logging.error("DEPRECATED : StrongBC values array")
            # assert self._has_good_shape(strong_bc_values_array)
            # assert len(strong_bc_values_array.shape) == 3 and strong_bc_values_array.shape[2] == 3, "strong cell  Conditiosn must be h,uh,vh"
            # self.strong_boundary_cond_values_tex = upload_np_array_to_gpu(
            #     GL_TEXTURE_RECTANGLE, GL_RGB32F, strong_bc_values_array)

        if bc_cells_array is not None:
            logging.debug("BCCells array")
            assert bc_cells_array.shape[0] >= 1, "You must give at least one boundary condition"
            self.boundary_cond_cells_tex = upload_np_array_to_gpu(
                GL_TEXTURE_RECTANGLE, GL_R32F,
                bc_cells_array)

        if weak_bc_array is not None:
            logging.debug("WeakBC array")
            assert self._has_good_shape(weak_bc_array), \
                f"Weak boundary condition has bad shape: {weak_bc_array.shape}, expected: {(self.height, self.width)}"
            self.weak_boundary_cond_tex = upload_np_array_to_gpu(
                GL_TEXTURE_RECTANGLE, GL_R32UI,
                self._tile_packer.shuffle_and_pack_array( weak_bc_array))

        if quantity_arrays is not None:
            logging.debug("Quantities array")
            # These are the qty0, qty1, qty2 arrays...
            # FIXME Why set up the three of them ?
            assert isinstance(quantity_arrays, list)
            assert len(quantity_arrays) == 3
            assert len(quantity_arrays[0].shape) == 3, "Need (h,qx,qy) arrays for each of the qty buffer"
            assert len(quantity_arrays[1].shape) == 3, "Need (h,qx,qy) arrays for each of the qty buffer"
            assert len(quantity_arrays[2].shape) == 3, "Need (h,qx,qy) arrays for each of the qty buffer"
            assert self._has_good_shape(quantity_arrays[0]), f"Bad shape for array 0: {quantity_arrays[0].shape}"
            assert self._has_good_shape(quantity_arrays[1]), f"Bad shape for array 0: {quantity_arrays[1].shape}"
            assert self._has_good_shape(quantity_arrays[2]), f"Bad shape for array 0: {quantity_arrays[2].shape}"
            assert quantity_arrays[0].dtype == np.float32
            assert quantity_arrays[1].dtype == np.float32
            assert quantity_arrays[2].dtype == np.float32

            self.quantity_tex = [
                    # Padding to move from vec3 to vec4.
                    upload_np_array_to_gpu(
                        GL_TEXTURE_RECTANGLE,
                        GL_RGBA32F,
                        self._tile_packer.shuffle_and_pack_array( np.pad( qa,  ((0,0),(0,0),(0,1)) )), texture_id=None)
                    for qa in quantity_arrays ]
            self.integration_fb = create_frame_buffer_for_computation(self.quantity_tex)

            # DEPRECATED
            # self.strong_boundary_cond_fb = create_frame_buffer_for_computation(self.quantity_tex)

        if bathymetry_array is not None:
            logging.debug("Bathymetry array")
            if self._has_good_shape(bathymetry_array):
                self.b_max = np.max(bathymetry_array)
                self.b_min = np.min(bathymetry_array)
                self.bathymetry_tex = upload_np_array_to_gpu(GL_TEXTURE_RECTANGLE, GL_R32F,
                                                             self._tile_packer.shuffle_and_pack_array( np.ascontiguousarray(bathymetry_array),
                                                                                                      self.b_max))
            else:
                raise Exception(f"The bathymetry array doesn't have the right size. I expect: {self.height} rows, {self.width} columns. You gave {bathymetry_array.shape[0]} rows by {bathymetry_array.shape[1]} columns.")

        if manning_array is not None:
            logging.debug("Manning array")
            assert self._has_good_shape(manning_array)
            self.manning_tex = upload_np_array_to_gpu(
                GL_TEXTURE_RECTANGLE, GL_R32F,
                self._tile_packer.shuffle_and_pack_array(np.ascontiguousarray(manning_array)))

        # The map of infiltration zones
        if infiltration_zones_array is not None:
            logging.debug("Infiltration zones array")
            assert self._has_good_shape(infiltration_zones_array), \
                f"Infiltration array has bad shape: {infiltration_zones_array.shape}, expected: {(self.height, self.width)}"
            # for row in infiltration_zones_array:
            #     print(row)
            # print(infiltration_zones_array.dtype)
            # exit()

            packed = self._tile_packer.shuffle_and_pack_array(
                np.ascontiguousarray(infiltration_zones_array),
                neutral_values=MESH_WITHOUT_INFILTRATION, debug=True
            )

            self.infiltration_zones_texture = upload_np_array_to_gpu(
                GL_TEXTURE_RECTANGLE, GL_R32I,
                packed)

        # The list of the infiltration zones with their time periods.
        if infiltration_timings is not None and len(infiltration_timings) >= 1:
            logging.debug("Infiltration timings array")

            # Each infiltration period has a corresponding column :
            # [start time, zone1 value, zone2 value, ...]

            if isinstance(infiltration_timings, list):
                a = np.array(infiltration_timings, dtype=float)
            elif isinstance(infiltration_timings, np.ndarray):
                a = infiltration_timings
            else:
                raise Exception(f"Bad type {type(infiltration_timings)}")

            assert a.shape[0] >= 1, "I don't handle empty lists for infiltrations"
            assert a.shape[1] >= 2, "I need at least one infiltration zone"

            test_order = a[:,0].flatten()
            assert np.all(test_order[:-1] <= test_order[1:]), f"The infiltration timings are not ordered ! : {test_order}"

            self.infiltration_timings = a[:,0] # Only the timings (which are stored in the first column
            self.nb_infiltration_lines = a.shape[0]
            self.nb_infiltration_zones = a.shape[1] - 1
            #print(f"set buffer : self.nb_infiltration_zones={self.nb_infiltration_zones}")
            if self.nb_infiltration_lines >= 2:
                deltas = self.infiltration_timings[1:] - self.infiltration_timings[0:-1]
                #print(deltas)

                if self.infiltration_timings[0] > 0:
                    self.max_time_step_inactive_tile = min( self.max_time_step_inactive_tile, self.infiltration_timings[0], np.min(deltas[deltas > 0]))
                else:
                    self.max_time_step_inactive_tile = min( self.max_time_step_inactive_tile, np.min(deltas[deltas > 0]))

            else:
                self.max_time_step_inactive_tile = MAXIMUM_TIME_STEP


            # Holding a copy for frequent accesses
            self._infiltration_zones_values_array_cache = a

            self.infiltration_zones_values_array = upload_np_array_to_gpu(
                GL_TEXTURE_RECTANGLE, GL_R32F, a)



[docs]
    def _clear_global_variables_buffer(self, sim_time, time_step):
        data = [0] * len(GLOBALS_STRUCT_PACK_FORMAT) + [0.0]*100+ [0] * self.nb_infiltration_zones
        data[0] = sim_time
        data[1] = time_step

        data_buffer = struct.pack(
            GLOBALS_STRUCT_PACK_FORMAT + "f"*100 + "f" * self.nb_infiltration_zones, *data
        )

        glBindBuffer(GL_SHADER_STORAGE_BUFFER, self.state_buffer_ssbo_id)
        glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, len(data_buffer), data_buffer)
        glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0)



[docs]
    def reset_globals(self, sim_time, time_step, step_num, h, qx, qy):
        """
        :param time_step: the duration of the time step (in case you have a fixed time step)
        :param step_num: The step number of the next computed simulation step.
        """
        self._current_step = step_num
        self._clear_global_variables_buffer(sim_time, time_step)
        self.set_buffers(quantity_arrays=[np.dstack([h, qx, qy]) for i in range(3)])




    # ----------------
    # SIMULATION STEPS
    # ----------------


[docs]
    def compute_derivative_with_dry_up_and_euler(self,
                                                 current_quantity_tex_ndx:int,      # Current step
                                                 euler_base_quantity_tex_ndx:int,   # Step used for Derivative/RHS evaluation
                                                 quantity_out_tex_ndx:int,          # Next step / Predictor
                                                 current_alpha_tex_ndx=0,
                                                 ignore_alpha=False):

        program = self.derivative_and_dry_up_cs_program
        glUseProgram(program)

        uniforms = {"epsilon": float(self.epsilon),
                    "dx": self.dx,
                    "dy": self.dy,
                    "max_nb_of_dry_up_loops": np.uintc(self._dbg_max_nb_of_dry_up_inner_loops),
                    }

        assert self._currentMinStepSizeTexture == 0, f"debugging assert... {self._currentMinStepSizeTexture}"
        samplers = {"bathy": self.bathymetry_tex,
                    # FIXME when you see the caller's params values, you see quantitySampler == quantityEulerSampler
                    "quantitySampler": self.quantity_tex[current_quantity_tex_ndx],
                    "quantityEulerBaseSampler": self.quantity_tex[euler_base_quantity_tex_ndx],
                    "weakBCSampler": self.weak_boundary_cond_tex,
                    "BCCellsSampler": self.boundary_cond_cells_tex,
                    "manning": self.manning_tex,
                    "q_out": (self.quantity_tex[quantity_out_tex_ndx], GL_WRITE_ONLY),
                    # These two are interanl to the compute shader
                    # "rhs_out": (self.derivative_tex, GL_WRITE_ONLY),
                    # "source_impl_out": (self.source_impl_tex, GL_WRITE_ONLY),
                    "alpha_image_1": (self.alpha_tex[0], GL_READ_WRITE),
                    "alpha_image_2": (self.alpha_tex[1], GL_READ_WRITE),
                    "activeTilesSampler": self._active_tiles_map_tex[0],
                    "tileIndirection": self._tile_indirection_tex,
                    }

        if self._debugging:
            samplers.update({"debug_out": (self.debug_tex[0], GL_READ_WRITE),
                             "debug_out2": (self.debug_tex[1], GL_READ_WRITE),
                             })

        if self.infiltration_timings is not None:
            # Note that this code implies that an infiltration source term can be
            # there at the first compute_derivative and not at the second. So
            # Runge-Kutta should be correct.
            #uniforms["active_infiltration_line"] = self.active_infiltration_line(simulation_time)
            samplers["infiltrationZonesSampler"] = self.infiltration_zones_texture
            #samplers["infiltrationValuesSampler"] = self.infiltration_zones_values_array

        if self._track_flows_at_border:
            # Texture 0 is for Euler (1 is RK)
            samplers["flows_at_border"] = self._flows_at_border_tex[0]
            samplers["rhs_texture"]     = self._rhs_tex


        wire_program(program,
                     uniforms,
                     samplers,
                     ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id})

        self._handle_dry_up_iterations(program)



[docs]
    def compute_derivative_with_dry_up_and_rk(self,
                                              current_quantity_tex_ndx:int,     # current step
                                              predictor_quantity_tex_ndx:int,   # Predictor step
                                              quantity_out_tex_ndx:int,         # Next step
                                              current_alpha_tex_ndx=0,
                                              ignore_alpha=False):

        program = self.derivative_and_dry_up_and_rk_cs_program
        glUseProgram(program)

        uniforms = {"epsilon": self.epsilon,
                    "dx": self.dx,
                    "dy": self.dy,
                    #"ponderation": float(self.ponderation_runge_kutta), # Allow an integer ponderation (eg. 0 or 1)
                    "max_nb_of_dry_up_loops": np.uintc(self._dbg_max_nb_of_dry_up_inner_loops),
                }

        samplers = {"bathy": self.bathymetry_tex,
                    # This one will be the base to compute the final RK value.
                    "quantityt0Sampler": self.quantity_tex[current_quantity_tex_ndx],
                    # These two will be used to compute the derivative over Euler's step.
                    # Derivative code is hardcoded to read from quantitySampler and for
                    # RK this is the result of the previous Euler step
                    "quantitySampler": self.quantity_tex[predictor_quantity_tex_ndx],

                    # This is used as the "base" when simulating potential conditions for dry up.
                    "quantityEulerBaseSampler": self.quantity_tex[current_quantity_tex_ndx], # FIXME : why use the same as quantity0Sampler ?

                    "weakBCSampler": self.weak_boundary_cond_tex,
                    "BCCellsSampler": self.boundary_cond_cells_tex,
                    "manning": self.manning_tex,
                    "q_out": (self.quantity_tex[quantity_out_tex_ndx], GL_WRITE_ONLY),
                    # "rhs_out": (self.derivative_tex, GL_WRITE_ONLY),
                    # "source_impl_out": (self.source_impl_tex, GL_WRITE_ONLY),
                    "alpha_image_1": (self.alpha_tex[0], GL_READ_WRITE),
                    "alpha_image_2": (self.alpha_tex[1], GL_READ_WRITE),
                    "activeTilesSampler": self._active_tiles_map_tex[0],
                    "tileIndirection": self._tile_indirection_tex,
                }

        if self._debugging:
            samplers.update({"debug_out": (self.debug_tex[0], GL_READ_WRITE),
                             "debug_out2": (self.debug_tex[1], GL_READ_WRITE),
                            })

        if self.infiltration_timings is not None:
            # Note that this code implies that an infiltration source term can be
            # there at the first compute_derivative and not at the second. So
            # Runge-Kutta should be correct.
            samplers["infiltrationZonesSampler"] = self.infiltration_zones_texture
            #samplers["infiltrationValuesSampler"] = self.infiltration_zones_values_array

        if self._track_flows_at_border:
            # Texture 1 is for RK (0 is Euler)
            samplers["flows_at_border"] = self._flows_at_border_tex[1]
            # FIXME This will erase Eulere's step results.
            # samplers["rhs_texture"] = self._rhs_tex


        wire_program(program, uniforms, samplers,ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id})

        self._handle_dry_up_iterations(program)



[docs]
    def detect_wet_tiles2(self, current_quantity:int):
        """ Detect wet tiles

        FIXME we also call them active but that a misnomer because active also means NAP-active...
        """

        # for i in range(8):
        #     glActiveTexture(TEXTURE_UNITS[i])
        #     glBindTexture(GL_TEXTURE_RECTANGLE, GL_NONE)
        #     #glBindImageTexture( TEXTURE_UNITS[i], GL_NONE, 0, GL_FALSE, 0, GL_NONE, GL_NONE)

        if False:
            from matplotlib import pyplot as plt
            ntx, nty = self._tile_packer.size_in_tiles()
            tile_indirection_tex = read_texture2(self._tile_indirection_tex, GL_RGB16UI, ntx, nty)[:,:,0:2]
            np.save( f"{self._prefix}_tile_indirection_{self._current_step}", tile_indirection_tex)

        program = self.active_tiles_detector_program2 # parallel_tile_detect.comp
        glUseProgram(program)

        textures = {
            "q": self.quantity_tex[current_quantity],
            "infiltrationZonesImage": (self.infiltration_zones_texture, GL_READ_ONLY),
            "tileIndirection": (self._tile_indirection_tex, GL_READ_ONLY),
            "activeTilesImage": (self._active_tiles_map_tex[0], GL_WRITE_ONLY),
        }

        if self._optimize_tile_indirection:
            textures["reversedTileIndirection"] = self._reversed_tile_indirection_tex

        wire_program(
            program,
            uniforms={},
            textures=textures,
            ssbos={"GlobalStateBlock": self.state_buffer_ssbo_id},
        )


        if self._optimize_tile_indirection:
            # We dispatch only over the tiles which are part of the active domain.
            ntx, nty = self._tile_packer.packed_size_in_tiles()
        else:
            # We dispatch over the whole domain, one workgroup per tile.
            # When tile packing is active, we dispatch over the whole domain. The shader
            # will take care of indirecting absolute coordinates to virtual ones.
            ntx, nty = self._tile_packer.size_in_tiles()

        glDispatchCompute(ntx, nty, 1)

        check_gl_error()
        # make sure writing to image has finished before read
        glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT)

        if False:

            ntx, nty = self._tile_packer.packed_size_in_tiles()
            active_tiles_map = read_texture2(self._active_tiles_map_tex[0], GL_R8UI, ntx, nty)
            np.save( f"{self._prefix}_active_tiles_map_{self._current_step}", active_tiles_map)
            h = self.read_quantity_result(current_quantity)[:,:,0]
            np.save( f"{self._prefix}_h_{self._current_step}", h)

            active_tiles_map = active_tiles_map.copy()
            active_tiles_map[active_tiles_map.shape[0]//2,active_tiles_map.shape[1]//2] = 0 # make more constrasted view
            fig, axs = plt.subplots(1,3)
            axs[0].imshow(active_tiles_map)
            axs[0].set_title(f"active_tiles_map {active_tiles_map.shape} {active_tiles_map.dtype} ")
            axs[1].imshow(tile_indirection_tex[:,:,0])
            axs[1].set_title(f"Inidrection- dispatch {self._tile_packer.size_in_tiles()}")

            axs[2].imshow(h)
            axs[2].set_title(f"h")
            plt.show()



[docs]
    def _handle_dry_up_iterations(self, program):
        set_uniform(program, "first_dry_up_iteration", True)
        # We dispatch over the virtual computation domain.
        ntx, nty = self._tile_packer.size_in_tiles()

        all_last_number_dryup_iterations = []
        last_number_dryup_iterations = -1
        nb_outer_loop_iterations = 0
        while True:
            glDispatchCompute(ntx, nty, 1)
            glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT)

            nb_outer_loop_iterations += 1


            if self._total_number_of_dry_up_outer_loops is None:
                # We iterate until done. So we have to ask the
                # question : "are we done ?"
                gs = self.read_global_state() # VERY costly !!!

                if self._debugging:
                    if nb_outer_loop_iterations > 10:
                        print(nb_outer_loop_iterations, gs.nb_dryup_iterations)

                        if nb_outer_loop_iterations > 100:
                            self.emergency_save()
                            from traceback import print_stack
                            print_stack()
                            exit()

                if nb_outer_loop_iterations > 200:
                    if nb_outer_loop_iterations % 100 == 0:
                        logging.warning(f"The total number of outer loop iterations ({nb_outer_loop_iterations}) in dry up management seems high")

                    if nb_outer_loop_iterations > 400:
                        logging.error(f"The total number of outer loop iterations ({nb_outer_loop_iterations}) in dry up management seems high. Aborting.")
                        break

                # Remember! gs.nb_dryup_iterations is the total number of inner
                # loop iterations that occured *across all tiles*.
                # So if 2 tiles have dry ups, this count will bet we
                # least 2. So DO NOT interpret this number too much
                # as it will vary wildly depending on the number of
                # inner loops necessary to complete the dry ups.

                if gs.nb_dryup_iterations == 0:
                    # 0 is a bit of a special case. It occurs only when
                    # no dry ups are detected as early as the first iteration.
                    break
                elif gs.nb_dryup_iterations == last_number_dryup_iterations:
                    # The number of dry ups iterations inside the tiles
                    # didn't change from the last iteration to the present
                    # one. Therefore we can conclude all the dry ups have
                    # been solved.

                    # Note that gs.nb_dryup_iterations is a global variable
                    # across all tiles (threads). Moreover it is never
                    # reset across outer loops. So the only way to conclude
                    # that nothing happened is to track the fact that
                    # gs.nb_dryup_iterations remains constant over two
                    # successive outer loops.
                    break
                else:
                    last_number_dryup_iterations = gs.nb_dryup_iterations

            elif self._total_number_of_dry_up_outer_loops == 0:
                # Dry ups are disabled (the shader won't handle dry up inside
                # the tiles either). So we basically do simply Euler or RK.
                break

            elif nb_outer_loop_iterations >= self._total_number_of_dry_up_outer_loops:
                # We want an exact number of iterations, therefore we
                # don't need to query the global state to know if
                # there are more iterations to do.
                break

            # Prepare for a next iteration
            set_uniform(program, "first_dry_up_iteration", False)



[docs]
    def simulation_step(self, source_quantity:int):
        """ Perform a full simulation step (with all substeps: euler, RK,...)

        :param source_quantity: is 0 or 1 and tells in which of the ping/pong textures
                                we'll *read* information. Consequently, we'll *write* into `1 - source_quantity`.
        :type source_quantity: int
        """
        assert source_quantity in (0,1)

        if not self._shaders_loaded:
            self._load_shaders()

        # This property tells if the GPU was synced at the end of the step. It
        # usually is a good time to make more queries on the GPU because what
        # costs the most is the sync, not the query.
        self.gpu_was_synced = False

        # ---------------------------------------
        # Updating and setting some global values
        # ---------------------------------------

        program = self.update_globals_cs_program
        glUseProgram(program)

        if self.nb_infiltration_lines >= 1:
            # Avoid to set an inexistent uniform (will be optimized out by GL/SL compiler)
            samplers= {"infiltrationValuesSampler": self.infiltration_zones_values_array}
        else:
            samplers = {}

        samplers["param_copy"] = ( self.param_copy_tex[self._current_step % len(self.param_copy_tex)], GL_WRITE_ONLY)

        uniforms = {"nb_infiltration_lines": np.uintc(self.nb_infiltration_lines)}

        wire_program(program, uniforms, samplers, ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id})
        glDispatchCompute(1,1,1)
        glMemoryBarrier( GL_SHADER_STORAGE_BARRIER_BIT)

        # ---------------------------------------------------------------------
        # Optimizing the computation domain - Detect tiles containing Wet cells
        # ---------------------------------------------------------------------

        if self._optim_geom_shaders is not None:
            self.detect_wet_tiles2(source_quantity)

        # ---------------------------------------------------------------------------------
        # Optimizing the computation time step
        #   1. Compute the time step for each tile
        #   2. Compute the max time step for the whole domain (matrix reduction on the GPU)
        # ---------------------------------------------------------------------------------

        if self.time_step_strategy == TimeStepStrategy.OPTIMIZED_TIME_STEP:
            logging.debug("Max step size compute")
            with time_it_gl("-- compute_max_step_size"):
                # self.compute_all_time_step_step(source_quantity)
                # self.last_step_duration = self.compute_max_step_size()
                self.compute_time_step_for_tiles_phase1(source_quantity)
                self.compute_time_step_for_tiles_phase2()
                #print("all time steps")
        else:
            assert self.time_step > 0


        # ----------------------------------------------------------------------
        # Evolving over time - Explicit scheme
        #   1. Euler scheme (1 step)
        #   2. Runge-Kutta scheme (2 steps - Precision : 1st order or 2nd order)
        # ----------------------------------------------------------------------

        #  1. Euler or predictor step
        #     ///////////////////////

        # self._skip_euler Used for debugging
        if not self._debugging_skip_euler:
            old_nb_dryup_iterations = None

            # while True:
            if self.ponderation_runge_kutta is None:
                # Only Euler (without Runge-Kutta)
                #
                # The current step is the `source_quantity`.
                # The next step is the `1-source_quantity`.
                #
                # !! No need to store this step in the buffer n° 2 !!

                self.last_euler_dry_up_fixes_count = self.compute_derivative_with_dry_up_and_euler(source_quantity,  # Current step
                                                                                                   source_quantity,  # Current step - used for derivative/RHS computation
                                                                                                   1 - source_quantity) # Next step - we are in a one step scheme
            else:
                # Euler will be followed by Runge Kutta
                #
                # The current step is the `source_quantity`.
                # The predictor step is the `2`.

                self.last_euler_dry_up_fixes_count = self.compute_derivative_with_dry_up_and_euler(source_quantity, # Current step
                                                                                                   source_quantity, # Current step - used for derivative/RHS computation
                                                                                                   2)               # Predictor step

            # glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT)
            # gs = self.read_global_state()
            # old_nb_dryup_iterations = gs.nb_dryup_iterations
            # print(gs.nb_dryup_iterations)

        #   **********************************************
        #   Strong Bcs are applied after each partial step
        #   **********************************************

        with time_it_gl("-- strong BC1"):
            if False:
                self.apply_strong_boundary_conditions(2)
                #print("strong bc")

        #  2. Runge-Kutta second step - Corrector + Ponderation
        #     /////////////////////////////////////////////////

        if self.ponderation_runge_kutta is not None:
            glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT)

            with time_it_gl("-- second derivative"):
                # Runge-Kutta second step
                #
                # The current step is the `source_quantity`.
                # The predictor step is the `2`.
                # The next step is the `1-source_quantity`. -- We do not store the corrector step in the texture.

                self.last_rk_dry_up_fixes_count = -1
                self.compute_derivative_with_dry_up_and_rk(source_quantity,     # Current step
                                                           2,                   # Predictor step - used for derivative/RHS computation
                                                           1 - source_quantity) # Next step -- Ponderation between predictor and corrector is done in the compute shader

            #   **********************************************
            #   Strong Bcs are applied after each partial step
            #   **********************************************

            with time_it_gl("-- strong bc2"):
                if False:
                    self.apply_strong_boundary_conditions(1 - source_quantity)
                    #print("strong bc RK")

        # ---------------------------------------
        # Updating and setting some global values
        # ---------------------------------------

        program = self.update_globals_cs_program2
        glUseProgram(program)
        wire_program(program, {}, {}, ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id})
        glDispatchCompute(1,1,1)

        glMemoryBarrier( GL_SHADER_STORAGE_BARRIER_BIT)


        # Is it time to sample the simulation time ?
        if False and self._current_step >= self._next_stop:
            print(f"Sampling at step {self._current_step} / {self._next_stop}")
            self.gpu_was_synced = True

            gs = self.read_global_state()

            if gs.time_step == 0:
                raise Exception("Simulation can't progress anymore, delta_t == 0 !!!")

            old_sim_time = self.last_gpu_time
            self.last_gpu_time, self.current_simulation_time_step = gs.simulation_time, gs.time_step

            # We now control if the sampling frequency is enough to do satisfy
            # the request reportnig period.

            if old_sim_time is None:
                old_sim_time = self.last_gpu_time
                sampling_period = None
            else:
                sampling_period = self.last_gpu_time - old_sim_time
                old_sim_time = self.last_gpu_time

            if self.report_frequency.type == SimulationDurationType.SECONDS:
                if sampling_period is not None:
                    if sampling_period > self.report_frequency.duration:
                        logging.warning(f"The recording period ({self.report_frequency}) is smaller than the time between two sampled steps ({sampling_period:.3f} s.). Remember we sample every {TIME_SAMPLING_RESOLUTION} steps.")


            if False and self.report_frequency.type == SimulationDurationType.SECONDS:
                # Trying to predict the best moment to check for the time reporting.
                # In practice: doesn't work very well... I leave this code here in case
                # I'd like to improve it.
                self.iteration_vs_simtime.append( [self._current_step, self.last_gpu_time])
                if len(self.iteration_vs_simtime) > 2:
                    self.iteration_vs_simtime = self.iteration_vs_simtime[-2:]
                    iter_time = self.iteration_vs_simtime
                    delta_iter = iter_time[1][0] - iter_time[0][0]
                    delta_time = iter_time[1][1] - iter_time[0][1]
                    iter_per_second = delta_iter / delta_time
                    self._next_stop = self._next_stop + iter_per_second * self.report_frequency
                    # print(self.iteration_vs_simtime)
                    # print(self._next_stop)
                else:
                    self._next_stop += TIME_SAMPLING_RESOLUTION
            else:
                self._next_stop += TIME_SAMPLING_RESOLUTION


        # finish_timings()
        # finish_timings_full()
        self._current_step += 1




[docs]
    def _tiles_ndc_dims(self):
        # Size of the tiles in NDC coordinates.
        nb_tiles_horiz, nb_tiles_verti = ceil(self.width / self._tile_size), ceil(self.height / self._tile_size)
        ndc_w = 2
        return (ndc_w * (self._tile_size/self.width),
                ndc_w * (self._tile_size/self.height))



[docs]
    def filter_low_water(self,
                        current_quantity_tex_ndx:int,      # Current step
                        euler_base_quantity_tex_ndx:int,   # Step used for Derivative/RHS evaluation
                        quantity_out_tex_ndx:int,          # Next step / Predictor
                        current_alpha_tex_ndx=0,
                        ignore_alpha=False):

        program = self.filter_low_water_cs_program
        glUseProgram(program)

        uniforms = {"epsilon": float(self.epsilon),
                    "dx": self.dx,
                    "dy": self.dy,
                    }

        samplers = {#"bathy": self.bathymetry_tex,
                    # FIXME when you see the caller's params values, you see quantitySampler == quantityEulerSampler
                    "quantitySampler": self.quantity_tex[current_quantity_tex_ndx],
                    # "weakBCSampler": self.weak_boundary_cond_tex,
                    # "BCCellsSampler": self.boundary_cond_cells_tex,
                    # "manning": self.manning_tex,
                    "q_out": (self.quantity_tex[quantity_out_tex_ndx], GL_WRITE_ONLY),
                    # These two are interanl to the compute shader
                    # "rhs_out": (self.derivative_tex, GL_WRITE_ONLY),
                    # "source_impl_out": (self.source_impl_tex, GL_WRITE_ONLY),
                    # "alpha_image_1": (self.alpha_tex[0], GL_READ_WRITE),
                    # "alpha_image_2": (self.alpha_tex[1], GL_READ_WRITE),
                    "activeTilesSampler": self._active_tiles_map_tex[0],
                    "tileIndirection": self._tile_indirection_tex,
                    }

        # if self._debugging:
        #     samplers.update({"debug_out": (self.debug_tex[0], GL_READ_WRITE),
        #                      "debug_out2": (self.debug_tex[1], GL_READ_WRITE),
        #                      })

        # if self.infiltration_timings is not None:
        #     # Note that this code implies that an infiltration source term can be
        #     # there at the first compute_derivative and not at the second. So
        #     # Runge-Kutta should be correct.
        #     #uniforms["active_infiltration_line"] = self.active_infiltration_line(simulation_time)
        #     samplers["infiltrationZonesSampler"] = self.infiltration_zones_texture
        #     #samplers["infiltrationValuesSampler"] = self.infiltration_zones_values_array

        # if self._track_flows_at_border:
        #     # Texture 0 is for Euler (1 is RK)
        #     samplers["flows_at_border"] = self._flows_at_border_tex[0]
        #     samplers["rhs_texture"]     = self._rhs_tex


        wire_program(program,
                     uniforms,
                     samplers,
                     ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id})

        if self._optimize_tile_indirection:
            # We dispatch only over the tiles which are part of the active domain.
            ntx, nty = self._tile_packer.packed_size_in_tiles()
        else:
            # We dispatch over the whole domain, one workgroup per tile.
            # When tile packing is active, we dispatch over the whole domain. The shader
            # will take care of indirecting absolute coordinates to virtual ones.
            ntx, nty = self._tile_packer.size_in_tiles()

        glDispatchCompute(ntx, nty, 1)

        # make sure writing to image has finished before read
        glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT)


    # -------
    # DRAWING
    # -------


[docs]
    def _draw_active_tiles(self, program, uniforms: dict, textures: dict, viewport=None):
        """ Draw the active tiles """

        nb_tiles_horiz, nb_tiles_verti = ceil(self.width / self._tile_size), ceil(self.height / self._tile_size)

        # FIXME Dirty state management
        if not hasattr(self, "_active_tile_grid_vao_id") or self._active_tile_grid_vao_id is None:
            # We build the lower left corner of the quads
            # that will be *generated* by the geometry shader.

            # from 0 to (l-1)/l then *2-1 to be in [-1,+1]
            # grid_line(4) will give (-1, -0.5, 0, 0.5)

            def grid_line(nb_tiles,tile_size_ndc):
                return (np.arange(0,nb_tiles)*tile_size_ndc-1).astype(np.float32)

            #tw, th = self._tiles_ndc_dims()
            grid = np.meshgrid(
                grid_line(nb_tiles_horiz, 2/self.width*self._tile_size),
                grid_line(nb_tiles_verti, 2/self.height*self._tile_size))

            # print(nb_tiles_horiz, nb_tiles_verti)
            # print(grid[0].shape)
            # print(grid[1].shape)
            # print(np.min(grid[0]), np.max(grid[0]))
            # grid[0] = a 2d array of all the points of the gird. Each cell of the array contains the X coordinate.
            # grid[1] = a 2d array of all the points of the gird. Each cell of the array contains the Y coordinate.
            # z = np.ravel(grid[0])
            lower_left_corners = np.stack( [np.ravel(grid[0]), np.ravel(grid[1]), np.zeros((grid[0].size,), dtype=np.float32)]).T
            # print(lower_left_corners.shape)

            # for i in range(0,nb_tiles_horiz*nb_tiles_verti,7):
            #     lower_left_corners[i,:] = [0,0,0]

            #lower_left_corners=  lower_left_corners[0:250,:]


            # lower_left_corners = np.array([[0,0,0],
            #                                [0.125,0.125,0],], dtype=np.float32)
            #print(lower_left_corners.ravel())
            self._active_tile_grid_vao_id = upload_geometry_to_vao(lower_left_corners)


        # For the vertex shader

        uniforms["uModelViewProjectionMatrix"] = np.asfortranarray(np.diag([1,1,0,1]).astype(np.float32))
        #uniforms["uModelViewProjectionMatrixGS"] = np.asfortranarray(np.diag([1,1,0,1]).astype(np.float32))
        textures["activeTilesSampler"] = self._active_tiles_map_tex[0]
        wire_program(program, uniforms, textures,
                     #ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id}
                     )

        if viewport is None:
            glViewport(0, 0, self.width, self.height)
            #print(self.width, self.height)
        else:
            glViewport(viewport[0],viewport[1],viewport[2],viewport[3])
            #print(viewport)

        glBindVertexArray( self._active_tile_grid_vao_id )

        # Pass the vertices to the geometry shader. The geometry shader will
        # output the quads corresponding to each tile.
        glDrawArrays(GL_POINTS, 0, nb_tiles_horiz*nb_tiles_verti)


    # FIXME : used in the main code or only in experiments?
    # FIXME : Runner has a plotprogress2 ...

[docs]
    def plot_progress(self, current_quantity, zoom=1.0, mean=0.5, flip=True, gl_window_viewport=(0,0,100,100)):
        self._gl_window_viewport = glGetIntegerv(GL_VIEWPORT)

        # Used int tests
        program = self._texture_viewer_program2
        uniforms = {
            "zoom": float(zoom),
            "mean": float(mean),
            "b_max": float(self.b_max),
            "b_min": float(self.b_min),
            # FragCoord (screen) to texture coord
            "textureCoordinateTransform": (
                float(self.nbx/self._gl_window_viewport[2]),
                float(self.nby/self._gl_window_viewport[3]))
        }
        textures= {
            "quantityTexture": self.quantity_tex[current_quantity],
            "tileIndirection" : self._tile_indirection_tex,
            "bathy": self.bathymetry_tex,
        }
        glUseProgram(program)
        glBindFramebuffer(GL_FRAMEBUFFER, GL_NONE) # draw on screen
        glClear(GL_COLOR_BUFFER_BIT)
        self._draw_active_tiles(program, uniforms, textures, gl_window_viewport)
        if flip:
            pygame.display.flip()



        #gl_timings_results_full()
        #gl_timings_results(label)


    # ------------
    # INFILTRATION
    # ------------


[docs]
    def active_infiltration_line(self, t: float) -> int:
        """
        Compute the active infiltration event number (or line in the
        array) at time t. The event number is one based.
        Returns 0 if none is active.
        """

        assert t >= 0, f"You're in the past (t={t}) !"
        if self.infiltration_timings is None:
            # There are no configured infiltrations.
            return 0

        assert np.all(self.infiltration_timings[:-1] <= self.infiltration_timings[1:]), \
            "timings are not in ascending order"

        if t < self.infiltration_timings[0]:
            return 0 # Nothing to do, the infiltration has not yet started

        for i in range(self.infiltration_timings.shape[0]-1):
            if self.infiltration_timings[i] <= t < self.infiltration_timings[i+1]:
                return i+1

        assert t > self.infiltration_timings[-1], "We should be past the last time"
        return self.infiltration_timings.shape[0] # one based !



[docs]
    def _infiltrations_timings_as_gpu_array(self, nb_cells_per_zone, infiltrations_chronology: InfiltrationChronology):
        nb_zones = len(nb_cells_per_zone)

        shape = (len(infiltrations_chronology), 1 + nb_zones)
        inf_timings = np.zeros( shape, dtype=np.float32)

        for ndx, inf_data in enumerate(infiltrations_chronology):
            inf_start, inf_values = inf_data
            inf_timings[ndx, 0] = inf_start
            inf_timings[ndx, 1:len(inf_values)+1] = inf_values

        # Spread each infiltration's the water over its zone's cells.
        logging.info("Spreading infiltrations over the cell")

        f = nb_cells_per_zone.copy()
        f[f == 0] = 1 # Hak to avoid divide by zero
        inf_timings[:, 1:] = inf_timings[:, 1:] / nb_cells_per_zone

        return inf_timings




    # --------------------------
    # READING TEXTURES / BUFFERS
    # --------------------------


[docs]
    def read_euler_step_result(self, tex=2):
        """ Returns a 3D array (q, qu, qv)
        """
        return read_texture(self.integration_fb, tex, self.width, self.height, GL_RGB32F)


    """
    # Strong BCs have been disabled for now. They are not used in the current
    # version of the code.
    def apply_strong_boundary_conditions(self, tex_out):
        program = self.strong_boundary_cond_program
        glUseProgram(program)

        # Remember the framebuffer is wired to the 3 "quantity"
        # textures (h,hu,hv) (each texture being one float wide)
        glBindFramebuffer(GL_FRAMEBUFFER, self.strong_boundary_cond_fb)

        # Choose destination texture (indexed on frame buffer's textures)
        glDrawBuffer(GL_COLOR_ATTACHMENTS[tex_out])

        # We copy from strong boundary condition texture to quantity texture.
        # We copy only the RED coordinate, which represents the "h" value.
        # Notice we read and write the same (quantity) texture.
        wire_program(
            program,
            textures={ "strongbcSampler": self.strong_boundary_cond_tex,
                       "strongbcvaluesSampler": self.strong_boundary_cond_values_tex,
                       "quantitySampler": self.quantity_tex[tex_out] },
            ssbos={"GlobalStateBlock" : self.state_buffer_ssbo_id} )

        self._draw_active_parts_deprecated(program)
    """



[docs]
    def read_debug_texture(self, tex=0):
        assert self._debugging, "You forgot to enable debugging"
        assert tex in (0,1), "I only use two debugging textures"
        #return read_texture2(self.debug_tex[tex], GL_RGBA32F)

        psw, psh = self._tile_packer.packed_size()
        return self._tile_packer.unpack_and_deshuffle_array(
             read_texture2(self.debug_tex[tex], GL_RGBA32F))



[docs]
    def read_infiltration_map(self):
        psw, psh = self._tile_packer.packed_size()
        return self._tile_packer.unpack_and_deshuffle_array(
             read_texture2(self.infiltration_zones_texture, GL_R32I, width=psw, height=psh))



[docs]
    def has_infiltration_chronology(self) -> bool:
        return self.infiltration_zones_values_array is not None



[docs]
    def read_infiltration_chronology(self):
        assert self.has_infiltration_chronology(), "You can't query the infiltration chronology because there is none."
        return read_texture2(self.infiltration_zones_values_array, GL_R32F)



[docs]
    def read_flows_on_borders(self, which:int):
        """
        Read flows on borders. Each cell of the returned arrays
        contains: (q_left, q_right, q_top, q_bottom).

        Used only while debugging

          :param which: 0 for Euler, 1 for Runge Kutta.
        """
        assert which in [0,1], "You must choose either Euler substep (zero) or Runge Kutta sub step (1)"
        assert self._track_flows_at_border, "You must enable this functionality first"

        return self._tile_packer.unpack_and_deshuffle_array(
             read_texture2(self._flows_at_border_tex[which], GL_RGBA32F))



[docs]
    def read_rhs(self):
        """
        Read RHS at Euler step or RK step (make sure you know which one is valid
        when). Each cell of the returned arrays contains: (q_left, q_right,
        q_top, q_bottom).

        Used only while debugging
        """
        assert self._track_flows_at_border, "You must enable this functionality first"

        return self._tile_packer.unpack_and_deshuffle_array(
             read_texture2(self._rhs_tex, GL_RGBA32F))




[docs]
    def read_quantity_result(self, current_quantity) -> np.array:
        """ Returns the matrix of quantities (h,qx,qy), unpacked.
        Used for debugging. Prefer the ResultStore.

        :param current_quantity: is 0,1 for the flip flop textures and 2
        for the intermediate euler resul (if one uses Runge-Kutta).
        """

        assert current_quantity in (0,1,2), f"Bad quantity index : {current_quantity}"
        psw, psh = self._tile_packer.packed_size()
        return self._tile_packer.unpack_and_deshuffle_array(
             read_texture(self.integration_fb, current_quantity, psw, psh, GL_RGB32F))



[docs]
    def _read_full_quantity_result(self, current_quantity) -> np.array:
        """ Returns the matrix of quantities (h,qx,qy, UNSPECIFIED), unpacked.

        :param current_quantity: is 0,1 for the flip flop textures and 2
        for the intermediate euler resul (if one uses Runge-Kutta).
        """
        assert current_quantity in (0,1,2), f"Bad quantity index : {current_quantity}"
        psw, psh = self._tile_packer.packed_size()
        return self._tile_packer.unpack_and_deshuffle_array(
             read_texture2(self.quantity_tex[current_quantity], GL_RGBA32F))




    # def read_weak_bc_map(self) -> np.array:
    #     psw, psh = self._tile_packer.packed_size()
    #     return self._tile_packer.unpack_and_deshuffle_array(
    #         read_texture2(self.weak_boundary_cond_tex, GL_R32UI)

    #     )

[docs]
    def read_tile_packed_quantity_result(self, current_quantity):
        """ Returns the matrix of quantities (h,qx,qy), "tile-packed".
        """
        assert current_quantity in (0,1)
        psw, psh = self._tile_packer.packed_size()
        return read_texture(self.integration_fb, current_quantity,
                            psw, psh, GL_RGB32F)



[docs]
    def read_bathymetry(self):
        """ Returns the current bathymetry, "tile-_un_packed".
        """
        return self._tile_packer.unpack_and_deshuffle_array(
             read_texture2(self.bathymetry_tex, GL_R32F))




[docs]
    def read_alpha(self, current_quantity):
        """ Returns the matrix of alpha-correction (dry ups), "tile-_un_packed".
        """
        assert current_quantity in (0, 1)
        return self._tile_packer.unpack_and_deshuffle_array(
            self.read_tile_packed_alpha(current_quantity))



[docs]
    def read_tile_packed_alpha(self, current_quantity):
        """ Returns the matrix of alpha-correction (dry ups), "tile-packed".
        """
        assert current_quantity in (0,1)
        psw, psh = self._tile_packer.packed_size()
        tex = read_texture2(self.alpha_tex[current_quantity], GL_RGBA32F)

        # Alpha texture is stored as red/green (two components)
        # but somehow pyopengl can't read into that format, therefore
        # I ask RGBA and then I slice out what I need.
        return tex[:,:,0:2]



[docs]
    def read_active_cells_map(self):
        """ Read active cells map.

            NOTE ! Be ware that this map is "packed" (as in tile packing)
        """
        ntx, nty = self._tile_packer.packed_size_in_tiles()
        active_tiles = read_texture2(self._active_tiles_map_tex[0], GL_R8UI, width=ntx, height=nty)
        return active_tiles



[docs]
    def read_tile_min_step_size(self):
        """ The last computed minimum step size. This is only valid after
        the first phase of the computation of the time steps (the second phase
        reuse the texture for a reduction).
        """
        return read_texture2(self.max_step_size_tex, GL_R32F)




[docs]
    def read_weak_bcs(self):
        """
        """
        return self._tile_packer.unpack_and_deshuffle_array(
            read_texture2(self.weak_boundary_cond_tex, GL_R32UI))



[docs]
    def read_bcs_descriptions(self) -> np.ndarray:
        """ Read the boundary conditions descriptions from the texture boundary_cond_cells_tex.
        """
        return read_texture2(self.boundary_cond_cells_tex, GL_R32F)



[docs]
    def read_global_state(self) -> GLSimulationGlobalState:
        """Read the global state variables of the simulation, as reported by the shaders in the GPU.

        Be aware that this implies reading information from the GPU which usually leads to
        a huge drop in performance (up to 50% !!!).

        :return: simulation_time, time_step, active_infiltration_line, nb_dryup_iterations, nb_active_tiles, status_code
        """

        assert self.state_buffer_ssbo_id is not None, "The SSBO is not configured"
        glBindBuffer(GL_SHADER_STORAGE_BUFFER, self.state_buffer_ssbo_id)

        # GL_INVALID_VALUE is generated if offset or size is negative, or if
        # offset+size is greater than the value of GL_BUFFER_SIZE for the buffer
        # object.

        pack_format = GLOBALS_STRUCT_PACK_FORMAT+"f"*100+"f"*self.nb_infiltration_zones


        res = glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, struct.calcsize(pack_format)).tobytes()
        glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE)

        parsed_ssbo = struct.unpack(pack_format, res)

        (
            simulation_time,
            previous_step_simulation_time,
            time_step,
            active_infiltration_line,
            nb_dryup_iterations,
            nb_active_tiles,
            status_code,
            total_active_tiles_hi,
            total_active_tiles_lo,
            step_number
        ) = parsed_ssbo[0 : len(GLOBALS_STRUCT_PACK_FORMAT)]
        step_times = parsed_ssbo[len(GLOBALS_STRUCT_PACK_FORMAT):len(GLOBALS_STRUCT_PACK_FORMAT)+100]
        infiltration_zones = parsed_ssbo[len(GLOBALS_STRUCT_PACK_FORMAT)+100:]

        total_active_tiles = total_active_tiles_hi * 0x100000000 + total_active_tiles_lo

        # print(simulation_time, time_step, active_infiltration_line, nb_dryup_iterations, nb_active_tiles)
        #print(f"Infiltration zones: {infiltration_zones}")
        #r= [simulation_time, time_step, active_infiltration_line, nb_dryup_iterations, infiltration_zones, nb_active_tiles, status_code]

        assert nb_active_tiles >= 0
        r = GLSimulationGlobalState(
            simulation_time,
            previous_step_simulation_time,
            time_step,
            active_infiltration_line,
            nb_dryup_iterations,
            nb_active_tiles,
            status_code,
            total_active_tiles,
            infiltration_zones,
            self._current_step,
            step_times
        )


        #print(self._current_step, r.simulation_time, r.time_step, r.status_code)
        return r



    # def apply_parallel_reduction(self, program, frame_buffer, textures_ids,
    #                              buffer_attachments = [GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1],
    #                              debug=False, read_result=True):
    #     """ Apply a parallel reduction algorithm using the `program`.

    #     The computation will be applied only on active stripes.

    #     IT IS NOT SAFE to use this if the data that is present *outside*
    #     the region defined by the stripes is not "neutral" as far as
    #     the computation done in the `program` are concerned. This is
    #     du to rounding errors that may leed the program to consider
    #     pixels outside the stripe area.

    #     FIXME That's dirty, one should make sure this function is safe
    #     to use. But I'm afraid it won't be possible since sooner or later
    #     there will be odd-sized stripes.

    #     FIXME The current way of using the parallel execution might not
    #     be safe to a change in the structure of stripes. Double check that!
    #     """

    #     assert len(textures_ids) == 2
    #     assert len(buffer_attachments) == 2

    #     _, internal_format  = textures_formats[textures_ids[0]]
    #     assert textures_formats[textures_ids[0]][1] == internal_format, \
    #         "I expect both the texture to have the same internal format"

    #     glUseProgram(program)

    #     source_texture_ndx = 0

    #     half_width, half_height = self.width, self.height
    #     while max(half_width, half_height) > 1:

    #         with time_it(f"tiles={half_width*2}x{half_height*2}"):

    #             if debug:
    #                 # Debugging code
    #                 #print(f"Debug: width={half_width}-height={half_height}, buffer:{currentMinStepSizeTexture}")
    #                 p = read_texture(frame_buffer, buffer_attachments[source_texture_ndx], half_width, half_height, internal_format)
    #                 s = np.argwhere( p > 0)
    #                 p2 = read_texture(frame_buffer, buffer_attachments[1-source_texture_ndx], half_width, half_height, internal_format)
    #                 s2 = np.argwhere( p2 > 0)

    #             # The ceil operation is very important to handle
    #             # odd dimensions. Note that if a dimension is a prime number, then
    #             # we never get even-sized half textures ! For example
    #             # 17 will give : 9, 5, 3, 1 !

    #             # IMPORTANT This computation allows us to do the parallel reduction
    #             # without cleaning the 2 buffers we use for it. Without that, the
    #             # values picked at borders of buffers (quarter buffers) may be off
    #             # by one texel and pick trashed data coming from previous iteration.
    #             # This has nothing to do with the use of stripes but much to do
    #             # with the fact that we have quarter buffers of width/height which
    #             # are sometimes odd, sometimes even.

    #             # set_uniform(program, "factor_x", half_width/ceil(half_width/2))
    #             # set_uniform(program, "factor_y", half_height/ceil(half_height/2))

    #             # if half_width % 2 == 0:
    #             #     factor_x = 2.0
    #             # else:
    #             #     factor_x = (half_width-2)/(ceil(half_width/2)-1)
    #             #     #factor_x = (half_width-1)/half_width/2.0

    #             # if half_height % 2 == 0:
    #             #     factor_y = 2.0
    #             # else:
    #             #     factor_y = (half_height-2)/(ceil(half_height/2)-1)
    #             #     #factor_y = (half_height-1)/half_height/2.0

    #             # factor_x = factor_y = 2.0

    #             # This one is tricky. In principle, factor shoud be two
    #             # as we're doing the reduction by a factor two.
    #             # Issues occur when width/height are odd.
    #             # In that case, on the second iteration of the reduction
    #             # you may end up collecting data out of the source
    #             # area because the destination area dimension has been
    #             # rounded up (and you'll multiply coordinates by two).

    #             # FIXME A cleaner way to handle this would be to
    #             # bound the texture look up inside the source rectangle.
    #             # Another way would be to sitck the "quarter" destination
    #             # area in the bottom right corner instead of top left.
    #             # This way, we'd use OpenGL's texture coordinates clipping
    #             # to our advantage.
    #             factor_x = half_width/(ceil(half_width/2))
    #             factor_y = half_height/(ceil(half_height/2))

    #             set_uniform(program, "factor_x", factor_x)
    #             set_uniform(program, "factor_y", factor_y)

    #             old_half_width = half_width
    #             old_half_height = half_height

    #             # half_width = max(1,floor(half_width/2))
    #             # half_height = max(1,floor(half_height/2))
    #             half_width = ceil(half_width/2)
    #             half_height = ceil(half_height/2)

    #             # Source texture
    #             glActiveTexture(GL_TEXTURE0) # activate texture unit
    #             glBindTexture(GL_TEXTURE_RECTANGLE, textures_ids[source_texture_ndx])
    #             set_uniform(program, "minStepSizeSampler", 0) # wire texture unit 0 to texture sampler

    #             # Destination texture
    #             glBindFramebuffer(GL_FRAMEBUFFER, frame_buffer)

    #             # glDrawBuffer - specify which color buffers of the FB are to be drawn into
    #             # (this one sets only ONE buffer so you'll have access to only one "gl_FragColor"
    #             # or "out" varaible; for more use glDrawBuffer_s_)
    #             #glDrawBuffer(GL_COLOR_ATTACHMENTS[1 - source_texture_ndx])
    #             glDrawBuffer(buffer_attachments[1 - source_texture_ndx])

    #             if True:
    #                 #draw_iso_quads(program, half_width, half_height)
    #                 self._draw_single_quad(program, x=0, y=0, w=half_width,h=half_height)
    #             else:
    #                 # FIXME CRITICAL Dirty and needless but to remove it I have to get the rounding issues
    #                 # right. I have already spent countless hours on these issues and I have yet to
    #                 # find a 100% working solution. This is a super TOUGH problem.
    #                 # I clear the destiantion texture so that when I'll use it as a asource
    #                 # at the next iteration, I'll be able to query "off by one" texels without
    #                 # issues. A first idea would be to clear only the texels that can be
    #                 # a problem; not the whole texture.
    #                 # Second idea: just do parallel reduction without stripes.

    #                 clear_texture(textures_ids[1-source_texture_ndx])

    #                 # Fortran==column major, like OpenGL.
    #                 # FIXME But since this is a diagonal matrix, do we care ???
    #                 # FIXME Do we have to set it again and again ?
    #                 pm = np.asfortranarray(np.diag([1,1,0,1]).astype(np.float32))
    #                 set_uniform(program, "uModelViewProjectionMatrix", pm)


    #                 # Bottom/left quarter of the texture. But that will be expanded
    #                 # to the full texture in the shader because it multiplies every
    #                 # texture coord by 2.

    #                 glViewport(0, 0, half_width, half_height)
    #                 stripes = self._stripes_on_nap[max(half_width, half_height)]
    #                 glBindVertexArray(stripes.get_vao_id())
    #                 glDrawArrays(GL_TRIANGLES, 0, stripes.get_vao_len())
    #                 glBindVertexArray( GL_NONE)

    #             # Swap buffers
    #             source_texture_ndx = 1 - source_texture_ndx

    #             if debug:
    #                 # debugging code
    #                 p2 = read_texture(frame_buffer, buffer_attachments[source_texture_ndx], half_width, half_height, internal_format)
    #                 s2 = np.argwhere( p2 > 0)


    #     # Read the result
    #     assert half_width == 1 and half_height == 1

    #     if read_result:
    #         # FIXME This is costly !!!! Find another way
    #         with time_it("-- Get min step size 1pixel"):
    #             # Read the bottom left texel (only one !)
    #             result = read_texture(frame_buffer, buffer_attachments[source_texture_ndx], 1, 1, internal_format)
    #             # Note that pyopengl returns a numpy array
    #             # item() translates numpy float to python float
    #             result = result[0,0].item()
    #     else:
    #         result = None

    #     # # Now lets read the result
    #     # glBindFramebuffer(GL_FRAMEBUFFER, frame_buffer)
    #     # # glReadBuffer — select a color buffer source for pixels
    #     # glReadBuffer(GL_COLOR_ATTACHMENTS[source_texture_ndx])
    #     # assert half_width == 1 and half_height == 1
    #     # # Read the bottom left texel (only one !)
    #     # # Note that pyopengl returns a numpy array
    #     # with time_it("-- Get min step size 1pixel"):
    #     #     result = glReadPixels(0, 0, 1, 1, GL_RED, GL_FLOAT)[0,0].item() # item() translates numpy float to python float

    #     return result, source_texture_ndx



    def plot_progress(self, current_quantity, zoom=1.0, mean=0.5, flip=True, gl_window_viewport=(0,0,100,100)):
        self._gl_window_viewport = glGetIntegerv(GL_VIEWPORT)

        # Used int tests
        program = self._texture_viewer_program2
        uniforms = {
            "zoom": float(zoom),
            "mean": float(mean),
            # FragCoord (screen) to texture coord
            "textureCoordinateTransform": (
                float(self.nbx/self._gl_window_viewport[2]),
                float(self.nby/self._gl_window_viewport[3]))
        }
        textures= {
            "quantityTexture": self.quantity_tex[current_quantity],
            "tileIndirection" : self._tile_indirection_tex,
        }
        glUseProgram(program)
        glBindFramebuffer(GL_FRAMEBUFFER, GL_NONE) # draw on screen
        glClear(GL_COLOR_BUFFER_BIT)
        self._draw_active_tiles(program, uniforms, textures, gl_window_viewport)
        if flip:
            pygame.display.flip()


        #gl_timings_results_full()
        #gl_timings_results(label)


    def _infiltrations_timings_as_gpu_array(self, nb_cells_per_zone, infiltrations_chronology: InfiltrationChronology):
        nb_zones = len(nb_cells_per_zone)

        shape = (len(infiltrations_chronology), 1 + nb_zones)
        inf_timings = np.zeros( shape, dtype=np.float32)

        for ndx, inf_data in enumerate(infiltrations_chronology):
            inf_start, inf_values = inf_data
            inf_timings[ndx, 0] = inf_start
            inf_timings[ndx, 1:len(inf_values)+1] = inf_values

        # Spread each infiltration's the water over its zone's meshes.
        logging.info("Spreading infiltrations over the mesh")

        f = nb_cells_per_zone.copy()
        f[f == 0] = 1 # Hack to avoid divide by zero
        inf_timings[:, 1:] = inf_timings[:, 1:] / f

        return inf_timings




[docs]
def set_border(array, value):
    array[0,:] = value
    array[ array.shape[0]-1,:] = value
    array[:, 0] = value
    array[:, array.shape[1]-1] = value
    return array



[docs]
def swimming_pool_with_hole_in_water():

    width, height = 50, 50
    array_shape = (height, width)

    bathymetry = np.zeros(array_shape)
    set_border(bathymetry, 10)
    active_meshes = np.ones(array_shape, dtype=bool)

    water_height = np.ones(array_shape)
    set_border(water_height, 0)
    water_height[20:50-20, 20:50-20] = 0
    quantity = np.dstack([water_height, np.zeros(array_shape), np.zeros(array_shape) ])

    manning = np.zeros(array_shape)

    glsim = GLSimulation(
        width, height,
        dx=0.01, dy=0.01)

    glsim.set_buffers(
        quantity_arrays=[quantity]*3,
        bathymetry_array=bathymetry,
        source_cont_array=np.zeros(array_shape),
        source_impl_array=np.zeros((height, width ,3)),
        manning_array=manning,
        active_meshes_array=active_meshes)

    return glsim




[docs]
def convert_wolf_rk_ponderation(wolf2d: prev_sim2D):
    """ Convert the ponderation value from prev_sim2D to the one used in the GLSimulation. """

    ponderation = wolf2d.parameters._scheme_rk

    if ponderation >= 1.0 or ponderation <= 0.0:
        logging.error(f"Unsupported ponderation value {ponderation}. I expect 'ponderation' between zero and one. I'll replace by R-K with 0.3/0.7.")
        ponderation = 0.3

    return ponderation



[docs]
def get_epsilon(wolf2d: prev_sim2D):
    """
    Get the epsilon value used in the GLSimulation.

    Based on the spatial resolution of the simulation, we take the minimum
    of dx and dy to the power of 4. The result is then compared to 0.000_001
    and the minimum of the two values is returned.

    """

    return min( min(wolf2d.dx, wolf2d.dy)**4, 0.000_001)



[docs]
def get_report_frequency(m: prev_sim2D) -> SimulationDuration:

    if m.parameters._writing_mode == 1:
        return SimulationDuration.from_seconds(m.parameters._writing_frequency)
    elif m.parameters._writing_mode == 0:
        return SimulationDuration.from_steps(int(m.parameters._writing_frequency))
    else:
        raise Exception(f"Unrecognized vallue for 'ntypefreq' in prev_sim2D : {m.parameters._writing_frequency}")




[docs]
def plot_evolution_of_simulation_base(sim: SimpleSimulation, rs: ResultsStore):
    #dx, dy = wolf2d.myparam.dxfin, wolf2d.myparam.dyfin
    dx, dy = sim.param_dx, sim.param_dy
    r = []
    times = []
    clock_times = []
    for i in tqdm.tqdm(range(1,rs.nb_results)):
        t,h,qx,qy,dryup_niter,clock_t = rs.get_named_result(["t","h","qx","qy","dryup_niter","clock_t"],i)
        msk = h > 0
        x_discharge = dx * qx[msk]
        y_discharge = dy * qy[msk]
        q = np.sqrt(x_discharge**2 + y_discharge**2)
        r.append( [np.sum(h[msk])*dx*dy, np.sum(q), dryup_niter])
        times.append(t)
        clock_times.append(clock_t)

    rs.close()


    r = np.array(r)
    fig, axs = plt.subplots(2,2)
    axs[0,0].plot(times, r[:,0])
    axs[0,0].set_xlabel("Sim. Time (s)")
    axs[0,0].set_ylabel("Water (m³)")

    axs[0,1].plot(times, r[:,1])
    axs[0,1].set_xlabel("Sim. Time (s)")
    axs[0,1].set_ylabel("Q (m³/s)")
    #print((r[1:,2]-r[:-1,2]).tolist())

    clock_times = np.array(clock_times)
    axs[1,0].bar(times, np.hstack([clock_times[1:][0], clock_times[1:] - clock_times[:-1]]), width=times[-1] /(1.5*rs.nb_results))
    axs[1,0].set_xlabel("Clock Time (s)")
    axs[1,0].set_ylabel("Sim. Time (sec./recd)")

    # dry ups tiles * loops * RK or euler
    axs[1,1].bar(times, [r[0,2]] + (r[1:,2]-r[:-1,2]).tolist(), width=times[-1] /(1.5*rs.nb_results))
    axs[1,1].set_xlabel("Sim. Time (s)")
    axs[1,1].set_ylabel("Dried ups tiles × loops")


    fig.tight_layout()

    plt.show()




[docs]
def plot_evolution_of_simulation(path_to_model: Path, path_to_results_store: Path):
    # FIXME Put this in another package: the simulator should not know about the ResultStore
    import matplotlib.pyplot as plt

    # wolf2d: prev_sim2D =prev_sim2D(dir=str(path_to_model), splash=False, in_gui=False)

    rs = ResultsStore(path_to_results_store, "r")

    #dx, dy = wolf2d.dx_fine, wolf2d.dy_fine
    dx, dy = 1, 1
    r = []
    times = []
    clock_times = []
    for i in tqdm.tqdm(range(1,rs.nb_results)):
        t,h,qx,qy,dryup_niter,clock_t = rs.get_named_result(["t","h","qx","qy","dryup_niter","clock_t"],i)
        msk = h > 0
        x_discharge = dx * qx[msk]
        y_discharge = dy * qy[msk]
        q = np.sqrt(x_discharge**2 + y_discharge**2)
        r.append( [np.sum(h[msk])*dx*dy, np.sum(q), dryup_niter])
        times.append(t)
        clock_times.append(clock_t)

    rs.close()


    r = np.array(r)
    fig, axs = plt.subplots(2,2)
    axs[0,0].plot(times, r[:,0])
    axs[0,0].set_xlabel("Sim. Time (s)")
    axs[0,0].set_ylabel("Water (m³)")

    axs[0,1].plot(times, r[:,1])
    axs[0,1].set_xlabel("Sim. Time (s)")
    axs[0,1].set_ylabel("Q (m³/s)")
    #print((r[1:,2]-r[:-1,2]).tolist())

    clock_times = np.array(clock_times)
    axs[1,0].bar(times, np.hstack([clock_times[1:][0], clock_times[1:] - clock_times[:-1]]), width=times[-1] /(1.5*rs.nb_results))
    axs[1,0].set_xlabel("Clock Time (s)")
    axs[1,0].set_ylabel("Sim. Time (sec./recd)")

    # dry ups tiles * loops * RK or euler
    axs[1,1].bar(times, [r[0,2]] + (r[1:,2]-r[:-1,2]).tolist(), width=times[-1] /(1.5*rs.nb_results))
    axs[1,1].set_xlabel("Sim. Time (s)")
    axs[1,1].set_ylabel("Dried ups tiles × loops")


    fig.tight_layout()

    plt.show()