Source code for wolfhece.rain_SPWMI

"""
Author: HECE - University of Liege, Pierre Archambeau
Date: 2024

Copyright (c) 2024 University of Liege. All rights reserved.

This script and its content are protected by copyright law. Unauthorized
copying or distribution of this file, via any medium, is strictly prohibited.
"""

from os.path import join,exists,normpath
import requests
import pandas as pd
import numpy as np
from calendar import month, monthrange
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from sympy import lowergamma
import time
import re

from .PyTranslate import _


[docs]
STATIONS_MI_RAIN="""92150015	PETIGNY Barrage
92880015	CUL-DES-SARTS
91370015	BOUSSU-EN-FAGNE
10430015	WAVRE
10810015	BOUSVAL
60480015	RACHAMPS-NOVILLE
61280015	ORTHO
61680015	SAINT-HUBERT aéro
68580015	TAILLES
70030015	SART-TILMAN
70160015	OUFFET
70180015	MEAN
70480015	EREZEE
70870015	MARCHE
10570015	LOUVAIN LA NEUVE
15790015	PERWEZ
15840015	HELECINE
18980015	UCCLE
19540015	TUBIZE
19970015	SOIGNIES
23480015	LILLOIS
23880015	SENEFFE
24590015	DERGNEAU
28920015	ENGHIEN
29930015	CHIEVRES
32770015	KAIN
34760015	MOUSCRON
35340015	WASMUEL
35720015	TRIVIERES
36280015	ROISIN
36470015	ROUVEROY
36490015	BLAREGNIES
37170015	PERUWELZ
38850015	COMINES Barrage-Ecl
52840015	GEMMENICH
55780015	WAREMME
55960015	AWANS
56490015	BATTICE
57570015	LANAYE
64970015	TERNELL
65290015	MONT-RIGI
65380015	SPA aerodrome
65500015	JALHAY
66570015	LOUVEIGNE
67120015	COO INF.
67120115	COO INF.
67180015	COO SUP.
67180115	COO SUP.
68480015	VIELSALM
69580015	ROBERTVILLE
69670015	BUTGENBACH
69670115	BUTGENBACH
71680015	LANDENNE
72280015	MODAVE
72960015	VEDRIN
73350015	MORNIMONT Bar-Ecluse
73690015	CHATELET
73950015	MONCEAU Bar-Ecluse
74850015	SOLRE S/S Bar-Ecluse
75770015	MOMIGNIES
76290015	LIGNY
76780015	GERPINNES
78650015	PLATE TAILLE
78880015	SENZEILLES
79670015	SIVRY
80630015	ANSEREMME
81280015	SAINT-GERARD
81380015	CRUPET
81570015	CINEY
81890015	FLORENNES
83480015	DAVERDISSE
83880015	LIBIN
84680015	BEAURAING
85180015	ROCHEFORT
85380015	NASSOGNE
86770015	GEDINNE
86870015	CROIX-SCAILLE
94360015	VRESSE
94690015	BOUILLON
95740015	FRATIN
95880015	MEIX-LE-TIGE
95960015	ARLON
95960115	ARLON
96170015	SUGNY
96320015	BERTRIX
96520015	STRAIMONT
96980015	NAMOUSSART
97430015	TORGNY
97810015	ATHUS
97940015	AUBANGE
97970015	SELANGE
98160015	ORVAL
99150015	STEFFESHAUSEN
99220015	SANKT-VITH
99480015	BASTOGNE
"""



[docs]
STATS_HOURS_IRM=np.asarray([1,2,3,6,12,24,2*24,3*24,4*24,5*24,7*24,10*24,15*24,20*24,25*24,30*24],dtype=np.int32)


[docs]
STATS_MINUTES_IRM=np.asarray(STATS_HOURS_IRM)*60



[docs]
class SPW_MI_pluvioraphs():
    """
    Gestion des données pluviographiques du SPW-MI au travers de l'ancien site web "voies-hydrauliques.be"
    http://voies-hydrauliques.wallonie.be/opencms/opencms/fr/hydro/Archive/
    """

    def __init__(self) -> None:
        #Création de 2 dictionnaires de recherche sur base de la chaîne

[docs]
        self.code2name={}


[docs]
        self.name2code={}


[docs]
        self.db_rains = None


        for mypluvio in STATIONS_MI_RAIN.splitlines():
            mycode,myname=mypluvio.split("\t")
            self.code2name[mycode]=myname
            self.name2code[myname.lower()]=mycode


[docs]
    def get_names(self):
        return list(self.name2code.keys())


[docs]
    def get_codes(self):
        return list(self.code2name.keys())



[docs]
    def get_rain_fromweb(self,fromyear,toyear,code='',name='',filterna=True):
        rain=[]
        for curyear in range(fromyear,toyear+1):
            rain.append(self.get_yearrain_fromweb(curyear,code,name,filterna))
            print(curyear)
        try:
            return pd.concat(rain)
        except:
            return None



[docs]
    def get_yearrain_fromweb(self,year=2021,code='',name='',filterna=True):
        rain=[]
        for curmonth in range(1,13):
            rain.append(self.get_monthrain_fromweb(curmonth,year,code,name))

        try:
            rain = pd.concat(rain)

            if filterna:
                rain[rain.isna()]=0.

            return rain
        except:
            return None



[docs]
    def get_monthrain_fromweb(self,month=7,year=2021,code='',name='',mysleep=.2):
        """Récupération des données au pas horaire depuis le site SPW-MI VH

        On lit les informations pour l'ensemble du mois

        http://voies-hydrauliques.wallonie.be/opencms/opencms/fr/hydro/Archive/
        """

        station=code
        if name!="":
            station=self.name2code[name.lower()]

        #calcul du nombre de jours dans le mois souhaité
        nbdays = monthrange(year, month)[1]

        url="http://voies-hydrauliques.wallonie.be/opencms/opencms/fr/hydro/Archive/annuaires/stathorairetab.do?code="+station+"&mois="+str(month)+"&annee="+str(year)

        res=requests.get(url)
        html_tables = pd.read_html(res.content, match='.+')

        startdate = dt.date(year,month,1)
        enddate = startdate+pd.DateOffset(months=1)
        try:
            #analyse du tableau HTML qui contient les données de pluie
            rain = html_tables[12].to_numpy()[0:24,1:nbdays+1].astype('float').reshape(24*nbdays,order='F')
            rain = pd.Series(rain,index=pd.date_range(startdate,enddate,inclusive='left',freq='1H'))

        except:
            rain=np.zeros(nbdays*24)
            rain = pd.Series(rain,index=pd.date_range(startdate,enddate,inclusive='left',freq='1H'))
            pass

        time.sleep(mysleep)

        return rain



[docs]
    def compute_stats_Q(self,rain,listhours):
        """
        Calcul des stats des "cumul maximaux" par convolution sur base d'un vecteur de nombre d'heures
        Unité : mm
        """
        mymaxQ=np.zeros(len(listhours),dtype=np.float64)
        k=0

        for locstat in listhours:
            a = np.ones(locstat)
            mymaxQ[k]=np.max(np.convolve(rain,a,'same'))
            k+=1

        return mymaxQ



[docs]
    def compute_stats_i(self,rain,listhours):
        """
        Calcul des stats des "intensités moyennes maximales" par convolution sur base d'un vecteur de nombre d'heures
        Unité : mm/h
        """
        mymeanQ=self.compute_stats_Q(rain,listhours)/np.asarray(listhours,dtype=np.float64)

        return mymeanQ



[docs]
    def plot(self,rain:pd.Series,toshow=False,xbounds=None,ticks='M',ExistingFig=None):

        if ExistingFig is None:
            fig,ax = plt.subplots(1,1,figsize=(15,8))
        else:
            fig=ExistingFig[0]
            ax=ExistingFig[1]

        x = rain.index

        if not xbounds is None:
            minyear = xbounds[0].year
            maxyear = xbounds[1].year+1
            if ticks=='M':
                xticks = [xbounds[0]+pd.DateOffset(months=k) for k in range(int((xbounds[1]-xbounds[0]).days/30)+1)]
            elif ticks=='D':
                xticks = [xbounds[0]+pd.DateOffset(days=k) for k in range(int((xbounds[1]-xbounds[0]).days))]
            elif ticks=='2H':
                xticks = [xbounds[0]+pd.DateOffset(hours=k) for k in range(0,int((xbounds[1]-xbounds[0]).days*24),2)]
            elif ticks=='H':
                xticks = [xbounds[0]+pd.DateOffset(hours=k) for k in range(int((xbounds[1]-xbounds[0]).days*24))]
        else:
            minyear = x[0].year
            maxyear = x[-1].year+1

            xticks = [dt.datetime(minyear,1,1)+pd.DateOffset(months=k) for k in range(0,(maxyear-minyear)*12+1,3)]

        ax.fill_between(rain.index,rain.values,step='pre',alpha=0.7)
        ax.step(rain.index,rain.values,where='pre',label=rain.name)
        ax.set_xticks(xticks)

        if ticks=='M':
            ax.set_xticklabels([curtick.strftime('%b/%Y') for curtick in xticks],rotation=45, fontsize=8)
        elif ticks=='D':
            ax.set_xticklabels([curtick.strftime('%d/%m/%Y') for curtick in xticks],rotation=45, fontsize=8)
        elif ticks=='2H' or ticks=='H':
            ax.set_xticklabels([curtick.strftime('%d/%m %H:%M') for curtick in xticks],rotation=45, fontsize=8)

        ax.set_xlabel('Date')
        ax.set_ylabel('Précipitation moyenne horaire [mm/h]')
        ax.legend()

        if not xbounds is None:
            ax.set_xlim(xbounds)

        if toshow:
            plt.show()

        return fig,ax



[docs]
    def plot_periodic(self,rain:pd.Series,origin:dt.datetime,offset_in_months,toshow=False):
        """Comparaison de plusieurs années sur un même horizon d'une snnée totale"""
        fig,ax = plt.subplots(1,1,figsize=(15,8))

        end = rain.index[-1]

        startdate=origin
        enddate = origin

        offset = pd.DateOffset(months=offset_in_months)

        while enddate<=end:

            enddate = startdate + offset

            locrain = rain[startdate:enddate]

            if len(locrain>0):
                i1=(locrain.index[0]-startdate).days*24
                x = np.arange(i1,i1+len(locrain.values))

                # ax.fill_between(x,locrain.values,step='pre',alpha=0.5)
                ax.step(x,locrain.values,where='pre')

            startdate=enddate

        xticks = [k*30*24 for k in range(offset_in_months+1)]
        ax.set_xticks(xticks)
        ax.set_xticklabels([str(k*30) for k in range(offset_in_months+1)])

        if toshow:
            plt.show()



[docs]
    def saveas(self,rain:pd.Series,filename:str):
        rain.to_csv(filename,header=['Data'])



[docs]
    def fromcsv(self,stationame='',stationcode=0,filename:str='',fromdate:dt.datetime=None,todate:dt.datetime=None):

        myname=filename
        if stationame!='':
            myname=stationame
            filename = self.name2code[stationame.lower()]+'.csv'
            filename = join('spw',filename)
        elif stationcode>0:
            myname = self.code2name(stationcode)
            filename = str(stationcode)+'.csv'
            filename = join('spw',filename)

        if exists(filename):
            mydata= pd.read_csv(filename,header=0,index_col=0,parse_dates=True).squeeze("columns")
            mydata.name=myname.upper()
        else:
            return

        if fromdate is None and todate is None:
            return mydata
        elif fromdate is None:
            return mydata[:todate]
        elif todate is None:
            return mydata[fromdate:]
        else:
            return mydata[fromdate:todate]



[docs]
    def from_xlsx_SPW(self,dir='',stationame='',stationcode=0,fromdate:dt.datetime=None,todate:dt.datetime=None,create_db=False):
        """Lecture de plusieurs fichiers Excel en autant de séries de pluies
        Renvoi d'un dictionnaire avec la série"""

        from os import listdir

        if dir =='' and self.db_rains is None:
            return None

        def read_xls_rainSPW(dir,filename=''):
            """Lecture du fichier Excel"""
            if filename=='':
                return None

            myrains = pd.read_excel(join(dir,filename))
            myrains = myrains.dropna(how='all').dropna(how='all', axis=1)
            myrains.columns=myrains.iloc[0]
            myrains=myrains.iloc[1:]
            myrains=myrains.set_index('Date')

            #On supprimme les espaces multiples pour avoir une en-tête de colonne correcte
            newnames = [re.sub(' +',' ',curstat) for curstat in myrains.keys()]
            myrains.columns=newnames

            return myrains

        def split_series(mydataframe:pd.DataFrame):
            """Split du dataframe général en séries pandas"""
            myseries={}
            for curcol in mydataframe.keys():
                locser=mydataframe[curcol].squeeze()

                #on recherche la première valeur non NaN
                first = locser.first_valid_index()
                #on recherche la dernière valeur non NaN
                last = locser.last_valid_index()
                #on remplit les NaN avec 0.
                myseries[int(curcol.split()[0])] = locser[first:last].fillna(0.)
            return myseries

        if self.db_rains is None:
            filenames=[]
            for file in listdir(dir):
                if file.endswith(".xlsx"):
                    filenames.append(file)

            myrains=pd.concat([read_xls_rainSPW(dir,filename) for filename in filenames],sort=True)
            myser =split_series(myrains)

            if create_db:
                self.db_rains = myser
        else:
            myser = self.db_rains

        if stationame!='' or stationcode!='':
            if stationame!='':
                stationcode = int(self.name2code[stationame.lower()])

            if stationcode in myser.keys():
                mydata = myser[stationcode]

                if fromdate is None and todate is None:
                    return mydata
                elif fromdate is None:
                    return mydata[:todate]
                elif todate is None:
                    return mydata[fromdate:]
                else:
                    return mydata[fromdate:todate]
            else:
                return None
        else:
            return myser



[docs]
    def import_all(self,dirout,fromyear=2002,toyear=2021):
        dirout=normpath(dirout)
        for curstation in self.code2name.keys():
            myflow = self.get_rain_fromweb(fromyear,toyear,curstation)
            if not myflow is None:
                self.saveas(myflow,join(dirout,curstation+'.csv'))
            print(curstation)



if __name__=="__main__":
    #exemple

[docs]
    my = SPW_MI_pluvioraphs()

    myrain=my.get_monthrain_fromweb(name="Jalhay")
    mystats=my.compute_stats_Q(myrain,STATS_HOURS_IRM)
    print(mystats)