Source code for wolfhece.flow_SPWMI

"""
Author: HECE - University of Liege, Pierre Archambeau
Date: 2024

Copyright (c) 2024 University of Liege. All rights reserved.

This script and its content are protected by copyright law. Unauthorized
copying or distribution of this file, via any medium, is strictly prohibited.
"""

from operator import mod
import requests
import pandas as pd
import numpy as np
from calendar import monthrange
from datetime import timedelta, date
import matplotlib.pyplot as plt
import datetime as dt
from os.path import join,normpath,exists
import re
import time

from .PyTranslate import _

#Liste des stations SPW-MI en date du 05/2022

[docs]
STATIONS_MI_FLOW="""6228	CHAUDFONTAINE
1951	TUBIZE
2341	CLABECQ
2371	RONQUIERES
2473	OISQUERCQ
2483	RONQUIERES Bief Aval
2536	GOUY
2537	GOUYCanal
2707	LESSINESBiefAmont
2713	PAPIGNIESBiefAval
2952	IRCHONWELZ
2971	ATHDENDREORIENTALE
3274	KAIN Avnt Bar-Ecl
3282	TOURNAI
3561	BOUSSOIT
3643	HYON
3778	SAINT-DENIS
3884	COMINES Aval Bar-Ecl
3886	COMINES Amont
3891	PLOEGSTEERT
5291	KELMIS
5436	LIXHE Aval
5447	LIXHE Bief Amont
5572	BERGILERS Amont
5771	HACCOURT
5796	MAREXHE
5804	ANGLEUR GR
5806	ANGLEUR GR
5826	SAUHEID
5857	MERy
5904	COMBLAIN-AU-PONT
5921	TABREUX
5922	HAMOIR
5953	DURBUY
5962	HOTTON
5991	NISRAMONT
6021	MABOMPRe
6122	ORTHO
6228	CHAUDFONTAINE
6387	EUPEN
6517	POLLEUR
6526	BELLEHEID
6621	MARTINRIV
6651	REMOUCHAMPS
6671	TARGNON
6732	STAVELOT
6753	LASNENVILLE
6803	CHEVRON
6832	TROIS-PONTS
6933	MALMEDY
6946	BEVERCE
6971	WIRTZFELD
6981	BULLINGEN
6991	MALMEDY
7117	IVOZ-RAMET
7132	AMAY
7137	AMPSIN
7139	HUYUS
7141	HUY
7228	MODAVE
7242	MOHA
7244	HUCCORGNE
7319	SALZINNES
7394	MONCEAU_Aval_Bar-EcL
7396	MONCEAU_AmBar-Ecl
7466	FONT-VALMON_Am B-E
7474	LABUISSIERE__Av B-E
7487	SOLRE
7711	JAMIOUL
7781	WALCOURT
7784	WALCOURT
7812	WALCOURT-VOGENEE
7831	SILENRIEUX
7843	BOUSSU-LEZ-WALCOURT
7863	SILENRIEUX
7883	SOUMOY
7891	CERFONTAINE
7944	WIHERIES
7978	BERSILLIES-L'ABBAYE
8017	PROFONDEVILLE
8022	LUSTIN
8059	DINA
8067	ANSEREMME Monia
8134	YVOIR
8163	WARNANT
8166	SOSOYE
8181	FOY
8221	GENDRON
8231	HOUYET
8341	DAVERDISSE
8527	JEMELLE
8622	HASTIERE
8661	FELENNE
8702	CHOOZ
9021	TREIGNES
9071	COUVIN
9081	NISMES
9111	MARIEMBOURG
9201	COUVIN Ry de Rome
9221	PETIGNY Ry de Rome
9223	PETIGNY Ermitage
9224	PETIGNY Fd Serpents
9232	BRULY RY PERNELLE
9434	MEMBRE Pont
9435	MEMBRE Amont
9461	BOUILLON
9531	LACUISINE
9541	CHINY
9561	TINTIGNY
9571	SAINTE-MARIE
9651	STRAIMONT
9741	TORGNY
9914	REULAND
9926	SCHOENBERG
"""



[docs]
STATS_HOURS_IRM=np.asarray([1,2,3,6,12,24,2*24,3*24,4*24,5*24,7*24,10*24,15*24,20*24,25*24,30*24],dtype=np.int32)


[docs]
STATS_MINUTES_IRM=np.asarray(STATS_HOURS_IRM)*60



[docs]
def daterange(date1, date2):
    for n in range(int ((date2 - date1).days)+1):
        yield date1 + timedelta(n)



[docs]
def is_bissextile(years):
    if(years%4==0 and years%100!=0 or years%400==0):
        return True
    else:
        return False



[docs]
class SPW_MI_flows():
    """
    Gestion des données pluviographiques du SPW-MI au travers de l'ancien site web "voies-hydrauliques.be"
    http://voies-hydrauliques.wallonie.be/opencms/opencms/fr/hydro/Archive/
    """

    def __init__(self) -> None:
        """Création de 2 dictionnaires de recherche sur base de la chaîne"""
        self.code2name={}
        self.name2code={}
        self.db_flows=None

        for mystations in STATIONS_MI_FLOW.splitlines():
            mycode,myname=mystations.split("\t")

            #Code pour les débits
            mycodeQ=mycode+'1002'
            self.code2name[mycodeQ]=myname
            self.name2code[myname.lower()]=mycodeQ

            #Code pour les hauteurs
            mycodeH=mycode+'1011'
            self.code2name[mycodeH]=myname
            self.name2code['h_'+myname.lower()]=mycodeH


[docs]
    def get_names(self):
        """Nom des stations"""
        return list(self.name2code.keys())



[docs]
    def get_namesQ(self):
        """Nom des stations de débit"""
        mylistN = self.get_names()
        mylistQ = self.get_codes()
        myQ=[]
        for curQ,curN in zip(mylistQ,mylistN):
            if mod(int(curQ),2)==0:
                myQ.append(curN)
        return myQ



[docs]
    def get_namesH(self):
        """Nom des stations de hauteur"""
        mylistN = self.get_names()
        mylistQ = self.get_codes()
        myQ=[]
        for curQ,curN in zip(mylistQ,mylistN):
            if mod(int(curQ),2)!=0:
                myQ.append(curN)
        return myQ



[docs]
    def get_codes(self):
        """Code des stations"""
        return list(self.code2name.keys())



[docs]
    def get_codesQ(self):
        """Code des stations pour la variable Débit [m³/s]"""
        mylistQ = self.get_codes()
        myQ=[]
        for curQ in mylistQ:
            if mod(int(curQ),2)==0:
                myQ.append(curQ)
        return myQ



[docs]
    def get_codesH(self):
        """Code des stations pour la variable Hauteur [m]"""
        mylistQ = self.get_codes()
        myQ=[]
        for curQ in mylistQ:
            if mod(int(curQ),2)!=0:
                myQ.append(curQ)
        return myQ



[docs]
    def get_dailyflow_fromweb(self,year=2021,code='',name=''):
        """Récupération de données journalières"""
        station=code
        if name!="":
            station=self.name2code[name.lower()]

        #il faut chercher les mois
        name_month=12
        url="http://voies-hydrauliques.wallonie.be/opencms/opencms/fr/hydro/Archive/annuaires/statjourtab.do?code="+station+ "&annee="+str(year)

        res=requests.get(url)
        html_tables = pd.read_html(res.content, match='.+')

        try:
            if mod(int(station),2)==0:
                Tableau=html_tables[12].to_numpy()[0:31,1:name_month+1].astype('float')
            else:
                Tableau=html_tables[13].to_numpy()[0:31,1:name_month+1].astype('float')

            Tableau=Tableau.transpose().tolist()

            remove = []
            for j in range(12):
                if j==1:
                    i=28
                    if is_bissextile(year):
                        remove+=[[j,29]]
                        remove+=[[j,30]]
                        del Tableau[j][29]
                        del Tableau[j][29]
                    else:
                        remove+=[[j,28]]
                        remove+=[[j,29]]
                        remove+=[[j,30]]
                        for l in range(3):
                            del Tableau[j][28]
                else:
                    i=30
                    if j in [3,5,8,10]:
                        remove += [[j,30]]
                        del Tableau[j][30]
            data=[]
            for i in Tableau:
                data += i

            startdate = dt.date(year,1,1)
            enddate = startdate+pd.DateOffset(year=1)
            flow = pd.Series(data,index=pd.date_range(startdate,enddate,inclusive='left',freq='1D'))
            return flow
        except:
            pass



[docs]
    def get_flow_fromweb(self,fromyear,toyear,code='',name='',filterna=True):
        """Récupération de plusieurs années"""
        flow=[]
        for curyear in range(fromyear,toyear+1):
            flow.append(self.get_yearflow_fromweb(curyear,code,name,filterna))

        try:
            return pd.concat(flow)
        except:
            return None



[docs]
    def get_yearflow_fromweb(self,year=2021,code='',name='',filterna=True):
        """Récupération d'une année complète"""
        flow=[]
        for curmonth in range(1,13):
            flow.append(self.get_hourlyflow_fromweb(curmonth,year,code,name))

        try:
            flow = pd.concat(flow)

            if filterna:
                flow[flow.isna()]=0.

            return flow

        except:
            return None



[docs]
    def get_hourlyflow_fromweb(self,month='',year='',code='',name='',mysleep=0.2):
        """récupération des données au pas horaire depuis le site SPW-MI VH
        http://voies-hydrauliques.wallonie.be/opencms/opencms/fr/hydro/Archive/"""

        station=code
        if name!="":
            station=self.name2code[name.lower()]

        nbdays = monthrange(year, month)[1]

        url="http://voies-hydrauliques.wallonie.be/opencms/opencms/fr/hydro/Archive/annuaires/stathorairetab.do?code="+station+"&mois="+str(month)+"&annee="+str(year)

        res=requests.get(url)
        html_tables = pd.read_html(res.content, match='.+')

        try:
            if mod(int(station),2)==0:
                flow = html_tables[12].to_numpy()[0:24,1:nbdays+1].astype('float').reshape(24*nbdays,order='F')
            else:
                flow = html_tables[13].to_numpy()[0:24,1:nbdays+1].astype('float').reshape(24*nbdays,order='F')

            startdate = dt.date(year,month,1)
            enddate = startdate+pd.DateOffset(months=1)
            flow = pd.Series(flow,index=pd.date_range(startdate,enddate,inclusive='left',freq='1H'))
            return flow
        except:
            pass

        time.sleep(mysleep)



[docs]
    def plot_years(self,name,years=np.arange(2008,2022),fromcsv=False):
        """
        Graphique d'une ou de plusieurs années pour une station unique
        Si plusieurs années, elles sont superposées car l'axe des X est calé sur une année seulement
        """
        STATS_days_SPW=np.linspace(0,365,365)
        STATS_daysbis_SPW= np.linspace(0,366,366)

        STATS_hours_SPW=np.linspace(0,365*24,365*24)
        STATS_hoursbis_SPW= np.linspace(0,366*24,366*24)

        if fromcsv:
            myflows=self.fromcsv(stationame=name,fromdate=dt.datetime(years[0],1,1,1),todate=dt.datetime(years[-1]+1,1,1,0))

        fig,ax = plt.subplots(1,1,figsize=(10,8))

        for curyear in years:
            if fromcsv:
                myflow=myflows[dt.datetime(curyear,1,1,1):dt.datetime(curyear+1,1,1,0)]
            else:
                myflow=self.get_yearflow_fromweb(year=curyear,name=name)

            if len(myflow)==365:
                ax.plot(STATS_days_SPW,myflow,'.',label='Year:{:.0f}'.format(curyear))
            elif len(myflow)==366:
                ax.plot(STATS_daysbis_SPW,myflow,'.',label='Year:{:.0f}'.format(curyear))
            elif len(myflow)==365*24 :
                ax.plot(STATS_hours_SPW,myflow,'.',label='Year:{:.0f}'.format(curyear))
            elif len(myflow)==366*24 :
                ax.plot(STATS_hoursbis_SPW,myflow,'.',label='Year:{:.0f}'.format(curyear))
            elif len(myflow)==365*24-1:
                ax.plot(STATS_hours_SPW[:-1],myflow,'.',label='Year:{:.0f}'.format(curyear))
            elif len(myflow)==366*24-1:
                ax.plot(STATS_hoursbis_SPW[:-1],myflow,'.',label='Year:{:.0f}'.format(curyear))

        if len(myflow)<=366:
            ax.set_xticks(np.arange(0, 366, 31),['Jan','Feb','Mrch','April','May','June','July','August','Sep','Oct','Nov','Dec'])
        else:
            ax.set_xticks(np.arange(0, 366*24, 31*24),['Jan','Feb','Mrch','April','May','June','July','August','Sep','Oct','Nov','Dec'])

        ax.set_xlabel(_('Time (days)'))
        ax.set_ylabel(_('Flow  (m3/s) '))
        ax.set_title(name,loc='center')
        ax.legend().set_draggable(True)
        ax.grid()

        return fig,ax



[docs]
    def plot_hydrolyears(self,name,years=np.arange(2008,2022),startmonth=10):
        """
        Graphique d'une ou de plusieurs années hydrologique pour une station unique
        Si plusieurs années, elles sont superposées car l'axe des X est calé sur une année seulement
        """
        from calendar import month_abbr,month_name

        STATS_hours_SPW=np.linspace(0,365*24,365*24)
        STATS_hoursbis_SPW= np.linspace(0,366*24,366*24)
        monthnames=[]
        for x in range(startmonth,13):
            monthnames.append(month_name[x])
        for x in range(1,startmonth):
            monthnames.append(month_name[x])

        if startmonth>1:
            myflows=self.fromcsv(stationame=name,fromdate=dt.datetime(years[0]-1,startmonth,1,1),todate=dt.datetime(years[-1],startmonth,1,0))
        else:
            myflows=self.fromcsv(stationame=name,fromdate=dt.datetime(years[0],1,1,1),todate=dt.datetime(years[-1]+1,1,1,0))

        fig,ax = plt.subplots(1,1,figsize=(10,8))

        for curyear in years:
            if startmonth>1:
                myflow=myflows[dt.datetime(curyear-1,startmonth,1,1):dt.datetime(curyear,startmonth,1,0)]
            else:
                myflow=myflows[dt.datetime(curyear,1,1,1):dt.datetime(curyear+1,1,1,0)]

            if len(myflow)==365*24 :
                ax.plot(STATS_hours_SPW,myflow,'.',label='Year:{:.0f}'.format(curyear))
            elif len(myflow)==366*24 :
                ax.plot(STATS_hoursbis_SPW,myflow,'.',label='Year:{:.0f}'.format(curyear))
            elif len(myflow)==365*24-1:
                ax.plot(STATS_hours_SPW[:-1],myflow,'.',label='Year:{:.0f}'.format(curyear))
            elif len(myflow)==366*24-1:
                ax.plot(STATS_hoursbis_SPW[:-1],myflow,'.',label='Year:{:.0f}'.format(curyear))

        ax.set_xticks(np.arange(0, 366*24, 31*24),monthnames)

        ax.set_xlabel(_('Time (days)'))
        ax.set_ylabel(_('Flow  (m3/s) '))
        ax.set_title(name,loc='center')
        ax.legend().set_draggable(True)
        ax.grid()

        return fig,ax



[docs]
    def plot_hydrolyears_HQ(self,name,years=np.arange(2008,2022),startmonth=10):
        """
        Graphique HQ d'une ou de plusieurs années hydrologique pour une station unique
        Si plusieurs années, elles sont superposées car l'axe des X est calé sur une année seulement
        """
        from calendar import month_abbr,month_name

        if startmonth>1:
            myflowsq=self.fromcsv(stationame=name,fromdate=dt.datetime(years[0]-1,startmonth,1,1),todate=dt.datetime(years[-1],startmonth,1,0))
            myflowsh=self.fromcsv(stationame='h_'+name,fromdate=dt.datetime(years[0]-1,startmonth,1,1),todate=dt.datetime(years[-1],startmonth,1,0))
        else:
            myflowsq=self.fromcsv(stationame=name,fromdate=dt.datetime(years[0],1,1,1),todate=dt.datetime(years[-1]+1,startmonth,1,0))
            myflowsh=self.fromcsv(stationame='h_'+name,fromdate=dt.datetime(years[0],1,1,1),todate=dt.datetime(years[-1]+1,startmonth,1,0))

        fig,ax = plt.subplots(1,1,figsize=(10,8))

        for curyear in years:
            if startmonth>1:
                myflowq=myflowsq[dt.datetime(curyear-1,startmonth,1,1):dt.datetime(curyear,startmonth,1,0)]
                myflowh=myflowsh[dt.datetime(curyear-1,startmonth,1,1):dt.datetime(curyear,startmonth,1,0)]
            else:
                myflowq=myflowsq[dt.datetime(curyear,startmonth,1,1):dt.datetime(curyear+1,startmonth,1,0)]
                myflowh=myflowsh[dt.datetime(curyear,startmonth,1,1):dt.datetime(curyear+1,startmonth,1,0)]

            ax.plot(myflowh,myflowq,'.',label='Year:{:.0f}'.format(curyear))

        ax.set_xlabel(_('Water depth [m]'))
        ax.set_ylabel(_('Flow  [m3/s] '))
        ax.set_title(name,loc='center')
        ax.legend().set_draggable(True)
        ax.grid()

        return fig,ax



[docs]
    def saveas(self,flow:pd.Series,filename:str):
        """Sauvegarde d'une series pandas dans un fichier .csv"""
        flow.to_csv(filename,header=['Data'])



[docs]
    def fromcsv(self,stationame='',stationcode=0,filename:str='',fromdate:dt.datetime=None,todate:dt.datetime=None):
        """
        Lecture depuis un fichier csv créé depuis un import précédent
        Les fichiers doivent être disponibles depuis un sous-répertoire spw
        """
        myname=filename
        if stationame!='':
            myname=stationame
            filename = self.name2code[stationame.lower()]+'.csv'
            filename = join('spw',filename)
        elif stationcode>0:
            myname = self.code2name(stationcode)
            filename = str(stationcode)+'.csv'
            filename = join('spw',filename)

        if exists(filename):
            mydata= pd.read_csv(filename,header=0,index_col=0,parse_dates=True).squeeze("columns")
            mydata.name=myname.upper()
        else:
            return

        if fromdate is None and todate is None:
            return mydata
        elif fromdate is None:
            return mydata[:todate]
        elif todate is None:
            return mydata[fromdate:]
        else:
            return mydata[fromdate:todate]



[docs]
    def from_xlsx_SPW(self,dir='',stationame='',stationcode=0,fromdate:dt.datetime=None,todate:dt.datetime=None,create_db=False):
        """Lecture de plusieurs fichiers Excel en autant de séries de débit/hauteur
        Renvoi d'un dictionnaire avec les séries
        Le paramètre create_db permet de conserver un pointeur vers les séries complètes afin d'éviter à devoir relire les fichiers
        pour des traitements sur plusieurs stations ou répétitifs
        """

        from os import listdir

        if dir =='' and self.db_flows is None:
            return None

        def read_xls_rainSPW(dir,filename=''):
            """Lecture du fichier Excel"""
            if filename=='':
                return None

            myflow = pd.read_excel(join(dir,filename))
            myflow = myflow.dropna(how='all').dropna(how='all', axis=1)
            myflow.columns=myflow.iloc[0]
            myflow=myflow.iloc[1:]
            myflow=myflow.set_index('Date')

            #On supprimme les espaces multiples pour avoir une en-tête de colonne correcte
            newnames = [re.sub(' +',' ',curstat) for curstat in myflow.keys()]
            myflow.columns=newnames

            return myflow

        def split_series(mydataframe:pd.DataFrame):
            """Split du dataframe général en séries pandas"""
            myseries={}
            for curcol in mydataframe.keys():
                locser=mydataframe[curcol].squeeze()

                #on recherche la première valeur non NaN
                first = locser.first_valid_index()
                #on recherche la dernière valeur non NaN
                last = locser.last_valid_index()
                #on remplit les NaN avec 0.
                myseries[int(curcol.split()[0])] = locser[first:last].fillna(0.)
            return myseries

        if self.db_flows is None:
            filenames=[]
            for file in listdir(dir):
                if file.endswith(".xlsx"):
                    filenames.append(file)

            myrains=pd.concat([read_xls_rainSPW(dir,filename) for filename in filenames],sort=True)
            myser =split_series(myrains)

            if create_db:
                self.db_flows = myser
        else:
            myser = self.db_flows

        if stationame!='' or stationcode!='':
            if stationame!='':
                stationcode = int(self.name2code[stationame.lower()])

            if stationcode in myser.keys():
                mydata = myser[stationcode]

                if fromdate is None and todate is None:
                    return mydata
                elif fromdate is None:
                    return mydata[:todate]
                elif todate is None:
                    return mydata[fromdate:]
                else:
                    return mydata[fromdate:todate]
            else:
                return None
        else:
            return myser



[docs]
    def import_all(self,dirout,fromyear=2002,toyear=2021,fromstation=0):
        """
        Import de tout ce qui est possible depuis le site web http://voies-hydrauliques.wallonie.be/
        Si des données sont manquantes et/ou inaccessibles, la gestion d'erreur "Try/Except" ne doit normalement pas faire planter le code
        Le résultat est écrit dans des fichiers .csv dans le répertoire passé en argument
        Il est possible de restreindre le téléchargement entre deux années passées en argument
        Il est également possible de redémarrer le téléchargement depuis un index de station si l'opération s'est interrompue
        :param
        """
        dirout=normpath(dirout)
        mystations = list(self.code2name.keys())
        for curstation in mystations[fromstation:]:
            myflow = self.get_flow_fromweb(fromyear,toyear,curstation)
            if not myflow is None:
                self.saveas(myflow,join(dirout,curstation+'.csv'))
            print(curstation)



if __name__=="__main__":
    #exemple

[docs]
    my = SPW_MI_flows()

    my.import_all(r'D:\Programmation2\wolf_oo\Sources\Python\PyPi\spw')
    myflow=my.get_yearflow_fromweb(name="Jalhay")