Source code for wolfhece.flow_SPWMI

"""
Author: HECE - University of Liege, Pierre Archambeau
Date: 2024

Copyright (c) 2024 University of Liege. All rights reserved.

This script and its content are protected by copyright law. Unauthorized
copying or distribution of this file, via any medium, is strictly prohibited.
"""

from operator import mod
import requests
import pandas as pd
import numpy as np
from calendar import monthrange
from datetime import timedelta, date
import matplotlib.pyplot as plt
import datetime as dt
from os.path import join,normpath,exists
import re
import time

from .PyTranslate import _

#Liste des stations SPW-MI en date du 05/2022
[docs] STATIONS_MI_FLOW="""6228 CHAUDFONTAINE 1951 TUBIZE 2341 CLABECQ 2371 RONQUIERES 2473 OISQUERCQ 2483 RONQUIERES Bief Aval 2536 GOUY 2537 GOUYCanal 2707 LESSINESBiefAmont 2713 PAPIGNIESBiefAval 2952 IRCHONWELZ 2971 ATHDENDREORIENTALE 3274 KAIN Avnt Bar-Ecl 3282 TOURNAI 3561 BOUSSOIT 3643 HYON 3778 SAINT-DENIS 3884 COMINES Aval Bar-Ecl 3886 COMINES Amont 3891 PLOEGSTEERT 5291 KELMIS 5436 LIXHE Aval 5447 LIXHE Bief Amont 5572 BERGILERS Amont 5771 HACCOURT 5796 MAREXHE 5804 ANGLEUR GR 5806 ANGLEUR GR 5826 SAUHEID 5857 MERy 5904 COMBLAIN-AU-PONT 5921 TABREUX 5922 HAMOIR 5953 DURBUY 5962 HOTTON 5991 NISRAMONT 6021 MABOMPRe 6122 ORTHO 6228 CHAUDFONTAINE 6387 EUPEN 6517 POLLEUR 6526 BELLEHEID 6621 MARTINRIV 6651 REMOUCHAMPS 6671 TARGNON 6732 STAVELOT 6753 LASNENVILLE 6803 CHEVRON 6832 TROIS-PONTS 6933 MALMEDY 6946 BEVERCE 6971 WIRTZFELD 6981 BULLINGEN 6991 MALMEDY 7117 IVOZ-RAMET 7132 AMAY 7137 AMPSIN 7139 HUYUS 7141 HUY 7228 MODAVE 7242 MOHA 7244 HUCCORGNE 7319 SALZINNES 7394 MONCEAU_Aval_Bar-EcL 7396 MONCEAU_AmBar-Ecl 7466 FONT-VALMON_Am B-E 7474 LABUISSIERE__Av B-E 7487 SOLRE 7711 JAMIOUL 7781 WALCOURT 7784 WALCOURT 7812 WALCOURT-VOGENEE 7831 SILENRIEUX 7843 BOUSSU-LEZ-WALCOURT 7863 SILENRIEUX 7883 SOUMOY 7891 CERFONTAINE 7944 WIHERIES 7978 BERSILLIES-L'ABBAYE 8017 PROFONDEVILLE 8022 LUSTIN 8059 DINA 8067 ANSEREMME Monia 8134 YVOIR 8163 WARNANT 8166 SOSOYE 8181 FOY 8221 GENDRON 8231 HOUYET 8341 DAVERDISSE 8527 JEMELLE 8622 HASTIERE 8661 FELENNE 8702 CHOOZ 9021 TREIGNES 9071 COUVIN 9081 NISMES 9111 MARIEMBOURG 9201 COUVIN Ry de Rome 9221 PETIGNY Ry de Rome 9223 PETIGNY Ermitage 9224 PETIGNY Fd Serpents 9232 BRULY RY PERNELLE 9434 MEMBRE Pont 9435 MEMBRE Amont 9461 BOUILLON 9531 LACUISINE 9541 CHINY 9561 TINTIGNY 9571 SAINTE-MARIE 9651 STRAIMONT 9741 TORGNY 9914 REULAND 9926 SCHOENBERG """
[docs] STATS_HOURS_IRM=np.asarray([1,2,3,6,12,24,2*24,3*24,4*24,5*24,7*24,10*24,15*24,20*24,25*24,30*24],dtype=np.int32)
[docs] STATS_MINUTES_IRM=np.asarray(STATS_HOURS_IRM)*60
[docs] def daterange(date1, date2): for n in range(int ((date2 - date1).days)+1): yield date1 + timedelta(n)
[docs] def is_bissextile(years): if(years%4==0 and years%100!=0 or years%400==0): return True else: return False
[docs] class SPW_MI_flows(): """ Gestion des données pluviographiques du SPW-MI au travers de l'ancien site web "voies-hydrauliques.be" http://voies-hydrauliques.wallonie.be/opencms/opencms/fr/hydro/Archive/ """ def __init__(self) -> None: """Création de 2 dictionnaires de recherche sur base de la chaîne""" self.code2name={} self.name2code={} self.db_flows=None for mystations in STATIONS_MI_FLOW.splitlines(): mycode,myname=mystations.split("\t") #Code pour les débits mycodeQ=mycode+'1002' self.code2name[mycodeQ]=myname self.name2code[myname.lower()]=mycodeQ #Code pour les hauteurs mycodeH=mycode+'1011' self.code2name[mycodeH]=myname self.name2code['h_'+myname.lower()]=mycodeH
[docs] def get_names(self): """Nom des stations""" return list(self.name2code.keys())
[docs] def get_namesQ(self): """Nom des stations de débit""" mylistN = self.get_names() mylistQ = self.get_codes() myQ=[] for curQ,curN in zip(mylistQ,mylistN): if mod(int(curQ),2)==0: myQ.append(curN) return myQ
[docs] def get_namesH(self): """Nom des stations de hauteur""" mylistN = self.get_names() mylistQ = self.get_codes() myQ=[] for curQ,curN in zip(mylistQ,mylistN): if mod(int(curQ),2)!=0: myQ.append(curN) return myQ
[docs] def get_codes(self): """Code des stations""" return list(self.code2name.keys())
[docs] def get_codesQ(self): """Code des stations pour la variable Débit [m³/s]""" mylistQ = self.get_codes() myQ=[] for curQ in mylistQ: if mod(int(curQ),2)==0: myQ.append(curQ) return myQ
[docs] def get_codesH(self): """Code des stations pour la variable Hauteur [m]""" mylistQ = self.get_codes() myQ=[] for curQ in mylistQ: if mod(int(curQ),2)!=0: myQ.append(curQ) return myQ
[docs] def get_dailyflow_fromweb(self,year=2021,code='',name=''): """Récupération de données journalières""" station=code if name!="": station=self.name2code[name.lower()] #il faut chercher les mois name_month=12 url="http://voies-hydrauliques.wallonie.be/opencms/opencms/fr/hydro/Archive/annuaires/statjourtab.do?code="+station+ "&annee="+str(year) res=requests.get(url) html_tables = pd.read_html(res.content, match='.+') try: if mod(int(station),2)==0: Tableau=html_tables[12].to_numpy()[0:31,1:name_month+1].astype('float') else: Tableau=html_tables[13].to_numpy()[0:31,1:name_month+1].astype('float') Tableau=Tableau.transpose().tolist() remove = [] for j in range(12): if j==1: i=28 if is_bissextile(year): remove+=[[j,29]] remove+=[[j,30]] del Tableau[j][29] del Tableau[j][29] else: remove+=[[j,28]] remove+=[[j,29]] remove+=[[j,30]] for l in range(3): del Tableau[j][28] else: i=30 if j in [3,5,8,10]: remove += [[j,30]] del Tableau[j][30] data=[] for i in Tableau: data += i startdate = dt.date(year,1,1) enddate = startdate+pd.DateOffset(year=1) flow = pd.Series(data,index=pd.date_range(startdate,enddate,inclusive='left',freq='1D')) return flow except: pass
[docs] def get_flow_fromweb(self,fromyear,toyear,code='',name='',filterna=True): """Récupération de plusieurs années""" flow=[] for curyear in range(fromyear,toyear+1): flow.append(self.get_yearflow_fromweb(curyear,code,name,filterna)) try: return pd.concat(flow) except: return None
[docs] def get_yearflow_fromweb(self,year=2021,code='',name='',filterna=True): """Récupération d'une année complète""" flow=[] for curmonth in range(1,13): flow.append(self.get_hourlyflow_fromweb(curmonth,year,code,name)) try: flow = pd.concat(flow) if filterna: flow[flow.isna()]=0. return flow except: return None
[docs] def get_hourlyflow_fromweb(self,month='',year='',code='',name='',mysleep=0.2): """récupération des données au pas horaire depuis le site SPW-MI VH http://voies-hydrauliques.wallonie.be/opencms/opencms/fr/hydro/Archive/""" station=code if name!="": station=self.name2code[name.lower()] nbdays = monthrange(year, month)[1] url="http://voies-hydrauliques.wallonie.be/opencms/opencms/fr/hydro/Archive/annuaires/stathorairetab.do?code="+station+"&mois="+str(month)+"&annee="+str(year) res=requests.get(url) html_tables = pd.read_html(res.content, match='.+') try: if mod(int(station),2)==0: flow = html_tables[12].to_numpy()[0:24,1:nbdays+1].astype('float').reshape(24*nbdays,order='F') else: flow = html_tables[13].to_numpy()[0:24,1:nbdays+1].astype('float').reshape(24*nbdays,order='F') startdate = dt.date(year,month,1) enddate = startdate+pd.DateOffset(months=1) flow = pd.Series(flow,index=pd.date_range(startdate,enddate,inclusive='left',freq='1H')) return flow except: pass time.sleep(mysleep)
[docs] def plot_years(self,name,years=np.arange(2008,2022),fromcsv=False): """ Graphique d'une ou de plusieurs années pour une station unique Si plusieurs années, elles sont superposées car l'axe des X est calé sur une année seulement """ STATS_days_SPW=np.linspace(0,365,365) STATS_daysbis_SPW= np.linspace(0,366,366) STATS_hours_SPW=np.linspace(0,365*24,365*24) STATS_hoursbis_SPW= np.linspace(0,366*24,366*24) if fromcsv: myflows=self.fromcsv(stationame=name,fromdate=dt.datetime(years[0],1,1,1),todate=dt.datetime(years[-1]+1,1,1,0)) fig,ax = plt.subplots(1,1,figsize=(10,8)) for curyear in years: if fromcsv: myflow=myflows[dt.datetime(curyear,1,1,1):dt.datetime(curyear+1,1,1,0)] else: myflow=self.get_yearflow_fromweb(year=curyear,name=name) if len(myflow)==365: ax.plot(STATS_days_SPW,myflow,'.',label='Year:{:.0f}'.format(curyear)) elif len(myflow)==366: ax.plot(STATS_daysbis_SPW,myflow,'.',label='Year:{:.0f}'.format(curyear)) elif len(myflow)==365*24 : ax.plot(STATS_hours_SPW,myflow,'.',label='Year:{:.0f}'.format(curyear)) elif len(myflow)==366*24 : ax.plot(STATS_hoursbis_SPW,myflow,'.',label='Year:{:.0f}'.format(curyear)) elif len(myflow)==365*24-1: ax.plot(STATS_hours_SPW[:-1],myflow,'.',label='Year:{:.0f}'.format(curyear)) elif len(myflow)==366*24-1: ax.plot(STATS_hoursbis_SPW[:-1],myflow,'.',label='Year:{:.0f}'.format(curyear)) if len(myflow)<=366: ax.set_xticks(np.arange(0, 366, 31),['Jan','Feb','Mrch','April','May','June','July','August','Sep','Oct','Nov','Dec']) else: ax.set_xticks(np.arange(0, 366*24, 31*24),['Jan','Feb','Mrch','April','May','June','July','August','Sep','Oct','Nov','Dec']) ax.set_xlabel(_('Time (days)')) ax.set_ylabel(_('Flow (m3/s) ')) ax.set_title(name,loc='center') ax.legend().set_draggable(True) ax.grid() return fig,ax
[docs] def plot_hydrolyears(self,name,years=np.arange(2008,2022),startmonth=10): """ Graphique d'une ou de plusieurs années hydrologique pour une station unique Si plusieurs années, elles sont superposées car l'axe des X est calé sur une année seulement """ from calendar import month_abbr,month_name STATS_hours_SPW=np.linspace(0,365*24,365*24) STATS_hoursbis_SPW= np.linspace(0,366*24,366*24) monthnames=[] for x in range(startmonth,13): monthnames.append(month_name[x]) for x in range(1,startmonth): monthnames.append(month_name[x]) if startmonth>1: myflows=self.fromcsv(stationame=name,fromdate=dt.datetime(years[0]-1,startmonth,1,1),todate=dt.datetime(years[-1],startmonth,1,0)) else: myflows=self.fromcsv(stationame=name,fromdate=dt.datetime(years[0],1,1,1),todate=dt.datetime(years[-1]+1,1,1,0)) fig,ax = plt.subplots(1,1,figsize=(10,8)) for curyear in years: if startmonth>1: myflow=myflows[dt.datetime(curyear-1,startmonth,1,1):dt.datetime(curyear,startmonth,1,0)] else: myflow=myflows[dt.datetime(curyear,1,1,1):dt.datetime(curyear+1,1,1,0)] if len(myflow)==365*24 : ax.plot(STATS_hours_SPW,myflow,'.',label='Year:{:.0f}'.format(curyear)) elif len(myflow)==366*24 : ax.plot(STATS_hoursbis_SPW,myflow,'.',label='Year:{:.0f}'.format(curyear)) elif len(myflow)==365*24-1: ax.plot(STATS_hours_SPW[:-1],myflow,'.',label='Year:{:.0f}'.format(curyear)) elif len(myflow)==366*24-1: ax.plot(STATS_hoursbis_SPW[:-1],myflow,'.',label='Year:{:.0f}'.format(curyear)) ax.set_xticks(np.arange(0, 366*24, 31*24),monthnames) ax.set_xlabel(_('Time (days)')) ax.set_ylabel(_('Flow (m3/s) ')) ax.set_title(name,loc='center') ax.legend().set_draggable(True) ax.grid() return fig,ax
[docs] def plot_hydrolyears_HQ(self,name,years=np.arange(2008,2022),startmonth=10): """ Graphique HQ d'une ou de plusieurs années hydrologique pour une station unique Si plusieurs années, elles sont superposées car l'axe des X est calé sur une année seulement """ from calendar import month_abbr,month_name if startmonth>1: myflowsq=self.fromcsv(stationame=name,fromdate=dt.datetime(years[0]-1,startmonth,1,1),todate=dt.datetime(years[-1],startmonth,1,0)) myflowsh=self.fromcsv(stationame='h_'+name,fromdate=dt.datetime(years[0]-1,startmonth,1,1),todate=dt.datetime(years[-1],startmonth,1,0)) else: myflowsq=self.fromcsv(stationame=name,fromdate=dt.datetime(years[0],1,1,1),todate=dt.datetime(years[-1]+1,startmonth,1,0)) myflowsh=self.fromcsv(stationame='h_'+name,fromdate=dt.datetime(years[0],1,1,1),todate=dt.datetime(years[-1]+1,startmonth,1,0)) fig,ax = plt.subplots(1,1,figsize=(10,8)) for curyear in years: if startmonth>1: myflowq=myflowsq[dt.datetime(curyear-1,startmonth,1,1):dt.datetime(curyear,startmonth,1,0)] myflowh=myflowsh[dt.datetime(curyear-1,startmonth,1,1):dt.datetime(curyear,startmonth,1,0)] else: myflowq=myflowsq[dt.datetime(curyear,startmonth,1,1):dt.datetime(curyear+1,startmonth,1,0)] myflowh=myflowsh[dt.datetime(curyear,startmonth,1,1):dt.datetime(curyear+1,startmonth,1,0)] ax.plot(myflowh,myflowq,'.',label='Year:{:.0f}'.format(curyear)) ax.set_xlabel(_('Water depth [m]')) ax.set_ylabel(_('Flow [m3/s] ')) ax.set_title(name,loc='center') ax.legend().set_draggable(True) ax.grid() return fig,ax
[docs] def saveas(self,flow:pd.Series,filename:str): """Sauvegarde d'une series pandas dans un fichier .csv""" flow.to_csv(filename,header=['Data'])
[docs] def fromcsv(self,stationame='',stationcode=0,filename:str='',fromdate:dt.datetime=None,todate:dt.datetime=None): """ Lecture depuis un fichier csv créé depuis un import précédent Les fichiers doivent être disponibles depuis un sous-répertoire spw """ myname=filename if stationame!='': myname=stationame filename = self.name2code[stationame.lower()]+'.csv' filename = join('spw',filename) elif stationcode>0: myname = self.code2name(stationcode) filename = str(stationcode)+'.csv' filename = join('spw',filename) if exists(filename): mydata= pd.read_csv(filename,header=0,index_col=0,parse_dates=True).squeeze("columns") mydata.name=myname.upper() else: return if fromdate is None and todate is None: return mydata elif fromdate is None: return mydata[:todate] elif todate is None: return mydata[fromdate:] else: return mydata[fromdate:todate]
[docs] def from_xlsx_SPW(self,dir='',stationame='',stationcode=0,fromdate:dt.datetime=None,todate:dt.datetime=None,create_db=False): """Lecture de plusieurs fichiers Excel en autant de séries de débit/hauteur Renvoi d'un dictionnaire avec les séries Le paramètre create_db permet de conserver un pointeur vers les séries complètes afin d'éviter à devoir relire les fichiers pour des traitements sur plusieurs stations ou répétitifs """ from os import listdir if dir =='' and self.db_flows is None: return None def read_xls_rainSPW(dir,filename=''): """Lecture du fichier Excel""" if filename=='': return None myflow = pd.read_excel(join(dir,filename)) myflow = myflow.dropna(how='all').dropna(how='all', axis=1) myflow.columns=myflow.iloc[0] myflow=myflow.iloc[1:] myflow=myflow.set_index('Date') #On supprimme les espaces multiples pour avoir une en-tête de colonne correcte newnames = [re.sub(' +',' ',curstat) for curstat in myflow.keys()] myflow.columns=newnames return myflow def split_series(mydataframe:pd.DataFrame): """Split du dataframe général en séries pandas""" myseries={} for curcol in mydataframe.keys(): locser=mydataframe[curcol].squeeze() #on recherche la première valeur non NaN first = locser.first_valid_index() #on recherche la dernière valeur non NaN last = locser.last_valid_index() #on remplit les NaN avec 0. myseries[int(curcol.split()[0])] = locser[first:last].fillna(0.) return myseries if self.db_flows is None: filenames=[] for file in listdir(dir): if file.endswith(".xlsx"): filenames.append(file) myrains=pd.concat([read_xls_rainSPW(dir,filename) for filename in filenames],sort=True) myser =split_series(myrains) if create_db: self.db_flows = myser else: myser = self.db_flows if stationame!='' or stationcode!='': if stationame!='': stationcode = int(self.name2code[stationame.lower()]) if stationcode in myser.keys(): mydata = myser[stationcode] if fromdate is None and todate is None: return mydata elif fromdate is None: return mydata[:todate] elif todate is None: return mydata[fromdate:] else: return mydata[fromdate:todate] else: return None else: return myser
[docs] def import_all(self,dirout,fromyear=2002,toyear=2021,fromstation=0): """ Import de tout ce qui est possible depuis le site web http://voies-hydrauliques.wallonie.be/ Si des données sont manquantes et/ou inaccessibles, la gestion d'erreur "Try/Except" ne doit normalement pas faire planter le code Le résultat est écrit dans des fichiers .csv dans le répertoire passé en argument Il est possible de restreindre le téléchargement entre deux années passées en argument Il est également possible de redémarrer le téléchargement depuis un index de station si l'opération s'est interrompue :param """ dirout=normpath(dirout) mystations = list(self.code2name.keys()) for curstation in mystations[fromstation:]: myflow = self.get_flow_fromweb(fromyear,toyear,curstation) if not myflow is None: self.saveas(myflow,join(dirout,curstation+'.csv')) print(curstation)
if __name__=="__main__": #exemple
[docs] my = SPW_MI_flows()
my.import_all(r'D:\Programmation2\wolf_oo\Sources\Python\PyPi\spw') myflow=my.get_yearflow_fromweb(name="Jalhay")