"""
Author: HECE - University of Liege, Pierre Archambeau
Date: 2024
Copyright (c) 2024 University of Liege. All rights reserved.
This script and its content are protected by copyright law. Unauthorized
copying or distribution of this file, via any medium, is strictly prohibited.
"""
from docx import Document
from docx.shared import Pt
from docx.oxml.ns import qn
from docx.shared import Inches
from docx.shared import RGBColor
from pathlib import Path
from typing import Union, List, Tuple,Literal
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from PIL import Image
import pandas as pd
from tempfile import NamedTemporaryFile, TemporaryDirectory
import logging
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
from datetime import datetime
import os
import socket
import hashlib
import re
from gettext import gettext as _
[docs]
class RapidReport:
"""
Class for creating a report 'quickly'.
It can be used in Jupyter notebooks or in scripts to create a simple report in Word format.
Word document is created with the following structure:
- Main page with title, author, date and hash of the document
- Summary (automatically generated)
- Title
- Paragraph
- Figure (numbered automatically with caption)
- Bullet list
- Table
It is not a full-fledged reporting tool with advanced functionnalities but a simple way to create a report quickly 'on-the-fly'.
Example:
```
rapport = RapidReport('Rapport de Projet', 'Alice')
rapport.add_title('Titre Principal', level=0)
rapport.add_paragraph('Ceci est un **paragraphe** introductif avec des mots en *italique* et en **gras**.')
rapport += "Tentative d'ajout de figure vie un lien incorrect.\nPassage à la ligne"
rapport.add_figure('/path/to/image.png', 'Légende de la figure.')
rapport.add_bullet_list(['Premier élément', 'Deuxième élément', 'Troisième élément'])
rapport.add_table_from_listoflists([['Nom', 'Âge'], ['Alice', '25'], ['Bob', '30']])
rapport.save('rapport.docx')
```
"""
def __init__(self, main_title:str, author:str):
self._main_title = main_title
self._author = author
self._date = None
self._content = []
self._document = None
self._filename = None
self._idx_figure = 0
self._styles={}
self._has_first_page = False
[docs]
def _define_default_styles(self):
# Définir le style de titre
self._title_style = self._document.styles.add_style('TitleStyle', 1)
self._title_style.font.name = 'Arial'
self._title_style.font.size = Pt(20)
self._title_style.font.bold = True
self._title_style._element.rPr.rFonts.set(qn('w:eastAsia'), 'Arial')
# Définir le style de légende
self._caption_style = self._document.styles.add_style('CaptionStyle', 1)
self._caption_style.font.name = 'Arial'
self._caption_style.font.size = Pt(9)
self._caption_style.font.italic = True
self._caption_style._element.rPr.rFonts.set(qn('w:eastAsia'), 'Arial')
self._caption_style.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
# Définir le style de corps de texte
self._body_text_style = self._document.styles.add_style('BodyTextStyle', 1)
self._body_text_style.font.name = 'Arial'
self._body_text_style.font.size = Pt(11)
self._body_text_style._element.rPr.rFonts.set(qn('w:eastAsia'), 'Arial')
self._body_text_style.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.JUSTIFY
# Définir le style de liste à puce
self._bullet_list_style = self._document.styles.add_style('BulletListStyle', 1)
self._bullet_list_style.font.name = 'Arial'
self._bullet_list_style.font.size = Pt(9)
self._bullet_list_style._element.rPr.rFonts.set(qn('w:eastAsia'), 'Arial')
self._bullet_list_style.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
self._bullet_list_style.paragraph_format.left_indent = Inches(0.25)
self._table_grid_style = self._document.styles.add_style('TableGrid', 3)
self._table_grid_style.font.name = 'Arial'
self._table_grid_style.font.size = Pt(9)
self._table_grid_style._element.rPr.rFonts.set(qn('w:eastAsia'), 'Arial')
self._table_grid_style.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
self._figure_style = self._document.styles.add_style('FigureStyle', 1)
self._figure_style.font.name = 'Arial'
self._figure_style.font.size = Pt(9)
self._figure_style._element.rPr.rFonts.set(qn('w:eastAsia'), 'Arial')
self._figure_style.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
self._styles['TitleStyle'] = self._title_style
self._styles['CaptionStyle'] = self._caption_style
self._styles['BodyTextStyle'] = self._body_text_style
self._styles['BulletListStyle'] = self._bullet_list_style
self._styles['TableGrid'] = self._table_grid_style
self._styles['FigureStyle'] = self._figure_style
[docs]
def set_font(self, fontname:str='Arial', fontsize:int=12):
""" Définir la police et la taille de la police pour les styles de texte. """
for style in self._styles.values():
style.font.name = fontname
style.font.size = Pt(fontsize)
[docs]
def fill_first_page(self, main_title:str, author:str):
"""
Remplir la première page du document.
Ajouter le titre, l'auteur et la date.
"""
if self._has_first_page:
return
# Récupérer le nom de l'utilisateur
user_name = os.getlogin()
# Récupérer le nom de l'ordinateur
computer_name = socket.gethostname()
logo_path = Path(__file__).parent / 'wolf_report.png'
self._main_title = main_title
self._author = author
self._date = datetime.now().strftime('%d/%m/%Y')
self._insert_title(self._main_title, level=0, index=0)
self._insert_figure(logo_path,caption=None, width=2.0, index=1)
self._insert_paragraph('Ce document a été généré automatiquement par le paquet Python "wolfhece".', index =2)
self._insert_paragraph(' ', index=3)
self._insert_paragraph(f'Auteur : {self._author}', index=4)
self._insert_paragraph(f'Date : {self._date}', index=5)
self._insert_paragraph(' ', index=6)
self._insert_paragraph(f'Utilisateur : {user_name}', index=7)
self._insert_paragraph(f'Ordinateur : {computer_name}', index=8)
self._insert_paragraph(' ', index=9)
chain_hash = hashlib.md5(self._main_title.encode() +
self._author.encode() +
user_name.encode() +
computer_name.encode()+
self._date.encode()).hexdigest()
self._insert_paragraph('Hash du document : ' + chain_hash, index=10)
self._insert_new_page(index=11)
self._insert_paragraph('summary', index=12)
self._has_first_page = True
[docs]
def _insert_title(self, title:str, level:int=1, index:int = 0):
""" Insère un titre dans le document. """
self._content.insert(index, ('title', title, level))
[docs]
def _insert_paragraph(self, paragraph_text:str, style:str='BodyTextStyle', index:int = 0):
""" Insère un paragraphe dans le document. """
self._content.insert(index, ('paragraph', paragraph_text, style))
[docs]
def add_title(self, title:str, level:int=1):
""" Ajoute un titre au document. """
self._content.append(('title', title, level))
[docs]
def _list_titles(self, level:int=None):
""" Renvoie la liste des titres du document. """
if level is None:
return [item[1] for item in self._content if item[0] == 'title']
else:
return [item[1] for item in self._content if item[0] == 'title' and item[2] == level]
[docs]
def _list_captions(self):
""" Renvoie la liste des légendes de figures du document. """
return [item[2] for item in self._content if item[0] == 'figure' if item[2]]
[docs]
def _list_index(self):
""" Renvoie la liste des index de figures du document. """
return [item[3] for item in self._content if item[0] == 'figure' if item[3]]
[docs]
def fig_exists(self, fig_name:str):
""" Vérifie si une figure existe dans le document. """
return fig_name in self._list_figures()
[docs]
def get_fig_index(self, fig_name_caption:str):
""" Renvoie la légende d'une figure. """
list_figures = self._list_figures()
list_captions = self._list_captions()
if fig_name_caption in list_figures:
idx = self._list_figures().index(fig_name_caption)+1
elif fig_name_caption in list_captions:
idx = self._list_captions().index(fig_name_caption)+1
else:
idx = None
return idx
[docs]
def _add_summary(self):
""" Ajoute un sommaire au document. """
titles = self._list_titles()
self._document.add_heading(_('Summary'), level=1).style = 'TitleStyle'
for cur_title in titles:
p = self._document.add_paragraph(cur_title, style='BodyTextStyle')
run = p.add_run()
run.add_tab()
run.bold = True
p.style = 'BodyTextStyle'
self._document.add_heading(_('List of figures'), level=1).style = 'TitleStyle'
figures = self._list_captions()
for i, cur_figure in enumerate(figures):
p = self._document.add_paragraph(f'Fig. {i+1} : {cur_figure}', style='BodyTextStyle')
run = p.add_run()
run.add_tab()
run.bold = True
p.style = 'BodyTextStyle'
self._document.add_page_break()
[docs]
def add_paragraph(self, paragraph_text:str, style:str='BodyTextStyle'):
""" Ajoute un paragraphe au document. """
self._content.append(('paragraph', paragraph_text, style))
[docs]
def add(self, paragraph_text:str, style:str='BodyTextStyle'):
""" Ajoute un paragraphe au document. """
self.add_paragraph(paragraph_text, style=style)
def __add__(self, paragraph_text:str):
""" Surcharge de l'opérateur + pour ajouter un paragraphe. """
self.add_paragraph(paragraph_text)
return self
[docs]
def add_bullet_list(self, bullet_list: List[str], style:str='BulletListStyle'):
""" Ajoute une liste à puce au document. """
for item in bullet_list:
self.add_paragraph('- ' + item, style=style)
[docs]
def add_new_page(self):
""" Ajoute une nouvelle page au document. """
self._content.append(('newpage', '', None))
[docs]
def _insert_new_page(self, index:int = 0):
""" Insère une nouvelle page au document. """
self._content.insert(index, ('newpage', '', None))
[docs]
def add_table_from_listoflists(self, data:List[List[str]], style:str='TableGrid'):
"""
Ajoute un tableau au document.
:param data: Liste de listes contenant les données du tableau. Chaque liste est une ligne du tableau.
"""
self._content.append(('table', data, style))
[docs]
def add_table_from_dict(self, data:dict, style:str='TableGrid'):
"""
Ajoute un tableau au document.
:param data: Dictionnaire contenant les données du tableau. Les clés sont les en-têtes de colonnes.
"""
table_data = [list(data.keys())]
table_data += [list(data.values())]
self.add_table_from_listoflists(table_data, style=style)
[docs]
def add_table_as_picture(self, data:Union[List[List[str]], dict, pd.DataFrame, Figure], caption:str=None):
""" Ajoute un tableau au document sous forme d'image. """
def fig2img(fig):
"""Convert a Matplotlib figure to a PIL Image and return it"""
import io
buf = io.BytesIO()
fig.savefig(buf, bbox_inches='tight')
buf.seek(0)
img = Image.open(buf)
return img
if isinstance(data, Figure):
tmp_image = fig2img(data)
self.add_figure(tmp_image, caption)
return
if isinstance(data, dict):
data = pd.DataFrame(data)
elif isinstance(data, list):
data = pd.DataFrame(data)
fig, ax = plt.subplots()
ax.axis('off')
ax.table(cellText=data.values,
colLabels=data.columns,
loc='center',
cellLoc='center',
colColours=['#f3f3f3']*len(data.columns))
fig.tight_layout()
tmp_image = fig2img(fig)
self.add_figure(tmp_image, caption, width=4.0)
[docs]
def _apply_text_styles(self, paragraph, text):
""" Search for bold and italic styles in the text and apply them."""
text = text.replace('\n\n', 'DOUBLE_NEWLINE')
text = text.replace('\n', ' ')
text = text.replace('DOUBLE_NEWLINE', '\n')
def split_bold(text):
return text.split('**')
def split_italic(text):
return text.split('*')
splitted_bold = split_bold(text)
bold = False
for cur_text in splitted_bold:
if cur_text != '':
italic = False
spliited_italic = split_italic(cur_text)
for cur_text2 in spliited_italic:
if cur_text2 != '':
run = paragraph.add_run(cur_text2)
run.bold = bold
run .italic = italic
italic = not italic
bold = not bold
[docs]
def parse_content(self):
""" Parse le contenu du document et l'ajoute au document Word. """
# tmp_dir = TemporaryDirectory()
for item in self._content:
if item[0] == 'title':
self._document.add_heading(item[1], level=item[2]).style = 'TitleStyle'
elif item[0] == 'paragraph':
if item[1] == 'summary':
self._add_summary()
continue
else:
p = self._document.add_paragraph()
self._apply_text_styles(p, item[1])
p.style = item[2] if item[2] else 'BodyTextStyle'
elif item[0] == 'figure':
if isinstance(item[1], Image.Image):
tmp_name = NamedTemporaryFile(suffix='.png').name
item[1].save(tmp_name)
elif isinstance(item[1], str):
tmp_name = item[1]
elif isinstance(item[1], Path):
tmp_name = str(item[1])
elif isinstance(item[1], Figure):
item[1].tight_layout()
tmp_name = NamedTemporaryFile(suffix='.png').name
item[1].savefig(tmp_name)
if Path(tmp_name).exists():
self._document.add_picture(tmp_name, width=Inches(item[3]) if item[3] else Inches(7.0))
self._document.paragraphs[-1].style = 'FigureStyle'
else:
logging.error(f"File {tmp_name} not found.")
p = self._document.add_paragraph()
run = p.add_run(f'Error: Image not found. {tmp_name}')
run.font.color.rgb = RGBColor(255, 0, 0)
p.style = 'BodyTextStyle'
if item[2]:
caption = self._document.add_paragraph(f'Fig. {item[4]} :' + item[2])
caption.style = 'CaptionStyle'
elif item[0] == 'table':
data = item[1]
style = item[2]
table = self._document.add_table(rows=len(data), cols=len(data[0]))
table.style = style
for i, row in enumerate(data):
for j, cell in enumerate(row):
table.cell(i, j).text = cell
elif item[0] == 'newpage':
self._document.add_page_break()
[docs]
def save(self, file_path:Union[str,Path]=None):
""" Sauvegarde le document Word. """
if file_path is None:
file_path = self._filename
if file_path is None:
raise ValueError("Le chemin du fichier n'a pas été spécifié.")
self._document = Document()
self._define_default_styles()
self.fill_first_page(self._main_title, self._author)
self.parse_content()
try:
self._document.save(str(file_path))
except Exception as e:
logging.error(f"Error saving file: {e}")
if __name__ == '__main__':
# Exemple d'utilisation
[docs]
rapport = RapidReport('Rapport de Projet', 'Alice')
rapport.add_title('Titre Principal', level=0)
rapport.add_paragraph('Ceci est un **paragraphe** introductif avec des mots en *italique* et en **gras**.')
rapport += "Tentative d'ajout de figure vie un lien incorrect.\nPassage à la ligne"
rapport.add_figure('/path/to/image.png', 'Légende de la figure.')
rapport+="""
Commentraire sur la figure multilignes
ligne 2
ligne3"""
rapport.add_bullet_list(['Premier élément', 'Deuxième élément', 'Troisième élément'])
rapport.add_table_from_listoflists([['Nom', 'Âge'], ['Alice', '25'], ['Bob', '30']])
rapport.add_table_from_dict({'Nom': ['Alice', 'Bob'], 'Âge': ['25', '30']})
rapport.add_table_as_picture({'Nom': ['Alice', 'Bob'], 'Âge': ['25', '30']}, caption='Tableau de données')
rapport.save('rapport.docx')
assert rapport.get_fig_index('/path/to/image.png') == 1
assert rapport.get_fig_index('Tableau de données') == 2