# Copyright (C) 2021-2025, Mindee.# This program is licensed under the Apache License 2.0.# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.importloggingfromtypingimportAnyimportnumpyasnpfromanyasciiimportanyasciifromPILimportImage,ImageDrawfrom.fontsimportget_font__all__=["synthesize_page","synthesize_kie_page"]# Global variable to avoid multiple warningsROTATION_WARNING=Falsedef_warn_rotation(entry:dict[str,Any])->None:# pragma: no coverglobalROTATION_WARNINGifnotROTATION_WARNINGandlen(entry["geometry"])==4:logging.warning("Polygons with larger rotations will lead to inaccurate rendering")ROTATION_WARNING=Truedef_synthesize(response:Image.Image,entry:dict[str,Any],w:int,h:int,draw_proba:bool=False,font_family:str|None=None,smoothing_factor:float=0.75,min_font_size:int=6,max_font_size:int=50,)->Image.Image:iflen(entry["geometry"])==2:(xmin,ymin),(xmax,ymax)=entry["geometry"]polygon=[(xmin,ymin),(xmax,ymin),(xmax,ymax),(xmin,ymax)]else:polygon=entry["geometry"]# Calculate the bounding box of the wordx_coords,y_coords=zip(*polygon)xmin,ymin,xmax,ymax=(int(round(w*min(x_coords))),int(round(h*min(y_coords))),int(round(w*max(x_coords))),int(round(h*max(y_coords))),)word_width=xmax-xminword_height=ymax-ymin# If lines are provided instead of words, concatenate the word entriesif"words"inentry:word_text=" ".join(word["value"]forwordinentry["words"])else:word_text=entry["value"]# Find the optimal font sizetry:font_size=min(word_height,max_font_size)font=get_font(font_family,font_size)text_width,text_height=font.getbbox(word_text)[2:4]while(text_width>word_widthortext_height>word_height)andfont_size>min_font_size:font_size=max(int(font_size*smoothing_factor),min_font_size)font=get_font(font_family,font_size)text_width,text_height=font.getbbox(word_text)[2:4]exceptValueError:font=get_font(font_family,min_font_size)# Create a mask for the wordmask=Image.new("L",(w,h),0)ImageDraw.Draw(mask).polygon([(int(round(w*x)),int(round(h*y)))forx,yinpolygon],fill=255)# Draw the word textd=ImageDraw.Draw(response)try:try:d.text((xmin,ymin),word_text,font=font,fill=(0,0,0),anchor="lt")exceptUnicodeEncodeError:d.text((xmin,ymin),anyascii(word_text),font=font,fill=(0,0,0),anchor="lt")# Catch generic exceptions to avoid crashing the whole renderingexceptException:# pragma: no coverlogging.warning(f"Could not render word: {word_text}")ifdraw_proba:confidence=(entry["confidence"]if"confidence"inentryelsesum(w["confidence"]forwinentry["words"])/len(entry["words"]))p=int(255*confidence)color=(255-p,0,p)# Red to blue gradient based on probabilityd.rectangle([(xmin,ymin),(xmax,ymax)],outline=color,width=2)prob_font=get_font(font_family,20)prob_text=f"{confidence:.2f}"prob_text_width,prob_text_height=prob_font.getbbox(prob_text)[2:4]# Position the probability slightly above the bounding boxprob_x_offset=(word_width-prob_text_width)//2prob_y_offset=ymin-prob_text_height-2prob_y_offset=max(0,prob_y_offset)d.text((xmin+prob_x_offset,prob_y_offset),prob_text,font=prob_font,fill=color,anchor="lt")returnresponse
[docs]defsynthesize_page(page:dict[str,Any],draw_proba:bool=False,font_family:str|None=None,smoothing_factor:float=0.95,min_font_size:int=8,max_font_size:int=50,)->np.ndarray:"""Draw a the content of the element page (OCR response) on a blank page. Args: page: exported Page object to represent draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0 font_family: family of the font smoothing_factor: factor to smooth the font size min_font_size: minimum font size max_font_size: maximum font size Returns: the synthesized page """# Draw templateh,w=page["dimensions"]response=Image.new("RGB",(w,h),color=(255,255,255))forblockinpage["blocks"]:# If lines are provided use these to get better rendering resultsiflen(block["lines"])>1:forlineinblock["lines"]:_warn_rotation(block)# pragma: no coverresponse=_synthesize(response=response,entry=line,w=w,h=h,draw_proba=draw_proba,font_family=font_family,smoothing_factor=smoothing_factor,min_font_size=min_font_size,max_font_size=max_font_size,)# Otherwise, draw each wordelse:forlineinblock["lines"]:_warn_rotation(block)# pragma: no coverforwordinline["words"]:response=_synthesize(response=response,entry=word,w=w,h=h,draw_proba=draw_proba,font_family=font_family,smoothing_factor=smoothing_factor,min_font_size=min_font_size,max_font_size=max_font_size,)returnnp.array(response,dtype=np.uint8)
defsynthesize_kie_page(page:dict[str,Any],draw_proba:bool=False,font_family:str|None=None,)->np.ndarray:"""Draw a the content of the element page (OCR response) on a blank page. Args: page: exported Page object to represent draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0 font_family: family of the font smoothing_factor: factor to smooth the font size min_font_size: minimum font size max_font_size: maximum font size Returns: the synthesized page """# Draw templateh,w=page["dimensions"]response=Image.new("RGB",(w,h),color=(255,255,255))# Draw each wordforpredictionsinpage["predictions"].values():forpredictioninpredictions:_warn_rotation(prediction)# pragma: no coverresponse=_synthesize(response=response,entry=prediction,w=w,h=h,draw_proba=draw_proba,font_family=font_family,)returnnp.array(response,dtype=np.uint8)