# Copyright (C) 2021-2025, Mindee.# This program is licensed under the Apache License 2.0.# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.importosfromtypingimportAnyimporth5pyimportnumpyasnpfromtqdmimporttqdmfrom.datasetsimportVisionDatasetfrom.utilsimportconvert_target_to_relative,crop_bboxes_from_image__all__=["SVHN"]
[docs]classSVHN(VisionDataset):"""SVHN dataset from `"The Street View House Numbers (SVHN) Dataset" <http://ufldl.stanford.edu/housenumbers/>`_. .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/svhn-grid.png&src=0 :align: center >>> from doctr.datasets import SVHN >>> train_set = SVHN(train=True, download=True) >>> img, target = train_set[0] Args: train: whether the subset should be the training one use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) recognition_task: whether the dataset should be used for recognition task detection_task: whether the dataset should be used for detection task **kwargs: keyword arguments from `VisionDataset`. """TRAIN=("http://ufldl.stanford.edu/housenumbers/train.tar.gz","4b17bb33b6cd8f963493168f80143da956f28ec406cc12f8e5745a9f91a51898","svhn_train.tar",)TEST=("http://ufldl.stanford.edu/housenumbers/test.tar.gz","57ac9ceb530e4aa85b55d991be8fc49c695b3d71c6f6a88afea86549efde7fb5","svhn_test.tar",)def__init__(self,train:bool=True,use_polygons:bool=False,recognition_task:bool=False,detection_task:bool=False,**kwargs:Any,)->None:url,sha256,name=self.TRAINiftrainelseself.TESTsuper().__init__(url,file_name=name,file_hash=sha256,extract_archive=True,pre_transforms=convert_target_to_relativeifnotrecognition_taskelseNone,**kwargs,)ifrecognition_taskanddetection_task:raiseValueError("`recognition_task` and `detection_task` cannot be set to True simultaneously. "+"To get the whole dataset with boxes and labels leave both parameters to False.")self.train=trainself.data:list[tuple[str|np.ndarray,str|dict[str,Any]|np.ndarray]]=[]np_dtype=np.float32tmp_root=os.path.join(self.root,"train"iftrainelse"test")# Load mat data (matlab v7.3 - can not be loaded with scipy)withh5py.File(os.path.join(tmp_root,"digitStruct.mat"),"r")asf:img_refs=f["digitStruct/name"]box_refs=f["digitStruct/bbox"]forimg_ref,box_refintqdm(iterable=zip(img_refs,box_refs),desc="Preparing and Loading SVHN",total=len(img_refs)):# convert ascii matrix to stringimg_name="".join(map(chr,f[img_ref[0]][()].flatten()))# File existence checkifnotos.path.exists(os.path.join(tmp_root,img_name)):raiseFileNotFoundError(f"unable to locate {os.path.join(tmp_root,img_name)}")# Unpack the informationbox=f[box_ref[0]]ifbox["left"].shape[0]==1:box_dict={k:[int(vals[0][0])]fork,valsinbox.items()}else:box_dict={k:[int(f[v[0]][()].item())forvinvals]fork,valsinbox.items()}# Convert it to the right formatcoords:np.ndarray=np.array([box_dict["left"],box_dict["top"],box_dict["width"],box_dict["height"]],dtype=np_dtype).transpose()label_targets=list(map(str,box_dict["label"]))ifuse_polygons:# (x, y) coordinates of top left, top right, bottom right, bottom left cornersbox_targets:np.ndarray=np.stack([np.stack([coords[:,0],coords[:,1]],axis=-1),np.stack([coords[:,0]+coords[:,2],coords[:,1]],axis=-1),np.stack([coords[:,0]+coords[:,2],coords[:,1]+coords[:,3]],axis=-1),np.stack([coords[:,0],coords[:,1]+coords[:,3]],axis=-1),],axis=1,)else:# x, y, width, height -> xmin, ymin, xmax, ymaxbox_targets=np.stack([coords[:,0],coords[:,1],coords[:,0]+coords[:,2],coords[:,1]+coords[:,3],],axis=-1,)ifrecognition_task:crops=crop_bboxes_from_image(img_path=os.path.join(tmp_root,img_name),geoms=box_targets)forcrop,labelinzip(crops,label_targets):ifcrop.shape[0]>0andcrop.shape[1]>0andlen(label)>0and" "notinlabel:self.data.append((crop,label))elifdetection_task:self.data.append((img_name,box_targets))else:self.data.append((img_name,dict(boxes=box_targets,labels=label_targets)))self.root=tmp_rootdefextra_repr(self)->str:returnf"train={self.train}"