Source code for sparseml.utils.datasets.imagenette

# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
General utilities for the imagenette and imagewoof dataset implementations
for the image classification field in computer vision.
More info for the dataset can be found `here <https://github.com/fastai/imagenette>`__.
"""

import os
import tarfile
from enum import Enum

from sparseml.utils import clean_path, create_dirs
from sparsezoo.utils import download_file


__all__ = [
    "ImagenetteSize",
    "ImagenetteDownloader",
    "ImagewoofDownloader",
    "IMAGENETTE_CLASSES",
]


IMAGENETTE_CLASSES = {
    0: "tench",
    1: "English springer",
    2: "cassette player",
    3: "chain saw",
    4: "church",
    5: "French horn",
    6: "garbage truck",
    7: "gas pump",
    8: "golf ball",
    9: "parachute",
}


[docs]class ImagenetteSize(Enum): """ Dataset size for Imagenette / Imagewoof. full does not resize the original dataset at all. s320 resizes the images to 320px. s160 resizes the images to 160px. """ full = "full" s320 = "s320" s160 = "s160"
[docs]class ImagenetteDownloader(object): """ Downloader implementation for the imagenette dataset. More info on the dataset can be found `here <https://github.com/fastai/imagenette>`__ :param download_root: the local path to download the files to :param dataset_size: which dataset size to download :param download: True to run the download, False otherwise. If False, dataset must already exist at root. """ def __init__( self, download_root: str, dataset_size: ImagenetteSize, download: bool ): self._download_root = clean_path(download_root) self._dataset_size = dataset_size self._download = download if dataset_size == ImagenetteSize.s160: self._extract_name = "imagenette-160" elif dataset_size == ImagenetteSize.s320: self._extract_name = "imagenette-320" elif dataset_size == ImagenetteSize.full: self._extract_name = "imagenette" else: raise ValueError("Unknown ImagenetteSize given of {}".format(dataset_size)) self._extracted_root = os.path.join(self._download_root, self._extract_name) if download: self._download_and_extract() else: file_path = "{}.tar".format(self._extracted_root) if not os.path.exists(file_path): raise ValueError( "could not find original tar for the dataset at {}".format( file_path ) ) @property def download_root(self) -> str: """ :return: the local path to download the files to """ return self._download_root @property def dataset_size(self) -> ImagenetteSize: """ :return: which dataset size to download """ return self._dataset_size @property def download(self) -> bool: """ :return: True to run the download, False otherwise. If False, dataset must already exist at root. """ return self._download @property def extracted_root(self) -> str: """ :return: Where the specific dataset was extracted to """ return self._extracted_root
[docs] def split_root(self, train: bool) -> str: """ :param train: True to get the path to the train dataset, False for validation :return: The path to the desired split for the dataset """ return os.path.join(self.extracted_root, "train" if train else "val")
def _download_and_extract(self): if self._dataset_size == ImagenetteSize.full: url = "https://s3.amazonaws.com/fast-ai-imageclas/imagenette.tgz" elif self._dataset_size == ImagenetteSize.s320: url = "https://s3.amazonaws.com/fast-ai-imageclas/imagenette-320.tgz" elif self._dataset_size == ImagenetteSize.s160: url = "https://s3.amazonaws.com/fast-ai-imageclas/imagenette-160.tgz" else: raise ValueError( "unknown imagenette size given of {}".format(self._dataset_size) ) create_dirs(self._extracted_root) file_path = "{}.tar".format(self._extracted_root) if os.path.exists(file_path): print("already downloaded imagenette {}".format(self._dataset_size)) return download_file( url, file_path, overwrite=False, progress_title="downloading imagenette {}".format(self._dataset_size), ) with tarfile.open(file_path, "r:gz") as tar: tar.extractall(path=self.download_root)
[docs]class ImagewoofDownloader(object): """ Downloader implementation for the imagewoof dataset. More info on the dataset can be found `here <https://github.com/fastai/imagenette>`__ :param root: the local path to download the files to :param dataset_size: which dataset size to download :param download: True to run the download, False otherwise. If False, dataset must already exist at root. """ def __init__( self, download_root: str, dataset_size: ImagenetteSize, download: bool ): self._download_root = clean_path(download_root) self._dataset_size = dataset_size self._download = download if dataset_size == ImagenetteSize.s160: self._extract_name = "imagewoof-160" elif dataset_size == ImagenetteSize.s320: self._extract_name = "imagewoof-320" elif dataset_size == ImagenetteSize.full: self._extract_name = "imagewooof" else: raise ValueError("Unknown ImagenetteSize given of {}".format(dataset_size)) self._extracted_root = os.path.join(self._download_root, self._extract_name) if download: self._download_and_extract() else: file_path = "{}.tar".format(self._extracted_root) if not os.path.exists(file_path): raise ValueError( "could not find original tar for the dataset at {}".format( file_path ) ) @property def download_root(self) -> str: """ :return: the local path to download the files to """ return self._download_root @property def dataset_size(self) -> ImagenetteSize: """ :return: which dataset size to download """ return self._dataset_size @property def download(self) -> bool: """ :return: True to run the download, False otherwise. If False, dataset must already exist at root. """ return self._download @property def extracted_root(self) -> str: """ :return: Where the specific dataset was extracted to """ return self._extracted_root
[docs] def split_root(self, train: bool) -> str: """ :param train: True to get the path to the train dataset, False for validation :return: The path to the desired split for the dataset """ return os.path.join(self.extracted_root, "train" if train else "val")
def _download_and_extract(self): if self._dataset_size == ImagenetteSize.full: url = "https://s3.amazonaws.com/fast-ai-imageclas/imagewoof.tgz" elif self._dataset_size == ImagenetteSize.s320: url = "https://s3.amazonaws.com/fast-ai-imageclas/imagewoof-320.tgz" elif self._dataset_size == ImagenetteSize.s160: url = "https://s3.amazonaws.com/fast-ai-imageclas/imagewoof-160.tgz" else: raise ValueError( "unknown imagenette size given of {}".format(self._dataset_size) ) create_dirs(self._extracted_root) file_path = "{}.tar".format(self._extracted_root) if os.path.exists(file_path): print("already downloaded imagewoof {}".format(self._dataset_size)) return download_file( url, file_path, overwrite=False, progress_title="downloading imagewoof {}".format(self._dataset_size), ) with tarfile.open(file_path, "r:gz") as tar: tar.extractall(path=self.download_root)