Source code for detectree.pixel_response
"""Build pixel binary (tree/non-tree) responses."""
import glob
from os import path
import numpy as np
import rasterio as rio
from . import settings, utils
__all__ = ["PixelResponseBuilder"]
class NonBinaryResponseError(Exception):
pass
[docs]
class PixelResponseBuilder:
"""Customize how pixel responses (tree/non-tree labels) are computed."""
# It is really not necessary to use a class for this, but we do so for the sake of
# API consistency with the `pixel_features` module
[docs]
def __init__(self, *, tree_val=None, nontree_val=None):
"""
Initialize the pixel response builder.
See the `background <https://bit.ly/2KlCICO>`_ example notebook for more
details.
Parameters
----------
tree_val : int, optional
The value that designates tree pixels in the response images.
nontree_val : int, optional
The value that designates non-tree pixels in the response images.
"""
if tree_val is None:
tree_val = settings.RESPONSE_TREE_VAL
self.tree_val = tree_val
if nontree_val is None:
nontree_val = settings.RESPONSE_NONTREE_VAL
self.nontree_val = nontree_val
def build_response_from_arr(self, img_binary):
"""
Build response (flat) array from a binary (tree/non-tree) image array.
Parameters
----------
img_binary : numpy ndarray
Two-dimensional binary (tree-non-tree) image array.
Returns
-------
responses : numpy ndarray
Array with the pixel responses.
"""
response_arr = img_binary.copy()
response_arr[response_arr == self.tree_val] = 1
response_arr[response_arr == self.nontree_val] = 0
# check that the provided `img_binary` is actually binary, i.e., consists only
# of `tree_val` and `nontree_val` values
if ((response_arr != 0) & (response_arr != 1)).any():
raise NonBinaryResponseError
return response_arr.flatten()
def build_response_from_filepath(self, img_filepath):
"""
Build response (flat) array from a binary (tree/non-tree) image file.
Parameters
----------
img_filepath : str, file object or pathlib.Path object
Path to a file, URI, file object opened in binary ('rb') mode, or a Path
object representing the binary (tree/non-tree) image to be transformed into
the response. The value will be passed to `rasterio.open`.
Returns
-------
responses : numpy ndarray
Array with the pixel responses.
"""
with rio.open(img_filepath) as src:
img_binary = src.read(1)
try:
return self.build_response_from_arr(img_binary)
except NonBinaryResponseError:
raise ValueError(
f"The response mask {img_filepath} must consist of only {self.tree_val}"
f" (tree) and {self.nontree_val} (non-tree) pixel values"
)
[docs]
def build_response(
self,
*,
split_df=None,
response_img_dir=None,
response_img_filepaths=None,
img_filename_pattern=None,
method=None,
img_cluster=None,
):
"""
Build the pixel response (flat) array for a list of images.
Parameters
----------
split_df : pd.DataFrame
Data frame with the train/test split.
response_img_dir : str representing path to a directory, optional
Path to the directory where the response images are located. Required if
providing `split_df`. Otherwise `response_img_dir` might either be ignored
if providing `response_img_filepaths`, or be used as the directory where the
images whose filename matches `img_filename_pattern` are to be located.
response_img_filepaths : list of image file paths, optional
List of images to be transformed into the response. Alternatively, the same
information can be provided by means of the `img_dir` and
`img_filename_pattern` keyword arguments. Ignored if providing `split_df`.
img_filename_pattern : str representing a file-name pattern, optional
Filename pattern to be matched in order to obtain the list of images. If no
value is provided, the value set in `settings.IMG_FILENAME_PATTERN` is used.
Ignored if `split_df` or `img_filepaths` is provided.
method : {'cluster-I', 'cluster-II'}, optional
Method used in the train/test split.
img_cluster : int, optional
The label of the cluster of images. Only used if `method` is 'cluster-II'.
Returns
-------
responses : numpy ndarray
Array with the pixel responses.
"""
if split_df is not None:
if response_img_dir is None:
raise ValueError(
"If `split_df` is provided, `response_img_dir` must also be"
" provided"
)
if method is None:
if "img_cluster" in split_df:
method = "cluster-II"
else:
method = "cluster-I"
if method == "cluster-I":
img_filepaths = split_df[split_df["train"]]["img_filepath"]
else:
if img_cluster is None:
raise ValueError(
"If `method` is 'cluster-II', `img_cluster` must be provided"
)
img_filepaths = utils.get_img_filepaths(split_df, img_cluster, True)
response_img_filepaths = img_filepaths.apply(
lambda filepath: path.join(response_img_dir, path.basename(filepath))
)
else:
if response_img_filepaths is None:
if img_filename_pattern is None:
img_filename_pattern = settings.IMG_FILENAME_PATTERN
if response_img_dir is None:
raise ValueError(
"Either `split_df`, `response_img_filepaths` or "
"`response_img_dir` must be provided"
)
response_img_filepaths = glob.glob(
path.join(response_img_dir, img_filename_pattern)
)
# TODO: `response_img_filepaths`
# no need for dask here
values = []
for response_img_filepath in response_img_filepaths:
values.append(self.build_response_from_filepath(response_img_filepath))
return np.vstack(values).flatten()