Skip to content

database

Dataset

A Container for managing workspace connections.

A Dataset is initialized using arcpy.da.Walk and will discover all child datasets, tables, and featureclasses. These discovered objects can be accessed by name directly (e.g. dataset['featureclass_name']) or by inspecting the property of the type they belong to (e.g. dataset.feature_classes['featureclass_name']). The benefit of the second method is that you will be able to know you are getting a FeatureClass, Table, or Dataset object.

Usage
>>> dataset = Dataset('dataset/path')
>>> fc1 = dataset.feature_classes['fc1']
>>> fc1 = dataset.feature_classes['fc2']
>>> len(fc1)
243
>>> len(fc2)
778

>>> count(dataset['fc1'][where('LENGTH > 500')])
42
>>> sum(dataset['fc2']['TOTAL'])
3204903

As you can see, the dataset container makes it incredibly easy to interact with data concisely and clearly.

Datasets also implement __contains__ which allows you to check membership from the root node:

Example
>>> 'fc1' in dataset
True
>>> 'fc6' in dataset
True
>>> list(dataset.feature_classes)
['fc1', 'fc2']
>>> list(dataset.datasets)
['ds1']
>>> list(dataset['ds1'].feature_classes)
['fc3', 'fc4', 'fc5', 'fc6']
Source code in src/arcpie/database.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
class Dataset:
    """A Container for managing workspace connections.

    A Dataset is initialized using `arcpy.da.Walk` and will discover all child datasets, tables, and featureclasses.
    These discovered objects can be accessed by name directly (e.g. `dataset['featureclass_name']`) or by inspecting the
    property of the type they belong to (e.g. dataset.feature_classes['featureclass_name']). The benefit of the second 
    method is that you will be able to know you are getting a `FeatureClass`, `Table`, or `Dataset` object.

    Usage:
        ```python
        >>> dataset = Dataset('dataset/path')
        >>> fc1 = dataset.feature_classes['fc1']
        >>> fc1 = dataset.feature_classes['fc2']
        >>> len(fc1)
        243
        >>> len(fc2)
        778

        >>> count(dataset['fc1'][where('LENGTH > 500')])
        42
        >>> sum(dataset['fc2']['TOTAL'])
        3204903
        ```
    As you can see, the dataset container makes it incredibly easy to interact with data concisely and clearly. 

    Datasets also implement `__contains__` which allows you to check membership from the root node:

    Example:
        ```python
        >>> 'fc1' in dataset
        True
        >>> 'fc6' in dataset
        True
        >>> list(dataset.feature_classes)
        ['fc1', 'fc2']
        >>> list(dataset.datasets)
        ['ds1']
        >>> list(dataset['ds1'].feature_classes)
        ['fc3', 'fc4', 'fc5', 'fc6']
        ```
    """
    def __init__(self, conn: str|Path) -> None:
        self.conn = Path(conn)
        self._datasets: dict[str, Dataset] | None = None
        self._feature_classes: dict[str, FeatureClass[GeometryType]] | None=None
        self._tables: dict[str, Table] | None=None
        self.walk()

    @property
    def datasets(self) -> dict[str, Dataset]:
        """A mapping of dataset names to child `Dataset` objects"""
        return self._datasets or {}

    @property
    def feature_classes(self) -> dict[str, FeatureClass[GeometryType]]:
        """A mapping of featureclass names to `FeatureClass` objects in the dataset root"""
        return self._feature_classes or {}

    @property
    def tables(self) -> dict[str, Table]:
        """A mapping of table names to `Table` objects in the dataset root"""
        return self._tables or {}

    def walk(self) -> None:
        """Traverse the connection/path using `arcpy.da.Walk` and discover all dataset children

        Note:
            This is called on dataset initialization and can take some time. Larger datasets can take up to
            a second or more to initialize.

        Note:
            If the contents of a dataset change during its lifetime, you may need to call walk again. All 
            children that are already initialized will be skipped and only new children will be initialized
        """
        self._feature_classes = {}
        for root, ds, fcs in Walk(str(self.conn), datatype=['FeatureClass']):
            root = Path(root)
            if ds:
                self._datasets = self._datasets or {}
                self._datasets.update({d: Dataset(root / d) for d in ds if d not in self})
            else:
                self._feature_classes.update({fc: FeatureClass(root / fc) for fc in fcs if fc not in self})
        self._tables = {}
        for root, ds, tbls in Walk(str(self.conn), datatype=['Table']):
            root = Path(root)
            self._tables.update({tbl: Table(root / tbl) for tbl in tbls if tbl not in self})

    def __getitem__(self, key: str) -> FeatureClass[GeometryType] | Table | Dataset:
        ret = self.tables.get(key) or self.feature_classes.get(key) or self.datasets.get(key)
        if not ret:
            raise KeyError(f'{key} is not a child of {self.conn.stem}')
        return ret

    def __contains__(self, key: str) -> bool:
        try:
            self[key]
            return True
        except KeyError:
            return False

datasets property

A mapping of dataset names to child Dataset objects

feature_classes property

A mapping of featureclass names to FeatureClass objects in the dataset root

tables property

A mapping of table names to Table objects in the dataset root

walk()

Traverse the connection/path using arcpy.da.Walk and discover all dataset children

Note

This is called on dataset initialization and can take some time. Larger datasets can take up to a second or more to initialize.

Note

If the contents of a dataset change during its lifetime, you may need to call walk again. All children that are already initialized will be skipped and only new children will be initialized

Source code in src/arcpie/database.py
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def walk(self) -> None:
    """Traverse the connection/path using `arcpy.da.Walk` and discover all dataset children

    Note:
        This is called on dataset initialization and can take some time. Larger datasets can take up to
        a second or more to initialize.

    Note:
        If the contents of a dataset change during its lifetime, you may need to call walk again. All 
        children that are already initialized will be skipped and only new children will be initialized
    """
    self._feature_classes = {}
    for root, ds, fcs in Walk(str(self.conn), datatype=['FeatureClass']):
        root = Path(root)
        if ds:
            self._datasets = self._datasets or {}
            self._datasets.update({d: Dataset(root / d) for d in ds if d not in self})
        else:
            self._feature_classes.update({fc: FeatureClass(root / fc) for fc in fcs if fc not in self})
    self._tables = {}
    for root, ds, tbls in Walk(str(self.conn), datatype=['Table']):
        root = Path(root)
        self._tables.update({tbl: Table(root / tbl) for tbl in tbls if tbl not in self})