This documentation is for an older version (1.4.7) of Dagster. You can view the version of this page from our latest release below.
from typing import Mapping, NamedTuple, Optional, Sequence, Union, cast
import dagster._check as check
from dagster._annotations import PublicAttr, experimental, public
from dagster._serdes.serdes import (
whitelist_for_serdes,
)
# ########################
# ##### TABLE RECORD
# ########################
[docs]@experimental
@whitelist_for_serdes
class TableRecord(
NamedTuple("TableRecord", [("data", PublicAttr[Mapping[str, Union[str, int, float, bool]]])])
):
"""Represents one record in a table. Field keys are arbitrary strings-- field values must be
strings, integers, floats, or bools.
"""
def __new__(cls, data: Mapping[str, Union[str, int, float, bool]]):
check.dict_param(
data,
"data",
value_type=(str, float, int, bool, type(None)),
additional_message="Record fields must be one of types: (str, float, int, bool)",
)
return super(TableRecord, cls).__new__(cls, data=data)
# ########################
# ##### TABLE SCHEMA
# ########################
[docs]@whitelist_for_serdes
class TableSchema(
NamedTuple(
"TableSchema",
[
("columns", PublicAttr[Sequence["TableColumn"]]),
("constraints", PublicAttr["TableConstraints"]),
],
)
):
"""Representation of a schema for tabular data.
Schema is composed of two parts:
- A required list of columns (`TableColumn`). Each column specifies a
`name`, `type`, set of `constraints`, and (optional) `description`. `type`
defaults to `string` if unspecified. Column constraints
(`TableColumnConstraints`) consist of boolean properties `unique` and
`nullable`, as well as a list of strings `other` containing string
descriptions of all additional constraints (e.g. `"<= 5"`).
- An optional list of table-level constraints (`TableConstraints`). A
table-level constraint cannot be expressed in terms of a single column,
e.g. col a > col b. Presently, all table-level constraints must be
expressed as strings under the `other` attribute of a `TableConstraints`
object.
.. code-block:: python
# example schema
TableSchema(
constraints = TableConstraints(
other = [
"foo > bar",
],
),
columns = [
TableColumn(
name = "foo",
type = "string",
description = "Foo description",
constraints = TableColumnConstraints(
required = True,
other = [
"starts with the letter 'a'",
],
),
),
TableColumn(
name = "bar",
type = "string",
),
TableColumn(
name = "baz",
type = "custom_type",
constraints = TableColumnConstraints(
unique = True,
)
),
],
)
Args:
columns (List[TableColumn]): The columns of the table.
constraints (Optional[TableConstraints]): The constraints of the table.
"""
def __new__(
cls,
columns: Sequence["TableColumn"],
constraints: Optional["TableConstraints"] = None,
):
return super(TableSchema, cls).__new__(
cls,
columns=check.sequence_param(columns, "columns", of_type=TableColumn),
constraints=check.opt_inst_param(
constraints, "constraints", TableConstraints, default=_DEFAULT_TABLE_CONSTRAINTS
),
)
[docs] @public
@staticmethod
def from_name_type_dict(name_type_dict: Mapping[str, str]):
"""Constructs a TableSchema from a dictionary whose keys are column names and values are the
names of data types of those columns.
"""
return TableSchema(
columns=[
TableColumn(name=name, type=type_str) for name, type_str in name_type_dict.items()
]
)
# ########################
# ##### TABLE CONSTRAINTS
# ########################
[docs]@whitelist_for_serdes
class TableConstraints(
NamedTuple(
"TableConstraints",
[
("other", PublicAttr[Sequence[str]]),
],
)
):
"""Descriptor for "table-level" constraints. Presently only one property,
`other` is supported. This contains strings describing arbitrary
table-level constraints. A table-level constraint is a constraint defined
in terms of multiple columns (e.g. col_A > col_B) or in terms of rows.
Args:
other (List[str]): Descriptions of arbitrary table-level constraints.
"""
def __new__(
cls,
other: Sequence[str],
):
return super(TableConstraints, cls).__new__(
cls,
other=check.sequence_param(other, "other", of_type=str),
)
_DEFAULT_TABLE_CONSTRAINTS = TableConstraints(other=[])
# ########################
# ##### TABLE COLUMN
# ########################
[docs]@whitelist_for_serdes
class TableColumn(
NamedTuple(
"TableColumn",
[
("name", PublicAttr[str]),
("type", PublicAttr[str]),
("description", PublicAttr[Optional[str]]),
("constraints", PublicAttr["TableColumnConstraints"]),
],
)
):
"""Descriptor for a table column. The only property that must be specified
by the user is `name`. If no `type` is specified, `string` is assumed. If
no `constraints` are specified, the column is assumed to be nullable
(i.e. `required = False`) and have no other constraints beyond the data type.
Args:
name (List[str]): Descriptions of arbitrary table-level constraints.
type (Optional[str]): The type of the column. Can be an arbitrary
string. Defaults to `"string"`.
description (Optional[str]): Description of this column. Defaults to `None`.
constraints (Optional[TableColumnConstraints]): Column-level constraints.
If unspecified, column is nullable with no constraints.
"""
def __new__(
cls,
name: str,
type: str = "string", # noqa: A002
description: Optional[str] = None,
constraints: Optional["TableColumnConstraints"] = None,
):
return super(TableColumn, cls).__new__(
cls,
name=check.str_param(name, "name"),
type=check.str_param(type, "type"),
description=check.opt_str_param(description, "description"),
constraints=cast(
"TableColumnConstraints",
check.opt_inst_param(
constraints,
"constraints",
TableColumnConstraints,
default=_DEFAULT_TABLE_COLUMN_CONSTRAINTS,
),
),
)
# ########################
# ##### TABLE COLUMN CONSTRAINTS
# ########################
[docs]@whitelist_for_serdes
class TableColumnConstraints(
NamedTuple(
"TableColumnConstraints",
[
("nullable", PublicAttr[bool]),
("unique", PublicAttr[bool]),
("other", PublicAttr[Optional[Sequence[str]]]),
],
)
):
"""Descriptor for a table column's constraints. Nullability and uniqueness are specified with
boolean properties. All other constraints are described using arbitrary strings under the
`other` property.
Args:
nullable (Optional[bool]): If true, this column can hold null values.
unique (Optional[bool]): If true, all values in this column must be unique.
other (List[str]): Descriptions of arbitrary column-level constraints
not expressible by the predefined properties.
"""
def __new__(
cls,
nullable: bool = True,
unique: bool = False,
other: Optional[Sequence[str]] = None,
):
return super(TableColumnConstraints, cls).__new__(
cls,
nullable=check.bool_param(nullable, "nullable"),
unique=check.bool_param(unique, "unique"),
other=check.opt_sequence_param(other, "other"),
)
_DEFAULT_TABLE_COLUMN_CONSTRAINTS = TableColumnConstraints()