You are viewing an outdated version of the documentation.

This documentation is for an older version (1.4.7) of Dagster. You can view the version of this page from our latest release below.

Source code for dagster._core.definitions.metadata.table

from typing import Mapping, NamedTuple, Optional, Sequence, Union, cast

import dagster._check as check
from dagster._annotations import PublicAttr, experimental, public
from dagster._serdes.serdes import (
    whitelist_for_serdes,
)

# ########################
# ##### TABLE RECORD
# ########################


[docs]@experimental @whitelist_for_serdes class TableRecord( NamedTuple("TableRecord", [("data", PublicAttr[Mapping[str, Union[str, int, float, bool]]])]) ): """Represents one record in a table. Field keys are arbitrary strings-- field values must be strings, integers, floats, or bools. """ def __new__(cls, data: Mapping[str, Union[str, int, float, bool]]): check.dict_param( data, "data", value_type=(str, float, int, bool, type(None)), additional_message="Record fields must be one of types: (str, float, int, bool)", ) return super(TableRecord, cls).__new__(cls, data=data)
# ######################## # ##### TABLE SCHEMA # ########################
[docs]@whitelist_for_serdes class TableSchema( NamedTuple( "TableSchema", [ ("columns", PublicAttr[Sequence["TableColumn"]]), ("constraints", PublicAttr["TableConstraints"]), ], ) ): """Representation of a schema for tabular data. Schema is composed of two parts: - A required list of columns (`TableColumn`). Each column specifies a `name`, `type`, set of `constraints`, and (optional) `description`. `type` defaults to `string` if unspecified. Column constraints (`TableColumnConstraints`) consist of boolean properties `unique` and `nullable`, as well as a list of strings `other` containing string descriptions of all additional constraints (e.g. `"<= 5"`). - An optional list of table-level constraints (`TableConstraints`). A table-level constraint cannot be expressed in terms of a single column, e.g. col a > col b. Presently, all table-level constraints must be expressed as strings under the `other` attribute of a `TableConstraints` object. .. code-block:: python # example schema TableSchema( constraints = TableConstraints( other = [ "foo > bar", ], ), columns = [ TableColumn( name = "foo", type = "string", description = "Foo description", constraints = TableColumnConstraints( required = True, other = [ "starts with the letter 'a'", ], ), ), TableColumn( name = "bar", type = "string", ), TableColumn( name = "baz", type = "custom_type", constraints = TableColumnConstraints( unique = True, ) ), ], ) Args: columns (List[TableColumn]): The columns of the table. constraints (Optional[TableConstraints]): The constraints of the table. """ def __new__( cls, columns: Sequence["TableColumn"], constraints: Optional["TableConstraints"] = None, ): return super(TableSchema, cls).__new__( cls, columns=check.sequence_param(columns, "columns", of_type=TableColumn), constraints=check.opt_inst_param( constraints, "constraints", TableConstraints, default=_DEFAULT_TABLE_CONSTRAINTS ), )
[docs] @public @staticmethod def from_name_type_dict(name_type_dict: Mapping[str, str]): """Constructs a TableSchema from a dictionary whose keys are column names and values are the names of data types of those columns. """ return TableSchema( columns=[ TableColumn(name=name, type=type_str) for name, type_str in name_type_dict.items() ] )
# ######################## # ##### TABLE CONSTRAINTS # ########################
[docs]@whitelist_for_serdes class TableConstraints( NamedTuple( "TableConstraints", [ ("other", PublicAttr[Sequence[str]]), ], ) ): """Descriptor for "table-level" constraints. Presently only one property, `other` is supported. This contains strings describing arbitrary table-level constraints. A table-level constraint is a constraint defined in terms of multiple columns (e.g. col_A > col_B) or in terms of rows. Args: other (List[str]): Descriptions of arbitrary table-level constraints. """ def __new__( cls, other: Sequence[str], ): return super(TableConstraints, cls).__new__( cls, other=check.sequence_param(other, "other", of_type=str), )
_DEFAULT_TABLE_CONSTRAINTS = TableConstraints(other=[]) # ######################## # ##### TABLE COLUMN # ########################
[docs]@whitelist_for_serdes class TableColumn( NamedTuple( "TableColumn", [ ("name", PublicAttr[str]), ("type", PublicAttr[str]), ("description", PublicAttr[Optional[str]]), ("constraints", PublicAttr["TableColumnConstraints"]), ], ) ): """Descriptor for a table column. The only property that must be specified by the user is `name`. If no `type` is specified, `string` is assumed. If no `constraints` are specified, the column is assumed to be nullable (i.e. `required = False`) and have no other constraints beyond the data type. Args: name (List[str]): Descriptions of arbitrary table-level constraints. type (Optional[str]): The type of the column. Can be an arbitrary string. Defaults to `"string"`. description (Optional[str]): Description of this column. Defaults to `None`. constraints (Optional[TableColumnConstraints]): Column-level constraints. If unspecified, column is nullable with no constraints. """ def __new__( cls, name: str, type: str = "string", # noqa: A002 description: Optional[str] = None, constraints: Optional["TableColumnConstraints"] = None, ): return super(TableColumn, cls).__new__( cls, name=check.str_param(name, "name"), type=check.str_param(type, "type"), description=check.opt_str_param(description, "description"), constraints=cast( "TableColumnConstraints", check.opt_inst_param( constraints, "constraints", TableColumnConstraints, default=_DEFAULT_TABLE_COLUMN_CONSTRAINTS, ), ), )
# ######################## # ##### TABLE COLUMN CONSTRAINTS # ########################
[docs]@whitelist_for_serdes class TableColumnConstraints( NamedTuple( "TableColumnConstraints", [ ("nullable", PublicAttr[bool]), ("unique", PublicAttr[bool]), ("other", PublicAttr[Optional[Sequence[str]]]), ], ) ): """Descriptor for a table column's constraints. Nullability and uniqueness are specified with boolean properties. All other constraints are described using arbitrary strings under the `other` property. Args: nullable (Optional[bool]): If true, this column can hold null values. unique (Optional[bool]): If true, all values in this column must be unique. other (List[str]): Descriptions of arbitrary column-level constraints not expressible by the predefined properties. """ def __new__( cls, nullable: bool = True, unique: bool = False, other: Optional[Sequence[str]] = None, ): return super(TableColumnConstraints, cls).__new__( cls, nullable=check.bool_param(nullable, "nullable"), unique=check.bool_param(unique, "unique"), other=check.opt_sequence_param(other, "other"), )
_DEFAULT_TABLE_COLUMN_CONSTRAINTS = TableColumnConstraints()