Module table.pick.column in plugin tabular v0.5.2
Pick one column from a table, returning an array.
Author(s) | Markus Binsteiner (markus@frkl.io) |
Tags | tabular |
Python class | kiara_plugin.tabular.modules.table.PickColumnModule |
Module configuration options
Configuration class: kiara_plugin.tabular.modules.table.PickColumnModuleConfig
Name | Description | Type | Required? | Default |
---|---|---|---|---|
column_name | A hardcoded column name to cut. | anyOf: [{'type': 'string'}, {'type': 'null'}] | false | null |
constants | Value constants for this module. | object | false | null |
defaults | Value defaults for this module. | object | false | null |
Module source code
class PickColumnModule(KiaraModule): """Pick one column from a table, returning an array."""
_module_type_name = "table.pick.column" _config_cls = PickColumnModuleConfig
def create_inputs_schema( self, ) -> ValueMapSchema:
inputs: Dict[str, Any] = {"table": {"type": "table", "doc": "A table."}} column_name = self.get_config_value("column_name") if not column_name: inputs["column_name"] = { "type": "string", "doc": "The name of the column to extract.", }
return inputs
def create_outputs_schema( self, ) -> ValueMapSchema:
outputs: Mapping[str, Any] = {"array": {"type": "array", "doc": "The column."}} return outputs
def process(self, inputs: ValueMap, outputs: ValueMap) -> None:
import pyarrow as pa
column_name: Union[str, None] = self.get_config_value("column_name") if not column_name: column_name = inputs.get_value_data("column_name")
if not column_name: raise KiaraProcessingException( "Could not cut column from table: column_name not provided or empty string." )
table_value: Value = inputs.get_value_obj("table") table_metadata: KiaraTableMetadata = table_value.get_property_data( "metadata.table" ) available = table_metadata.table.column_names
if column_name not in available: raise KiaraProcessingException( f"Invalid column name '{column_name}'. Available column names: {', '.join(available)}" )
table: pa.Table = table_value.data.arrow_table column = table.column(column_name)
outputs.set_value("array", column)