Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4654,22 +4654,34 @@ pub enum Statement {
/// Legacy copy-style options.
options: Vec<CopyLegacyOption>,
},
/// ClickHouse:
/// ```sql
/// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
/// ```
///
/// See ClickHouse <https://clickhouse.com/docs/en/sql-reference/statements/optimize>
///
/// Databricks:
/// ```sql
/// OPTIMIZE table_name [WHERE predicate] [ZORDER BY (col_name1 [, ...])]
/// ```
/// See Databricks <https://docs.databricks.com/en/sql/language-manual/delta-optimize.html>
OptimizeTable {
/// Table name to optimize.
name: ObjectName,
/// Optional cluster identifier.
/// Whether the `TABLE` keyword was present (ClickHouse uses `OPTIMIZE TABLE`, Databricks uses `OPTIMIZE`).
has_table_keyword: bool,
/// Optional cluster identifier (ClickHouse).
on_cluster: Option<Ident>,
/// Optional partition spec.
/// Optional partition spec (ClickHouse).
partition: Option<Partition>,
/// Whether `FINAL` was specified.
/// Whether `FINAL` was specified (ClickHouse).
include_final: bool,
/// Optional deduplication settings.
/// Optional deduplication settings (ClickHouse).
deduplicate: Option<Deduplicate>,
/// Optional WHERE predicate (Databricks).
predicate: Option<Expr>,
/// Optional ZORDER BY columns (Databricks).
zorder: Option<Vec<Expr>>,
},
/// ```sql
/// LISTEN
Expand Down Expand Up @@ -6243,12 +6255,19 @@ impl fmt::Display for Statement {
}
Statement::OptimizeTable {
name,
has_table_keyword,
on_cluster,
partition,
include_final,
deduplicate,
predicate,
zorder,
} => {
write!(f, "OPTIMIZE TABLE {name}")?;
write!(f, "OPTIMIZE")?;
if *has_table_keyword {
write!(f, " TABLE")?;
}
write!(f, " {name}")?;
if let Some(on_cluster) = on_cluster {
write!(f, " ON CLUSTER {on_cluster}")?;
}
Expand All @@ -6261,6 +6280,12 @@ impl fmt::Display for Statement {
if let Some(deduplicate) = deduplicate {
write!(f, " {deduplicate}")?;
}
if let Some(predicate) = predicate {
write!(f, " WHERE {predicate}")?;
}
if let Some(zorder) = zorder {
write!(f, " ZORDER BY ({})", display_comma_separated(zorder))?;
}
Ok(())
}
Statement::LISTEN { channel } => {
Expand Down
91 changes: 81 additions & 10 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -697,8 +697,10 @@ impl<'a> Parser<'a> {
self.parse_install()
}
Keyword::LOAD => self.parse_load(),
// `OPTIMIZE` is clickhouse specific https://clickhouse.tech/docs/en/sql-reference/statements/optimize/
Keyword::OPTIMIZE if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
// `OPTIMIZE` is clickhouse/databricks specific
// ClickHouse: https://clickhouse.tech/docs/en/sql-reference/statements/optimize/
// Databricks: https://docs.databricks.com/en/sql/language-manual/delta-optimize.html
Keyword::OPTIMIZE if dialect_of!(self is ClickHouseDialect | DatabricksDialect | GenericDialect) => {
self.parse_optimize_table()
}
// `COMMENT` is snowflake specific https://docs.snowflake.com/en/sql-reference/sql/comment
Expand Down Expand Up @@ -3332,25 +3334,35 @@ impl<'a> Parser<'a> {
/// Syntax:
///
/// ```sql
/// -- BigQuery style
/// [field_name] field_type
/// -- Databricks/Hive style (colon separator)
/// field_name: field_type
/// ```
///
/// [struct]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type
/// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple
/// [databricks]: https://docs.databricks.com/en/sql/language-manual/data-types/struct-type.html
fn parse_struct_field_def(
&mut self,
) -> Result<(StructField, MatchedTrailingBracket), ParserError> {
// Look beyond the next item to infer whether both field name
// and type are specified.
let is_anonymous_field = !matches!(
// Supports both:
// - `field_name field_type` (BigQuery style)
// - `field_name: field_type` (Databricks/Hive style)
let is_named_field = matches!(
(self.peek_nth_token(0).token, self.peek_nth_token(1).token),
(Token::Word(_), Token::Word(_))
(Token::Word(_), Token::Word(_)) | (Token::Word(_), Token::Colon)
);

let field_name = if is_anonymous_field {
None
let field_name = if is_named_field {
let name = self.parse_identifier()?;
// Consume optional colon separator (Databricks/Hive style)
let _ = self.consume_token(&Token::Colon);
Some(name)
} else {
Some(self.parse_identifier()?)
None
};

let (field_type, trailing_bracket) = self.parse_data_type_helper()?;
Expand Down Expand Up @@ -7878,14 +7890,41 @@ impl<'a> Parser<'a> {
pub fn parse_hive_distribution(&mut self) -> Result<HiveDistributionStyle, ParserError> {
if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) {
self.expect_token(&Token::LParen)?;
let columns = self.parse_comma_separated(Parser::parse_column_def)?;
let columns = self.parse_comma_separated(Parser::parse_column_def_for_partition)?;
self.expect_token(&Token::RParen)?;
Ok(HiveDistributionStyle::PARTITIONED { columns })
} else {
Ok(HiveDistributionStyle::NONE)
}
}

/// Parse column definition for PARTITIONED BY clause.
///
/// Databricks allows partition columns without types when referencing
/// columns already defined in the table specification:
/// ```sql
/// CREATE TABLE t (col1 STRING, col2 INT) PARTITIONED BY (col1)
/// CREATE TABLE t (col1 STRING) PARTITIONED BY (col2 INT)
/// ```
///
/// See [Databricks](https://docs.databricks.com/en/sql/language-manual/sql-ref-partition.html)
fn parse_column_def_for_partition(&mut self) -> Result<ColumnDef, ParserError> {
let name = self.parse_identifier()?;

// Check if the next token indicates there's no type specified
// (comma or closing paren means end of this column definition)
let data_type = match self.peek_token().token {
Token::Comma | Token::RParen => DataType::Unspecified,
_ => self.parse_data_type()?,
};

Ok(ColumnDef {
name,
data_type,
options: vec![],
})
}

/// Parse Hive formats.
pub fn parse_hive_formats(&mut self) -> Result<Option<HiveFormat>, ParserError> {
let mut hive_format: Option<HiveFormat> = None;
Expand Down Expand Up @@ -11781,7 +11820,8 @@ impl<'a> Parser<'a> {
let field_defs = self.parse_duckdb_struct_type_def()?;
Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses))
}
Keyword::STRUCT if dialect_is!(dialect is BigQueryDialect | GenericDialect) => {
Keyword::STRUCT if dialect_is!(dialect is BigQueryDialect | DatabricksDialect | GenericDialect) =>
{
self.prev_token();
let (field_defs, _trailing_bracket) =
self.parse_struct_type_def(Self::parse_struct_field_def)?;
Expand Down Expand Up @@ -18204,13 +18244,24 @@ impl<'a> Parser<'a> {
}
}

/// ClickHouse:
/// ```sql
/// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
/// ```
/// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize)
///
/// Databricks:
/// ```sql
/// OPTIMIZE table_name [WHERE predicate] [ZORDER BY (col_name1 [, ...])]
/// ```
/// [Databricks](https://docs.databricks.com/en/sql/language-manual/delta-optimize.html)
pub fn parse_optimize_table(&mut self) -> Result<Statement, ParserError> {
self.expect_keyword_is(Keyword::TABLE)?;
// Check for TABLE keyword (ClickHouse uses it, Databricks does not)
let has_table_keyword = self.parse_keyword(Keyword::TABLE);

let name = self.parse_object_name(false)?;

// ClickHouse-specific options
let on_cluster = self.parse_optional_on_cluster()?;

let partition = if self.parse_keyword(Keyword::PARTITION) {
Expand All @@ -18224,6 +18275,7 @@ impl<'a> Parser<'a> {
};

let include_final = self.parse_keyword(Keyword::FINAL);

let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) {
if self.parse_keyword(Keyword::BY) {
Some(Deduplicate::ByExpression(self.parse_expr()?))
Expand All @@ -18234,12 +18286,31 @@ impl<'a> Parser<'a> {
None
};

// Databricks-specific options
let predicate = if self.parse_keyword(Keyword::WHERE) {
Some(self.parse_expr()?)
} else {
None
};

let zorder = if self.parse_keywords(&[Keyword::ZORDER, Keyword::BY]) {
self.expect_token(&Token::LParen)?;
let columns = self.parse_comma_separated(|p| p.parse_expr())?;
self.expect_token(&Token::RParen)?;
Some(columns)
} else {
None
};

Ok(Statement::OptimizeTable {
name,
has_table_keyword,
on_cluster,
partition,
include_final,
deduplicate,
predicate,
zorder,
})
}

Expand Down
Loading