apache · funcpp · Jan 22, 2026 · Jan 22, 2026 · Jan 22, 2026 · Jan 22, 2026
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
@@ -4654,22 +4654,34 @@ pub enum Statement {
         /// Legacy copy-style options.
         options: Vec<CopyLegacyOption>,
     },
+    /// ClickHouse:
     /// ```sql
     /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
     /// ```
-    ///
     /// See ClickHouse <https://clickhouse.com/docs/en/sql-reference/statements/optimize>
+    ///
+    /// Databricks:
+    /// ```sql
+    /// OPTIMIZE table_name [WHERE predicate] [ZORDER BY (col_name1 [, ...])]
+    /// ```
+    /// See Databricks <https://docs.databricks.com/en/sql/language-manual/delta-optimize.html>
     OptimizeTable {
         /// Table name to optimize.
         name: ObjectName,
-        /// Optional cluster identifier.
+        /// Whether the `TABLE` keyword was present (ClickHouse uses `OPTIMIZE TABLE`, Databricks uses `OPTIMIZE`).
+        has_table_keyword: bool,
+        /// Optional cluster identifier (ClickHouse).
         on_cluster: Option<Ident>,
-        /// Optional partition spec.
+        /// Optional partition spec (ClickHouse).
         partition: Option<Partition>,
-        /// Whether `FINAL` was specified.
+        /// Whether `FINAL` was specified (ClickHouse).
         include_final: bool,
-        /// Optional deduplication settings.
+        /// Optional deduplication settings (ClickHouse).
         deduplicate: Option<Deduplicate>,
+        /// Optional WHERE predicate (Databricks).
+        predicate: Option<Expr>,
+        /// Optional ZORDER BY columns (Databricks).
+        zorder: Option<Vec<Expr>>,
     },
     /// ```sql
     /// LISTEN
@@ -6243,12 +6255,19 @@ impl fmt::Display for Statement {
             }
             Statement::OptimizeTable {
                 name,
+                has_table_keyword,
                 on_cluster,
                 partition,
                 include_final,
                 deduplicate,
+                predicate,
+                zorder,
             } => {
-                write!(f, "OPTIMIZE TABLE {name}")?;
+                write!(f, "OPTIMIZE")?;
+                if *has_table_keyword {
+                    write!(f, " TABLE")?;
+                }
+                write!(f, " {name}")?;
                 if let Some(on_cluster) = on_cluster {
                     write!(f, " ON CLUSTER {on_cluster}")?;
                 }
@@ -6261,6 +6280,12 @@ impl fmt::Display for Statement {
                 if let Some(deduplicate) = deduplicate {
                     write!(f, " {deduplicate}")?;
                 }
+                if let Some(predicate) = predicate {
+                    write!(f, " WHERE {predicate}")?;
+                }
+                if let Some(zorder) = zorder {
+                    write!(f, " ZORDER BY ({})", display_comma_separated(zorder))?;
+                }
                 Ok(())
             }
             Statement::LISTEN { channel } => {

diff --git a/src/parser/mod.rs b/src/parser/mod.rs
@@ -697,8 +697,10 @@ impl<'a> Parser<'a> {
                     self.parse_install()
                 }
                 Keyword::LOAD => self.parse_load(),
-                // `OPTIMIZE` is clickhouse specific https://clickhouse.tech/docs/en/sql-reference/statements/optimize/
-                Keyword::OPTIMIZE if dialect_of!(self is ClickHouseDialect | GenericDialect) => {
+                // `OPTIMIZE` is clickhouse/databricks specific
+                // ClickHouse: https://clickhouse.tech/docs/en/sql-reference/statements/optimize/
+                // Databricks: https://docs.databricks.com/en/sql/language-manual/delta-optimize.html
+                Keyword::OPTIMIZE if dialect_of!(self is ClickHouseDialect | DatabricksDialect | GenericDialect) => {
                     self.parse_optimize_table()
                 }
                 // `COMMENT` is snowflake specific https://docs.snowflake.com/en/sql-reference/sql/comment
@@ -3332,25 +3334,35 @@ impl<'a> Parser<'a> {
     /// Syntax:
     ///
     /// ```sql
+    /// -- BigQuery style
     /// [field_name] field_type
+    /// -- Databricks/Hive style (colon separator)
+    /// field_name: field_type
     /// ```
     ///
     /// [struct]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type
     /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple
+    /// [databricks]: https://docs.databricks.com/en/sql/language-manual/data-types/struct-type.html
     fn parse_struct_field_def(
         &mut self,
     ) -> Result<(StructField, MatchedTrailingBracket), ParserError> {
         // Look beyond the next item to infer whether both field name
         // and type are specified.
-        let is_anonymous_field = !matches!(
+        // Supports both:
+        //   - `field_name field_type` (BigQuery style)
+        //   - `field_name: field_type` (Databricks/Hive style)
+        let is_named_field = matches!(
             (self.peek_nth_token(0).token, self.peek_nth_token(1).token),
-            (Token::Word(_), Token::Word(_))
+            (Token::Word(_), Token::Word(_)) | (Token::Word(_), Token::Colon)
         );
 
-        let field_name = if is_anonymous_field {
-            None
+        let field_name = if is_named_field {
+            let name = self.parse_identifier()?;
+            // Consume optional colon separator (Databricks/Hive style)
+            let _ = self.consume_token(&Token::Colon);
+            Some(name)
         } else {
-            Some(self.parse_identifier()?)
+            None
         };
 
         let (field_type, trailing_bracket) = self.parse_data_type_helper()?;
@@ -7878,14 +7890,41 @@ impl<'a> Parser<'a> {
     pub fn parse_hive_distribution(&mut self) -> Result<HiveDistributionStyle, ParserError> {
         if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) {
             self.expect_token(&Token::LParen)?;
-            let columns = self.parse_comma_separated(Parser::parse_column_def)?;
+            let columns = self.parse_comma_separated(Parser::parse_column_def_for_partition)?;
             self.expect_token(&Token::RParen)?;
             Ok(HiveDistributionStyle::PARTITIONED { columns })
         } else {
             Ok(HiveDistributionStyle::NONE)
         }
     }
 
+    /// Parse column definition for PARTITIONED BY clause.
+    ///
+    /// Databricks allows partition columns without types when referencing
+    /// columns already defined in the table specification:
+    /// ```sql
+    /// CREATE TABLE t (col1 STRING, col2 INT) PARTITIONED BY (col1)
+    /// CREATE TABLE t (col1 STRING) PARTITIONED BY (col2 INT)
+    /// ```
+    ///
+    /// See [Databricks](https://docs.databricks.com/en/sql/language-manual/sql-ref-partition.html)
+    fn parse_column_def_for_partition(&mut self) -> Result<ColumnDef, ParserError> {
+        let name = self.parse_identifier()?;
+
+        // Check if the next token indicates there's no type specified
+        // (comma or closing paren means end of this column definition)
+        let data_type = match self.peek_token().token {
+            Token::Comma | Token::RParen => DataType::Unspecified,
+            _ => self.parse_data_type()?,
+        };
+
+        Ok(ColumnDef {
+            name,
+            data_type,
+            options: vec![],
+        })
+    }
+
     /// Parse Hive formats.
     pub fn parse_hive_formats(&mut self) -> Result<Option<HiveFormat>, ParserError> {
         let mut hive_format: Option<HiveFormat> = None;
@@ -11781,7 +11820,8 @@ impl<'a> Parser<'a> {
                     let field_defs = self.parse_duckdb_struct_type_def()?;
                     Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses))
                 }
-                Keyword::STRUCT if dialect_is!(dialect is BigQueryDialect | GenericDialect) => {
+                Keyword::STRUCT if dialect_is!(dialect is BigQueryDialect | DatabricksDialect | GenericDialect) =>
+                {
                     self.prev_token();
                     let (field_defs, _trailing_bracket) =
                         self.parse_struct_type_def(Self::parse_struct_field_def)?;
@@ -18204,13 +18244,24 @@ impl<'a> Parser<'a> {
         }
     }
 
+    /// ClickHouse:
     /// ```sql
     /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]]
     /// ```
     /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize)
+    ///
+    /// Databricks:
+    /// ```sql
+    /// OPTIMIZE table_name [WHERE predicate] [ZORDER BY (col_name1 [, ...])]
+    /// ```
+    /// [Databricks](https://docs.databricks.com/en/sql/language-manual/delta-optimize.html)
     pub fn parse_optimize_table(&mut self) -> Result<Statement, ParserError> {
-        self.expect_keyword_is(Keyword::TABLE)?;
+        // Check for TABLE keyword (ClickHouse uses it, Databricks does not)
+        let has_table_keyword = self.parse_keyword(Keyword::TABLE);
+
         let name = self.parse_object_name(false)?;
+
+        // ClickHouse-specific options
         let on_cluster = self.parse_optional_on_cluster()?;
 
         let partition = if self.parse_keyword(Keyword::PARTITION) {
@@ -18224,6 +18275,7 @@ impl<'a> Parser<'a> {
         };
 
         let include_final = self.parse_keyword(Keyword::FINAL);
+
         let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) {
             if self.parse_keyword(Keyword::BY) {
                 Some(Deduplicate::ByExpression(self.parse_expr()?))
@@ -18234,12 +18286,31 @@ impl<'a> Parser<'a> {
             None
         };
 
+        // Databricks-specific options
+        let predicate = if self.parse_keyword(Keyword::WHERE) {
+            Some(self.parse_expr()?)
+        } else {
+            None
+        };
+
+        let zorder = if self.parse_keywords(&[Keyword::ZORDER, Keyword::BY]) {
+            self.expect_token(&Token::LParen)?;
+            let columns = self.parse_comma_separated(|p| p.parse_expr())?;
+            self.expect_token(&Token::RParen)?;
+            Some(columns)
+        } else {
+            None
+        };
+
         Ok(Statement::OptimizeTable {
             name,
+            has_table_keyword,
             on_cluster,
             partition,
             include_final,
             deduplicate,
+            predicate,
+            zorder,
         })
     }