From 03431947833b4e3f3fc79b09fc626e0f30508a2b Mon Sep 17 00:00:00 2001 From: lilfetz22 <50301466+lilfetz22@users.noreply.github.com> Date: Mon, 5 Jan 2026 10:03:41 -0500 Subject: [PATCH] fix(cmd-config-generate): fix config output for Microsoft Windows UTF-8 encoding (#1400) Resolves: #702 * docs(cmd-config-generate): add Windows PowerShell specific `generate-config` usage example * test(cmd-config-generate): adds UTF-8 encoding test for platform specific output --- docs/api/commands.rst | 32 ++++++-- .../cli/commands/generate_config.py | 35 +++++++-- src/semantic_release/cli/util.py | 2 +- tests/const.py | 2 +- tests/e2e/cmd_config/test_generate_config.py | 78 +++++++++++++++++++ 5 files changed, 133 insertions(+), 16 deletions(-) diff --git a/docs/api/commands.rst b/docs/api/commands.rst index cdf9be45c..dd31a4e1a 100644 --- a/docs/api/commands.rst +++ b/docs/api/commands.rst @@ -473,16 +473,36 @@ Release corresponding to this version. Generate default configuration for semantic-release, to help you get started quickly. You can inspect the defaults, write to a file and then edit according to -your needs. -For example, to append the default configuration to your pyproject.toml -file, you can use the following command:: +your needs. For example, to append the default configuration to your ``pyproject.toml`` +file, you can use the following command (in POSIX-Compliant shells): - $ semantic-release generate-config -f toml --pyproject >> pyproject.toml +.. code-block:: bash + + semantic-release generate-config --pyproject >> pyproject.toml + +On Windows PowerShell, the redirection operators (`>`/`>>`) default to UTF-16LE, +which can introduce NUL characters. Prefer one of the following to keep UTF-8: + +.. code-block:: console + + # 2 File output Piping Options in PowerShell (Out-File or Set-Content) + + # Example for writing to pyproject.toml using Out-File: + semantic-release generate-config --pyproject | Out-File -Encoding utf8 pyproject.toml + + # Example for writing to a releaserc.toml file using Set-Content: + semantic-release generate-config -f toml | Set-Content -Encoding utf8 releaserc.toml If your project doesn't already leverage TOML files for configuration, it might better -suit your project to use JSON instead:: +suit your project to use JSON instead: + +.. code-block:: bash + + # POSIX-Compliant shell example + semantic-release generate-config -f json | tee releaserc.json - $ semantic-release generate-config -f json + # Windows PowerShell example + semantic-release generate-config -f json | Out-File -Encoding utf8 releaserc.json If you would like to add JSON configuration to a shared file, e.g. ``package.json``, you can then simply add the output from this command as a **top-level** key to the file. diff --git a/src/semantic_release/cli/commands/generate_config.py b/src/semantic_release/cli/commands/generate_config.py index a6bf36013..7d498b31e 100644 --- a/src/semantic_release/cli/commands/generate_config.py +++ b/src/semantic_release/cli/commands/generate_config.py @@ -1,6 +1,8 @@ from __future__ import annotations import json +import sys +from typing import Literal import click import tomlkit @@ -31,7 +33,9 @@ "'semantic_release'" ), ) -def generate_config(fmt: str = "toml", is_pyproject_toml: bool = False) -> None: +def generate_config( + fmt: Literal["toml", "json"], is_pyproject_toml: bool = False +) -> None: """ Generate default configuration for semantic-release, to help you get started quickly. You can inspect the defaults, write to a file and then edit according to @@ -42,14 +46,29 @@ def generate_config(fmt: str = "toml", is_pyproject_toml: bool = False) -> None: """ # due to possible IntEnum values (which are not supported by tomlkit.dumps, see sdispater/tomlkit#237), # we must ensure the transformation of the model to a dict uses json serializable values - config = RawConfig().model_dump(mode="json", exclude_none=True) + config_dct = { + "semantic_release": RawConfig().model_dump(mode="json", exclude_none=True) + } - config_dct = {"semantic_release": config} - if is_pyproject_toml and fmt == "toml": - config_dct = {"tool": config_dct} + if is_pyproject_toml: + output = tomlkit.dumps({"tool": config_dct}) - if fmt == "toml": - click.echo(tomlkit.dumps(config_dct)) + elif fmt == "toml": + output = tomlkit.dumps(config_dct) elif fmt == "json": - click.echo(json.dumps(config_dct, indent=4)) + output = json.dumps(config_dct, indent=4) + + else: + raise ValueError(f"Unsupported format: {fmt}") + + # Write output directly to stdout buffer as UTF-8 bytes + # This ensures consistent UTF-8 output on all platforms, especially Windows where + # shell redirection (>, >>) defaults to the system encoding (e.g., UTF-16LE or cp1252) + # By writing to sys.stdout.buffer, we bypass the encoding layer and guarantee UTF-8. + try: + sys.stdout.buffer.write(f"{output.strip()}\n".encode("utf-8")) # noqa: UP012; allow explicit encoding declaration + sys.stdout.buffer.flush() + except (AttributeError, TypeError): + # Fallback for environments without buffer (shouldn't happen in standard Python) + click.echo(output) diff --git a/src/semantic_release/cli/util.py b/src/semantic_release/cli/util.py index 37d249c1a..4696a7270 100644 --- a/src/semantic_release/cli/util.py +++ b/src/semantic_release/cli/util.py @@ -75,7 +75,7 @@ def load_raw_config_file(config_file: Path | str) -> dict[Any, Any]: while trying to read the specified configuration file """ logger.info("Loading configuration from %s", config_file) - raw_text = (Path() / config_file).resolve().read_text(encoding="utf-8") + raw_text = (Path() / config_file).resolve().read_text(encoding="utf-8-sig") try: logger.debug("Trying to parse configuration %s in TOML format", config_file) return parse_toml(raw_text) diff --git a/tests/const.py b/tests/const.py index 8ff979f2f..186a23013 100644 --- a/tests/const.py +++ b/tests/const.py @@ -39,7 +39,7 @@ class RepoActionStep(str, Enum): SUCCESS_EXIT_CODE = 0 CHANGELOG_SUBCMD = Cli.SubCmds.CHANGELOG.name.lower() -GENERATE_CONFIG_SUBCMD = Cli.SubCmds.GENERATE_CONFIG.name.lower() +GENERATE_CONFIG_SUBCMD = Cli.SubCmds.GENERATE_CONFIG.name.lower().replace("_", "-") PUBLISH_SUBCMD = Cli.SubCmds.PUBLISH.name.lower() VERSION_SUBCMD = Cli.SubCmds.VERSION.name.lower() diff --git a/tests/e2e/cmd_config/test_generate_config.py b/tests/e2e/cmd_config/test_generate_config.py index 4a21f0be7..a9db934ea 100644 --- a/tests/e2e/cmd_config/test_generate_config.py +++ b/tests/e2e/cmd_config/test_generate_config.py @@ -1,11 +1,15 @@ from __future__ import annotations import json +import subprocess +import sys +from sys import executable as python_interpreter from typing import TYPE_CHECKING import pytest import tomlkit +import semantic_release from semantic_release.cli.config import RawConfig from tests.const import GENERATE_CONFIG_SUBCMD, MAIN_PROG_NAME, VERSION_SUBCMD @@ -19,6 +23,9 @@ from tests.conftest import RunCliFn from tests.fixtures.example_project import ExProjectDir +# Constant +NULL_BYTE = b"\x00" + @pytest.fixture def raw_config_dict() -> dict[str, Any]: @@ -157,3 +164,74 @@ def test_generate_config_pyproject_toml( # Evaluate: Check that the version command in noop mode ran successfully # which means PSR loaded the configuration successfully assert_successful_exit_code(result, cli_cmd) + + +@pytest.mark.skipif(sys.platform != "win32", reason="Windows-specific encoding check") +@pytest.mark.parametrize( + "console_executable", + ( + "C:\\Windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe", + # "C:\\Windows\\System32\\cmd.exe", # CMD.exe does not support specifying encoding for output + ), +) +@pytest.mark.usefixtures(repo_w_no_tags_conventional_commits.__name__) +def test_generate_config_toml_utf8_bytes_windows( + console_executable: str, + example_project_dir: ExProjectDir, + run_cli: RunCliFn, +) -> None: + """ + Given an example project directory + When generating a TOML configuration file via Powershell redirection + Then the emitted file contains only UTF-8 bytes and no NUL bytes + """ + if "powershell.exe" not in console_executable.lower(): + pytest.skip("Only PowerShell is currently supported for this test") + + output_file = example_project_dir / "releaserc.toml" + psr_cmd = [ + python_interpreter, + "-m", + semantic_release.__name__, + GENERATE_CONFIG_SUBCMD, + "-f", + "toml", + ] + + redirection_cmd = ( + f"{str.join(' ', psr_cmd)} | Out-File -Encoding utf8 {output_file}" + ) + + # Act: Generate the config file via subprocess call to PowerShell + proc = subprocess.run( # noqa: S602, not a security concern in testing & required for redirection + redirection_cmd, + executable=console_executable, + shell=True, + stdin=None, + capture_output=True, + check=True, + ) + + config_as_bytes = output_file.read_bytes() + assert config_as_bytes, "Generated config file is empty!" + assert ( + NULL_BYTE not in config_as_bytes + ), f"Generated config file '{output_file}' contains NUL bytes!" + assert not proc.stderr + assert not proc.stdout + + # Act: Validate that the generated config is a valid configuration for PSR + cli_cmd = [ + MAIN_PROG_NAME, + "--noop", + "--strict", + "-c", + str(output_file), + VERSION_SUBCMD, + "--print", + ] + result = run_cli(cli_cmd[1:]) + + # Evaluate: Check that the version command in noop mode ran successfully + # which means PSR loaded the configuration successfully + assert_successful_exit_code(result, cli_cmd)