From a90870da4c67ee5bcac078174c0bed424d322d6c Mon Sep 17 00:00:00 2001 From: Luca Burelli Date: Wed, 26 Mar 2025 16:22:55 +0100 Subject: [PATCH] cmake: yaml: improve escaping of strings with backslashes and quotes Switching the intermediate file format from JSON to YAML has a very significant benefit: the file is now loaded via yaml_load(), which internally calls Python to parse the file into the JSON format that CMake expects. This means that the file contents are now automatically escaped properly for JSON; it is a huge improvement over the previous implementation, which was escaping everything given as input to to_yaml(). With the removal of the now-redundant call in to_yaml(), escaping is applied exactly once per value or list, when it is passed to yaml_set(). This allows to convert the logic in zephyr_string(ESCAPE ...) to a more robust "escape everything" approach. These changes fix the handling of strings with backslashes and different types of quotes passed either directly or via generator expression. The existing tests are updated to cover these cases. Two other small changes are made in this commit: - a small check in internal_yaml_list_append() is removed, as the same issue is already detected by the caller yaml_set() logic. - the to_yaml() function is modified to initialize the YAML output variable at the top level, which is the expected behavior. This resulted in genex temp files sometimes having duplicate lines. Signed-off-by: Luca Burelli --- cmake/modules/extensions.cmake | 28 +++++----- cmake/modules/yaml.cmake | 16 ++---- tests/cmake/yaml/CMakeLists.txt | 90 ++++++++++++++++++++++++++++++++- 3 files changed, 109 insertions(+), 25 deletions(-) diff --git a/cmake/modules/extensions.cmake b/cmake/modules/extensions.cmake index 32d49930206..843590d0638 100644 --- a/cmake/modules/extensions.cmake +++ b/cmake/modules/extensions.cmake @@ -3051,15 +3051,14 @@ endfunction() # This function extends the CMake string function by providing additional # manipulation options for the argument: # -# ESCAPE: Ensure that any single '\', except '\"', in the input string is -# escaped with the escape char '\'. For example the string 'foo\bar' -# will be escaped so that it becomes 'foo\\bar'. -# Backslashes which are already escaped will not be escaped further, -# for example 'foo\\bar' will not be modified. -# This is useful for handling of windows path separator in strings or -# when strings contains newline escapes such as '\n' and this can -# cause issues when writing to a file where a '\n' is desired in the -# string instead of a newline. +# ESCAPE: Ensure that every character of the input arguments is considered +# by CMake as a literal by prefixing the escape character '\' where +# appropriate. This is useful for handling Windows path separators in +# strings, or when it is desired to write "\n" as an actual string of +# four characters instead of a single newline. +# Note that this operation must be performed exactly once during the +# lifetime of a string, or previous escape characters will be treated +# as literals and escaped further. # # SANITIZE: Ensure that the output string does not contain any special # characters. Special characters, such as -, +, =, $, etc. are @@ -3092,10 +3091,13 @@ function(zephyr_string) string(TOUPPER ${work_string} work_string) endif() elseif(ZEPHYR_STRING_ESCAPE) - # If a single '\' is discovered, such as 'foo\bar', then it must be escaped like: 'foo\\bar' - # \\1 and \\2 are keeping the match patterns, the \\\\ --> \\ meaning an escaped '\', - # which then becomes a single '\' in the final string. - string(REGEX REPLACE "([^\\][\\])([^\\\"])" "\\1\\\\\\2" work_string "${ZEPHYR_STRING_UNPARSED_ARGUMENTS}") + # Escape every instance of '\' or '"' in the arguments. + # Note that: + # - backslashes must be replaced first to avoid duplicating the '\' in \" + # - "\\\\" is seen by CMake as \\, meaning an escaped '\', which then + # becomes a single '\' in the final string. + string(REGEX REPLACE "\\\\" "\\\\\\\\" work_string "${ZEPHYR_STRING_UNPARSED_ARGUMENTS}") + string(REGEX REPLACE "\"" "\\\\\"" work_string "${work_string}") endif() set(${return_arg} ${work_string} PARENT_SCOPE) diff --git a/cmake/modules/yaml.cmake b/cmake/modules/yaml.cmake index 6be9d52c93a..76451068757 100644 --- a/cmake/modules/yaml.cmake +++ b/cmake/modules/yaml.cmake @@ -96,13 +96,6 @@ function(internal_yaml_list_append var genex key) internal_yaml_list_initializer(subjson TRUE) if(${arraylength} GREATER 0) math(EXPR arraystop "${arraylength} - 1") - list(GET ARG_YAML_LIST 0 entry_0) - if(entry_0 STREQUAL MAP) - message(FATAL_ERROR "${function}(GENEX ${argument} ) is not valid at this position.\n" - "Syntax is 'LIST MAP \"key1: value1.1, ...\" MAP \"key1: value1.2, ...\"" - ) - endif() - foreach(i RANGE 0 ${arraystop}) string(JSON item GET "${json_content}" ${key} ${i}) list(APPEND subjson ${item}) @@ -544,9 +537,7 @@ function(yaml_save) endif() endfunction() -function(to_yaml in_json level yaml mode) - zephyr_string(ESCAPE json "${in_json}") - +function(to_yaml json level yaml mode) if(mode STREQUAL "DIRECT") # Direct output mode, no genexes: write a standard YAML set(expand_lists TRUE) @@ -569,7 +560,10 @@ function(to_yaml in_json level yaml mode) message(FATAL_ERROR "to_yaml(... ${mode} ) is malformed.") endif() - if(level GREATER 0) + if(level EQUAL 0) + # Top-level call, initialize the YAML output variable + set(${yaml} "" PARENT_SCOPE) + else() math(EXPR level_dec "${level} - 1") set(indent_${level} "${indent_${level_dec}} ") endif() diff --git a/tests/cmake/yaml/CMakeLists.txt b/tests/cmake/yaml/CMakeLists.txt index 150f44a0510..43f93a28b15 100644 --- a/tests/cmake/yaml/CMakeLists.txt +++ b/tests/cmake/yaml/CMakeLists.txt @@ -242,6 +242,46 @@ function(test_setting_list_strings) endforeach() endfunction() +function(test_setting_list_escaped_strings) + yaml_create(FILE ${CMAKE_BINARY_DIR}/${CMAKE_CURRENT_FUNCTION}_test_create.yaml + NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create + ) + + set(new_value "back\\slash" "dou\"ble" "sin'gle" "co:lon" "win:\\path") + yaml_set(NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create + KEY cmake test set key-list-string LIST ${new_value} + ) + + yaml_save(NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create) + + # Read-back the yaml and verify the value. + yaml_load(FILE ${CMAKE_BINARY_DIR}/${CMAKE_CURRENT_FUNCTION}_test_create.yaml + NAME ${CMAKE_CURRENT_FUNCTION}_readback + ) + + yaml_length(readback NAME ${CMAKE_CURRENT_FUNCTION}_readback KEY cmake test set key-list-string) + + test_assert(TEST 5 EQUAL ${readback} + COMMENT "readback yaml list length does not match original." + ) + + yaml_get(readback NAME ${CMAKE_CURRENT_FUNCTION}_readback KEY cmake test set key-list-string) + + foreach(a e IN ZIP_LISTS readback new_value) + # cmake_parse_arguments() breaks horribly when given strings that include + # escapes, making it impossible to use test_assert() here. Or, for that + # matter, even trying to print them in message(). Thanks, CMake! + if("${e}" STREQUAL "${a}") + message("${CMAKE_CURRENT_FUNCTION}(): Passed") + else() + message(FATAL_ERROR + "${CMAKE_CURRENT_FUNCTION}(): Failed\n" + "Test: list values mismatch." + ) + endif() + endforeach() +endfunction() + function(test_append_list_strings) yaml_create(FILE ${CMAKE_BINARY_DIR}/${CMAKE_CURRENT_FUNCTION}_test_create.yaml NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create @@ -653,6 +693,51 @@ function(test_verify_genexes) endforeach() endfunction() +function(test_setting_escaped_genexes) + set(file ${CMAKE_BINARY_DIR}/test_setting_escaped_genexes.yaml) + yaml_create(FILE ${file} + NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create + ) + + set_property(TARGET app PROPERTY escaped_list + "back\\slash" "dou\"ble" "sin'gle" "co:lon" "win:\\path") + yaml_set(NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create + KEY cmake test set key-list-string-genex + GENEX LIST "$" + ) + + yaml_save(NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create) +endfunction() + +function(test_verify_escaped_genexes) + set(file ${CMAKE_BINARY_DIR}/test_setting_escaped_genexes.yaml) + yaml_load(FILE ${file} + NAME ${CMAKE_CURRENT_FUNCTION}_readback + ) + + set(expected "back\\slash" "dou\"ble" "sin'gle" "co:lon" "win:\\path") + list(LENGTH expected exp_len) + + yaml_get(readback NAME ${CMAKE_CURRENT_FUNCTION}_readback KEY cmake test set key-list-string-genex) + yaml_length(act_len NAME ${CMAKE_CURRENT_FUNCTION}_readback KEY cmake test set key-list-string-genex) + + test_assert(TEST ${exp_len} EQUAL ${act_len} + COMMENT "yaml list length does not match expectation." + ) + + foreach(a e IN ZIP_LISTS readback expected) + # See comment in test_setting_list_escaped_strings() for why test_assert() is not used here. + if("${e}" STREQUAL "${a}") + message("${CMAKE_CURRENT_FUNCTION}(): Passed") + else() + message(FATAL_ERROR + "${CMAKE_CURRENT_FUNCTION}(): Failed\n" + "Test: key-list-string-genex list values mismatch." + ) + endif() + endforeach() +endfunction() + function(test_set_remove_int) yaml_create(FILE ${CMAKE_BINARY_DIR}/${CMAKE_CURRENT_FUNCTION}_test_create.yaml NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create @@ -728,6 +813,7 @@ test_save_new_file() test_setting_int() test_setting_string() test_setting_list_strings() +test_setting_list_escaped_strings() test_setting_list_int() test_setting_map_list_entry() test_setting_map_list_entry_windows() @@ -749,6 +835,7 @@ test_fail_missing_filename() if(NOT CMAKE_SCRIPT_MODE_FILE) # create a file containing genexes test_setting_genexes() + test_setting_escaped_genexes() # Spawn a new CMake instance to re-run the whole test suite in script mode # and verify the genex values once the associated target is built. @@ -756,6 +843,7 @@ if(NOT CMAKE_SCRIPT_MODE_FILE) COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_LIST_FILE} ) else() - # verify the contents of the created genex file + # verify the contents of the created genex files test_verify_genexes() + test_verify_escaped_genexes() endif()