Browse Source

cmake: yaml: improve escaping of strings with backslashes and quotes

Switching the intermediate file format from JSON to YAML has a very
significant benefit: the file is now loaded via yaml_load(), which
internally calls Python to parse the file into the JSON format that
CMake expects. This means that the file contents are now automatically
escaped properly for JSON; it is a huge improvement over the previous
implementation, which was escaping everything given as input to
to_yaml().

With the removal of the now-redundant call in to_yaml(), escaping is
applied exactly once per value or list, when it is passed to yaml_set().
This allows to convert the logic in zephyr_string(ESCAPE ...) to a more
robust "escape everything" approach.

These changes fix the handling of strings with backslashes and different
types of quotes passed either directly or via generator expression. The
existing tests are updated to cover these cases.

Two other small changes are made in this commit:

- a small check in internal_yaml_list_append() is removed, as the same
  issue is already detected by the caller yaml_set() logic.

- the to_yaml() function is modified to initialize the YAML output
  variable at the top level, which is the expected behavior. This
  resulted in genex temp files sometimes having duplicate lines.

Signed-off-by: Luca Burelli <l.burelli@arduino.cc>
pull/88410/head
Luca Burelli 3 months ago committed by Benjamin Cabé
parent
commit
a90870da4c
  1. 28
      cmake/modules/extensions.cmake
  2. 16
      cmake/modules/yaml.cmake
  3. 90
      tests/cmake/yaml/CMakeLists.txt

28
cmake/modules/extensions.cmake

@ -3051,15 +3051,14 @@ endfunction() @@ -3051,15 +3051,14 @@ endfunction()
# This function extends the CMake string function by providing additional
# manipulation options for the <mode> argument:
#
# ESCAPE: Ensure that any single '\', except '\"', in the input string is
# escaped with the escape char '\'. For example the string 'foo\bar'
# will be escaped so that it becomes 'foo\\bar'.
# Backslashes which are already escaped will not be escaped further,
# for example 'foo\\bar' will not be modified.
# This is useful for handling of windows path separator in strings or
# when strings contains newline escapes such as '\n' and this can
# cause issues when writing to a file where a '\n' is desired in the
# string instead of a newline.
# ESCAPE: Ensure that every character of the input arguments is considered
# by CMake as a literal by prefixing the escape character '\' where
# appropriate. This is useful for handling Windows path separators in
# strings, or when it is desired to write "\n" as an actual string of
# four characters instead of a single newline.
# Note that this operation must be performed exactly once during the
# lifetime of a string, or previous escape characters will be treated
# as literals and escaped further.
#
# SANITIZE: Ensure that the output string does not contain any special
# characters. Special characters, such as -, +, =, $, etc. are
@ -3092,10 +3091,13 @@ function(zephyr_string) @@ -3092,10 +3091,13 @@ function(zephyr_string)
string(TOUPPER ${work_string} work_string)
endif()
elseif(ZEPHYR_STRING_ESCAPE)
# If a single '\' is discovered, such as 'foo\bar', then it must be escaped like: 'foo\\bar'
# \\1 and \\2 are keeping the match patterns, the \\\\ --> \\ meaning an escaped '\',
# which then becomes a single '\' in the final string.
string(REGEX REPLACE "([^\\][\\])([^\\\"])" "\\1\\\\\\2" work_string "${ZEPHYR_STRING_UNPARSED_ARGUMENTS}")
# Escape every instance of '\' or '"' in the <input> arguments.
# Note that:
# - backslashes must be replaced first to avoid duplicating the '\' in \"
# - "\\\\" is seen by CMake as \\, meaning an escaped '\', which then
# becomes a single '\' in the final string.
string(REGEX REPLACE "\\\\" "\\\\\\\\" work_string "${ZEPHYR_STRING_UNPARSED_ARGUMENTS}")
string(REGEX REPLACE "\"" "\\\\\"" work_string "${work_string}")
endif()
set(${return_arg} ${work_string} PARENT_SCOPE)

16
cmake/modules/yaml.cmake

@ -96,13 +96,6 @@ function(internal_yaml_list_append var genex key) @@ -96,13 +96,6 @@ function(internal_yaml_list_append var genex key)
internal_yaml_list_initializer(subjson TRUE)
if(${arraylength} GREATER 0)
math(EXPR arraystop "${arraylength} - 1")
list(GET ARG_YAML_LIST 0 entry_0)
if(entry_0 STREQUAL MAP)
message(FATAL_ERROR "${function}(GENEX ${argument} ) is not valid at this position.\n"
"Syntax is 'LIST MAP \"key1: value1.1, ...\" MAP \"key1: value1.2, ...\""
)
endif()
foreach(i RANGE 0 ${arraystop})
string(JSON item GET "${json_content}" ${key} ${i})
list(APPEND subjson ${item})
@ -544,9 +537,7 @@ function(yaml_save) @@ -544,9 +537,7 @@ function(yaml_save)
endif()
endfunction()
function(to_yaml in_json level yaml mode)
zephyr_string(ESCAPE json "${in_json}")
function(to_yaml json level yaml mode)
if(mode STREQUAL "DIRECT")
# Direct output mode, no genexes: write a standard YAML
set(expand_lists TRUE)
@ -569,7 +560,10 @@ function(to_yaml in_json level yaml mode) @@ -569,7 +560,10 @@ function(to_yaml in_json level yaml mode)
message(FATAL_ERROR "to_yaml(... ${mode} ) is malformed.")
endif()
if(level GREATER 0)
if(level EQUAL 0)
# Top-level call, initialize the YAML output variable
set(${yaml} "" PARENT_SCOPE)
else()
math(EXPR level_dec "${level} - 1")
set(indent_${level} "${indent_${level_dec}} ")
endif()

90
tests/cmake/yaml/CMakeLists.txt

@ -242,6 +242,46 @@ function(test_setting_list_strings) @@ -242,6 +242,46 @@ function(test_setting_list_strings)
endforeach()
endfunction()
function(test_setting_list_escaped_strings)
yaml_create(FILE ${CMAKE_BINARY_DIR}/${CMAKE_CURRENT_FUNCTION}_test_create.yaml
NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create
)
set(new_value "back\\slash" "dou\"ble" "sin'gle" "co:lon" "win:\\path")
yaml_set(NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create
KEY cmake test set key-list-string LIST ${new_value}
)
yaml_save(NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create)
# Read-back the yaml and verify the value.
yaml_load(FILE ${CMAKE_BINARY_DIR}/${CMAKE_CURRENT_FUNCTION}_test_create.yaml
NAME ${CMAKE_CURRENT_FUNCTION}_readback
)
yaml_length(readback NAME ${CMAKE_CURRENT_FUNCTION}_readback KEY cmake test set key-list-string)
test_assert(TEST 5 EQUAL ${readback}
COMMENT "readback yaml list length does not match original."
)
yaml_get(readback NAME ${CMAKE_CURRENT_FUNCTION}_readback KEY cmake test set key-list-string)
foreach(a e IN ZIP_LISTS readback new_value)
# cmake_parse_arguments() breaks horribly when given strings that include
# escapes, making it impossible to use test_assert() here. Or, for that
# matter, even trying to print them in message(). Thanks, CMake!
if("${e}" STREQUAL "${a}")
message("${CMAKE_CURRENT_FUNCTION}(): Passed")
else()
message(FATAL_ERROR
"${CMAKE_CURRENT_FUNCTION}(): Failed\n"
"Test: list values mismatch."
)
endif()
endforeach()
endfunction()
function(test_append_list_strings)
yaml_create(FILE ${CMAKE_BINARY_DIR}/${CMAKE_CURRENT_FUNCTION}_test_create.yaml
NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create
@ -653,6 +693,51 @@ function(test_verify_genexes) @@ -653,6 +693,51 @@ function(test_verify_genexes)
endforeach()
endfunction()
function(test_setting_escaped_genexes)
set(file ${CMAKE_BINARY_DIR}/test_setting_escaped_genexes.yaml)
yaml_create(FILE ${file}
NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create
)
set_property(TARGET app PROPERTY escaped_list
"back\\slash" "dou\"ble" "sin'gle" "co:lon" "win:\\path")
yaml_set(NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create
KEY cmake test set key-list-string-genex
GENEX LIST "$<TARGET_PROPERTY:app,escaped_list>"
)
yaml_save(NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create)
endfunction()
function(test_verify_escaped_genexes)
set(file ${CMAKE_BINARY_DIR}/test_setting_escaped_genexes.yaml)
yaml_load(FILE ${file}
NAME ${CMAKE_CURRENT_FUNCTION}_readback
)
set(expected "back\\slash" "dou\"ble" "sin'gle" "co:lon" "win:\\path")
list(LENGTH expected exp_len)
yaml_get(readback NAME ${CMAKE_CURRENT_FUNCTION}_readback KEY cmake test set key-list-string-genex)
yaml_length(act_len NAME ${CMAKE_CURRENT_FUNCTION}_readback KEY cmake test set key-list-string-genex)
test_assert(TEST ${exp_len} EQUAL ${act_len}
COMMENT "yaml list length does not match expectation."
)
foreach(a e IN ZIP_LISTS readback expected)
# See comment in test_setting_list_escaped_strings() for why test_assert() is not used here.
if("${e}" STREQUAL "${a}")
message("${CMAKE_CURRENT_FUNCTION}(): Passed")
else()
message(FATAL_ERROR
"${CMAKE_CURRENT_FUNCTION}(): Failed\n"
"Test: key-list-string-genex list values mismatch."
)
endif()
endforeach()
endfunction()
function(test_set_remove_int)
yaml_create(FILE ${CMAKE_BINARY_DIR}/${CMAKE_CURRENT_FUNCTION}_test_create.yaml
NAME ${CMAKE_CURRENT_FUNCTION}_yaml-create
@ -728,6 +813,7 @@ test_save_new_file() @@ -728,6 +813,7 @@ test_save_new_file()
test_setting_int()
test_setting_string()
test_setting_list_strings()
test_setting_list_escaped_strings()
test_setting_list_int()
test_setting_map_list_entry()
test_setting_map_list_entry_windows()
@ -749,6 +835,7 @@ test_fail_missing_filename() @@ -749,6 +835,7 @@ test_fail_missing_filename()
if(NOT CMAKE_SCRIPT_MODE_FILE)
# create a file containing genexes
test_setting_genexes()
test_setting_escaped_genexes()
# Spawn a new CMake instance to re-run the whole test suite in script mode
# and verify the genex values once the associated target is built.
@ -756,6 +843,7 @@ if(NOT CMAKE_SCRIPT_MODE_FILE) @@ -756,6 +843,7 @@ if(NOT CMAKE_SCRIPT_MODE_FILE)
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_LIST_FILE}
)
else()
# verify the contents of the created genex file
# verify the contents of the created genex files
test_verify_genexes()
test_verify_escaped_genexes()
endif()

Loading…
Cancel
Save