From 5a30a3aba065f6e683bc429da44437676662e113 Mon Sep 17 00:00:00 2001 From: David Malcolm <dmalcolm@redhat.com> Date: Thu, 20 Feb 2025 17:10:53 -0500 Subject: [PATCH] =?UTF-8?q?sarif-replay:=20improve=20error=20for=20unescap?= =?UTF-8?q?ed=20braces=20in=20messages=20(=C2=A73.11.5)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spotted via https://github.com/llvm/llvm-project/issues/128024 gcc/ChangeLog: * libsarifreplay.cc (sarif_replayer::make_plain_text_within_result_message): Capture which json::string was used. When reporting on unescaped "{" or "}" in SARIF message strings, use that string rather than the message object, and refer the user to §3.11.5 ("Messages with placeholders") rather than §3.11.11 ("arguments"). Ideally we'd place the error at the precise character, but that can't be done without reworking json-parsing.cc's lexer::lex_string, which is too invasive for stage 4. (sarif_replayer::get_plain_text_from_mfms): Capture which json::string was used. (sarif_replayer::lookup_plain_text_within_result_message): Likewise. gcc/testsuite/ChangeLog: * sarif-replay.dg/2.1.0-invalid/3.11.11-malformed-placeholder.sarif: Rename to... * sarif-replay.dg/2.1.0-invalid/3.11.5-unescaped-braces.sarif: ...this. Update expected subsection in error message, and expected underline in quoted JSON. Signed-off-by: David Malcolm <dmalcolm@redhat.com> --- gcc/libsarifreplay.cc | 43 +++++++++++++------ ...er.sarif => 3.11.5-unescaped-braces.sarif} | 4 +- 2 files changed, 33 insertions(+), 14 deletions(-) rename gcc/testsuite/sarif-replay.dg/2.1.0-invalid/{3.11.11-malformed-placeholder.sarif => 3.11.5-unescaped-braces.sarif} (84%) diff --git a/gcc/libsarifreplay.cc b/gcc/libsarifreplay.cc index cc051dcd485c..ce42bdace3ca 100644 --- a/gcc/libsarifreplay.cc +++ b/gcc/libsarifreplay.cc @@ -272,12 +272,14 @@ private: const char * lookup_plain_text_within_result_message (const json::object *tool_component_obj, const json::object &message_obj, - const json::object *rule_obj); + const json::object *rule_obj, + const json::string *&out_js_str); // "multiformatMessageString" object (§3.12). const char * get_plain_text_from_mfms (json::value &mfms_val, - const property_spec_ref &prop); + const property_spec_ref &prop, + const json::string *&out_js_str); // "run" object (§3.14) enum status @@ -1367,13 +1369,17 @@ make_plain_text_within_result_message (const json::object *tool_component_obj, const json::object &message_obj, const json::object *rule_obj) { + const json::string *js_str = nullptr; const char *original_text = lookup_plain_text_within_result_message (tool_component_obj, message_obj, - rule_obj); + rule_obj, + js_str); if (!original_text) return label_text::borrow (nullptr); + gcc_assert (js_str); + /* Look up any arguments for substituting into placeholders. */ const property_spec_ref arguments_prop ("message", "arguments", "3.11.11"); const json::array *arguments @@ -1425,7 +1431,9 @@ make_plain_text_within_result_message (const json::object *tool_component_obj, } else { - report_invalid_sarif (message_obj, arguments_prop, + const spec_ref msgs_with_placeholders ("3.11.5"); + gcc_assert (js_str); + report_invalid_sarif (*js_str, msgs_with_placeholders, "unescaped '%c' within message string", ch); return label_text::borrow (nullptr); @@ -1450,11 +1458,14 @@ make_plain_text_within_result_message (const json::object *tool_component_obj, /* Handle a value that should be a multiformatMessageString object (§3.12). Complain using prop if MFMS_VAL is not an object. - Return get the "text" value (or nullptr, and complain). */ + Return get the "text" value (or nullptr, and complain). + If the result is non-null, write the json::string that was actually used + to OUT_JS_STR. */ const char * sarif_replayer::get_plain_text_from_mfms (json::value &mfms_val, - const property_spec_ref &prop) + const property_spec_ref &prop, + const json::string *&out_js_str) { auto mfms_obj = require_object (mfms_val, prop); if (!mfms_obj) @@ -1465,6 +1476,7 @@ sarif_replayer::get_plain_text_from_mfms (json::value &mfms_val, auto text_jstr = get_required_property<json::string> (*mfms_obj, text_prop); if (!text_jstr) return nullptr; + out_js_str = text_jstr; return text_jstr->get_string (); } @@ -1479,13 +1491,17 @@ sarif_replayer::get_plain_text_from_mfms (json::value &mfms_val, is the value of result.message (§3.27.11). MESSAGE_OBJ is "theMessage" - RULE_OBJ is "theRule". */ + RULE_OBJ is "theRule". + + If the result is non-null, write the json::string that was actually used + to OUT_JS_STR. */ const char * sarif_replayer:: lookup_plain_text_within_result_message (const json::object *tool_component_obj, const json::object &message_obj, - const json::object *rule_obj) + const json::object *rule_obj, + const json::string *&out_js_str) { // rule_obj can be NULL @@ -1493,8 +1509,11 @@ lookup_plain_text_within_result_message (const json::object *tool_component_obj, Use the text or markdown property of theMessage as appropriate. */ if (const json::string *str = get_optional_property<json::string> (message_obj, PROP_message_text)) - // TODO: check language - return str->get_string (); + { + // TODO: check language + out_js_str = str; + return str->get_string (); + } if (rule_obj) if (auto message_id_jstr @@ -1507,7 +1526,7 @@ lookup_plain_text_within_result_message (const json::object *tool_component_obj, = get_optional_property<json::object> (*rule_obj, message_strings)) if (json::value *mfms = message_strings_obj->get (message_id)) - return get_plain_text_from_mfms (*mfms, message_strings); + return get_plain_text_from_mfms (*mfms, message_strings, out_js_str); /* Look up by theMessage.id within theComponent.globalMessageStrings (§3.19.22). */ @@ -1519,7 +1538,7 @@ lookup_plain_text_within_result_message (const json::object *tool_component_obj, = get_optional_property<json::object> (*tool_component_obj, prop_gms)) if (auto mfms = global_message_strings->get (message_id)) - return get_plain_text_from_mfms (*mfms, prop_gms); + return get_plain_text_from_mfms (*mfms, prop_gms, out_js_str); } } diff --git a/gcc/testsuite/sarif-replay.dg/2.1.0-invalid/3.11.11-malformed-placeholder.sarif b/gcc/testsuite/sarif-replay.dg/2.1.0-invalid/3.11.5-unescaped-braces.sarif similarity index 84% rename from gcc/testsuite/sarif-replay.dg/2.1.0-invalid/3.11.11-malformed-placeholder.sarif rename to gcc/testsuite/sarif-replay.dg/2.1.0-invalid/3.11.5-unescaped-braces.sarif index 72da185de0dc..29460e19c698 100644 --- a/gcc/testsuite/sarif-replay.dg/2.1.0-invalid/3.11.11-malformed-placeholder.sarif +++ b/gcc/testsuite/sarif-replay.dg/2.1.0-invalid/3.11.5-unescaped-braces.sarif @@ -3,7 +3,7 @@ "runs": [{ "tool": { "driver": { "name": "example" } }, "results": [ - { "message": { "text" : "before {} after" }, /* { dg-error "unescaped '\\\{' within message string \\\[SARIF v2.1.0 §3.11.11\\\]" } */ + { "message": { "text" : "before {} after" }, /* { dg-error "unescaped '\\\{' within message string \\\[SARIF v2.1.0 §3.11.5\\\]" } */ "locations": [] } ] }] @@ -11,5 +11,5 @@ /* { dg-begin-multiline-output "" } 6 | { "message": { "text" : "before {} after" }, - | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + | ^~~~~~~~~~~~~~~~~ { dg-end-multiline-output "" } */ -- GitLab