diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py index ebcd98a0a37776..b0e0ca1585ea86 100644 --- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -140,6 +140,26 @@ def test_bad_numerical_literals(self): check("1e2_", "invalid decimal literal") check("1e+", "invalid decimal literal") + def test_end_of_numerical_literals_offset(self): + # gh-149277: verify the error caret points at the first invalid + # character, not the last valid digit. + cases = [ + ("0xfg", 4), + ("0x9g", 4), + ("0b1z", 4), + ("0o7q", 4), + ("9spam", 2), + ("0xfspam", 4), + ("1.0x", 4), + ("1e3w", 4), + ("1jz", 3), + ] + for source, expected_offset in cases: + with self.subTest(source=source): + with self.assertRaises(SyntaxError) as cm: + compile(source, "", "eval") + self.assertEqual(cm.exception.offset, expected_offset) + def test_end_of_numerical_literals(self): def check(test, error=False): with self.subTest(expr=test): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-05-06-18-16-33.gh-issue-149277.4uVfSK.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-06-18-16-33.gh-issue-149277.4uVfSK.rst new file mode 100644 index 00000000000000..0c7e4fbba02f25 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-05-06-18-16-33.gh-issue-149277.4uVfSK.rst @@ -0,0 +1,2 @@ +Fix the :exc:`SyntaxError` caret position for invalid numeric literals to +point at the first invalid character instead of the last valid one. diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 7f25afec302c22..dad8d800617b12 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -352,7 +352,6 @@ verify_end_of_number(struct tok_state *tok, int c, const char *kind) { } else /* In future releases, only error will remain. */ if (c < 128 && is_potential_identifier_char(c)) { - tok_backup(tok, c); _PyTokenizer_syntaxerror(tok, "invalid %s literal", kind); return 0; }