Skip to content

Commit cf7da9e

Browse files
committed
bug #61199 [JsonPath] Fix parsing invalid Unicode codepoints (nicolas-grekas)
This PR was merged into the 7.3 branch. Discussion ---------- [JsonPath] Fix parsing invalid Unicode codepoints | Q | A | ------------- | --- | Branch? | 7.3 | Bug fix? | yes | New feature? | no | Deprecations? | no | Issues | - | License | MIT Commits ------- 66c8a1a [JsonPath] Fix parsing invalid Unicode codepoints
2 parents ba445f4 + 66c8a1a commit cf7da9e

File tree

2 files changed

+24
-20
lines changed

2 files changed

+24
-20
lines changed

src/Symfony/Component/JsonPath/JsonCrawler.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
namespace Symfony\Component\JsonPath;
1313

1414
use Symfony\Component\JsonPath\Exception\InvalidArgumentException;
15+
use Symfony\Component\JsonPath\Exception\InvalidJsonPathException;
1516
use Symfony\Component\JsonPath\Exception\InvalidJsonStringInputException;
1617
use Symfony\Component\JsonPath\Exception\JsonCrawlerException;
1718
use Symfony\Component\JsonPath\Tokenizer\JsonPathToken;
@@ -83,7 +84,7 @@ private function evaluate(JsonPath $query): array
8384
return $this->evaluateTokensOnDecodedData($tokens, $data);
8485
} catch (InvalidArgumentException $e) {
8586
throw $e;
86-
} catch (\Throwable $e) {
87+
} catch (InvalidJsonPathException $e) {
8788
throw new JsonCrawlerException($query, $e->getMessage(), previous: $e);
8889
}
8990
}
@@ -329,7 +330,7 @@ private function evaluateBracket(string $expr, mixed $value): array
329330
return \array_key_exists($key, $value) ? [$value[$key]] : [];
330331
}
331332

332-
throw new \LogicException(\sprintf('Unsupported bracket expression "%s".', $expr));
333+
throw new InvalidJsonPathException(\sprintf('Unsupported bracket expression "%s".', $expr));
333334
}
334335

335336
private function evaluateFilter(string $expr, mixed $value): array

src/Symfony/Component/JsonPath/JsonPathUtils.php

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ public static function unescapeString(string $str, string $quoteChar): string
117117
't' => "\t",
118118
'u' => self::unescapeUnicodeSequence($str, $i),
119119
$quoteChar => $quoteChar,
120-
default => throw new JsonCrawlerException('', \sprintf('Invalid escape sequence "\\%s" in %s-quoted string', $str[$i + 1], "'" === $quoteChar ? 'single' : 'double')),
120+
default => throw new JsonCrawlerException('', \sprintf('Invalid escape sequence "\\%s" in %s-quoted string.', $str[$i + 1], "'" === $quoteChar ? 'single' : 'double')),
121121
};
122122

123123
++$i;
@@ -132,30 +132,33 @@ public static function unescapeString(string $str, string $quoteChar): string
132132
private static function unescapeUnicodeSequence(string $str, int &$i): string
133133
{
134134
if (!isset($str[$i + 5]) || !ctype_xdigit(substr($str, $i + 2, 4))) {
135-
throw new JsonCrawlerException('', 'Invalid unicode escape sequence');
135+
throw new JsonCrawlerException('', 'Invalid unicode escape sequence.');
136136
}
137137

138-
$hex = substr($str, $i + 2, 4);
138+
$codepoint = hexdec(substr($str, $i + 2, 4));
139139

140-
$codepoint = hexdec($hex);
141140
// looks like a valid Unicode codepoint, string length is sufficient and it starts with \u
142-
if (0xD800 <= $codepoint && $codepoint <= 0xDBFF && isset($str[$i + 11]) && '\\' === $str[$i + 6] && 'u' === $str[$i + 7]) {
143-
$lowHex = substr($str, $i + 8, 4);
144-
if (ctype_xdigit($lowHex)) {
145-
$lowSurrogate = hexdec($lowHex);
146-
if (0xDC00 <= $lowSurrogate && $lowSurrogate <= 0xDFFF) {
147-
$codepoint = 0x10000 + (($codepoint & 0x3FF) << 10) + ($lowSurrogate & 0x3FF);
148-
$i += 10; // skip surrogate pair
149-
150-
return mb_chr($codepoint, 'UTF-8');
151-
}
152-
}
141+
if (0xD800 <= $codepoint
142+
&& $codepoint <= 0xDBFF
143+
&& isset($str[$i + 11])
144+
&& '\\' === $str[$i + 6]
145+
&& 'u' === $str[$i + 7]
146+
&& ctype_xdigit($lowSurrogate = substr($str, $i + 8, 4))
147+
&& 0xDC00 <= ($lowSurrogate = hexdec($lowSurrogate))
148+
&& $lowSurrogate <= 0xDFFF
149+
) {
150+
$codepoint = 0x10000 + (($codepoint & 0x3FF) << 10) + ($lowSurrogate & 0x3FF);
151+
$i += 10; // skip surrogate pair
152+
} else {
153+
// single Unicode character or invalid surrogate, skip the sequence
154+
$i += 4;
153155
}
154156

155-
// single Unicode character or invalid surrogate, skip the sequence
156-
$i += 4;
157+
if (false === $chr = mb_chr($codepoint, 'UTF-8')) {
158+
throw new JsonCrawlerException('', \sprintf('Invalid Unicode codepoint: U+%04X.', $codepoint));
159+
}
157160

158-
return mb_chr($codepoint, 'UTF-8');
161+
return $chr;
159162
}
160163

161164
/**

0 commit comments

Comments
 (0)