WebKit Bugzilla
Attachment 362241 Details for
Bug 193909
: Implement serializing in MIME type parser
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Patch
bug-193909-20190217211201.patch (text/plain), 16.60 KB, created by
Rob Buis
on 2019-02-17 12:12:02 PST
(
hide
)
Description:
Patch
Filename:
MIME Type:
Creator:
Rob Buis
Created:
2019-02-17 12:12:02 PST
Size:
16.60 KB
patch
obsolete
>Subversion Revision: 241591 >diff --git a/Source/WebCore/ChangeLog b/Source/WebCore/ChangeLog >index 07fc22ab260888b67dc4628ba27b53113da3d9aa..741493665731737ae4c9e9c232fdd36eb3a829c0 100644 >--- a/Source/WebCore/ChangeLog >+++ b/Source/WebCore/ChangeLog >@@ -1,3 +1,43 @@ >+2019-02-17 Rob Buis <rbuis@igalia.com> >+ >+ Implement serializing in MIME type parser >+ https://bugs.webkit.org/show_bug.cgi?id=193909 >+ >+ Reviewed by NOBODY (OOPS!). >+ >+ Further testing showed the MIME parser needs these fixes: >+ - stripWhitespace is wrong for removing HTTP whitespace, use >+ stripLeadingAndTrailingHTTPSpaces instead. >+ - HTTP Token code points checking for Rfc2045 and Mimesniff were >+ mixed up, use the dedicated isValidHTTPToken for Mimesniff mode. >+ - Quoted Strings were not unescaped/escaped, this seems ok for >+ serializing but is wrong when gettings individual parameter values. >+ Implement [1] and [2] Step 2.4 to properly unescape and escape. >+ >+ This change also tries to avoid hard to read uses of find. >+ >+ Test: ParsedContentType.Serialize >+ >+ [1] https://fetch.spec.whatwg.org/#collect-an-http-quoted-string >+ [2] https://mimesniff.spec.whatwg.org/#serializing-a-mime-type >+ >+ * platform/network/ParsedContentType.cpp: >+ (WebCore::consumeCodePoints): more spec conforming name. >+ (WebCore::isNotQuoteOrBackslash): >+ (WebCore::collectHTTPQuotedString): >+ (WebCore::substringForRange): >+ (WebCore::containsNonTokenCharacters): >+ (WebCore::parseQuotedString): >+ (WebCore::ParsedContentType::parseContentType): >+ (WebCore::ParsedContentType::create): >+ (WebCore::ParsedContentType::setContentType): >+ (WebCore::containsNonQuoteStringTokenCharacters): >+ (WebCore::ParsedContentType::setContentTypeParameter): >+ (WebCore::ParsedContentType::serialize const): >+ (WebCore::parseToken): Deleted. >+ (WebCore::isNonTokenCharacter): Deleted. >+ (WebCore::isNonQuotedStringTokenCharacter): Deleted. >+ > 2019-02-15 Zalan Bujtas <zalan@apple.com> > > [LFC] Out-of-flow box is never a float box >diff --git a/Source/WebCore/platform/network/ParsedContentType.cpp b/Source/WebCore/platform/network/ParsedContentType.cpp >index ef1c5cd0e78e83eb46750fff044f72760bae1ea2..89155abbd497cd0f94ccaa575ca1c4dd64a52b0e 100644 >--- a/Source/WebCore/platform/network/ParsedContentType.cpp >+++ b/Source/WebCore/platform/network/ParsedContentType.cpp >@@ -56,7 +56,7 @@ static bool isTokenCharacter(UChar c) > > using CharacterMeetsCondition = bool (*)(UChar); > >-static Optional<SubstringRange> parseToken(const String& input, unsigned& startIndex, CharacterMeetsCondition characterMeetsCondition, Mode mode, bool skipTrailingWhitespace = false) >+static Optional<SubstringRange> consumeCodePoints(const String& input, unsigned& startIndex, CharacterMeetsCondition characterMeetsCondition, Mode mode, bool skipTrailingWhitespace = false) > { > unsigned inputLength = input.length(); > unsigned tokenStart = startIndex; >@@ -80,8 +80,49 @@ static Optional<SubstringRange> parseToken(const String& input, unsigned& startI > return SubstringRange(tokenStart, tokenEnd - tokenStart); > } > >-static bool containsNonTokenCharacters(const String& input, SubstringRange range) >+static bool isNotQuoteOrBackslash(UChar ch) > { >+ return ch != '"' && ch != '\\'; >+} >+ >+static String collectHTTPQuotedString(const String& input, unsigned& startIndex) >+{ >+ ASSERT(input[startIndex] == '"'); >+ unsigned inputLength = input.length(); >+ unsigned& position = startIndex; >+ position++; >+ StringBuilder builder; >+ while (true) { >+ unsigned positionStart = position; >+ consumeCodePoints(input, position, isNotQuoteOrBackslash, Mode::MimeSniff); >+ builder.append(input.substring(positionStart, position - positionStart)); >+ if (position >= inputLength) >+ break; >+ UChar quoteOrBackslash = input[position++]; >+ if (quoteOrBackslash == '\\') { >+ if (position >= inputLength) { >+ builder.append(quoteOrBackslash); >+ break; >+ } >+ builder.append(input[position++]); >+ } else { >+ ASSERT(quoteOrBackslash == '"'); >+ break; >+ } >+ >+ } >+ return builder.toString(); >+} >+ >+static String substringForRange(const String& string, const SubstringRange& range) >+{ >+ return string.substring(range.first, range.second); >+} >+ >+static bool containsNonTokenCharacters(const String& input, SubstringRange range, Mode mode) >+{ >+ if (mode == Mode::MimeSniff) >+ return !isValidHTTPToken(substringForRange(input, range)); > for (unsigned index = 0; index < range.second; ++index) { > if (!isTokenCharacter(input[range.first + index])) > return true; >@@ -89,7 +130,7 @@ static bool containsNonTokenCharacters(const String& input, SubstringRange range > return false; > } > >-static Optional<SubstringRange> parseQuotedString(const String& input, unsigned& startIndex, Mode mode) >+static Optional<SubstringRange> parseQuotedString(const String& input, unsigned& startIndex) > { > unsigned inputLength = input.length(); > unsigned quotedStringStart = startIndex + 1; >@@ -98,20 +139,14 @@ static Optional<SubstringRange> parseQuotedString(const String& input, unsigned& > if (quotedStringEnd >= inputLength) > return WTF::nullopt; > >- if (input[quotedStringEnd++] != '"' || quotedStringEnd >= inputLength) { >- if (mode == Mode::Rfc2045) >- return WTF::nullopt; >- return SubstringRange(quotedStringStart, quotedStringEnd - quotedStringStart); >- } >+ if (input[quotedStringEnd++] != '"' || quotedStringEnd >= inputLength) >+ return WTF::nullopt; > > bool lastCharacterWasBackslash = false; > char currentCharacter; > while ((currentCharacter = input[quotedStringEnd++]) != '"' || lastCharacterWasBackslash) { >- if (quotedStringEnd >= inputLength) { >- if (mode == Mode::Rfc2045) >- return WTF::nullopt; >- break; >- } >+ if (quotedStringEnd >= inputLength) >+ return WTF::nullopt; > if (currentCharacter == '\\' && !lastCharacterWasBackslash) { > lastCharacterWasBackslash = true; > continue; >@@ -124,11 +159,6 @@ static Optional<SubstringRange> parseQuotedString(const String& input, unsigned& > return SubstringRange(quotedStringStart, quotedStringEnd - quotedStringStart); > } > >-static String substringForRange(const String& string, const SubstringRange& range) >-{ >- return string.substring(range.first, range.second); >-} >- > // From http://tools.ietf.org/html/rfc2045#section-5.1: > // > // content := "Content-Type" ":" type "/" subtype >@@ -208,8 +238,8 @@ bool ParsedContentType::parseContentType(Mode mode) > } > > unsigned contentTypeStart = index; >- auto typeRange = parseToken(m_contentType, index, isNotForwardSlash, mode); >- if (!typeRange || containsNonTokenCharacters(m_contentType, *typeRange)) { >+ auto typeRange = consumeCodePoints(m_contentType, index, isNotForwardSlash, mode); >+ if (!typeRange || containsNonTokenCharacters(m_contentType, *typeRange, mode)) { > LOG_ERROR("Invalid Content-Type, invalid type value."); > return false; > } >@@ -219,8 +249,8 @@ bool ParsedContentType::parseContentType(Mode mode) > return false; > } > >- auto subTypeRange = parseToken(m_contentType, index, isNotSemicolon, mode, mode == Mode::MimeSniff); >- if (!subTypeRange || containsNonTokenCharacters(m_contentType, *subTypeRange)) { >+ auto subTypeRange = consumeCodePoints(m_contentType, index, isNotSemicolon, mode, mode == Mode::MimeSniff); >+ if (!subTypeRange || containsNonTokenCharacters(m_contentType, *subTypeRange, mode)) { > LOG_ERROR("Invalid Content-Type, invalid subtype value."); > return false; > } >@@ -236,7 +266,7 @@ bool ParsedContentType::parseContentType(Mode mode) > index = semiColonIndex + 1; > while (true) { > skipSpaces(m_contentType, index); >- auto keyRange = parseToken(m_contentType, index, isNotSemicolonOrEqualSign, mode); >+ auto keyRange = consumeCodePoints(m_contentType, index, isNotSemicolonOrEqualSign, mode); > if (mode == Mode::Rfc2045 && (!keyRange || index >= contentTypeLength)) { > LOG_ERROR("Invalid Content-Type parameter name."); > return false; >@@ -262,22 +292,27 @@ bool ParsedContentType::parseContentType(Mode mode) > String parameterName = substringForRange(m_contentType, *keyRange); > > // Should we tolerate spaces here? >+ String parameterValue; > Optional<SubstringRange> valueRange; > if (m_contentType[index] == '"') { >- valueRange = parseQuotedString(m_contentType, index, mode); >- if (mode == Mode::MimeSniff) >- parseToken(m_contentType, index, isNotSemicolon, mode); >+ if (mode == Mode::MimeSniff) { >+ parameterValue = collectHTTPQuotedString(m_contentType, index); >+ consumeCodePoints(m_contentType, index, isNotSemicolon, mode); >+ } else >+ valueRange = parseQuotedString(m_contentType, index); > } else >- valueRange = parseToken(m_contentType, index, isNotSemicolon, mode, mode == Mode::MimeSniff); >+ valueRange = consumeCodePoints(m_contentType, index, isNotSemicolon, mode, mode == Mode::MimeSniff); > >- if (!valueRange) { >- if (mode == Mode::MimeSniff) >- continue; >- LOG_ERROR("Invalid Content-Type, invalid parameter value."); >- return false; >+ if (parameterValue.isNull()) { >+ if (!valueRange) { >+ if (mode == Mode::MimeSniff) >+ continue; >+ LOG_ERROR("Invalid Content-Type, invalid parameter value."); >+ return false; >+ } >+ parameterValue = substringForRange(m_contentType, *valueRange); > } > >- String parameterValue = substringForRange(m_contentType, *valueRange); > // Should we tolerate spaces here? > if (mode == Mode::Rfc2045 && index < contentTypeLength && m_contentType[index++] != ';') { > LOG_ERROR("Invalid Content-Type, invalid character at the end of key/value parameter."); >@@ -295,7 +330,7 @@ bool ParsedContentType::parseContentType(Mode mode) > > Optional<ParsedContentType> ParsedContentType::create(const String& contentType, Mode mode) > { >- ParsedContentType parsedContentType(mode == Mode::Rfc2045 ? contentType : contentType.stripWhiteSpace()); >+ ParsedContentType parsedContentType(mode == Mode::Rfc2045 ? contentType : stripLeadingAndTrailingHTTPSpaces(contentType)); > if (!parsedContentType.parseContentType(mode)) > return WTF::nullopt; > return { WTFMove(parsedContentType) }; >@@ -328,26 +363,27 @@ size_t ParsedContentType::parameterCount() const > > void ParsedContentType::setContentType(const SubstringRange& contentRange, Mode mode) > { >- m_mimeType = substringForRange(m_contentType, contentRange).stripWhiteSpace(); >+ m_mimeType = substringForRange(m_contentType, contentRange); > if (mode == Mode::MimeSniff) >- m_mimeType = m_mimeType.convertToASCIILowercase(); >-} >- >-static bool isNonTokenCharacter(UChar ch) >-{ >- return !isTokenCharacter(ch); >+ m_mimeType = stripLeadingAndTrailingHTTPSpaces(m_mimeType).convertToASCIILowercase(); >+ else >+ m_mimeType = m_mimeType.stripWhiteSpace(); > } > >-static bool isNonQuotedStringTokenCharacter(UChar ch) >+static bool containsNonQuoteStringTokenCharacters(const String& input) > { >- return !isQuotedStringTokenCharacter(ch); >+ for (unsigned index = 0; index < input.length(); ++index) { >+ if (!isQuotedStringTokenCharacter(input[index])) >+ return true; >+ } >+ return false; > } > > void ParsedContentType::setContentTypeParameter(const String& keyName, const String& keyValue, Mode mode) > { > String name = keyName; > if (mode == Mode::MimeSniff) { >- if (m_parameterValues.contains(keyName) || keyName.find(isNonTokenCharacter) != notFound || keyValue.find(isNonQuotedStringTokenCharacter) != notFound) >+ if (m_parameterValues.contains(keyName) || !isValidHTTPToken(keyName) || containsNonQuoteStringTokenCharacters(keyValue)) > return; > name = name.convertToASCIILowercase(); > } >@@ -364,9 +400,14 @@ String ParsedContentType::serialize() const > builder.append(name); > builder.append('='); > String value = m_parameterValues.get(name); >- if (value.isEmpty() || value.find(isNonTokenCharacter) != notFound) { >+ if (value.isEmpty() || !isValidHTTPToken(value)) { > builder.append('"'); >- builder.append(value); >+ for (unsigned index = 0; index < value.length(); ++index) { >+ auto ch = value[index]; >+ if (ch == '\\' || ch =='"') >+ builder.append('\\'); >+ builder.append(ch); >+ } > builder.append('"'); > } else > builder.append(value); >diff --git a/Tools/ChangeLog b/Tools/ChangeLog >index ffe05ffb2b7e206f351df28bfee1dd8fecc0801a..c1cfe10a6ea6c83ead196c32915fb49d346df0ef 100644 >--- a/Tools/ChangeLog >+++ b/Tools/ChangeLog >@@ -1,3 +1,16 @@ >+2019-02-17 Rob Buis <rbuis@igalia.com> >+ >+ Implement serializing in MIME type parser >+ https://bugs.webkit.org/show_bug.cgi?id=193909 >+ >+ Reviewed by NOBODY (OOPS!). >+ >+ Add tests involving leading and trailing whitespace, non-token >+ characters and quoted strings. >+ >+ * TestWebKitAPI/Tests/WebCore/ParsedContentType.cpp: >+ (TestWebKitAPI::TEST): >+ > 2019-02-15 Zalan Bujtas <zalan@apple.com> > > [LFC] Out-of-flow box is never a float box >diff --git a/Tools/TestWebKitAPI/Tests/WebCore/ParsedContentType.cpp b/Tools/TestWebKitAPI/Tests/WebCore/ParsedContentType.cpp >index e084462e35d5164082905cc73ffefdabe978720c..8457f7ceb9b20dfb45fdf3138ca45e0c736b7e98 100644 >--- a/Tools/TestWebKitAPI/Tests/WebCore/ParsedContentType.cpp >+++ b/Tools/TestWebKitAPI/Tests/WebCore/ParsedContentType.cpp >@@ -149,6 +149,8 @@ TEST(ParsedContentType, Serialize) > EXPECT_STREQ(serializeIfValid("text/"), "NOTVALID"); > EXPECT_STREQ(serializeIfValid("text/\0"), "NOTVALID"); > EXPECT_STREQ(serializeIfValid("text/ "), "NOTVALID"); >+ EXPECT_STREQ(serializeIfValid("{/}"), "NOTVALID"); >+ EXPECT_STREQ(serializeIfValid("x/x ;x=x"), "x/x;x=x"); > EXPECT_STREQ(serializeIfValid("text/plain"), "text/plain"); > EXPECT_STREQ(serializeIfValid("text/plain\0"), "text/plain"); > EXPECT_STREQ(serializeIfValid(" text/plain"), "text/plain"); >@@ -156,11 +158,15 @@ TEST(ParsedContentType, Serialize) > EXPECT_STREQ(serializeIfValid("\ntext/plain"), "text/plain"); > EXPECT_STREQ(serializeIfValid("\rtext/plain"), "text/plain"); > EXPECT_STREQ(serializeIfValid("\btext/plain"), "NOTVALID"); >+ EXPECT_STREQ(serializeIfValid("\x0Btext/plain"), "NOTVALID"); >+ EXPECT_STREQ(serializeIfValid("\u000Btext/plain"), "NOTVALID"); > EXPECT_STREQ(serializeIfValid("text/plain "), "text/plain"); > EXPECT_STREQ(serializeIfValid("text/plain\t"), "text/plain"); > EXPECT_STREQ(serializeIfValid("text/plain\n"), "text/plain"); > EXPECT_STREQ(serializeIfValid("text/plain\r"), "text/plain"); > EXPECT_STREQ(serializeIfValid("text/plain\b"), "NOTVALID"); >+ EXPECT_STREQ(serializeIfValid("text/plain\x0B"), "NOTVALID"); >+ EXPECT_STREQ(serializeIfValid("text/plain\u000B"), "NOTVALID"); > EXPECT_STREQ(serializeIfValid(" text/plain "), "text/plain"); > EXPECT_STREQ(serializeIfValid("\ttext/plain\t"), "text/plain"); > EXPECT_STREQ(serializeIfValid("\ntext/plain\n"), "text/plain"); >@@ -236,11 +242,12 @@ TEST(ParsedContentType, Serialize) > EXPECT_STREQ(serializeIfValid("text/plain;test=\""), "text/plain;test=\"\""); > EXPECT_STREQ(serializeIfValid("text/plain;test=\"value\"foo"), "text/plain;test=value"); > EXPECT_STREQ(serializeIfValid("text/plain;test=\"value\"foo;foo=bar"), "text/plain;test=value;foo=bar"); >- EXPECT_STREQ(serializeIfValid("text/plain;test=\"val\\ue\""), "text/plain;test=\"val\\ue\""); >+ EXPECT_STREQ(serializeIfValid("text/plain;test=\"val\\ue\""), "text/plain;test=value"); > EXPECT_STREQ(serializeIfValid("text/plain;test=\"val\\\"ue\""), "text/plain;test=\"val\\\"ue\""); > EXPECT_STREQ(serializeIfValid("text/plain;test='value'"), "text/plain;test='value'"); > EXPECT_STREQ(serializeIfValid("text/plain;test='value"), "text/plain;test='value"); > EXPECT_STREQ(serializeIfValid("text/plain;test=value'"), "text/plain;test=value'"); >+ EXPECT_STREQ(serializeIfValid("text/plain;test={value}"), "text/plain;test=\"{value}\""); > } > > } // namespace TestWebKitAPI
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Flags:
ews-watchlist
:
commit-queue-
Actions:
View
|
Formatted Diff
|
Diff
Attachments on
bug 193909
:
361430
|
361627
|
361686
| 362241 |
362244