summaryrefslogtreecommitdiff
path: root/dev-qt/qtbase/files/qtbase-6.5.2-CVE-2023-38197.patch
blob: 220e94d9ca2f068796daf9aa87b53886df502e61 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
Upstream: https://codereview.qt-project.org/c/qt/qtbase/+/490550
Upstream: https://lists.qt-project.org/pipermail/development/2023-July/044166.html

From c216c3d9859a20b3aeec985512e89316423fc3a8 Mon Sep 17 00:00:00 2001
From: Axel Spoerl <axel.spoerl@qt.io>
Date: Fri, 30 Jun 2023 12:43:59 +0200
Subject: [PATCH] QXmlStreamReader: Raise error on unexpected tokens

QXmlStreamReader accepted multiple DOCTYPE elements, containing DTD
fragments in the XML prolog, and in the XML body.
Well-formed but invalid XML files - with multiple DTD fragments in
prolog and body, combined with recursive entity expansions - have
caused infinite loops in QXmlStreamReader.

This patch implements a token check in QXmlStreamReader.
A stream is allowed to start with an XML prolog. StartDocument
and DOCTYPE elements are only allowed in this prolog, which
may also contain ProcessingInstruction and Comment elements.
As soon as anything else is seen, the prolog ends.
After that, the prolog-specific elements are treated as unexpected.
Furthermore, the prolog can contain at most one DOCTYPE element.

Update the documentation to reflect the new behavior.
Add an autotest that checks the new error cases are correctly detected,
and no error is raised for legitimate input.

The original OSS-Fuzz files (see bug reports) are not included in this
patch for file size reasons. They have been tested manually. Each of
them has more than one DOCTYPE element, causing infinite loops in
recursive entity expansions. The newly implemented functionality
detects those invalid DTD fragments. By raising an error, it aborts
stream reading before an infinite loop occurs.

Thanks to OSS-Fuzz for finding this.

Fixes: QTBUG-92113
Fixes: QTBUG-95188
Change-Id: I0a082b9188b2eee50b396c4d5b1c9e1fd237bbdd
Reviewed-by: Volker Hilsheimer <volker.hilsheimer@qt.io>
(cherry picked from commit c4301be7d5f94852e1b17f2c2989d5ca807855d4)
---
 src/corelib/serialization/qxmlstream.cpp           | 145 +++++++++++++++++++--
 src/corelib/serialization/qxmlstream_p.h           |  11 ++
 .../qxmlstream/tokenError/dtdInBody.xml            |  20 +++
 .../qxmlstream/tokenError/multipleDtd.xml          |  20 +++
 .../qxmlstream/tokenError/wellFormed.xml           |  15 +++
 .../serialization/qxmlstream/tst_qxmlstream.cpp    |  39 ++++++
 6 files changed, 242 insertions(+), 8 deletions(-)
 create mode 100644 tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml
 create mode 100644 tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml
 create mode 100644 tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml

diff --git a/src/corelib/serialization/qxmlstream.cpp b/src/corelib/serialization/qxmlstream.cpp
index 6e34d4da6e5a..cf46d690f122 100644
--- a/src/corelib/serialization/qxmlstream.cpp
+++ b/src/corelib/serialization/qxmlstream.cpp
@@ -185,7 +185,7 @@ WRAP(indexOf, QLatin1StringView)
     addData() or by waiting for it to arrive on the device().
 
     \value UnexpectedElementError The parser encountered an element
-    that was different to those it expected.
+    or token that was different to those it expected.
 
 */
 
@@ -322,13 +322,34 @@ QXmlStreamEntityResolver *QXmlStreamReader::entityResolver() const
 
   QXmlStreamReader is a well-formed XML 1.0 parser that does \e not
   include external parsed entities. As long as no error occurs, the
-  application code can thus be assured that the data provided by the
-  stream reader satisfies the W3C's criteria for well-formed XML. For
-  example, you can be certain that all tags are indeed nested and
-  closed properly, that references to internal entities have been
-  replaced with the correct replacement text, and that attributes have
-  been normalized or added according to the internal subset of the
-  DTD.
+  application code can thus be assured, that
+  \list
+  \li the data provided by the stream reader satisfies the W3C's
+      criteria for well-formed XML,
+  \li tokens are provided in a valid order.
+  \endlist
+
+  Unless QXmlStreamReader raises an error, it guarantees the following:
+  \list
+  \li All tags are nested and closed properly.
+  \li References to internal entities have been replaced with the
+      correct replacement text.
+  \li Attributes have been normalized or added according to the
+      internal subset of the \l DTD.
+  \li Tokens of type \l StartDocument happen before all others,
+      aside from comments and processing instructions.
+  \li At most one DOCTYPE element (a token of type \l DTD) is present.
+  \li If present, the DOCTYPE appears before all other elements,
+      aside from StartDocument, comments and processing instructions.
+  \endlist
+
+  In particular, once any token of type \l StartElement, \l EndElement,
+  \l Characters, \l EntityReference or \l EndDocument is seen, no
+  tokens of type StartDocument or DTD will be seen. If one is present in
+  the input stream, out of order, an error is raised.
+
+  \note The token types \l Comment and \l ProcessingInstruction may appear
+  anywhere in the stream.
 
   If an error occurs while parsing, atEnd() and hasError() return
   true, and error() returns the error that occurred. The functions
@@ -659,6 +680,7 @@ QXmlStreamReader::TokenType QXmlStreamReader::readNext()
         d->token = -1;
         return readNext();
     }
+    d->checkToken();
     return d->type;
 }
 
@@ -743,6 +765,11 @@ static constexpr auto QXmlStreamReader_tokenTypeString = qOffsetStringArray(
     "ProcessingInstruction"
 );
 
+static constexpr auto QXmlStreamReader_XmlContextString = qOffsetStringArray(
+    "Prolog",
+    "Body"
+);
+
 /*!
     \property  QXmlStreamReader::namespaceProcessing
     \brief the namespace-processing flag of the stream reader.
@@ -777,6 +804,15 @@ QString QXmlStreamReader::tokenString() const
     return QLatin1StringView(QXmlStreamReader_tokenTypeString.at(d->type));
 }
 
+/*!
+   \internal
+   \return \param loc (Prolog/Body) as a string.
+ */
+static constexpr QLatin1StringView contextString(QXmlStreamReaderPrivate::XmlContext ctxt)
+{
+    return QLatin1StringView(QXmlStreamReader_XmlContextString.at(static_cast<int>(ctxt)));
+}
+
 #endif // QT_NO_XMLSTREAMREADER
 
 QXmlStreamPrivateTagStack::QXmlStreamPrivateTagStack()
@@ -864,6 +900,8 @@ void QXmlStreamReaderPrivate::init()
 
     type = QXmlStreamReader::NoToken;
     error = QXmlStreamReader::NoError;
+    currentContext = XmlContext::Prolog;
+    foundDTD = false;
 }
 
 /*
@@ -3838,6 +3876,97 @@ void QXmlStreamWriter::writeCurrentToken(const QXmlStreamReader &reader)
     }
 }
 
+static constexpr bool isTokenAllowedInContext(QXmlStreamReader::TokenType type,
+                                               QXmlStreamReaderPrivate::XmlContext loc)
+{
+    switch (type) {
+    case QXmlStreamReader::StartDocument:
+    case QXmlStreamReader::DTD:
+        return loc == QXmlStreamReaderPrivate::XmlContext::Prolog;
+
+    case QXmlStreamReader::StartElement:
+    case QXmlStreamReader::EndElement:
+    case QXmlStreamReader::Characters:
+    case QXmlStreamReader::EntityReference:
+    case QXmlStreamReader::EndDocument:
+        return loc == QXmlStreamReaderPrivate::XmlContext::Body;
+
+    case QXmlStreamReader::Comment:
+    case QXmlStreamReader::ProcessingInstruction:
+        return true;
+
+    case QXmlStreamReader::NoToken:
+    case QXmlStreamReader::Invalid:
+        return false;
+    }
+
+    // GCC 8.x does not treat __builtin_unreachable() as constexpr
+#if !defined(Q_CC_GNU_ONLY) || (Q_CC_GNU >= 900)
+    Q_UNREACHABLE_RETURN(false);
+#else
+    return false;
+#endif
+}
+
+/*!
+   \internal
+   \brief QXmlStreamReader::isValidToken
+   \return \c true if \param type is a valid token type.
+   \return \c false if \param type is an unexpected token,
+   which indicates a non-well-formed or invalid XML stream.
+ */
+bool QXmlStreamReaderPrivate::isValidToken(QXmlStreamReader::TokenType type)
+{
+    // Don't change currentContext, if Invalid or NoToken occur in the prolog
+    if (type == QXmlStreamReader::Invalid || type == QXmlStreamReader::NoToken)
+        return false;
+
+    // If a token type gets rejected in the body, there is no recovery
+    const bool result = isTokenAllowedInContext(type, currentContext);
+    if (result || currentContext == XmlContext::Body)
+        return result;
+
+    // First non-Prolog token observed => switch context to body and check again.
+    currentContext = XmlContext::Body;
+    return isTokenAllowedInContext(type, currentContext);
+}
+
+/*!
+   \internal
+   Checks token type and raises an error, if it is invalid
+   in the current context (prolog/body).
+ */
+void QXmlStreamReaderPrivate::checkToken()
+{
+    Q_Q(QXmlStreamReader);
+
+    // The token type must be consumed, to keep track if the body has been reached.
+    const XmlContext context = currentContext;
+    const bool ok = isValidToken(type);
+
+    // Do nothing if an error has been raised already (going along with an unexpected token)
+    if (error != QXmlStreamReader::Error::NoError)
+        return;
+
+    if (!ok) {
+        raiseError(QXmlStreamReader::UnexpectedElementError,
+                   QObject::tr("Unexpected token type %1 in %2.")
+                   .arg(q->tokenString(), contextString(context)));
+        return;
+    }
+
+    if (type != QXmlStreamReader::DTD)
+        return;
+
+    // Raise error on multiple DTD tokens
+    if (foundDTD) {
+        raiseError(QXmlStreamReader::UnexpectedElementError,
+                   QObject::tr("Found second DTD token in %1.").arg(contextString(context)));
+    } else {
+        foundDTD = true;
+    }
+}
+
 /*!
  \fn bool QXmlStreamAttributes::hasAttribute(QAnyStringView qualifiedName) const
 
diff --git a/src/corelib/serialization/qxmlstream_p.h b/src/corelib/serialization/qxmlstream_p.h
index 070424a9f523..f09adaa37e66 100644
--- a/src/corelib/serialization/qxmlstream_p.h
+++ b/src/corelib/serialization/qxmlstream_p.h
@@ -297,6 +297,17 @@ public:
     QStringDecoder decoder;
     bool atEnd;
 
+    enum class XmlContext
+    {
+        Prolog,
+        Body,
+    };
+
+    XmlContext currentContext = XmlContext::Prolog;
+    bool foundDTD = false;
+    bool isValidToken(QXmlStreamReader::TokenType type);
+    void checkToken();
+
     /*!
       \sa setType()
      */
diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml
new file mode 100644
index 000000000000..1c3ca4e2711f
--- /dev/null
+++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml
@@ -0,0 +1,20 @@
+<!DOCTYPE TEST [
+   <!ELEMENT TESTATTRIBUTE (CASE+)>
+   <!ELEMENT CASE (CLASS, FUNCTION)>
+   <!ELEMENT CLASS (#PCDATA)>
+
+   <!-- adding random ENTITY statement, as this is typical DTD content -->
+   <!ENTITY unite "&#x222a;">
+
+   <!ATTLIST CASE CLASS CDATA #REQUIRED>
+]>
+<TEST>
+  <CASE>
+    <CLASS>tst_QXmlStream</CLASS>
+  </CASE>
+  <!-- invalid DTD in XML body follows -->
+  <!DOCTYPE DTDTEST [
+    <!ELEMENT RESULT (CASE+)>
+    <!ATTLIST RESULT OUTPUT CDATA #REQUIRED>
+  ]>
+</TEST>
diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml
new file mode 100644
index 000000000000..cd398c0f9fde
--- /dev/null
+++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml
@@ -0,0 +1,20 @@
+<!DOCTYPE TEST [
+   <!ELEMENT TESTATTRIBUTE (CASE+)>
+   <!ELEMENT CASE (CLASS, FUNCTION, DATASET, COMMENTS)>
+   <!ELEMENT CLASS (#PCDATA)>
+
+   <!-- adding random ENTITY statements, as this is typical DTD content -->
+   <!ENTITY iff "&hArr;">
+
+   <!ATTLIST CASE CLASS CDATA #REQUIRED>
+]>
+<!-- invalid second DTD follows -->
+<!DOCTYPE SECOND [
+   <!ELEMENT SECONDATTRIBUTE (#PCDATA)>
+   <!ENTITY on "&#8728;">
+]>
+<TEST>
+  <CASE>
+    <CLASS>tst_QXmlStream</CLASS>
+  </CASE>
+</TEST>
diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml
new file mode 100644
index 000000000000..1b61a3f06225
--- /dev/null
+++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml
@@ -0,0 +1,15 @@
+<!DOCTYPE TEST [
+   <!ELEMENT TESTATTRIBUTE (CASE+)>
+   <!ELEMENT CASE (CLASS, FUNCTION, DATASET, COMMENTS)>
+   <!ELEMENT CLASS (#PCDATA)>
+
+   <!-- adding random ENTITY statements, as this is typical DTD content -->
+   <!ENTITY unite "&#x222a;">
+
+   <!ATTLIST CASE CLASS CDATA #REQUIRED>
+]>
+<TEST>
+  <CASE>
+    <CLASS>tst_QXmlStream</CLASS>
+  </CASE>
+</TEST>
diff --git a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp
index 2a340e11bff5..30f54999e7c8 100644
--- a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp
+++ b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp
@@ -590,6 +590,9 @@ private slots:
 
     void entityExpansionLimit() const;
 
+    void tokenErrorHandling_data() const;
+    void tokenErrorHandling() const;
+
 private:
     static QByteArray readFile(const QString &filename);
 
@@ -1855,5 +1858,41 @@ void tst_QXmlStream::test_fastScanName() const
     QCOMPARE(reader.error(), errorType);
 }
 
+void tst_QXmlStream::tokenErrorHandling_data() const
+{
+    QTest::addColumn<QString>("fileName");
+    QTest::addColumn<QXmlStreamReader::Error>("expectedError");
+    QTest::addColumn<QString>("errorKeyWord");
+
+    constexpr auto invalid = QXmlStreamReader::Error::UnexpectedElementError;
+    constexpr auto valid = QXmlStreamReader::Error::NoError;
+    QTest::newRow("DtdInBody") << "dtdInBody.xml" << invalid << "DTD";
+    QTest::newRow("multipleDTD") << "multipleDtd.xml" << invalid << "second DTD";
+    QTest::newRow("wellFormed") << "wellFormed.xml" << valid << "";
+}
+
+void tst_QXmlStream::tokenErrorHandling() const
+{
+    QFETCH(const QString, fileName);
+    QFETCH(const QXmlStreamReader::Error, expectedError);
+    QFETCH(const QString, errorKeyWord);
+
+    const QDir dir(QFINDTESTDATA("tokenError"));
+    QFile file(dir.absoluteFilePath(fileName));
+
+    // Cross-compiling: File will be on host only
+    if (!file.exists())
+        QSKIP("Testfile not found.");
+
+    file.open(QIODevice::ReadOnly);
+    QXmlStreamReader reader(&file);
+    while (!reader.atEnd())
+        reader.readNext();
+
+    QCOMPARE(reader.error(), expectedError);
+    if (expectedError != QXmlStreamReader::Error::NoError)
+        QVERIFY(reader.errorString().contains(errorKeyWord));
+}
+
 #include "tst_qxmlstream.moc"
 // vim: et:ts=4:sw=4:sts=4
-- 
2.16.3