1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
From c76bcd0feed005aaf9db28a76f4883f3ae98295b Mon Sep 17 00:00:00 2001
From: Gavin Smith <gavinsmith0123@gmail.com>
Date: Mon, 23 Oct 2023 19:51:00 +0100
Subject: [PATCH 1/5] * tp/Texinfo/XS/xspara.c (get_utf8_codepoint): Wrapper
for mbrtowc/btowc. [_WIN32]: Do not call btowc, as it was tested to be very
slow on MinGW. Report from Eli Zaretskii.
---
ChangeLog | 7 ++++++
tp/Texinfo/XS/xspara.c | 48 +++++++++++++++++++++++-------------------
2 files changed, 33 insertions(+), 22 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index e619109f5b..c4379ec56b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2023-10-23 Gavin Smith <gavinsmith0123@gmail.com>
+
+ * tp/Texinfo/XS/xspara.c (get_utf8_codepoint):
+ Wrapper for mbrtowc/btowc.
+ [_WIN32]: Do not call btowc, as it was tested to be very slow
+ on MinGW. Report from Eli Zaretskii.
+
2023-10-18 Gavin Smith <gavinsmith0123@gmail.com>
Texinfo 7.1
diff --git a/tp/Texinfo/XS/xspara.c b/tp/Texinfo/XS/xspara.c
index 7c6895a7ff..e1cddcdc2a 100644
--- a/tp/Texinfo/XS/xspara.c
+++ b/tp/Texinfo/XS/xspara.c
@@ -684,6 +684,30 @@ xspara_end (void)
/* characters triggering an end of sentence */
#define end_sentence_characters ".?!"
+/* Wrapper for mbrtowc. Set *PWC and return length of codepoint in bytes. */
+size_t
+get_utf8_codepoint (wchar_t *pwc, const char *mbs, size_t n)
+{
+#ifdef _WIN32
+ /* Use the above implementation of mbrtowc. Do not use btowc as
+ does not exist as standard on MS-Windows, and was tested to be
+ very slow on MinGW. */
+ return mbrtowc (pwc, mbs, n, NULL);
+#else
+ if (!PRINTABLE_ASCII(*mbs))
+ {
+ return mbrtowc (pwc, mbs, n, NULL);
+ }
+ else
+ {
+ /* Functionally the same as mbrtowc but (tested) slightly quicker. */
+ *pwc = btowc (*mbs);
+ return 1;
+ }
+#endif
+}
+
+
/* Add WORD to paragraph in RESULT, not refilling WORD. If we go past the end
of the line start a new one. TRANSPARENT means that the letters in WORD
are ignored for the purpose of deciding whether a full stop ends a sentence
@@ -730,18 +754,7 @@ xspara__add_next (TEXT *result, char *word, int word_len, int transparent)
if (!strchr (end_sentence_characters
after_punctuation_characters, *p))
{
- if (!PRINTABLE_ASCII(*p))
- {
- wchar_t wc = L'\0';
- mbrtowc (&wc, p, len, NULL);
- state.last_letter = wc;
- break;
- }
- else
- {
- state.last_letter = btowc (*p);
- break;
- }
+ get_utf8_codepoint (&state.last_letter, p, len);
}
}
}
@@ -1013,16 +1026,7 @@ xspara_add_text (char *text, int len)
}
/************** Not a white space character. *****************/
- if (!PRINTABLE_ASCII(*p))
- {
- char_len = mbrtowc (&wc, p, len, NULL);
- }
- else
- {
- /* Functonally the same as mbrtowc but (tested) slightly quicker. */
- char_len = 1;
- wc = btowc (*p);
- }
+ char_len = get_utf8_codepoint (&wc, p, len);
if ((long) char_len == 0)
break; /* Null character. Shouldn't happen. */
--
2.42.1
|