-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[libc++] Fix wchar.h to work with modules #146630
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-libcxx Author: Matt (matts1) ChangesTextual headers are guaranteed to be safe to include multiple times. Function definitions are not safe to be included multiple times. Although the function definitions are guarded by a header guard, header guards do not work properly with modules. Consider: module a: #include <wchar.h> module c:
When precompling A, the AST now contains all function definitions in wchar.h. The same goes for B. When compiling C, both A and B provide definitions for these functions, resulting in ODR violations. When this occurs, you get an ODR violation while attempting to compile the module std. This is because function definitions cannot be textual. To solve this, we split the module into a textual and non-textual component. Full diff: https://github.com/llvm/llvm-project/pull/146630.diff 4 Files Affected:
diff --git a/libcxx/include/__mbstate_t.h b/libcxx/include/__mbstate_t.h
index c23ea7113ca70..1e1cf7b27c799 100644
--- a/libcxx/include/__mbstate_t.h
+++ b/libcxx/include/__mbstate_t.h
@@ -43,8 +43,15 @@
# include <bits/types/mbstate_t.h> // works on most Unixes
#elif __has_include(<sys/_types/_mbstate_t.h>)
# include <sys/_types/_mbstate_t.h> // works on Darwin
+// include_next works differently for module builders.
#elif __has_include_next(<wchar.h>)
+# define _LIBCPP_INCLUDE_NEXT_WCHAR
# include_next <wchar.h> // use the C standard provider of mbstate_t if present
+# undef _LIBCPP_INCLUDE_NEXT_WCHAR
+# ifdef _LIBCPP_WCHAR_NOT_FOUND
+# undef _LIBCPP_WCHAR_NOT_FOUND
+# include <uchar.h>
+# endif
#elif __has_include_next(<uchar.h>)
# include_next <uchar.h> // Try <uchar.h> in absence of <wchar.h> for mbstate_t
#else
diff --git a/libcxx/include/__wchar.h b/libcxx/include/__wchar.h
new file mode 100644
index 0000000000000..7d9a738e24386
--- /dev/null
+++ b/libcxx/include/__wchar.h
@@ -0,0 +1,106 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Lack of header guards is intentional.
+// This file should only ever be included by wchar.h, which provides its own
+// header guard. This prevents macro hiding issues with modules where cwchar
+// complains that _LIBCPP_WCHAR_H isn't present.
+
+#include <__config>
+#include <__mbstate_t.h> // provide mbstate_t
+#include <stddef.h> // provide size_t
+
+// include_next doesn't work with modules.
+#if __has_include_next(<wchar.h>)
+# define _LIBCPP_INCLUDE_NEXT_WCHAR
+# include_next <wchar.h>
+# undef _LIBCPP_INCLUDE_NEXT_WCHAR
+#endif
+
+// Determine whether we have const-correct overloads for wcschr and friends.
+# if defined(_WCHAR_H_CPLUSPLUS_98_CONFORMANCE_)
+# define _LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS 1
+# elif defined(__GLIBC_PREREQ)
+# if __GLIBC_PREREQ(2, 10)
+# define _LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS 1
+# endif
+# elif defined(_LIBCPP_MSVCRT)
+# if defined(_CRT_CONST_CORRECT_OVERLOADS)
+# define _LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS 1
+# endif
+# endif
+
+# if _LIBCPP_HAS_WIDE_CHARACTERS
+# if defined(__cplusplus) && !defined(_LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS) && defined(_LIBCPP_PREFERRED_OVERLOAD)
+extern "C++" {
+inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcschr(const wchar_t* __s, wchar_t __c) {
+ return (wchar_t*)wcschr(__s, __c);
+}
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t* wcschr(const wchar_t* __s, wchar_t __c) {
+ return __libcpp_wcschr(__s, __c);
+}
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcschr(wchar_t* __s, wchar_t __c) {
+ return __libcpp_wcschr(__s, __c);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcspbrk(const wchar_t* __s1, const wchar_t* __s2) {
+ return (wchar_t*)wcspbrk(__s1, __s2);
+}
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t*
+wcspbrk(const wchar_t* __s1, const wchar_t* __s2) {
+ return __libcpp_wcspbrk(__s1, __s2);
+}
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcspbrk(wchar_t* __s1, const wchar_t* __s2) {
+ return __libcpp_wcspbrk(__s1, __s2);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcsrchr(const wchar_t* __s, wchar_t __c) {
+ return (wchar_t*)wcsrchr(__s, __c);
+}
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t* wcsrchr(const wchar_t* __s, wchar_t __c) {
+ return __libcpp_wcsrchr(__s, __c);
+}
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcsrchr(wchar_t* __s, wchar_t __c) {
+ return __libcpp_wcsrchr(__s, __c);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcsstr(const wchar_t* __s1, const wchar_t* __s2) {
+ return (wchar_t*)wcsstr(__s1, __s2);
+}
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t*
+wcsstr(const wchar_t* __s1, const wchar_t* __s2) {
+ return __libcpp_wcsstr(__s1, __s2);
+}
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcsstr(wchar_t* __s1, const wchar_t* __s2) {
+ return __libcpp_wcsstr(__s1, __s2);
+}
+
+inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wmemchr(const wchar_t* __s, wchar_t __c, size_t __n) {
+ return (wchar_t*)wmemchr(__s, __c, __n);
+}
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t*
+wmemchr(const wchar_t* __s, wchar_t __c, size_t __n) {
+ return __libcpp_wmemchr(__s, __c, __n);
+}
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wmemchr(wchar_t* __s, wchar_t __c, size_t __n) {
+ return __libcpp_wmemchr(__s, __c, __n);
+}
+}
+# endif
+
+# if defined(__cplusplus) && (defined(_LIBCPP_MSVCRT_LIKE) || defined(__MVS__))
+extern "C" {
+size_t mbsnrtowcs(
+ wchar_t* __restrict __dst, const char** __restrict __src, size_t __nmc, size_t __len, mbstate_t* __restrict __ps);
+size_t wcsnrtombs(
+ char* __restrict __dst, const wchar_t** __restrict __src, size_t __nwc, size_t __len, mbstate_t* __restrict __ps);
+} // extern "C"
+# endif // __cplusplus && (_LIBCPP_MSVCRT || __MVS__)
+# endif // _LIBCPP_HAS_WIDE_CHARACTERS
+
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index f878e15d70b1a..bc7dc8d3efa10 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -2420,6 +2420,9 @@ module std_uchar_h [system] {
module std_wchar_h [system] {
// <wchar.h> supports being included multiple times with different pre-defined macros
textual header "wchar.h"
+ // Parts of wchar.h contain function definitions, so cannot be included
+ // multiple times.
+ header "__wchar.h"
}
module std_wctype_h [system] {
header "wctype.h"
diff --git a/libcxx/include/wchar.h b/libcxx/include/wchar.h
index a932dd266b862..0056069ba0c4d 100644
--- a/libcxx/include/wchar.h
+++ b/libcxx/include/wchar.h
@@ -94,7 +94,17 @@ size_t wcsrtombs(char* restrict dst, const wchar_t** restrict src, size_t len,
*/
-#if defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
+// #include_next has different semantics for module builds.
+// include_next<wchar.h> only works if the current filename is wchar.h,
+// otherwise it just does a regular #include.
+// To solve this, if _LIBCPP_INCLUDE_NEXT_WCHAR is defined, fake an include_next.
+#ifdef _LIBCPP_INCLUDE_NEXT_WCHAR
+# if __has_include_next(<wchar.h>)
+# include_next <wchar.h>
+# elif defined(_LIBCPP_INCLUDE_NEXT_WCHAR)
+# define _LIBCPP_WCHAR_NOT_FOUND
+# endif
+#elif defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
# include <__cxx03/wchar.h>
#else
# include <__config>
@@ -116,92 +126,12 @@ size_t wcsrtombs(char* restrict dst, const wchar_t** restrict src, size_t len,
# include_next <wchar.h>
# endif
-# ifndef _LIBCPP_WCHAR_H
-# define _LIBCPP_WCHAR_H
-
-# include <__mbstate_t.h> // provide mbstate_t
-# include <stddef.h> // provide size_t
-
-// Determine whether we have const-correct overloads for wcschr and friends.
-# if defined(_WCHAR_H_CPLUSPLUS_98_CONFORMANCE_)
-# define _LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS 1
-# elif defined(__GLIBC_PREREQ)
-# if __GLIBC_PREREQ(2, 10)
-# define _LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS 1
-# endif
-# elif defined(_LIBCPP_MSVCRT)
-# if defined(_CRT_CONST_CORRECT_OVERLOADS)
-# define _LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS 1
-# endif
-# endif
-
-# if _LIBCPP_HAS_WIDE_CHARACTERS
-# if defined(__cplusplus) && !defined(_LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS) && defined(_LIBCPP_PREFERRED_OVERLOAD)
-extern "C++" {
-inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcschr(const wchar_t* __s, wchar_t __c) {
- return (wchar_t*)wcschr(__s, __c);
-}
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t* wcschr(const wchar_t* __s, wchar_t __c) {
- return __libcpp_wcschr(__s, __c);
-}
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcschr(wchar_t* __s, wchar_t __c) {
- return __libcpp_wcschr(__s, __c);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcspbrk(const wchar_t* __s1, const wchar_t* __s2) {
- return (wchar_t*)wcspbrk(__s1, __s2);
-}
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t*
-wcspbrk(const wchar_t* __s1, const wchar_t* __s2) {
- return __libcpp_wcspbrk(__s1, __s2);
-}
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcspbrk(wchar_t* __s1, const wchar_t* __s2) {
- return __libcpp_wcspbrk(__s1, __s2);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcsrchr(const wchar_t* __s, wchar_t __c) {
- return (wchar_t*)wcsrchr(__s, __c);
-}
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t* wcsrchr(const wchar_t* __s, wchar_t __c) {
- return __libcpp_wcsrchr(__s, __c);
-}
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcsrchr(wchar_t* __s, wchar_t __c) {
- return __libcpp_wcsrchr(__s, __c);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcsstr(const wchar_t* __s1, const wchar_t* __s2) {
- return (wchar_t*)wcsstr(__s1, __s2);
-}
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t*
-wcsstr(const wchar_t* __s1, const wchar_t* __s2) {
- return __libcpp_wcsstr(__s1, __s2);
-}
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcsstr(wchar_t* __s1, const wchar_t* __s2) {
- return __libcpp_wcsstr(__s1, __s2);
-}
-
-inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wmemchr(const wchar_t* __s, wchar_t __c, size_t __n) {
- return (wchar_t*)wmemchr(__s, __c, __n);
-}
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t*
-wmemchr(const wchar_t* __s, wchar_t __c, size_t __n) {
- return __libcpp_wmemchr(__s, __c, __n);
-}
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wmemchr(wchar_t* __s, wchar_t __c, size_t __n) {
- return __libcpp_wmemchr(__s, __c, __n);
-}
-}
-# endif
-
-# if defined(__cplusplus) && (defined(_LIBCPP_MSVCRT_LIKE) || defined(__MVS__))
-extern "C" {
-size_t mbsnrtowcs(
- wchar_t* __restrict __dst, const char** __restrict __src, size_t __nmc, size_t __len, mbstate_t* __restrict __ps);
-size_t wcsnrtombs(
- char* __restrict __dst, const wchar_t** __restrict __src, size_t __nwc, size_t __len, mbstate_t* __restrict __ps);
-} // extern "C"
-# endif // __cplusplus && (_LIBCPP_MSVCRT || __MVS__)
-# endif // _LIBCPP_HAS_WIDE_CHARACTERS
-# endif // _LIBCPP_WCHAR_H
+// Place the header guard here to make it visible to cwchar.
+#if !defined(_LIBCPP_WCHAR_H)
+# define _LIBCPP_WCHAR_H
+// This section is not safe to include multiple times, so it goes in a seperate
+// file which is marked as non textual in the modulemap.
+# include <__wchar.h>
+#endif
#endif // defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
|
FYI @atetubou |
✅ With the latest revision this PR passed the C/C++ code formatter. |
Textual headers are guaranteed to be safe to include multiple times. Function definitions are not safe to be included multiple times. Although the function definitions are guarded by a header guard, header guards do not work properly with modules. Consider: module a: #include <wchar.h> module b: #include <wchar.h> module c: #include <a> #include <b> When precompling A, the AST now contains all function definitions in wchar.h. The same goes for B. When compiling C, both A and B provide definitions for these functions, resulting in ODR violations. When this occurs, you get an ODR violation while attempting to compile the module std. This is because function definitions cannot be textual. To solve this, we split the module into a textual and non-textual component.
|
||
# if defined(__cplusplus) && (defined(_LIBCPP_MSVCRT_LIKE) || defined(__MVS__)) | ||
extern "C" { | ||
size_t mbsnrtowcs( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This and below are not definition, but they also need to be here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These ones don't strictly need to be here, but unless something relies on macros that can be set by the includer, it should prefer to be non-textual (this relies on macros set in __config).
So if we're going to the effort of moving the definitions, we might as well move this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this would be a lot cleaner if we just moved the overrides into something like __wchar/overrides.h
and make that a module. Then you can include it in wchar.h
and you don't need the weird macro dance.
I'd also like to see a test for this.
Yep, I've been working on a test locally. I'm not sure I understand your The problem is that both the module I'd love a solution that avoids the macro dance - it is very gross, but I'm not currently seeing one. |
Textual headers are guaranteed to be safe to include multiple times. Function definitions are not safe to be included multiple times.
Although the function definitions are guarded by a header guard, header guards do not work properly with modules. Consider:
module a: #include <wchar.h>
module b: #include <wchar.h>
module c:
When precompling A, the AST now contains all function definitions in wchar.h. The same goes for B.
When compiling C, both A and B provide definitions for these functions, resulting in ODR violations.
When this occurs, you get an ODR violation while attempting to compile the module std. This is because function definitions cannot be textual.
To solve this, we split the module into a textual and non-textual component.