From 54077632072544a068691eec502cbe9366e8fcdf Mon Sep 17 00:00:00 2001 From: Matt Stark Date: Mon, 30 Jun 2025 13:39:12 +1000 Subject: [PATCH] Fix wchar.h to work with modules. Textual headers are guaranteed to be safe to include multiple times. Function definitions are not safe to be included multiple times. Although the function definitions are guarded by a header guard, header guards do not work properly with modules. Consider: module a: #include module b: #include module c: #include #include When precompling A, the AST now contains all function definitions in wchar.h. The same goes for B. When compiling C, both A and B provide definitions for these functions, resulting in ODR violations. When this occurs, you get an ODR violation while attempting to compile the module std. This is because function definitions cannot be textual. To solve this, we split the module into a textual and non-textual component. --- libcxx/include/__mbstate_t.h | 7 ++ libcxx/include/__wchar.h | 105 +++++++++++++++++++++++++++++ libcxx/include/module.modulemap.in | 3 + libcxx/include/wchar.h | 104 +++++----------------------- 4 files changed, 132 insertions(+), 87 deletions(-) create mode 100644 libcxx/include/__wchar.h diff --git a/libcxx/include/__mbstate_t.h b/libcxx/include/__mbstate_t.h index c23ea7113ca70..1e1cf7b27c799 100644 --- a/libcxx/include/__mbstate_t.h +++ b/libcxx/include/__mbstate_t.h @@ -43,8 +43,15 @@ # include // works on most Unixes #elif __has_include() # include // works on Darwin +// include_next works differently for module builders. #elif __has_include_next() +# define _LIBCPP_INCLUDE_NEXT_WCHAR # include_next // use the C standard provider of mbstate_t if present +# undef _LIBCPP_INCLUDE_NEXT_WCHAR +# ifdef _LIBCPP_WCHAR_NOT_FOUND +# undef _LIBCPP_WCHAR_NOT_FOUND +# include +# endif #elif __has_include_next() # include_next // Try in absence of for mbstate_t #else diff --git a/libcxx/include/__wchar.h b/libcxx/include/__wchar.h new file mode 100644 index 0000000000000..a5ebc32da7ebb --- /dev/null +++ b/libcxx/include/__wchar.h @@ -0,0 +1,105 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Lack of header guards is intentional. +// This file should only ever be included by wchar.h, which provides its own +// header guard. This prevents macro hiding issues with modules where cwchar +// complains that _LIBCPP_WCHAR_H isn't present. + +#include <__config> +#include <__mbstate_t.h> // provide mbstate_t +#include // provide size_t + +// include_next doesn't work with modules. +#if __has_include_next() +# define _LIBCPP_INCLUDE_NEXT_WCHAR +# include_next +# undef _LIBCPP_INCLUDE_NEXT_WCHAR +#endif + +// Determine whether we have const-correct overloads for wcschr and friends. +#if defined(_WCHAR_H_CPLUSPLUS_98_CONFORMANCE_) +# define _LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS 1 +#elif defined(__GLIBC_PREREQ) +# if __GLIBC_PREREQ(2, 10) +# define _LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS 1 +# endif +#elif defined(_LIBCPP_MSVCRT) +# if defined(_CRT_CONST_CORRECT_OVERLOADS) +# define _LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS 1 +# endif +#endif + +#if _LIBCPP_HAS_WIDE_CHARACTERS +# if defined(__cplusplus) && !defined(_LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS) && defined(_LIBCPP_PREFERRED_OVERLOAD) +extern "C++" { +inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcschr(const wchar_t* __s, wchar_t __c) { + return (wchar_t*)wcschr(__s, __c); +} +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t* wcschr(const wchar_t* __s, wchar_t __c) { + return __libcpp_wcschr(__s, __c); +} +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcschr(wchar_t* __s, wchar_t __c) { + return __libcpp_wcschr(__s, __c); +} + +inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcspbrk(const wchar_t* __s1, const wchar_t* __s2) { + return (wchar_t*)wcspbrk(__s1, __s2); +} +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t* +wcspbrk(const wchar_t* __s1, const wchar_t* __s2) { + return __libcpp_wcspbrk(__s1, __s2); +} +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcspbrk(wchar_t* __s1, const wchar_t* __s2) { + return __libcpp_wcspbrk(__s1, __s2); +} + +inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcsrchr(const wchar_t* __s, wchar_t __c) { + return (wchar_t*)wcsrchr(__s, __c); +} +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t* wcsrchr(const wchar_t* __s, wchar_t __c) { + return __libcpp_wcsrchr(__s, __c); +} +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcsrchr(wchar_t* __s, wchar_t __c) { + return __libcpp_wcsrchr(__s, __c); +} + +inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcsstr(const wchar_t* __s1, const wchar_t* __s2) { + return (wchar_t*)wcsstr(__s1, __s2); +} +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t* +wcsstr(const wchar_t* __s1, const wchar_t* __s2) { + return __libcpp_wcsstr(__s1, __s2); +} +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcsstr(wchar_t* __s1, const wchar_t* __s2) { + return __libcpp_wcsstr(__s1, __s2); +} + +inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wmemchr(const wchar_t* __s, wchar_t __c, size_t __n) { + return (wchar_t*)wmemchr(__s, __c, __n); +} +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t* +wmemchr(const wchar_t* __s, wchar_t __c, size_t __n) { + return __libcpp_wmemchr(__s, __c, __n); +} +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wmemchr(wchar_t* __s, wchar_t __c, size_t __n) { + return __libcpp_wmemchr(__s, __c, __n); +} +} +# endif + +# if defined(__cplusplus) && (defined(_LIBCPP_MSVCRT_LIKE) || defined(__MVS__)) +extern "C" { +size_t mbsnrtowcs( + wchar_t* __restrict __dst, const char** __restrict __src, size_t __nmc, size_t __len, mbstate_t* __restrict __ps); +size_t wcsnrtombs( + char* __restrict __dst, const wchar_t** __restrict __src, size_t __nwc, size_t __len, mbstate_t* __restrict __ps); +} // extern "C" +# endif // __cplusplus && (_LIBCPP_MSVCRT || __MVS__) +#endif // _LIBCPP_HAS_WIDE_CHARACTERS diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index f878e15d70b1a..bc7dc8d3efa10 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -2420,6 +2420,9 @@ module std_uchar_h [system] { module std_wchar_h [system] { // supports being included multiple times with different pre-defined macros textual header "wchar.h" + // Parts of wchar.h contain function definitions, so cannot be included + // multiple times. + header "__wchar.h" } module std_wctype_h [system] { header "wctype.h" diff --git a/libcxx/include/wchar.h b/libcxx/include/wchar.h index a932dd266b862..633430093df5a 100644 --- a/libcxx/include/wchar.h +++ b/libcxx/include/wchar.h @@ -94,7 +94,17 @@ size_t wcsrtombs(char* restrict dst, const wchar_t** restrict src, size_t len, */ -#if defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) +// #include_next has different semantics for module builds. +// include_next only works if the current filename is wchar.h, +// otherwise it just does a regular #include. +// To solve this, if _LIBCPP_INCLUDE_NEXT_WCHAR is defined, fake an include_next. +#ifdef _LIBCPP_INCLUDE_NEXT_WCHAR +# if __has_include_next() +# include_next +# elif defined(_LIBCPP_INCLUDE_NEXT_WCHAR) +# define _LIBCPP_WCHAR_NOT_FOUND +# endif +#elif defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) # include <__cxx03/wchar.h> #else # include <__config> @@ -116,92 +126,12 @@ size_t wcsrtombs(char* restrict dst, const wchar_t** restrict src, size_t len, # include_next # endif -# ifndef _LIBCPP_WCHAR_H +// Place the header guard here to make it visible to cwchar. +# if !defined(_LIBCPP_WCHAR_H) # define _LIBCPP_WCHAR_H - -# include <__mbstate_t.h> // provide mbstate_t -# include // provide size_t - -// Determine whether we have const-correct overloads for wcschr and friends. -# if defined(_WCHAR_H_CPLUSPLUS_98_CONFORMANCE_) -# define _LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS 1 -# elif defined(__GLIBC_PREREQ) -# if __GLIBC_PREREQ(2, 10) -# define _LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS 1 -# endif -# elif defined(_LIBCPP_MSVCRT) -# if defined(_CRT_CONST_CORRECT_OVERLOADS) -# define _LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS 1 -# endif -# endif - -# if _LIBCPP_HAS_WIDE_CHARACTERS -# if defined(__cplusplus) && !defined(_LIBCPP_WCHAR_H_HAS_CONST_OVERLOADS) && defined(_LIBCPP_PREFERRED_OVERLOAD) -extern "C++" { -inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcschr(const wchar_t* __s, wchar_t __c) { - return (wchar_t*)wcschr(__s, __c); -} -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t* wcschr(const wchar_t* __s, wchar_t __c) { - return __libcpp_wcschr(__s, __c); -} -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcschr(wchar_t* __s, wchar_t __c) { - return __libcpp_wcschr(__s, __c); -} - -inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcspbrk(const wchar_t* __s1, const wchar_t* __s2) { - return (wchar_t*)wcspbrk(__s1, __s2); -} -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t* -wcspbrk(const wchar_t* __s1, const wchar_t* __s2) { - return __libcpp_wcspbrk(__s1, __s2); -} -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcspbrk(wchar_t* __s1, const wchar_t* __s2) { - return __libcpp_wcspbrk(__s1, __s2); -} - -inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcsrchr(const wchar_t* __s, wchar_t __c) { - return (wchar_t*)wcsrchr(__s, __c); -} -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t* wcsrchr(const wchar_t* __s, wchar_t __c) { - return __libcpp_wcsrchr(__s, __c); -} -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcsrchr(wchar_t* __s, wchar_t __c) { - return __libcpp_wcsrchr(__s, __c); -} - -inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wcsstr(const wchar_t* __s1, const wchar_t* __s2) { - return (wchar_t*)wcsstr(__s1, __s2); -} -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t* -wcsstr(const wchar_t* __s1, const wchar_t* __s2) { - return __libcpp_wcsstr(__s1, __s2); -} -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wcsstr(wchar_t* __s1, const wchar_t* __s2) { - return __libcpp_wcsstr(__s1, __s2); -} - -inline _LIBCPP_HIDE_FROM_ABI wchar_t* __libcpp_wmemchr(const wchar_t* __s, wchar_t __c, size_t __n) { - return (wchar_t*)wmemchr(__s, __c, __n); -} -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD const wchar_t* -wmemchr(const wchar_t* __s, wchar_t __c, size_t __n) { - return __libcpp_wmemchr(__s, __c, __n); -} -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD wchar_t* wmemchr(wchar_t* __s, wchar_t __c, size_t __n) { - return __libcpp_wmemchr(__s, __c, __n); -} -} -# endif - -# if defined(__cplusplus) && (defined(_LIBCPP_MSVCRT_LIKE) || defined(__MVS__)) -extern "C" { -size_t mbsnrtowcs( - wchar_t* __restrict __dst, const char** __restrict __src, size_t __nmc, size_t __len, mbstate_t* __restrict __ps); -size_t wcsnrtombs( - char* __restrict __dst, const wchar_t** __restrict __src, size_t __nwc, size_t __len, mbstate_t* __restrict __ps); -} // extern "C" -# endif // __cplusplus && (_LIBCPP_MSVCRT || __MVS__) -# endif // _LIBCPP_HAS_WIDE_CHARACTERS -# endif // _LIBCPP_WCHAR_H +// This section is not safe to include multiple times, so it goes in a seperate +// file which is marked as non textual in the modulemap. +# include <__wchar.h> +# endif #endif // defined(__cplusplus) && __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)