Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MSVC WCHAR -> utf8 (https://github.com/madler/pigz/issues/62) #3

Merged
merged 1 commit into from
Feb 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 100 additions & 7 deletions src/pigz.c
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,9 @@
*/

#if defined(_MSC_VER)
//#include <wchar.h>
//#include <mbstring.h>
#pragma execution_character_set("utf-8")
//#include <cstdint>
#include <BaseTsd.h>
//#include <winbase.h>
Expand Down Expand Up @@ -395,10 +398,80 @@ typedef SSIZE_T ssize_t;
#endif

#ifdef _MSC_VER

//https://www.nu42.com/2017/02/unicode-windows-command-line.html
//https://raw.githubusercontent.com/gypified/libmp3lame/master/frontend/main.c
static wchar_t *mbsToUnicode(const char *mbstr, int code_page) {
int n = MultiByteToWideChar(code_page, 0, mbstr, -1, NULL, 0);
wchar_t* wstr = malloc( n*sizeof(wstr[0]) );
if ( wstr !=0 ) {
n = MultiByteToWideChar(code_page, 0, mbstr, -1, wstr, n);
if ( n==0 ) {
free( wstr );
wstr = 0;
}
}
return wstr;
}

static char *unicodeToMbs(const wchar_t *wstr, int code_page) {
int n = 1+WideCharToMultiByte(code_page, 0, wstr, -1, 0, 0, 0, 0);
char* mbstr = malloc( n*sizeof(mbstr[0]) );
if ( mbstr !=0 ) {
n = WideCharToMultiByte(code_page, 0, wstr, -1, mbstr, n, 0, 0);
if( n == 0 ){
free( mbstr );
mbstr = 0;
}
}
return mbstr;
}

wchar_t *utf8ToUnicode(const char *mbstr) {
return mbsToUnicode(mbstr, CP_UTF8);
}

char *unicodeToUtf8(const wchar_t *wstr) {
return unicodeToMbs(wstr, CP_UTF8);
}

# define chown(p,o,g) 0
# define utimes(p,t) 0
# define lstat(p,s) stat(p,s)
# define _exit(s) exit(s)

int lstat(const char *path, struct stat *buf){
//https://mail.gnome.org/archives/gtk-devel-list/2011-September/msg00177.html
wchar_t *wstr = utf8ToUnicode(path);
int rc = _wstat(wstr, buf);
#ifdef PIGZ_DEBUG
printf("lstat(%s)=%d\n", path, rc );
#endif
free(wstr);
return rc;
}

int openUTF8(const char *path, int flags, int mode) {
wchar_t *wstr = utf8ToUnicode(path);
int rc = _wopen(wstr, flags, mode);
#ifdef PIGZ_DEBUG
printf("openUTF8(%s, %d, %d)=%d\n", path, flags, mode, rc );
#endif
free(wstr);
return rc;
}

int unlinkUTF8(const char *path) {
wchar_t *wstr = utf8ToUnicode(path);
int rc = _wunlink(wstr);
#ifdef PIGZ_DEBUG
printf("unlinkUTF8(%s)=%d\n", path, rc );
#endif
free(wstr);
return rc;
}
#else
# define openUTF8(p,f,m) open(p,f,m)
# define unlinkUTF8(p) unlink(p)
#endif

#ifdef __MINGW32__
Expand Down Expand Up @@ -868,7 +941,7 @@ local void cut_short(int sig) {
Trace(("termination by user"));
}
if (g.outd != -1 && g.outd != 1) {
unlink(g.outf);
unlinkUTF8(g.outf);
RELEASE(g.outf);
g.outd = -1;
}
Expand Down Expand Up @@ -3870,7 +3943,7 @@ local void process(char *path) {
}

// open input file
g.ind = open(g.inf, O_RDONLY, 0);
g.ind = openUTF8(g.inf, O_RDONLY, 0);
if (g.ind < 0)
throw(errno, "read error on %s (%s)", g.inf, strerror(errno));

Expand Down Expand Up @@ -3964,7 +4037,7 @@ local void process(char *path) {
memcpy(g.outf, g.inf, pre);
memcpy(g.outf + pre, to, len);
strcpy(g.outf + pre + len, sufx);
g.outd = open(g.outf, O_CREAT | O_TRUNC | O_WRONLY |
g.outd = openUTF8(g.outf, O_CREAT | O_TRUNC | O_WRONLY |
(g.force ? 0 : O_EXCL), 0600);

// if exists and not -f, give user a chance to overwrite
Expand All @@ -3980,7 +4053,7 @@ local void process(char *path) {
reply = ch == 'y' || ch == 'Y' ? 1 : 0;
} while (ch != EOF && ch != '\n' && ch != '\r');
if (reply == 1)
g.outd = open(g.outf, O_CREAT | O_TRUNC | O_WRONLY,
g.outd = openUTF8(g.outf, O_CREAT | O_TRUNC | O_WRONLY,
0600);
}

Expand Down Expand Up @@ -4019,7 +4092,7 @@ local void process(char *path) {
if (g.outd != -1 && g.outd != 1) {
close(g.outd);
g.outd = -1;
unlink(g.outf);
unlinkUTF8(g.outf);
RELEASE(g.outf);
}
}
Expand All @@ -4046,7 +4119,7 @@ local void process(char *path) {
if (g.ind != 0) {
copymeta(g.inf, g.outf);
if (!g.keep)
unlink(g.inf);
unlinkUTF8(g.inf);
}
if (g.decode && (g.headis & 2) != 0 && g.stamp)
touch(g.outf, g.stamp);
Expand Down Expand Up @@ -4396,7 +4469,11 @@ local void cut_yarn(int err) {
#endif

// Process command line arguments.
#ifndef _MSC_VER
int main(int argc, char **argv) {
#else
int c_main(int argc, char *argv[]) {
#endif
int n; // general index
int nop; // index before which "-" means stdin
int done; // number of named files processed
Expand Down Expand Up @@ -4536,3 +4613,19 @@ int main(int argc, char **argv) {
log_dump();
return g.ret;
}

#ifdef _MSC_VER
int wmain(int argc, wchar_t* argv[]) { //convert each argument to UTF8
char **utf8_argv;
int ret;
utf8_argv = calloc(argc, sizeof(char*));
for (int i = 0; i < argc; ++i)
utf8_argv[i] = unicodeToUtf8(argv[i]);
SetConsoleOutputCP(CP_UTF8);
ret = c_main(argc, utf8_argv);
for (int i = 0; i < argc; ++i)
free( utf8_argv[i] );
free( utf8_argv );
return ret;
}
#endif
11 changes: 7 additions & 4 deletions src/win/msvc.bat
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ nmake -f win32\Makefile.msc
cd ..\thread
nmake clean VC-static
cd ..
rm *.obj

cl -c ../pigz.c /Idirent /Ithread /Izlib -DNOZOPFLI
cl -c ../yarn.c /Idirent /Ithread /Izlib
cl -c ../try.c /Idirent /Ithread /Izlib
cl -o pigz yarn.obj try.obj pigz.obj /Idirent /Ithread /Izlib /link shell32.lib zlib/zlib.lib thread/libpthreadVC3.lib /NODEFAULTLIB:MSVCRT

cl -c ../pigz.c /utf-8 /Idirent /Ithread /Izlib -DNOZOPFLI
cl -c ../yarn.c /utf-8 /Idirent /Ithread /Izlib
cl -c ../try.c /utf-8 /Idirent /Ithread /Izlib
cl /Fepigz pigz.obj yarn.obj try.obj /utf-8 /Idirent /Ithread /Izlib /link shell32.lib zlib/zlib.lib thread/libpthreadVC3.lib /NODEFAULTLIB:MSVCRT
REM pigz -f -k Описание.pdf 测试.obj fx.pdf
REM pigz -d -f -k Описание.pdf.gz 测试.obj.gz