X-Recipient: archive-cygwin AT delorie DOT com X-SWARE-Spam-Status: No, hits=1.0 required=5.0 tests=BAYES_50,DKIM_SIGNED,DKIM_VALID,RCVD_IN_DNSWL_NONE,TW_VZ,TW_ZB,TW_ZW,T_FILL_THIS_FORM X-Spam-Check-By: sourceware.org X-RZG-AUTH: :Ln4Re0+Ic/6oZXR1YgKryK8brksyK8dozXDwHXjf9hj/zDJRaPAn/CKsdwc= X-RZG-CLASS-ID: mo00 From: Bruno Haible To: cygwin AT cygwin DOT com Subject: wctob function overwrites caller-owned register Date: Sun, 2 May 2010 12:43:15 +0200 User-Agent: KMail/1.9.9 MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Content-Disposition: inline Message-Id: <201005021243.16515.bruno@clisp.org> Mailing-List: contact cygwin-help AT cygwin DOT com; run by ezmlm List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: cygwin-owner AT cygwin DOT com Mail-Followup-To: cygwin AT cygwin DOT com Delivered-To: mailing list cygwin AT cygwin DOT com In Cygwin 1.7.2, the wctob() function clobbers the %ebx register, which belongs to the caller. The effects are random behaviour and crashes in the caller. How to reproduce: ================= Compile this program, consisting of 2 parts, without optimization. It works fine. ================================= bugpart1.c ================================= #include #include #include #include #include #include #include #define ASSERT(expr) \ do \ { \ if (!(expr)) \ { \ fprintf (stderr, "%s:%d: assertion failed\n", \ __FILE__, __LINE__); \ fflush (stderr); \ abort (); \ } \ } \ while (0) #define BUFSIZE 10 void dumpbuf(wchar_t buf[BUFSIZE]) { int i; printf ("buf ="); for (i = 0; i < BUFSIZE; i++) printf(" %04X", buf[i]); printf ("\n"); fflush (stdout); } void dumpstate(const char *prefix, mbstate_t *statep) { int i; printf ("%s = ", prefix); for (i = 0; i < sizeof (mbstate_t); i++) printf("%02X", ((unsigned char *)statep)[i]); printf ("\n"); fflush (stdout); } void step1 (wchar_t buf[BUFSIZE]) { size_t i; for (i = 0; i < BUFSIZE; i++) buf[i] = (wchar_t) 0xBADFACE; } void step2 (mbstate_t *statep) { memset (statep, '\0', sizeof (mbstate_t)); } void step3 (wchar_t buf[BUFSIZE], mbstate_t *statep, char *input) { wchar_t wc; size_t ret; wc = (wchar_t) 0xBADFACE; ret = mbrtowc (&wc, input + 1, 1, statep); ASSERT (ret == (size_t)(-2)); ASSERT (wc == (wchar_t) 0xBADFACE); ASSERT (!mbsinit (statep)); input[1] = '\0'; dumpbuf(buf); dumpstate("state",statep); } void step4 (wchar_t buf[BUFSIZE], mbstate_t *statep, mbstate_t *temp_statep, char *input) { const char *src; size_t ret; src = input + 2; *temp_statep = *statep; ret = mbsrtowcs (NULL, &src, 2, temp_statep); ASSERT (ret == 4); ASSERT (src == input + 2); ASSERT (!mbsinit (statep)); dumpbuf(buf); dumpstate("state",statep); dumpstate("temps",temp_statep); } extern void step5 (wchar_t buf[BUFSIZE], mbstate_t *statep, char *input); int main (int argc, char *argv[]) { if (setlocale (LC_ALL, "fr_FR.UTF-8") == NULL) return 1; { wchar_t buf[BUFSIZE]; mbstate_t state; mbstate_t temp_state; step1 (buf); /* Locale encoding is UTF-8. */ { char input[] = "B\303\274\303\237er"; step2 (&state); dumpbuf(buf); dumpstate("state",&state); step3 (buf, &state, input); #if 1 step4 (buf, &state, &temp_state, input); #else { const char *src; size_t ret; src = input + 2; temp_state = state; ret = mbsrtowcs (NULL, &src, 2, &temp_state); ASSERT (ret == 4); ASSERT (src == input + 2); ASSERT (!mbsinit (&state)); dumpbuf(buf); dumpstate("state",&state); dumpstate("temps",&temp_state); } #endif #if 1 step5 (buf, &state, input); #else { const char *src; size_t ret; src = input + 2; ret = mbsrtowcs (buf, &src, 2, &state); ASSERT (ret == 2); ASSERT (src == input + 5); dumpbuf(buf); dumpstate("state",&state); ASSERT (wctob (buf[0]) == EOF); ASSERT (wctob (buf[1]) == EOF); ASSERT (buf[2] == (wchar_t) 0xBADFACE); ASSERT (mbsinit (&state)); } #endif } } return 0; } ================================= bugpart2.c ================================= #include #include #include #include #include #include #include #define ASSERT(expr) \ do \ { \ if (!(expr)) \ { \ fprintf (stderr, "%s:%d: assertion failed\n", \ __FILE__, __LINE__); \ fflush (stderr); \ abort (); \ } \ } \ while (0) #define BUFSIZE 10 extern void dumpbuf(wchar_t buf[BUFSIZE]); extern void dumpstate(const char *prefix, mbstate_t *statep); void step5 (wchar_t buf[BUFSIZE], mbstate_t *statep, char *input) { const char *src; size_t ret; src = input + 2; ret = mbsrtowcs (buf, &src, 2, statep); ASSERT (ret == 2); ASSERT (src == input + 5); dumpbuf(buf); dumpstate("state",statep); ASSERT (wctob (buf[0]) == EOF); ASSERT (wctob (buf[1]) == EOF); ASSERT (buf[2] == (wchar_t) 0xBADFACE); ASSERT (mbsinit (statep)); } ============================================================================== $ gcc -c bugpart1.c -Wall $ gcc -c bugpart2.c -Wall $ gcc bugpart1.o bugpart2.o $ ./a.exe buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE state = 0000000000000000 buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE state = 01000000C3000000 buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE state = 01000000C3000000 temps = 00000000C3000000 buf = 00FC 00DF FACE FACE FACE FACE FACE FACE FACE FACE state = 00000000C3000000 Then compile bugpart2 with optimization. The program crahes: $ gcc -c bugpart2.c -Wall -O $ gcc bugpart1.o bugpart2.o $ ./a.exe buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE state = 0000000000000000 buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE state = 01000000C3000000 buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE state = 01000000C3000000 temps = 00000000C3000000 buf = 00FC 00DF FACE FACE FACE FACE FACE FACE FACE FACE state = 00000000C3000000 bugpart2.c:38: assertion failed bash: [5528: 1] tcsetattr: Inappropriate ioctl for device Aborted (core dumped) Known facts: - When GCC optimizes, it allocates variables in registers. In this case, in bugpart2, the variable 'buf' gets tied to register %ebx. - %ebx is a saved registers, see gcc-4.5.0/gcc/config/i386/i386.h the value of CALL_USED_REGISTERS. Then single-step through bugpart2 (with gdb's 'nexti' command), while looking at the values of the saved registers. The gcc generated code is correct. Here it is, with comments on the right-hand side: ------------------------------------------------------------------------------------- _step5: pushl %ebp movl %esp, %ebp pushl %edi pushl %esi pushl %ebx subl $44, %esp movl 8(%ebp), %ebx buf movl 12(%ebp), %edi statep movl 16(%ebp), %esi input leal 2(%esi), %eax input+2 movl %eax, -16(%ebp) src movl %edi, 12(%esp) movl $2, 8(%esp) leal -16(%ebp), %eax movl %eax, 4(%esp) movl %ebx, (%esp) %ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08 call _mbsrtowcs call mbsrtowcs %ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08 cmpl $2, %eax ret == 2 je L2 ... L2: leal 5(%esi), %eax input+5 cmpl %eax, -16(%ebp) == src je L3 ... L3: movl %ebx, (%esp) buf %ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08 call _dumpbuf %ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08 movl %edi, 4(%esp) statep movl $LC2, (%esp) %ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08 call _dumpstate %ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08 movzwl (%ebx), %eax buf[0] movl %eax, (%esp) %ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08 call _wctob %ebx=0x22cdbc %esi=0x22ccf8 %edi=0x22cd08 cmpl $-1, %eax je L4 ... L4: movzwl 2(%ebx), %eax buf[1] movl %eax, (%esp) %ebx=0x22cdbc %esi=0x22ccf8 %edi=0x22cd08 call _wctob %ebx=0x228084 %esi=0x22ccf8 %edi=0x22cd08 cmpl $-1, %eax je L5 ... L5: cmpw $-1330, 4(%ebx) buf[2] == 0xbadface je L6 .p2align 4,,6 ... L6: movl %edi, (%esp) statep call _mbsinit testl %eax, %eax jne L8 ... L8: addl $44, %esp popl %ebx popl %esi popl %edi popl %ebp ret ------------------------------------------------------------------------------------- You can see that across each call to wctob, %ebx is clobbered. Origin of the bug: ================== This is the code in wctob.c: int wctob (wint_t c) { mbstate_t mbs; int retval = 0; unsigned char pwc; /* Put mbs in initial state. */ memset (&mbs, '\0', sizeof (mbs)); _REENT_CHECK_MISC(_REENT); retval = __wctomb (_REENT, &pwc, c, __locale_charset (), &mbs); if (c == EOF || retval != 1) return WEOF; else return (int)pwc; } And this is its disassembly: ------------------------------------------------------------------------------- 0x6110d510 : push %ebp 0x6110d511 : mov %esp,%ebp 0x6110d513 : sub $0x38,%esp 0x6110d516 : mov %ebx,-0xc(%ebp) save %ebx 0x6110d519 : lea -0x18(%ebp),%ebx &mbs 0x6110d51c : mov %esi,-0x8(%ebp) save %esi 0x6110d51f : mov %edi,-0x4(%ebp) save %edi 0x6110d522 : mov 0x8(%ebp),%edi c 0x6110d525 : movl $0x8,0x8(%esp) 0x6110d52d : movl $0x0,0x4(%esp) 0x6110d535 : mov %ebx,(%esp) 0x6110d538 : call 0x61107d30 call memset 0x6110d53d : mov 0x6115da24,%esi 0x6110d543 : call 0x61103a50 <__locale_charset> 0x6110d548 : mov %ebx,0x10(%esp) 0x6110d54c : mov %eax,0xc(%esp) 0x6110d550 : movzwl %di,%eax 0x6110d553 : mov %eax,0x8(%esp) 0x6110d557 : lea -0xd(%ebp),%eax &pwc 0x6110d55a : mov %eax,0x4(%esp) 0x6110d55e : mov %fs:0x4,%eax 0x6110d564 : sub $0x3000,%eax 0x6110d569 : mov %eax,(%esp) _REENT 0x6110d56c : call *%esi call __wctomb 0x6110d56e : add $0x1,%edi 0x6110d571 : je 0x6110d578 0x6110d573 : sub $0x1,%eax 0x6110d576 : je 0x6110d590 0x6110d578 : mov $0xffffffff,%eax 0x6110d57d : mov -0xc(%ebp),%ebx restore %ebx 0x6110d580 : mov -0x8(%ebp),%esi restore %esi 0x6110d583 : mov -0x4(%ebp),%edi restore %edi 0x6110d586 : mov %ebp,%esp 0x6110d588 : pop %ebp 0x6110d589 : ret 0x6110d590 : movzbl -0xd(%ebp),%eax 0x6110d594 : jmp 0x6110d57d ------------------------------------------------------------------------------- You can see that the area where %ebx is saved is in the bytes %ebp-12..%ebp-9. And in %ebp-13 you have the 'pwc' variable. The bug is that you are passing a 1-byte buffer to a function which will write up to MB_CUR_MAX bytes into this buffer. Of course it will clobber the memory area next to the 1-byte buffer, and this is the %ebx save area! This code dates back to 2002. When Cygwin did not support multibyte encodings, MB_CUR_MAX was effectively 1 always. But now, for the UTF-8 encoding at least, MB_CUR_MAX is effectively 4. Bruno -- Problem reports: http://cygwin.com/problems.html FAQ: http://cygwin.com/faq/ Documentation: http://cygwin.com/docs.html Unsubscribe info: http://cygwin.com/ml/#unsubscribe-simple