Mail Archives: cygwin/2010/05/02/06:43:52
X-Recipient: | archive-cygwin AT delorie DOT com
|
X-SWARE-Spam-Status: | No, hits=1.0 required=5.0 tests=BAYES_50,DKIM_SIGNED,DKIM_VALID,RCVD_IN_DNSWL_NONE,TW_VZ,TW_ZB,TW_ZW,T_FILL_THIS_FORM
|
X-Spam-Check-By: | sourceware.org
|
X-RZG-AUTH: | :Ln4Re0+Ic/6oZXR1YgKryK8brksyK8dozXDwHXjf9hj/zDJRaPAn/CKsdwc=
|
X-RZG-CLASS-ID: | mo00
|
From: | Bruno Haible <bruno AT clisp DOT org>
|
To: | cygwin AT cygwin DOT com
|
Subject: | wctob function overwrites caller-owned register
|
Date: | Sun, 2 May 2010 12:43:15 +0200
|
User-Agent: | KMail/1.9.9
|
MIME-Version: | 1.0
|
Message-Id: | <201005021243.16515.bruno@clisp.org>
|
Mailing-List: | contact cygwin-help AT cygwin DOT com; run by ezmlm
|
List-Id: | <cygwin.cygwin.com>
|
List-Subscribe: | <mailto:cygwin-subscribe AT cygwin DOT com>
|
List-Archive: | <http://sourceware.org/ml/cygwin/>
|
List-Post: | <mailto:cygwin AT cygwin DOT com>
|
List-Help: | <mailto:cygwin-help AT cygwin DOT com>, <http://sourceware.org/ml/#faqs>
|
Sender: | cygwin-owner AT cygwin DOT com
|
Mail-Followup-To: | cygwin AT cygwin DOT com
|
Delivered-To: | mailing list cygwin AT cygwin DOT com
|
In Cygwin 1.7.2, the wctob() function clobbers the %ebx register, which
belongs to the caller. The effects are random behaviour and crashes in
the caller.
How to reproduce:
=================
Compile this program, consisting of 2 parts, without optimization.
It works fine.
================================= bugpart1.c =================================
#include <wchar.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <limits.h>
#define ASSERT(expr) \
do \
{ \
if (!(expr)) \
{ \
fprintf (stderr, "%s:%d: assertion failed\n", \
__FILE__, __LINE__); \
fflush (stderr); \
abort (); \
} \
} \
while (0)
#define BUFSIZE 10
void dumpbuf(wchar_t buf[BUFSIZE])
{
int i;
printf ("buf =");
for (i = 0; i < BUFSIZE; i++)
printf(" %04X", buf[i]);
printf ("\n");
fflush (stdout);
}
void dumpstate(const char *prefix, mbstate_t *statep)
{
int i;
printf ("%s = ", prefix);
for (i = 0; i < sizeof (mbstate_t); i++)
printf("%02X", ((unsigned char *)statep)[i]);
printf ("\n");
fflush (stdout);
}
void step1 (wchar_t buf[BUFSIZE])
{
size_t i;
for (i = 0; i < BUFSIZE; i++)
buf[i] = (wchar_t) 0xBADFACE;
}
void step2 (mbstate_t *statep)
{
memset (statep, '\0', sizeof (mbstate_t));
}
void step3 (wchar_t buf[BUFSIZE], mbstate_t *statep, char *input)
{
wchar_t wc;
size_t ret;
wc = (wchar_t) 0xBADFACE;
ret = mbrtowc (&wc, input + 1, 1, statep);
ASSERT (ret == (size_t)(-2));
ASSERT (wc == (wchar_t) 0xBADFACE);
ASSERT (!mbsinit (statep));
input[1] = '\0';
dumpbuf(buf); dumpstate("state",statep);
}
void step4 (wchar_t buf[BUFSIZE], mbstate_t *statep, mbstate_t *temp_statep, char *input)
{
const char *src;
size_t ret;
src = input + 2;
*temp_statep = *statep;
ret = mbsrtowcs (NULL, &src, 2, temp_statep);
ASSERT (ret == 4);
ASSERT (src == input + 2);
ASSERT (!mbsinit (statep));
dumpbuf(buf); dumpstate("state",statep); dumpstate("temps",temp_statep);
}
extern void step5 (wchar_t buf[BUFSIZE], mbstate_t *statep, char *input);
int
main (int argc, char *argv[])
{
if (setlocale (LC_ALL, "fr_FR.UTF-8") == NULL)
return 1;
{
wchar_t buf[BUFSIZE];
mbstate_t state;
mbstate_t temp_state;
step1 (buf);
/* Locale encoding is UTF-8. */
{
char input[] = "B\303\274\303\237er";
step2 (&state);
dumpbuf(buf); dumpstate("state",&state);
step3 (buf, &state, input);
#if 1
step4 (buf, &state, &temp_state, input);
#else
{
const char *src;
size_t ret;
src = input + 2;
temp_state = state;
ret = mbsrtowcs (NULL, &src, 2, &temp_state);
ASSERT (ret == 4);
ASSERT (src == input + 2);
ASSERT (!mbsinit (&state));
dumpbuf(buf); dumpstate("state",&state); dumpstate("temps",&temp_state);
}
#endif
#if 1
step5 (buf, &state, input);
#else
{
const char *src;
size_t ret;
src = input + 2;
ret = mbsrtowcs (buf, &src, 2, &state);
ASSERT (ret == 2);
ASSERT (src == input + 5);
dumpbuf(buf); dumpstate("state",&state);
ASSERT (wctob (buf[0]) == EOF);
ASSERT (wctob (buf[1]) == EOF);
ASSERT (buf[2] == (wchar_t) 0xBADFACE);
ASSERT (mbsinit (&state));
}
#endif
}
}
return 0;
}
================================= bugpart2.c =================================
#include <wchar.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <limits.h>
#define ASSERT(expr) \
do \
{ \
if (!(expr)) \
{ \
fprintf (stderr, "%s:%d: assertion failed\n", \
__FILE__, __LINE__); \
fflush (stderr); \
abort (); \
} \
} \
while (0)
#define BUFSIZE 10
extern void dumpbuf(wchar_t buf[BUFSIZE]);
extern void dumpstate(const char *prefix, mbstate_t *statep);
void step5 (wchar_t buf[BUFSIZE], mbstate_t *statep, char *input)
{
const char *src;
size_t ret;
src = input + 2;
ret = mbsrtowcs (buf, &src, 2, statep);
ASSERT (ret == 2);
ASSERT (src == input + 5);
dumpbuf(buf); dumpstate("state",statep);
ASSERT (wctob (buf[0]) == EOF);
ASSERT (wctob (buf[1]) == EOF);
ASSERT (buf[2] == (wchar_t) 0xBADFACE);
ASSERT (mbsinit (statep));
}
==============================================================================
$ gcc -c bugpart1.c -Wall
$ gcc -c bugpart2.c -Wall
$ gcc bugpart1.o bugpart2.o
$ ./a.exe
buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE
state = 0000000000000000
buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE
state = 01000000C3000000
buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE
state = 01000000C3000000
temps = 00000000C3000000
buf = 00FC 00DF FACE FACE FACE FACE FACE FACE FACE FACE
state = 00000000C3000000
Then compile bugpart2 with optimization. The program crahes:
$ gcc -c bugpart2.c -Wall -O
$ gcc bugpart1.o bugpart2.o
$ ./a.exe
buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE
state = 0000000000000000
buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE
state = 01000000C3000000
buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE
state = 01000000C3000000
temps = 00000000C3000000
buf = 00FC 00DF FACE FACE FACE FACE FACE FACE FACE FACE
state = 00000000C3000000
bugpart2.c:38: assertion failed
bash: [5528: 1] tcsetattr: Inappropriate ioctl for device
Aborted (core dumped)
Known facts:
- When GCC optimizes, it allocates variables in registers.
In this case, in bugpart2, the variable 'buf' gets tied to register %ebx.
- %ebx is a saved registers, see gcc-4.5.0/gcc/config/i386/i386.h the
value of CALL_USED_REGISTERS.
Then single-step through bugpart2 (with gdb's 'nexti' command),
while looking at the values of the saved registers. The gcc
generated code is correct. Here it is, with comments on the
right-hand side:
-------------------------------------------------------------------------------------
_step5:
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $44, %esp
movl 8(%ebp), %ebx buf
movl 12(%ebp), %edi statep
movl 16(%ebp), %esi input
leal 2(%esi), %eax input+2
movl %eax, -16(%ebp) src
movl %edi, 12(%esp)
movl $2, 8(%esp)
leal -16(%ebp), %eax
movl %eax, 4(%esp)
movl %ebx, (%esp)
%ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08
call _mbsrtowcs call mbsrtowcs
%ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08
cmpl $2, %eax ret == 2
je L2
...
L2:
leal 5(%esi), %eax input+5
cmpl %eax, -16(%ebp) == src
je L3
...
L3:
movl %ebx, (%esp) buf
%ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08
call _dumpbuf
%ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08
movl %edi, 4(%esp) statep
movl $LC2, (%esp)
%ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08
call _dumpstate
%ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08
movzwl (%ebx), %eax buf[0]
movl %eax, (%esp)
%ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08
call _wctob
%ebx=0x22cdbc %esi=0x22ccf8 %edi=0x22cd08
cmpl $-1, %eax
je L4
...
L4:
movzwl 2(%ebx), %eax buf[1]
movl %eax, (%esp)
%ebx=0x22cdbc %esi=0x22ccf8 %edi=0x22cd08
call _wctob
%ebx=0x228084 %esi=0x22ccf8 %edi=0x22cd08
cmpl $-1, %eax
je L5
...
L5:
cmpw $-1330, 4(%ebx) buf[2] == 0xbadface
je L6
.p2align 4,,6
...
L6:
movl %edi, (%esp) statep
call _mbsinit
testl %eax, %eax
jne L8
...
L8:
addl $44, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
-------------------------------------------------------------------------------------
You can see that across each call to wctob, %ebx is clobbered.
Origin of the bug:
==================
This is the code in wctob.c:
int
wctob (wint_t c)
{
mbstate_t mbs;
int retval = 0;
unsigned char pwc;
/* Put mbs in initial state. */
memset (&mbs, '\0', sizeof (mbs));
_REENT_CHECK_MISC(_REENT);
retval = __wctomb (_REENT, &pwc, c, __locale_charset (), &mbs);
if (c == EOF || retval != 1)
return WEOF;
else
return (int)pwc;
}
And this is its disassembly:
-------------------------------------------------------------------------------
0x6110d510 <wctob>: push %ebp
0x6110d511 <wctob+1>: mov %esp,%ebp
0x6110d513 <wctob+3>: sub $0x38,%esp
0x6110d516 <wctob+6>: mov %ebx,-0xc(%ebp) save %ebx
0x6110d519 <wctob+9>: lea -0x18(%ebp),%ebx &mbs
0x6110d51c <wctob+12>: mov %esi,-0x8(%ebp) save %esi
0x6110d51f <wctob+15>: mov %edi,-0x4(%ebp) save %edi
0x6110d522 <wctob+18>: mov 0x8(%ebp),%edi c
0x6110d525 <wctob+21>: movl $0x8,0x8(%esp)
0x6110d52d <wctob+29>: movl $0x0,0x4(%esp)
0x6110d535 <wctob+37>: mov %ebx,(%esp)
0x6110d538 <wctob+40>: call 0x61107d30 <memset> call memset
0x6110d53d <wctob+45>: mov 0x6115da24,%esi
0x6110d543 <wctob+51>: call 0x61103a50 <__locale_charset>
0x6110d548 <wctob+56>: mov %ebx,0x10(%esp)
0x6110d54c <wctob+60>: mov %eax,0xc(%esp)
0x6110d550 <wctob+64>: movzwl %di,%eax
0x6110d553 <wctob+67>: mov %eax,0x8(%esp)
0x6110d557 <wctob+71>: lea -0xd(%ebp),%eax &pwc
0x6110d55a <wctob+74>: mov %eax,0x4(%esp)
0x6110d55e <wctob+78>: mov %fs:0x4,%eax
0x6110d564 <wctob+84>: sub $0x3000,%eax
0x6110d569 <wctob+89>: mov %eax,(%esp) _REENT
0x6110d56c <wctob+92>: call *%esi call __wctomb
0x6110d56e <wctob+94>: add $0x1,%edi
0x6110d571 <wctob+97>: je 0x6110d578 <wctob+104>
0x6110d573 <wctob+99>: sub $0x1,%eax
0x6110d576 <wctob+102>: je 0x6110d590 <wctob+128>
0x6110d578 <wctob+104>: mov $0xffffffff,%eax
0x6110d57d <wctob+109>: mov -0xc(%ebp),%ebx restore %ebx
0x6110d580 <wctob+112>: mov -0x8(%ebp),%esi restore %esi
0x6110d583 <wctob+115>: mov -0x4(%ebp),%edi restore %edi
0x6110d586 <wctob+118>: mov %ebp,%esp
0x6110d588 <wctob+120>: pop %ebp
0x6110d589 <wctob+121>: ret
0x6110d590 <wctob+128>: movzbl -0xd(%ebp),%eax
0x6110d594 <wctob+132>: jmp 0x6110d57d <wctob+109>
-------------------------------------------------------------------------------
You can see that the area where %ebx is saved is in the bytes %ebp-12..%ebp-9.
And in %ebp-13 you have the 'pwc' variable.
The bug is that you are passing a 1-byte buffer to a function which will
write up to MB_CUR_MAX bytes into this buffer. Of course it will clobber the
memory area next to the 1-byte buffer, and this is the %ebx save area!
This code dates back to 2002. When Cygwin did not support multibyte encodings,
MB_CUR_MAX was effectively 1 always. But now, for the UTF-8 encoding at least,
MB_CUR_MAX is effectively 4.
Bruno
--
Problem reports: http://cygwin.com/problems.html
FAQ: http://cygwin.com/faq/
Documentation: http://cygwin.com/docs.html
Unsubscribe info: http://cygwin.com/ml/#unsubscribe-simple
- Raw text -