delorie.com/archives/browse.cgi   search  
Mail Archives: cygwin/2010/05/02/06:43:52

X-Recipient: archive-cygwin AT delorie DOT com
X-SWARE-Spam-Status: No, hits=1.0 required=5.0 tests=BAYES_50,DKIM_SIGNED,DKIM_VALID,RCVD_IN_DNSWL_NONE,TW_VZ,TW_ZB,TW_ZW,T_FILL_THIS_FORM
X-Spam-Check-By: sourceware.org
X-RZG-AUTH: :Ln4Re0+Ic/6oZXR1YgKryK8brksyK8dozXDwHXjf9hj/zDJRaPAn/CKsdwc=
X-RZG-CLASS-ID: mo00
From: Bruno Haible <bruno AT clisp DOT org>
To: cygwin AT cygwin DOT com
Subject: wctob function overwrites caller-owned register
Date: Sun, 2 May 2010 12:43:15 +0200
User-Agent: KMail/1.9.9
MIME-Version: 1.0
Message-Id: <201005021243.16515.bruno@clisp.org>
Mailing-List: contact cygwin-help AT cygwin DOT com; run by ezmlm
List-Id: <cygwin.cygwin.com>
List-Subscribe: <mailto:cygwin-subscribe AT cygwin DOT com>
List-Archive: <http://sourceware.org/ml/cygwin/>
List-Post: <mailto:cygwin AT cygwin DOT com>
List-Help: <mailto:cygwin-help AT cygwin DOT com>, <http://sourceware.org/ml/#faqs>
Sender: cygwin-owner AT cygwin DOT com
Mail-Followup-To: cygwin AT cygwin DOT com
Delivered-To: mailing list cygwin AT cygwin DOT com

In Cygwin 1.7.2, the wctob() function clobbers the %ebx register, which
belongs to the caller. The effects are random behaviour and crashes in
the caller.

How to reproduce:
=================

Compile this program, consisting of 2 parts, without optimization.
It works fine.

================================= bugpart1.c =================================
#include <wchar.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <limits.h>

#define ASSERT(expr) \
  do                                                                         \
    {                                                                        \
      if (!(expr))                                                           \
        {                                                                    \
          fprintf (stderr, "%s:%d: assertion failed\n",                      \
                   __FILE__, __LINE__);                                      \
          fflush (stderr);                                                   \
          abort ();                                                          \
        }                                                                    \
    }                                                                        \
  while (0)

#define BUFSIZE 10

void dumpbuf(wchar_t buf[BUFSIZE])
{
  int i;
  printf ("buf =");
  for (i = 0; i < BUFSIZE; i++)
    printf(" %04X", buf[i]);
  printf ("\n");
  fflush (stdout);
}

void dumpstate(const char *prefix, mbstate_t *statep)
{
  int i;
  printf ("%s = ", prefix);
  for (i = 0; i < sizeof (mbstate_t); i++)
    printf("%02X", ((unsigned char *)statep)[i]);
  printf ("\n");
  fflush (stdout);
}

void step1 (wchar_t buf[BUFSIZE])
{
        size_t i;
        for (i = 0; i < BUFSIZE; i++)
          buf[i] = (wchar_t) 0xBADFACE;
}

void step2 (mbstate_t *statep)
{
  memset (statep, '\0', sizeof (mbstate_t));
}

void step3 (wchar_t buf[BUFSIZE], mbstate_t *statep, char *input)
{
        wchar_t wc;
        size_t ret;
        wc = (wchar_t) 0xBADFACE;
        ret = mbrtowc (&wc, input + 1, 1, statep);
        ASSERT (ret == (size_t)(-2));
        ASSERT (wc == (wchar_t) 0xBADFACE);
        ASSERT (!mbsinit (statep));
        input[1] = '\0';
        dumpbuf(buf); dumpstate("state",statep);
}

void step4 (wchar_t buf[BUFSIZE], mbstate_t *statep, mbstate_t *temp_statep, char *input)
{
        const char *src;
        size_t ret;
        src = input + 2;
        *temp_statep = *statep;
        ret = mbsrtowcs (NULL, &src, 2, temp_statep);
        ASSERT (ret == 4);
        ASSERT (src == input + 2);
        ASSERT (!mbsinit (statep));
        dumpbuf(buf); dumpstate("state",statep); dumpstate("temps",temp_statep);
}

extern void step5 (wchar_t buf[BUFSIZE], mbstate_t *statep, char *input);

int
main (int argc, char *argv[])
{
  if (setlocale (LC_ALL, "fr_FR.UTF-8") == NULL)
    return 1;

    {
      wchar_t buf[BUFSIZE];
      mbstate_t state;
      mbstate_t temp_state;

      step1 (buf);

      /* Locale encoding is UTF-8.  */
      {
        char input[] = "B\303\274\303\237er";
        step2 (&state);
        dumpbuf(buf); dumpstate("state",&state);

        step3 (buf, &state, input);

#if 1
        step4 (buf, &state, &temp_state, input);
#else
       {
        const char *src;
        size_t ret;
        src = input + 2;
        temp_state = state;
        ret = mbsrtowcs (NULL, &src, 2, &temp_state);
        ASSERT (ret == 4);
        ASSERT (src == input + 2);
        ASSERT (!mbsinit (&state));
        dumpbuf(buf); dumpstate("state",&state); dumpstate("temps",&temp_state);
       }
#endif

#if 1
        step5 (buf, &state, input);
#else
       {
        const char *src;
        size_t ret;
        src = input + 2;
        ret = mbsrtowcs (buf, &src, 2, &state);
        ASSERT (ret == 2);
        ASSERT (src == input + 5);
        dumpbuf(buf); dumpstate("state",&state);
        ASSERT (wctob (buf[0]) == EOF);
        ASSERT (wctob (buf[1]) == EOF);
        ASSERT (buf[2] == (wchar_t) 0xBADFACE);
        ASSERT (mbsinit (&state));
       }
#endif

      }
    }

  return 0;
}
================================= bugpart2.c =================================
#include <wchar.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <limits.h>

#define ASSERT(expr) \
  do                                                                         \
    {                                                                        \
      if (!(expr))                                                           \
        {                                                                    \
          fprintf (stderr, "%s:%d: assertion failed\n",                      \
                   __FILE__, __LINE__);                                      \
          fflush (stderr);                                                   \
          abort ();                                                          \
        }                                                                    \
    }                                                                        \
  while (0)

#define BUFSIZE 10

extern void dumpbuf(wchar_t buf[BUFSIZE]);
extern void dumpstate(const char *prefix, mbstate_t *statep);

void step5 (wchar_t buf[BUFSIZE], mbstate_t *statep, char *input)
{
        const char *src;
        size_t ret;
        src = input + 2;
        ret = mbsrtowcs (buf, &src, 2, statep);
        ASSERT (ret == 2);
        ASSERT (src == input + 5);
        dumpbuf(buf); dumpstate("state",statep);
        ASSERT (wctob (buf[0]) == EOF);
        ASSERT (wctob (buf[1]) == EOF);
        ASSERT (buf[2] == (wchar_t) 0xBADFACE);
        ASSERT (mbsinit (statep));
}
==============================================================================
$ gcc -c bugpart1.c -Wall
$ gcc -c bugpart2.c -Wall
$ gcc bugpart1.o bugpart2.o
$ ./a.exe
buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE
state = 0000000000000000
buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE
state = 01000000C3000000
buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE
state = 01000000C3000000
temps = 00000000C3000000
buf = 00FC 00DF FACE FACE FACE FACE FACE FACE FACE FACE
state = 00000000C3000000

Then compile bugpart2 with optimization. The program crahes:

$ gcc -c bugpart2.c -Wall -O
$ gcc bugpart1.o bugpart2.o
$ ./a.exe
buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE
state = 0000000000000000
buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE
state = 01000000C3000000
buf = FACE FACE FACE FACE FACE FACE FACE FACE FACE FACE
state = 01000000C3000000
temps = 00000000C3000000
buf = 00FC 00DF FACE FACE FACE FACE FACE FACE FACE FACE
state = 00000000C3000000
bugpart2.c:38: assertion failed
bash: [5528: 1] tcsetattr: Inappropriate ioctl for device
Aborted (core dumped)

Known facts:
  - When GCC optimizes, it allocates variables in registers.
    In this case, in bugpart2, the variable 'buf' gets tied to register %ebx.
  - %ebx is a saved registers, see gcc-4.5.0/gcc/config/i386/i386.h the
    value of CALL_USED_REGISTERS.

Then single-step through bugpart2 (with gdb's 'nexti' command),
while looking at the values of the saved registers. The gcc
generated code is correct. Here it is, with comments on the
right-hand side:

-------------------------------------------------------------------------------------
_step5:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        subl    $44, %esp
        movl    8(%ebp), %ebx		buf
        movl    12(%ebp), %edi		statep
        movl    16(%ebp), %esi		input
        leal    2(%esi), %eax		input+2
        movl    %eax, -16(%ebp)		src
        movl    %edi, 12(%esp)
        movl    $2, 8(%esp)
        leal    -16(%ebp), %eax
        movl    %eax, 4(%esp)
        movl    %ebx, (%esp)
					%ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08
        call    _mbsrtowcs		call mbsrtowcs
					%ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08
        cmpl    $2, %eax		ret == 2
        je      L2
        ...
L2:
        leal    5(%esi), %eax		input+5
        cmpl    %eax, -16(%ebp)		== src
        je      L3
        ...
L3:
        movl    %ebx, (%esp)		buf
					%ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08
        call    _dumpbuf
					%ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08
        movl    %edi, 4(%esp)		statep
        movl    $LC2, (%esp)
					%ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08
        call    _dumpstate
					%ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08
        movzwl  (%ebx), %eax		buf[0]
        movl    %eax, (%esp)
					%ebx=0x22cd10 %esi=0x22ccf8 %edi=0x22cd08
        call    _wctob
					%ebx=0x22cdbc %esi=0x22ccf8 %edi=0x22cd08
        cmpl    $-1, %eax
        je      L4
        ...
L4:
        movzwl  2(%ebx), %eax		buf[1]
        movl    %eax, (%esp)
					%ebx=0x22cdbc %esi=0x22ccf8 %edi=0x22cd08
        call    _wctob
					%ebx=0x228084 %esi=0x22ccf8 %edi=0x22cd08
        cmpl    $-1, %eax
        je      L5
        ...
L5:
        cmpw    $-1330, 4(%ebx)		buf[2] == 0xbadface
        je      L6
        .p2align 4,,6
        ...
L6:
        movl    %edi, (%esp)		statep
        call    _mbsinit
        testl   %eax, %eax
        jne     L8
        ...
L8:
        addl    $44, %esp
        popl    %ebx
        popl    %esi
        popl    %edi
        popl    %ebp
        ret
-------------------------------------------------------------------------------------

You can see that across each call to wctob, %ebx is clobbered.


Origin of the bug:
==================

This is the code in wctob.c:

int
wctob (wint_t c)
{
  mbstate_t mbs;
  int retval = 0;
  unsigned char pwc;

  /* Put mbs in initial state. */
  memset (&mbs, '\0', sizeof (mbs));

  _REENT_CHECK_MISC(_REENT);

  retval = __wctomb (_REENT, &pwc, c, __locale_charset (), &mbs);

  if (c == EOF || retval != 1)
    return WEOF;
  else
    return (int)pwc;
}

And this is its disassembly:

-------------------------------------------------------------------------------
0x6110d510 <wctob>:     push   %ebp
0x6110d511 <wctob+1>:   mov    %esp,%ebp
0x6110d513 <wctob+3>:   sub    $0x38,%esp
0x6110d516 <wctob+6>:   mov    %ebx,-0xc(%ebp)		save %ebx
0x6110d519 <wctob+9>:   lea    -0x18(%ebp),%ebx		&mbs
0x6110d51c <wctob+12>:  mov    %esi,-0x8(%ebp)		save %esi
0x6110d51f <wctob+15>:  mov    %edi,-0x4(%ebp)		save %edi
0x6110d522 <wctob+18>:  mov    0x8(%ebp),%edi		c

0x6110d525 <wctob+21>:  movl   $0x8,0x8(%esp)
0x6110d52d <wctob+29>:  movl   $0x0,0x4(%esp)
0x6110d535 <wctob+37>:  mov    %ebx,(%esp)
0x6110d538 <wctob+40>:  call   0x61107d30 <memset>	call memset

0x6110d53d <wctob+45>:  mov    0x6115da24,%esi
0x6110d543 <wctob+51>:  call   0x61103a50 <__locale_charset>
0x6110d548 <wctob+56>:  mov    %ebx,0x10(%esp)
0x6110d54c <wctob+60>:  mov    %eax,0xc(%esp)
0x6110d550 <wctob+64>:  movzwl %di,%eax
0x6110d553 <wctob+67>:  mov    %eax,0x8(%esp)
0x6110d557 <wctob+71>:  lea    -0xd(%ebp),%eax		&pwc
0x6110d55a <wctob+74>:  mov    %eax,0x4(%esp)
0x6110d55e <wctob+78>:  mov    %fs:0x4,%eax
0x6110d564 <wctob+84>:  sub    $0x3000,%eax
0x6110d569 <wctob+89>:  mov    %eax,(%esp)		_REENT
0x6110d56c <wctob+92>:  call   *%esi			call __wctomb

0x6110d56e <wctob+94>:  add    $0x1,%edi
0x6110d571 <wctob+97>:  je     0x6110d578 <wctob+104>
0x6110d573 <wctob+99>:  sub    $0x1,%eax
0x6110d576 <wctob+102>: je     0x6110d590 <wctob+128>
0x6110d578 <wctob+104>: mov    $0xffffffff,%eax
0x6110d57d <wctob+109>: mov    -0xc(%ebp),%ebx		restore %ebx
0x6110d580 <wctob+112>: mov    -0x8(%ebp),%esi		restore %esi
0x6110d583 <wctob+115>: mov    -0x4(%ebp),%edi		restore %edi
0x6110d586 <wctob+118>: mov    %ebp,%esp
0x6110d588 <wctob+120>: pop    %ebp
0x6110d589 <wctob+121>: ret    

0x6110d590 <wctob+128>: movzbl -0xd(%ebp),%eax
0x6110d594 <wctob+132>: jmp    0x6110d57d <wctob+109>
-------------------------------------------------------------------------------

You can see that the area where %ebx is saved is in the bytes %ebp-12..%ebp-9.
And in %ebp-13 you have the 'pwc' variable.

The bug is that you are passing a 1-byte buffer to a function which will
write up to MB_CUR_MAX bytes into this buffer. Of course it will clobber the
memory area next to the 1-byte buffer, and this is the %ebx save area!

This code dates back to 2002. When Cygwin did not support multibyte encodings,
MB_CUR_MAX was effectively 1 always. But now, for the UTF-8 encoding at least,
MB_CUR_MAX is effectively 4.

Bruno

--
Problem reports:       http://cygwin.com/problems.html
FAQ:                   http://cygwin.com/faq/
Documentation:         http://cygwin.com/docs.html
Unsubscribe info:      http://cygwin.com/ml/#unsubscribe-simple

- Raw text -


  webmaster     delorie software   privacy  
  Copyright © 2019   by DJ Delorie     Updated Jul 2019