delorie.com/archives/browse.cgi   search  
Mail Archives: djgpp-workers/2000/11/25/04:53:33

From: "Juan Manuel Guerrero" <ST001906 AT HRZ1 DOT HRZ DOT TU-Darmstadt DOT De>
Organization: Darmstadt University of Technology
To: eliz AT is DOT elta DOT co DOT il
Date: Sat, 25 Nov 2000 10:52:38 +0200
MIME-Version: 1.0
Subject: Patch #3 for dtou.c
CC: djgpp-workers AT delorie DOT com
X-mailer: Pegasus Mail for Windows (v2.54DE)
Message-ID: <8B9F3722CF@HRZ1.hrz.tu-darmstadt.de>
Reply-To: djgpp-workers AT delorie DOT com

On Fri, 24 Nov 2000, Eli Zaretskii wrote:

> Btw, did you test the case where CR is the last character in the file,
> with or without a Ctrl-Z after it?  I don't see anything immediately
> wrong, but the logic is complicated, so I think it's worth testing.

Thank you for the advice. The bug has been fixed.
I hope that there will be no more surprises.

Regards,
Guerrero, Juan Manuel


diff -acprNC5 djgpp.orig/src/util/dtou.c djgpp/src/util/dtou.c
*** djgpp.orig/src/util/dtou.c	Wed Nov 22 23:43:52 2000
--- djgpp/src/util/dtou.c	Sat Nov 25 06:47:42 2000
***************
*** 12,98 ****
  
  #ifndef O_BINARY
  #define O_BINARY 0
  #endif
  
  static int
! dtou(char *fname)
  {
!   int i, k, k2, sf, df, l, l2=0, err=0, isCR=0;
!   char buf[16384];
!   char tfname[FILENAME_MAX], *bn, *w;
    struct stat st;
    struct utimbuf tim1;
!   sf = open(fname, O_RDONLY|O_BINARY);
    if (sf < 1)
    {
!     perror(fname);
!     return 1;
    }
    
    fstat (sf,&st);
    tim1.actime = st.st_atime;
    tim1.modtime = st.st_mtime;
  
    strcpy (tfname, fname);
!   for (bn=w=tfname; *w; w++) 
!     if (*w=='/' || *w=='\\' || *w==':') 
        bn = w+1;  
    if (bn) *bn=0;
!   strcat (tfname,"utod.tm$");
    
!   df = open(tfname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644);
    if (df < 1)
    {
!     perror(tfname);
!     close(sf);
!     return 1;
    }
  
!   k2=0;
!   while ((l=read(sf, buf, 16384)) > 0)
    { 
!     int CtrlZ=0;
!     for (i=k=0; i<l; i++) 
        {
!          if (isCR && buf[i]!=0x0A) buf[k++] = 0x0D; 
!          if (buf[i]==0x0D) { isCR=1; continue; }
!          if (buf[i]==0x1A) { CtrlZ=1; break; }
!          	     else    buf[k++] = buf[i];
!          isCR = 0;
        }
!     l2=(k>0 ? write(df, buf, k) : 0);
!     if (l2<0 || CtrlZ) break;
!     if (l2!=k) { err=1; break; }
    }
  
!   if (l<0) perror (fname);
!   if (l2<0) perror (tfname);
!   if (err) fprintf (stderr,"Cannot process file %s\n",fname);
  
!   close(sf);
!   close(df);
  
!   if (l>=0 && l2>=0 && err==0)
    {
!     remove(fname);
!     rename(tfname, fname);
!     utime(fname, &tim1);
!     chown(fname, st.st_uid, st.st_gid);
!     chmod(fname, st.st_mode);
    }
!   else 
    {
!     remove(tfname);
    }
!   return 0;
  }
  
  int
  main(int argc, char **argv)
  {
!   int rv = 0;
!   for (argc--, argv++; argc; argc--, argv++)
!     rv += dtou(*argv);
!   return rv;
! }
  
--- 12,319 ----
  
  #ifndef O_BINARY
  #define O_BINARY 0
  #endif
  
+ #define IS_DIR_SEPARATOR(path) ((path) == '/' || (path) == '\\' || (path) == ':')
+ #define IS_LAST_CR_IN_BUF  (i == l - 1)
+ #define IS_LAST_CR_IN_FILE (position + i + 1 == st.st_size)
+ #define SET_FLAG(flag)         \
+ do {                           \
+   if ((flag) == 0) (flag) = 1; \
+ } while (0)
+ #define BUF_SIZE      16384
+ 
+ /* Control characters. */    
+ #define LF            0x0A
+ #define CR            0x0D
+ #define CntlZ         0x1A
+ 
+ /* Exit codes. */
+ #define NO_ERROR      0x00
+ #define IO_ERROR      0x01  /* Some I/O error occurred. */
+ 
+ 
  static int
! dtou(char *fname, int make_backup, int repair_mode, int strip_mode, int verbose, int vverbose, int preserve_timestamp)
  {
!   int i, k, sf, df, l, l2 = 0, is_CR = 0, is_nCR = 0, is_CR_sequence = 0;
!   int CntlZ_flag = 0, CR_flag = 0, nCR_flag = 0, LF_flag = 0, exit_status = NO_ERROR;
!   int buf_counter, nbufs, LF_counter, must_rewind, position, offset, whence;
!   char buf[BUF_SIZE];
!   char bfname[FILENAME_MAX], tfname[FILENAME_MAX], *bn, *w;
    struct stat st;
    struct utimbuf tim1;
! 
!   sf = open (fname, O_RDONLY|O_BINARY);
    if (sf < 1)
    {
!     perror (fname);
!     return IO_ERROR;
    }
    
    fstat (sf,&st);
    tim1.actime = st.st_atime;
    tim1.modtime = st.st_mtime;
+   nbufs = st.st_size / BUF_SIZE;
  
    strcpy (tfname, fname);
!   for (bn = w = tfname; *w; w++) 
!     if (IS_DIR_SEPARATOR (*w))
        bn = w+1;  
    if (bn) *bn=0;
!   strcat (tfname,"dtou.tm$");
!   if (make_backup)
!   {
!     strcpy (bfname, fname);
!     if (pathconf ((fname), _PC_NAME_MAX) <= 12)
!       for (i = strlen (bfname); i > -1; i--)
!         if (bfname[i] == '.')
!         {
!           bfname[i] = '\0';
!           break;
!         }
!     strcat (bfname,".d2u");
!   }
    
!   df = open (tfname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644);
    if (df < 1)
    {
!     perror (tfname);
!     close (sf);
!     return IO_ERROR;
    }
  
!   buf_counter = LF_counter = must_rewind = position = 0;
!   if (strip_mode)
!   {
!     offset = 0;
!     whence = SEEK_SET;
!   }
!   else
!   {
!     offset = -1;
!     whence = SEEK_CUR;
!   }
!   while ((l = read (sf, buf, BUF_SIZE)) > 0)
    { 
!     for (i = k = 0; i < l; i++) 
!     {
!       if (strip_mode)
!       {
!         if (buf[i] == LF)
!         {
!           if (!(is_CR || is_nCR)) SET_FLAG (LF_flag);
!           if (is_nCR) { SET_FLAG (nCR_flag); is_nCR = 0; }
!           if (is_CR) { SET_FLAG (CR_flag); is_CR = 0; }
!           LF_counter++;
!           offset = must_rewind = 0;
!           buf[k++] = buf[i]; continue;
!         }
!         if (is_CR_sequence)
!         {
!           if (buf[i] == CR) { buf[k++] = buf[i]; continue; }
!           else is_CR_sequence = 0;
!         }
!         if (is_nCR)
!         {
!           if (buf[i] != CR || IS_LAST_CR_IN_FILE)
!           {
!             is_CR_sequence = must_rewind = 1;
!             is_nCR = 0; break;
!           }
!           else
!             continue;
!         }
!         if (is_CR && buf[i] == CR) { is_nCR = 1; is_CR = 0; continue; }
!         if (buf[i] == CR)
!         {
!           if (IS_LAST_CR_IN_FILE) { buf[k++] = buf[i]; break; }
!           is_CR = must_rewind = 1;
!           offset = position + i;
!           continue;
!         }
!       }
!       else
        {
!         if (buf[i] == LF)
!         {
!           if (is_CR)  SET_FLAG (CR_flag);
!           if (!is_CR) SET_FLAG (LF_flag);
!           LF_counter++;
!         }
!         if (is_CR && buf[i] != LF) buf[k++] = CR;
!         if (buf[i] == CR)
!         {
!           if (IS_LAST_CR_IN_BUF)
!           {
!             if (buf_counter < nbufs)
!               must_rewind = 1;
!             else
!               buf[k++] = CR;
!           }
!           is_CR = 1; continue;
!         }
!         is_CR = 0;
        }
! 
!       if (!repair_mode)
!         if (buf[i] == CntlZ) { SET_FLAG (CntlZ_flag); break; }
! 
!       buf[k++] = buf[i];
!     }
! 
!     is_CR = 0;
!     buf_counter++;
!     position += l;
!     if (must_rewind)
!     {
!       /* Last character/s in buf is/are CR/s.
!          Push it/them back and reread it/them next time. */
!       position = lseek (sf, offset, whence);
!       must_rewind = 0;
!     }
! 
!     l2 = (k > 0 ? write (df, buf, k) : 0);
!     if (l2 < 0 || CntlZ_flag) break;
!     if (l2 != k) { exit_status = IO_ERROR; break; }
    }
  
!   if (l < 0) perror (fname);
!   if (l2 < 0) perror (tfname);
!   if (exit_status != NO_ERROR)
!     fprintf (stderr,"Cannot process file %s\n",fname);
  
!   close (sf);
!   close (df);
  
!   if (l >= 0 && l2 >= 0 && exit_status == NO_ERROR)
    {
!     int file_has_changed = CR_flag || nCR_flag || CntlZ_flag || LF_flag;
! 
!     if (verbose)
!       printf ("File: %s successfully processed.\n",fname);
!     if (vverbose)
!       printf ("File: %s\n",fname);
! 
!     if (CR_flag && vverbose) 
!       printf ("At least one CR/LF to LF transformation occurred.\n");
!     if (nCR_flag && vverbose) 
!       printf ("Warning: At least one CR sequence stripped from a LF.\n");
!     if (CntlZ_flag && vverbose) 
!       printf ("Warning: At least one Cntl-Z has been found. File truncated at line %i.\n", LF_counter);
!     if (LF_flag && vverbose) 
!       printf ("Warning: At least one LF without a preceeding CR has been found.\n");
! 
!     if (vverbose && !file_has_changed)
!       printf ("File unchanged.\n");
! 
!     if (make_backup && file_has_changed)
!       rename (fname, bfname);
!     else
!       remove (fname);
!     rename (tfname, fname);
!     chown (fname, st.st_uid, st.st_gid);
!     chmod (fname, st.st_mode);
!     if (preserve_timestamp || !file_has_changed)
!       utime (fname, &tim1);
    }
!   else
    {
!     remove (tfname);
!     if (verbose || vverbose)
!       printf ("File: %s. An I/O error occurred\n",fname);
    }
! 
!   return exit_status;
! }
! 
! void
! usage(char *progname)
! {
!   printf ("Usage: %s [-b] [-h] [-r] [-s] [-t] [-v] [-vv] files...\n\n", progname);
!   printf ("Options are:\n");
!   printf ("            -b:  A backup of the original file is made using `.d2u' as backup\n");
!   printf ("                 extension, if the file has been modified.\n");
!   printf ("            -h:  Display this help and exit.\n");
!   printf ("            -r:  Transform MSDOS-style EOF (CRLF) into UNIX-style EOL (LF).\n");
!   printf ("                 Cntl-Z are ignored and will not truncate the file and\n");
!   printf ("                 CR sequences in front of LF will be left unchanged.\n");
!   printf ("            -s:  Transform MSDOS-style EOF (CRLF) into UNIX-style EOL (LF)\n");
!   printf ("                 and strip a CR sequence of arbitrary length from the file,\n");
!   printf ("                 if and only if the sequence is followed by LF. CR sequences\n");
!   printf ("                 that are not followed by LF are always left unchanged.\n");
!   printf ("            -t:  The timestamp of the modified file will not be preserved.\n");
!   printf ("            -v:  Show if file processing has been successful or not.\n");
!   printf ("           -vv:  Show the kind of modifications that have been done to the file.\n");
!   printf ("The program is backward compatible with previous program versions if no options\n");
!   printf ("are given at all. In this case, an occurrence of Cntl-Z will truncate the file,\n");
!   printf ("MSDOS-style EOL (CRLF) are transformed into UNIX-style EOL (LF) and CR sequence\n");
!   printf ("stripping will not happen at all. Also the timestamp will not be alterated and\n");
!   printf ("no backup of the original file will be done.");
  }
  
  int
  main(int argc, char **argv)
  {
!   int exit_status = NO_ERROR, i, make_backup, repair_mode;
!   int strip_mode, verbose, vverbose, preserve_timestamp;
!   char* progname = strlwr(strdup(argv[0]));
  
+   if (argc < 2)
+   {
+     usage (progname);
+     exit(NO_ERROR);
+   }
+ 
+   /* Default for backward compatibility. */ 
+   make_backup = repair_mode = strip_mode = verbose = vverbose = 0;
+   preserve_timestamp = 1;
+ 
+   i = 1;
+   while ((argc > i) && (argv[i][0] == '-') && argv[i][1])
+   {
+     switch (argv[i][1])
+     {
+       case 'b':
+         make_backup = 1;
+         break;
+       case 'h':
+         usage (progname);
+         exit(NO_ERROR);
+         break;
+       case 'r':
+         repair_mode = 1;
+         strip_mode = 0;
+         break;
+       case 's':
+         strip_mode = 1;
+         repair_mode = 0;
+         break;
+       case 't':
+         preserve_timestamp = 0;
+         break;
+       case 'v':
+         if (argv[i][2] == 'v')
+         {
+           vverbose = 1;
+           verbose = 0;
+         }
+         else
+         {
+           verbose = 1;
+           vverbose = 0;
+         }
+         break;
+       default:
+         fprintf (stderr, "%s: invalid option -- %s\n", progname, &argv[i][1]);
+         fprintf (stderr, "Try `%s -h' for more information.\n", progname);
+         exit (IO_ERROR);
+         break;
+     }
+     i++;
+   }
+ 
+   for (; i < argc; i++)
+     exit_status += dtou (argv[i], make_backup, repair_mode, strip_mode, verbose, vverbose, preserve_timestamp);
+   return exit_status;
+ }
diff -acprNC5 djgpp.orig/src/util/utils.tex djgpp/src/util/utils.tex
*** djgpp.orig/src/util/utils.tex	Wed Nov 22 23:44:24 2000
--- djgpp/src/util/utils.tex	Fri Nov 24 14:35:30 2000
*************** so that they won't get mixed with the fi
*** 320,334 ****
--- 320,422 ----
  
  @c -----------------------------------------------------------------------------
  @node dtou, utod, djtar, Top
  @chapter dtou
  
+ Usage: @code{dtou} [@code{-b}] [@code{-h}] [@code{-r}] [@code{-s}] [@code{-t}]
+ [@code{-v}] [@code{-vv}] @file{files}
+ 
  Each file specified on the command line is converted from dos's CR/LF
  text file mode to unix's NL text file mode.
  
  All djgpp wildcards are supported.  Timestamps of the files are preserved.
  
+ @code{dtou} will pass an exit status of 0 to the calling context if all the
+ files have been successfully processed and an exit status greater than 0 if
+ not.  In this case, the exit status is equal to the amount of unsuccessfully
+ processed files.  A file has not been successfully processed if some kind of
+ I/O error occurred.
+ 
+ @strong{Options:}
+ 
+ @table @code
+ 
+ @item -b
+ 
+ Creates a backup of the original file if the file has been modified.
+ @file{.d2u} is used as backup suffix.  On systems with LFN support, the
+ backup suffix will be appended to the file name.  If no LFN support is
+ available the backup suffix will overwrite the original file suffix.
+ 
+ @item -h
+ 
+ Displays a help text and exits.
+ 
+ @item -r
+ 
+ Repair mode.  This mode transforms MSDOS-style EOL (CR/LF) into
+ UNIX-style EOL (LF).  It ignores Cntl-Z thus it will not truncate the file.
+ CR sequences in front of LFs are left unchanged.  This mode is intended
+ for repairing files that have erroneously been transmited in text-mode
+ instead of binary-mode during a FTP session or with Windows versions of
+ Netscape which have the bad habit to transfer files with such extensions
+ as @file{.gz}, @file{.bz2}, @file{.tgz} as text files.
+ 
+ @item -s
+ 
+ Strip mode.  It transforms MSDOS-style EOL (CR/LF) into UNIX-style EOL (LF)
+ and strips a CR sequence of arbitrary length from a file, if the sequence
+ followed by a LF.  CR sequences that are not followed by a LF are left
+ unchanged.  This mode is intended to repair files that have been processed
+ with buggy ports of Unix software, which always blindly add a CR to a LF,
+ even if there is already a CR there.
+ 
+ @item -t
+ 
+ Timestamp.  With this option the timestamp of a modified file will not be
+ preserved.  The timestamp of an unmodified file will always be preserved.
+ 
+ @item -v
+ 
+ Verbose mode.  Prints a single line showing the file name and if file
+ processing has been successful or not.  The only case that a file is
+ considered as unsuccessfully processed is if an I/O error has occurred.
+ 
+ @item -vv
+ 
+ Very verbose mode.  Prints the file name and shows the kind of modifications
+ that have been done to the file.  All possible output looks like:
+ 
+ @example
+ File: foo.c
+ File unchanged.
+ At least one CR/LF to LF transformation occurred.
+ Warning: At least one CR sequence striped from a LF.
+ Warning: At least one Cntl-Z has been found. File truncated
+          at line n.
+ Warning: At least one LF without a preceeding CR has been found.
+ @end example
+ 
+ Of course, not all of the above lines will appear all together.  The first
+ line showing the file name will always be printed.  If the file has not been
+ modified at all, then only the next line will be printed.  If the file has
+ been modified an appropiate combination of the lines 3 to 6 will be printed,
+ indicating what has been modified.  The above output is of some use, e.g.:
+ if @code{dtou} reports a LF without a preceeding CR this will be a sure sign
+ that the file is either binary or has inconsistent EOL format for some other
+ reason.  If @code{dtou} reports that a Cntl-Z (software EOF) has been found,
+ then the file will be truncated at that line and the rest of the file will be
+ lost.
+ 
+ @end table
+ 
+ The program is backward compatible with previous program versions if no options
+ are given at all.  In this case, an occurrence of Cntl-Z will truncate the file,
+ MSDOS-style EOL (CR/LF) are transformed into UNIX-style EOL (LF) and CR sequence
+ stripping will not happen at all.  Also the timestamp will not be alterated and
+ no backup of the original file will be done.
+ 
  @c -----------------------------------------------------------------------------
  @node utod, gxx, dtou, Top
  @chapter utod
  
  Each file specified on the command line is converted from unix's NL text

- Raw text -


  webmaster     delorie software   privacy  
  Copyright © 2019   by DJ Delorie     Updated Jul 2019