Mail Archives: djgpp-workers/2000/11/25/04:53:33
On Fri, 24 Nov 2000, Eli Zaretskii wrote:
> Btw, did you test the case where CR is the last character in the file,
> with or without a Ctrl-Z after it? I don't see anything immediately
> wrong, but the logic is complicated, so I think it's worth testing.
Thank you for the advice. The bug has been fixed.
I hope that there will be no more surprises.
Regards,
Guerrero, Juan Manuel
diff -acprNC5 djgpp.orig/src/util/dtou.c djgpp/src/util/dtou.c
*** djgpp.orig/src/util/dtou.c Wed Nov 22 23:43:52 2000
--- djgpp/src/util/dtou.c Sat Nov 25 06:47:42 2000
***************
*** 12,98 ****
#ifndef O_BINARY
#define O_BINARY 0
#endif
static int
! dtou(char *fname)
{
! int i, k, k2, sf, df, l, l2=0, err=0, isCR=0;
! char buf[16384];
! char tfname[FILENAME_MAX], *bn, *w;
struct stat st;
struct utimbuf tim1;
! sf = open(fname, O_RDONLY|O_BINARY);
if (sf < 1)
{
! perror(fname);
! return 1;
}
fstat (sf,&st);
tim1.actime = st.st_atime;
tim1.modtime = st.st_mtime;
strcpy (tfname, fname);
! for (bn=w=tfname; *w; w++)
! if (*w=='/' || *w=='\\' || *w==':')
bn = w+1;
if (bn) *bn=0;
! strcat (tfname,"utod.tm$");
! df = open(tfname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644);
if (df < 1)
{
! perror(tfname);
! close(sf);
! return 1;
}
! k2=0;
! while ((l=read(sf, buf, 16384)) > 0)
{
! int CtrlZ=0;
! for (i=k=0; i<l; i++)
{
! if (isCR && buf[i]!=0x0A) buf[k++] = 0x0D;
! if (buf[i]==0x0D) { isCR=1; continue; }
! if (buf[i]==0x1A) { CtrlZ=1; break; }
! else buf[k++] = buf[i];
! isCR = 0;
}
! l2=(k>0 ? write(df, buf, k) : 0);
! if (l2<0 || CtrlZ) break;
! if (l2!=k) { err=1; break; }
}
! if (l<0) perror (fname);
! if (l2<0) perror (tfname);
! if (err) fprintf (stderr,"Cannot process file %s\n",fname);
! close(sf);
! close(df);
! if (l>=0 && l2>=0 && err==0)
{
! remove(fname);
! rename(tfname, fname);
! utime(fname, &tim1);
! chown(fname, st.st_uid, st.st_gid);
! chmod(fname, st.st_mode);
}
! else
{
! remove(tfname);
}
! return 0;
}
int
main(int argc, char **argv)
{
! int rv = 0;
! for (argc--, argv++; argc; argc--, argv++)
! rv += dtou(*argv);
! return rv;
! }
--- 12,319 ----
#ifndef O_BINARY
#define O_BINARY 0
#endif
+ #define IS_DIR_SEPARATOR(path) ((path) == '/' || (path) == '\\' || (path) == ':')
+ #define IS_LAST_CR_IN_BUF (i == l - 1)
+ #define IS_LAST_CR_IN_FILE (position + i + 1 == st.st_size)
+ #define SET_FLAG(flag) \
+ do { \
+ if ((flag) == 0) (flag) = 1; \
+ } while (0)
+ #define BUF_SIZE 16384
+
+ /* Control characters. */
+ #define LF 0x0A
+ #define CR 0x0D
+ #define CntlZ 0x1A
+
+ /* Exit codes. */
+ #define NO_ERROR 0x00
+ #define IO_ERROR 0x01 /* Some I/O error occurred. */
+
+
static int
! dtou(char *fname, int make_backup, int repair_mode, int strip_mode, int verbose, int vverbose, int preserve_timestamp)
{
! int i, k, sf, df, l, l2 = 0, is_CR = 0, is_nCR = 0, is_CR_sequence = 0;
! int CntlZ_flag = 0, CR_flag = 0, nCR_flag = 0, LF_flag = 0, exit_status = NO_ERROR;
! int buf_counter, nbufs, LF_counter, must_rewind, position, offset, whence;
! char buf[BUF_SIZE];
! char bfname[FILENAME_MAX], tfname[FILENAME_MAX], *bn, *w;
struct stat st;
struct utimbuf tim1;
!
! sf = open (fname, O_RDONLY|O_BINARY);
if (sf < 1)
{
! perror (fname);
! return IO_ERROR;
}
fstat (sf,&st);
tim1.actime = st.st_atime;
tim1.modtime = st.st_mtime;
+ nbufs = st.st_size / BUF_SIZE;
strcpy (tfname, fname);
! for (bn = w = tfname; *w; w++)
! if (IS_DIR_SEPARATOR (*w))
bn = w+1;
if (bn) *bn=0;
! strcat (tfname,"dtou.tm$");
! if (make_backup)
! {
! strcpy (bfname, fname);
! if (pathconf ((fname), _PC_NAME_MAX) <= 12)
! for (i = strlen (bfname); i > -1; i--)
! if (bfname[i] == '.')
! {
! bfname[i] = '\0';
! break;
! }
! strcat (bfname,".d2u");
! }
! df = open (tfname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644);
if (df < 1)
{
! perror (tfname);
! close (sf);
! return IO_ERROR;
}
! buf_counter = LF_counter = must_rewind = position = 0;
! if (strip_mode)
! {
! offset = 0;
! whence = SEEK_SET;
! }
! else
! {
! offset = -1;
! whence = SEEK_CUR;
! }
! while ((l = read (sf, buf, BUF_SIZE)) > 0)
{
! for (i = k = 0; i < l; i++)
! {
! if (strip_mode)
! {
! if (buf[i] == LF)
! {
! if (!(is_CR || is_nCR)) SET_FLAG (LF_flag);
! if (is_nCR) { SET_FLAG (nCR_flag); is_nCR = 0; }
! if (is_CR) { SET_FLAG (CR_flag); is_CR = 0; }
! LF_counter++;
! offset = must_rewind = 0;
! buf[k++] = buf[i]; continue;
! }
! if (is_CR_sequence)
! {
! if (buf[i] == CR) { buf[k++] = buf[i]; continue; }
! else is_CR_sequence = 0;
! }
! if (is_nCR)
! {
! if (buf[i] != CR || IS_LAST_CR_IN_FILE)
! {
! is_CR_sequence = must_rewind = 1;
! is_nCR = 0; break;
! }
! else
! continue;
! }
! if (is_CR && buf[i] == CR) { is_nCR = 1; is_CR = 0; continue; }
! if (buf[i] == CR)
! {
! if (IS_LAST_CR_IN_FILE) { buf[k++] = buf[i]; break; }
! is_CR = must_rewind = 1;
! offset = position + i;
! continue;
! }
! }
! else
{
! if (buf[i] == LF)
! {
! if (is_CR) SET_FLAG (CR_flag);
! if (!is_CR) SET_FLAG (LF_flag);
! LF_counter++;
! }
! if (is_CR && buf[i] != LF) buf[k++] = CR;
! if (buf[i] == CR)
! {
! if (IS_LAST_CR_IN_BUF)
! {
! if (buf_counter < nbufs)
! must_rewind = 1;
! else
! buf[k++] = CR;
! }
! is_CR = 1; continue;
! }
! is_CR = 0;
}
!
! if (!repair_mode)
! if (buf[i] == CntlZ) { SET_FLAG (CntlZ_flag); break; }
!
! buf[k++] = buf[i];
! }
!
! is_CR = 0;
! buf_counter++;
! position += l;
! if (must_rewind)
! {
! /* Last character/s in buf is/are CR/s.
! Push it/them back and reread it/them next time. */
! position = lseek (sf, offset, whence);
! must_rewind = 0;
! }
!
! l2 = (k > 0 ? write (df, buf, k) : 0);
! if (l2 < 0 || CntlZ_flag) break;
! if (l2 != k) { exit_status = IO_ERROR; break; }
}
! if (l < 0) perror (fname);
! if (l2 < 0) perror (tfname);
! if (exit_status != NO_ERROR)
! fprintf (stderr,"Cannot process file %s\n",fname);
! close (sf);
! close (df);
! if (l >= 0 && l2 >= 0 && exit_status == NO_ERROR)
{
! int file_has_changed = CR_flag || nCR_flag || CntlZ_flag || LF_flag;
!
! if (verbose)
! printf ("File: %s successfully processed.\n",fname);
! if (vverbose)
! printf ("File: %s\n",fname);
!
! if (CR_flag && vverbose)
! printf ("At least one CR/LF to LF transformation occurred.\n");
! if (nCR_flag && vverbose)
! printf ("Warning: At least one CR sequence stripped from a LF.\n");
! if (CntlZ_flag && vverbose)
! printf ("Warning: At least one Cntl-Z has been found. File truncated at line %i.\n", LF_counter);
! if (LF_flag && vverbose)
! printf ("Warning: At least one LF without a preceeding CR has been found.\n");
!
! if (vverbose && !file_has_changed)
! printf ("File unchanged.\n");
!
! if (make_backup && file_has_changed)
! rename (fname, bfname);
! else
! remove (fname);
! rename (tfname, fname);
! chown (fname, st.st_uid, st.st_gid);
! chmod (fname, st.st_mode);
! if (preserve_timestamp || !file_has_changed)
! utime (fname, &tim1);
}
! else
{
! remove (tfname);
! if (verbose || vverbose)
! printf ("File: %s. An I/O error occurred\n",fname);
}
!
! return exit_status;
! }
!
! void
! usage(char *progname)
! {
! printf ("Usage: %s [-b] [-h] [-r] [-s] [-t] [-v] [-vv] files...\n\n", progname);
! printf ("Options are:\n");
! printf (" -b: A backup of the original file is made using `.d2u' as backup\n");
! printf (" extension, if the file has been modified.\n");
! printf (" -h: Display this help and exit.\n");
! printf (" -r: Transform MSDOS-style EOF (CRLF) into UNIX-style EOL (LF).\n");
! printf (" Cntl-Z are ignored and will not truncate the file and\n");
! printf (" CR sequences in front of LF will be left unchanged.\n");
! printf (" -s: Transform MSDOS-style EOF (CRLF) into UNIX-style EOL (LF)\n");
! printf (" and strip a CR sequence of arbitrary length from the file,\n");
! printf (" if and only if the sequence is followed by LF. CR sequences\n");
! printf (" that are not followed by LF are always left unchanged.\n");
! printf (" -t: The timestamp of the modified file will not be preserved.\n");
! printf (" -v: Show if file processing has been successful or not.\n");
! printf (" -vv: Show the kind of modifications that have been done to the file.\n");
! printf ("The program is backward compatible with previous program versions if no options\n");
! printf ("are given at all. In this case, an occurrence of Cntl-Z will truncate the file,\n");
! printf ("MSDOS-style EOL (CRLF) are transformed into UNIX-style EOL (LF) and CR sequence\n");
! printf ("stripping will not happen at all. Also the timestamp will not be alterated and\n");
! printf ("no backup of the original file will be done.");
}
int
main(int argc, char **argv)
{
! int exit_status = NO_ERROR, i, make_backup, repair_mode;
! int strip_mode, verbose, vverbose, preserve_timestamp;
! char* progname = strlwr(strdup(argv[0]));
+ if (argc < 2)
+ {
+ usage (progname);
+ exit(NO_ERROR);
+ }
+
+ /* Default for backward compatibility. */
+ make_backup = repair_mode = strip_mode = verbose = vverbose = 0;
+ preserve_timestamp = 1;
+
+ i = 1;
+ while ((argc > i) && (argv[i][0] == '-') && argv[i][1])
+ {
+ switch (argv[i][1])
+ {
+ case 'b':
+ make_backup = 1;
+ break;
+ case 'h':
+ usage (progname);
+ exit(NO_ERROR);
+ break;
+ case 'r':
+ repair_mode = 1;
+ strip_mode = 0;
+ break;
+ case 's':
+ strip_mode = 1;
+ repair_mode = 0;
+ break;
+ case 't':
+ preserve_timestamp = 0;
+ break;
+ case 'v':
+ if (argv[i][2] == 'v')
+ {
+ vverbose = 1;
+ verbose = 0;
+ }
+ else
+ {
+ verbose = 1;
+ vverbose = 0;
+ }
+ break;
+ default:
+ fprintf (stderr, "%s: invalid option -- %s\n", progname, &argv[i][1]);
+ fprintf (stderr, "Try `%s -h' for more information.\n", progname);
+ exit (IO_ERROR);
+ break;
+ }
+ i++;
+ }
+
+ for (; i < argc; i++)
+ exit_status += dtou (argv[i], make_backup, repair_mode, strip_mode, verbose, vverbose, preserve_timestamp);
+ return exit_status;
+ }
diff -acprNC5 djgpp.orig/src/util/utils.tex djgpp/src/util/utils.tex
*** djgpp.orig/src/util/utils.tex Wed Nov 22 23:44:24 2000
--- djgpp/src/util/utils.tex Fri Nov 24 14:35:30 2000
*************** so that they won't get mixed with the fi
*** 320,334 ****
--- 320,422 ----
@c -----------------------------------------------------------------------------
@node dtou, utod, djtar, Top
@chapter dtou
+ Usage: @code{dtou} [@code{-b}] [@code{-h}] [@code{-r}] [@code{-s}] [@code{-t}]
+ [@code{-v}] [@code{-vv}] @file{files}
+
Each file specified on the command line is converted from dos's CR/LF
text file mode to unix's NL text file mode.
All djgpp wildcards are supported. Timestamps of the files are preserved.
+ @code{dtou} will pass an exit status of 0 to the calling context if all the
+ files have been successfully processed and an exit status greater than 0 if
+ not. In this case, the exit status is equal to the amount of unsuccessfully
+ processed files. A file has not been successfully processed if some kind of
+ I/O error occurred.
+
+ @strong{Options:}
+
+ @table @code
+
+ @item -b
+
+ Creates a backup of the original file if the file has been modified.
+ @file{.d2u} is used as backup suffix. On systems with LFN support, the
+ backup suffix will be appended to the file name. If no LFN support is
+ available the backup suffix will overwrite the original file suffix.
+
+ @item -h
+
+ Displays a help text and exits.
+
+ @item -r
+
+ Repair mode. This mode transforms MSDOS-style EOL (CR/LF) into
+ UNIX-style EOL (LF). It ignores Cntl-Z thus it will not truncate the file.
+ CR sequences in front of LFs are left unchanged. This mode is intended
+ for repairing files that have erroneously been transmited in text-mode
+ instead of binary-mode during a FTP session or with Windows versions of
+ Netscape which have the bad habit to transfer files with such extensions
+ as @file{.gz}, @file{.bz2}, @file{.tgz} as text files.
+
+ @item -s
+
+ Strip mode. It transforms MSDOS-style EOL (CR/LF) into UNIX-style EOL (LF)
+ and strips a CR sequence of arbitrary length from a file, if the sequence
+ followed by a LF. CR sequences that are not followed by a LF are left
+ unchanged. This mode is intended to repair files that have been processed
+ with buggy ports of Unix software, which always blindly add a CR to a LF,
+ even if there is already a CR there.
+
+ @item -t
+
+ Timestamp. With this option the timestamp of a modified file will not be
+ preserved. The timestamp of an unmodified file will always be preserved.
+
+ @item -v
+
+ Verbose mode. Prints a single line showing the file name and if file
+ processing has been successful or not. The only case that a file is
+ considered as unsuccessfully processed is if an I/O error has occurred.
+
+ @item -vv
+
+ Very verbose mode. Prints the file name and shows the kind of modifications
+ that have been done to the file. All possible output looks like:
+
+ @example
+ File: foo.c
+ File unchanged.
+ At least one CR/LF to LF transformation occurred.
+ Warning: At least one CR sequence striped from a LF.
+ Warning: At least one Cntl-Z has been found. File truncated
+ at line n.
+ Warning: At least one LF without a preceeding CR has been found.
+ @end example
+
+ Of course, not all of the above lines will appear all together. The first
+ line showing the file name will always be printed. If the file has not been
+ modified at all, then only the next line will be printed. If the file has
+ been modified an appropiate combination of the lines 3 to 6 will be printed,
+ indicating what has been modified. The above output is of some use, e.g.:
+ if @code{dtou} reports a LF without a preceeding CR this will be a sure sign
+ that the file is either binary or has inconsistent EOL format for some other
+ reason. If @code{dtou} reports that a Cntl-Z (software EOF) has been found,
+ then the file will be truncated at that line and the rest of the file will be
+ lost.
+
+ @end table
+
+ The program is backward compatible with previous program versions if no options
+ are given at all. In this case, an occurrence of Cntl-Z will truncate the file,
+ MSDOS-style EOL (CR/LF) are transformed into UNIX-style EOL (LF) and CR sequence
+ stripping will not happen at all. Also the timestamp will not be alterated and
+ no backup of the original file will be done.
+
@c -----------------------------------------------------------------------------
@node utod, gxx, dtou, Top
@chapter utod
Each file specified on the command line is converted from unix's NL text
- Raw text -