From: "Juan Manuel Guerrero" Organization: Darmstadt University of Technology To: eliz AT is DOT elta DOT co DOT il Date: Sat, 25 Nov 2000 10:52:38 +0200 MIME-Version: 1.0 Content-type: text/plain; charset=US-ASCII Content-transfer-encoding: 7BIT Subject: Patch #3 for dtou.c CC: djgpp-workers AT delorie DOT com X-mailer: Pegasus Mail for Windows (v2.54DE) Message-ID: <8B9F3722CF@HRZ1.hrz.tu-darmstadt.de> Reply-To: djgpp-workers AT delorie DOT com On Fri, 24 Nov 2000, Eli Zaretskii wrote: > Btw, did you test the case where CR is the last character in the file, > with or without a Ctrl-Z after it? I don't see anything immediately > wrong, but the logic is complicated, so I think it's worth testing. Thank you for the advice. The bug has been fixed. I hope that there will be no more surprises. Regards, Guerrero, Juan Manuel diff -acprNC5 djgpp.orig/src/util/dtou.c djgpp/src/util/dtou.c *** djgpp.orig/src/util/dtou.c Wed Nov 22 23:43:52 2000 --- djgpp/src/util/dtou.c Sat Nov 25 06:47:42 2000 *************** *** 12,98 **** #ifndef O_BINARY #define O_BINARY 0 #endif static int ! dtou(char *fname) { ! int i, k, k2, sf, df, l, l2=0, err=0, isCR=0; ! char buf[16384]; ! char tfname[FILENAME_MAX], *bn, *w; struct stat st; struct utimbuf tim1; ! sf = open(fname, O_RDONLY|O_BINARY); if (sf < 1) { ! perror(fname); ! return 1; } fstat (sf,&st); tim1.actime = st.st_atime; tim1.modtime = st.st_mtime; strcpy (tfname, fname); ! for (bn=w=tfname; *w; w++) ! if (*w=='/' || *w=='\\' || *w==':') bn = w+1; if (bn) *bn=0; ! strcat (tfname,"utod.tm$"); ! df = open(tfname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644); if (df < 1) { ! perror(tfname); ! close(sf); ! return 1; } ! k2=0; ! while ((l=read(sf, buf, 16384)) > 0) { ! int CtrlZ=0; ! for (i=k=0; i0 ? write(df, buf, k) : 0); ! if (l2<0 || CtrlZ) break; ! if (l2!=k) { err=1; break; } } ! if (l<0) perror (fname); ! if (l2<0) perror (tfname); ! if (err) fprintf (stderr,"Cannot process file %s\n",fname); ! close(sf); ! close(df); ! if (l>=0 && l2>=0 && err==0) { ! remove(fname); ! rename(tfname, fname); ! utime(fname, &tim1); ! chown(fname, st.st_uid, st.st_gid); ! chmod(fname, st.st_mode); } ! else { ! remove(tfname); } ! return 0; } int main(int argc, char **argv) { ! int rv = 0; ! for (argc--, argv++; argc; argc--, argv++) ! rv += dtou(*argv); ! return rv; ! } --- 12,319 ---- #ifndef O_BINARY #define O_BINARY 0 #endif + #define IS_DIR_SEPARATOR(path) ((path) == '/' || (path) == '\\' || (path) == ':') + #define IS_LAST_CR_IN_BUF (i == l - 1) + #define IS_LAST_CR_IN_FILE (position + i + 1 == st.st_size) + #define SET_FLAG(flag) \ + do { \ + if ((flag) == 0) (flag) = 1; \ + } while (0) + #define BUF_SIZE 16384 + + /* Control characters. */ + #define LF 0x0A + #define CR 0x0D + #define CntlZ 0x1A + + /* Exit codes. */ + #define NO_ERROR 0x00 + #define IO_ERROR 0x01 /* Some I/O error occurred. */ + + static int ! dtou(char *fname, int make_backup, int repair_mode, int strip_mode, int verbose, int vverbose, int preserve_timestamp) { ! int i, k, sf, df, l, l2 = 0, is_CR = 0, is_nCR = 0, is_CR_sequence = 0; ! int CntlZ_flag = 0, CR_flag = 0, nCR_flag = 0, LF_flag = 0, exit_status = NO_ERROR; ! int buf_counter, nbufs, LF_counter, must_rewind, position, offset, whence; ! char buf[BUF_SIZE]; ! char bfname[FILENAME_MAX], tfname[FILENAME_MAX], *bn, *w; struct stat st; struct utimbuf tim1; ! ! sf = open (fname, O_RDONLY|O_BINARY); if (sf < 1) { ! perror (fname); ! return IO_ERROR; } fstat (sf,&st); tim1.actime = st.st_atime; tim1.modtime = st.st_mtime; + nbufs = st.st_size / BUF_SIZE; strcpy (tfname, fname); ! for (bn = w = tfname; *w; w++) ! if (IS_DIR_SEPARATOR (*w)) bn = w+1; if (bn) *bn=0; ! strcat (tfname,"dtou.tm$"); ! if (make_backup) ! { ! strcpy (bfname, fname); ! if (pathconf ((fname), _PC_NAME_MAX) <= 12) ! for (i = strlen (bfname); i > -1; i--) ! if (bfname[i] == '.') ! { ! bfname[i] = '\0'; ! break; ! } ! strcat (bfname,".d2u"); ! } ! df = open (tfname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644); if (df < 1) { ! perror (tfname); ! close (sf); ! return IO_ERROR; } ! buf_counter = LF_counter = must_rewind = position = 0; ! if (strip_mode) ! { ! offset = 0; ! whence = SEEK_SET; ! } ! else ! { ! offset = -1; ! whence = SEEK_CUR; ! } ! while ((l = read (sf, buf, BUF_SIZE)) > 0) { ! for (i = k = 0; i < l; i++) ! { ! if (strip_mode) ! { ! if (buf[i] == LF) ! { ! if (!(is_CR || is_nCR)) SET_FLAG (LF_flag); ! if (is_nCR) { SET_FLAG (nCR_flag); is_nCR = 0; } ! if (is_CR) { SET_FLAG (CR_flag); is_CR = 0; } ! LF_counter++; ! offset = must_rewind = 0; ! buf[k++] = buf[i]; continue; ! } ! if (is_CR_sequence) ! { ! if (buf[i] == CR) { buf[k++] = buf[i]; continue; } ! else is_CR_sequence = 0; ! } ! if (is_nCR) ! { ! if (buf[i] != CR || IS_LAST_CR_IN_FILE) ! { ! is_CR_sequence = must_rewind = 1; ! is_nCR = 0; break; ! } ! else ! continue; ! } ! if (is_CR && buf[i] == CR) { is_nCR = 1; is_CR = 0; continue; } ! if (buf[i] == CR) ! { ! if (IS_LAST_CR_IN_FILE) { buf[k++] = buf[i]; break; } ! is_CR = must_rewind = 1; ! offset = position + i; ! continue; ! } ! } ! else { ! if (buf[i] == LF) ! { ! if (is_CR) SET_FLAG (CR_flag); ! if (!is_CR) SET_FLAG (LF_flag); ! LF_counter++; ! } ! if (is_CR && buf[i] != LF) buf[k++] = CR; ! if (buf[i] == CR) ! { ! if (IS_LAST_CR_IN_BUF) ! { ! if (buf_counter < nbufs) ! must_rewind = 1; ! else ! buf[k++] = CR; ! } ! is_CR = 1; continue; ! } ! is_CR = 0; } ! ! if (!repair_mode) ! if (buf[i] == CntlZ) { SET_FLAG (CntlZ_flag); break; } ! ! buf[k++] = buf[i]; ! } ! ! is_CR = 0; ! buf_counter++; ! position += l; ! if (must_rewind) ! { ! /* Last character/s in buf is/are CR/s. ! Push it/them back and reread it/them next time. */ ! position = lseek (sf, offset, whence); ! must_rewind = 0; ! } ! ! l2 = (k > 0 ? write (df, buf, k) : 0); ! if (l2 < 0 || CntlZ_flag) break; ! if (l2 != k) { exit_status = IO_ERROR; break; } } ! if (l < 0) perror (fname); ! if (l2 < 0) perror (tfname); ! if (exit_status != NO_ERROR) ! fprintf (stderr,"Cannot process file %s\n",fname); ! close (sf); ! close (df); ! if (l >= 0 && l2 >= 0 && exit_status == NO_ERROR) { ! int file_has_changed = CR_flag || nCR_flag || CntlZ_flag || LF_flag; ! ! if (verbose) ! printf ("File: %s successfully processed.\n",fname); ! if (vverbose) ! printf ("File: %s\n",fname); ! ! if (CR_flag && vverbose) ! printf ("At least one CR/LF to LF transformation occurred.\n"); ! if (nCR_flag && vverbose) ! printf ("Warning: At least one CR sequence stripped from a LF.\n"); ! if (CntlZ_flag && vverbose) ! printf ("Warning: At least one Cntl-Z has been found. File truncated at line %i.\n", LF_counter); ! if (LF_flag && vverbose) ! printf ("Warning: At least one LF without a preceeding CR has been found.\n"); ! ! if (vverbose && !file_has_changed) ! printf ("File unchanged.\n"); ! ! if (make_backup && file_has_changed) ! rename (fname, bfname); ! else ! remove (fname); ! rename (tfname, fname); ! chown (fname, st.st_uid, st.st_gid); ! chmod (fname, st.st_mode); ! if (preserve_timestamp || !file_has_changed) ! utime (fname, &tim1); } ! else { ! remove (tfname); ! if (verbose || vverbose) ! printf ("File: %s. An I/O error occurred\n",fname); } ! ! return exit_status; ! } ! ! void ! usage(char *progname) ! { ! printf ("Usage: %s [-b] [-h] [-r] [-s] [-t] [-v] [-vv] files...\n\n", progname); ! printf ("Options are:\n"); ! printf (" -b: A backup of the original file is made using `.d2u' as backup\n"); ! printf (" extension, if the file has been modified.\n"); ! printf (" -h: Display this help and exit.\n"); ! printf (" -r: Transform MSDOS-style EOF (CRLF) into UNIX-style EOL (LF).\n"); ! printf (" Cntl-Z are ignored and will not truncate the file and\n"); ! printf (" CR sequences in front of LF will be left unchanged.\n"); ! printf (" -s: Transform MSDOS-style EOF (CRLF) into UNIX-style EOL (LF)\n"); ! printf (" and strip a CR sequence of arbitrary length from the file,\n"); ! printf (" if and only if the sequence is followed by LF. CR sequences\n"); ! printf (" that are not followed by LF are always left unchanged.\n"); ! printf (" -t: The timestamp of the modified file will not be preserved.\n"); ! printf (" -v: Show if file processing has been successful or not.\n"); ! printf (" -vv: Show the kind of modifications that have been done to the file.\n"); ! printf ("The program is backward compatible with previous program versions if no options\n"); ! printf ("are given at all. In this case, an occurrence of Cntl-Z will truncate the file,\n"); ! printf ("MSDOS-style EOL (CRLF) are transformed into UNIX-style EOL (LF) and CR sequence\n"); ! printf ("stripping will not happen at all. Also the timestamp will not be alterated and\n"); ! printf ("no backup of the original file will be done."); } int main(int argc, char **argv) { ! int exit_status = NO_ERROR, i, make_backup, repair_mode; ! int strip_mode, verbose, vverbose, preserve_timestamp; ! char* progname = strlwr(strdup(argv[0])); + if (argc < 2) + { + usage (progname); + exit(NO_ERROR); + } + + /* Default for backward compatibility. */ + make_backup = repair_mode = strip_mode = verbose = vverbose = 0; + preserve_timestamp = 1; + + i = 1; + while ((argc > i) && (argv[i][0] == '-') && argv[i][1]) + { + switch (argv[i][1]) + { + case 'b': + make_backup = 1; + break; + case 'h': + usage (progname); + exit(NO_ERROR); + break; + case 'r': + repair_mode = 1; + strip_mode = 0; + break; + case 's': + strip_mode = 1; + repair_mode = 0; + break; + case 't': + preserve_timestamp = 0; + break; + case 'v': + if (argv[i][2] == 'v') + { + vverbose = 1; + verbose = 0; + } + else + { + verbose = 1; + vverbose = 0; + } + break; + default: + fprintf (stderr, "%s: invalid option -- %s\n", progname, &argv[i][1]); + fprintf (stderr, "Try `%s -h' for more information.\n", progname); + exit (IO_ERROR); + break; + } + i++; + } + + for (; i < argc; i++) + exit_status += dtou (argv[i], make_backup, repair_mode, strip_mode, verbose, vverbose, preserve_timestamp); + return exit_status; + } diff -acprNC5 djgpp.orig/src/util/utils.tex djgpp/src/util/utils.tex *** djgpp.orig/src/util/utils.tex Wed Nov 22 23:44:24 2000 --- djgpp/src/util/utils.tex Fri Nov 24 14:35:30 2000 *************** so that they won't get mixed with the fi *** 320,334 **** --- 320,422 ---- @c ----------------------------------------------------------------------------- @node dtou, utod, djtar, Top @chapter dtou + Usage: @code{dtou} [@code{-b}] [@code{-h}] [@code{-r}] [@code{-s}] [@code{-t}] + [@code{-v}] [@code{-vv}] @file{files} + Each file specified on the command line is converted from dos's CR/LF text file mode to unix's NL text file mode. All djgpp wildcards are supported. Timestamps of the files are preserved. + @code{dtou} will pass an exit status of 0 to the calling context if all the + files have been successfully processed and an exit status greater than 0 if + not. In this case, the exit status is equal to the amount of unsuccessfully + processed files. A file has not been successfully processed if some kind of + I/O error occurred. + + @strong{Options:} + + @table @code + + @item -b + + Creates a backup of the original file if the file has been modified. + @file{.d2u} is used as backup suffix. On systems with LFN support, the + backup suffix will be appended to the file name. If no LFN support is + available the backup suffix will overwrite the original file suffix. + + @item -h + + Displays a help text and exits. + + @item -r + + Repair mode. This mode transforms MSDOS-style EOL (CR/LF) into + UNIX-style EOL (LF). It ignores Cntl-Z thus it will not truncate the file. + CR sequences in front of LFs are left unchanged. This mode is intended + for repairing files that have erroneously been transmited in text-mode + instead of binary-mode during a FTP session or with Windows versions of + Netscape which have the bad habit to transfer files with such extensions + as @file{.gz}, @file{.bz2}, @file{.tgz} as text files. + + @item -s + + Strip mode. It transforms MSDOS-style EOL (CR/LF) into UNIX-style EOL (LF) + and strips a CR sequence of arbitrary length from a file, if the sequence + followed by a LF. CR sequences that are not followed by a LF are left + unchanged. This mode is intended to repair files that have been processed + with buggy ports of Unix software, which always blindly add a CR to a LF, + even if there is already a CR there. + + @item -t + + Timestamp. With this option the timestamp of a modified file will not be + preserved. The timestamp of an unmodified file will always be preserved. + + @item -v + + Verbose mode. Prints a single line showing the file name and if file + processing has been successful or not. The only case that a file is + considered as unsuccessfully processed is if an I/O error has occurred. + + @item -vv + + Very verbose mode. Prints the file name and shows the kind of modifications + that have been done to the file. All possible output looks like: + + @example + File: foo.c + File unchanged. + At least one CR/LF to LF transformation occurred. + Warning: At least one CR sequence striped from a LF. + Warning: At least one Cntl-Z has been found. File truncated + at line n. + Warning: At least one LF without a preceeding CR has been found. + @end example + + Of course, not all of the above lines will appear all together. The first + line showing the file name will always be printed. If the file has not been + modified at all, then only the next line will be printed. If the file has + been modified an appropiate combination of the lines 3 to 6 will be printed, + indicating what has been modified. The above output is of some use, e.g.: + if @code{dtou} reports a LF without a preceeding CR this will be a sure sign + that the file is either binary or has inconsistent EOL format for some other + reason. If @code{dtou} reports that a Cntl-Z (software EOF) has been found, + then the file will be truncated at that line and the rest of the file will be + lost. + + @end table + + The program is backward compatible with previous program versions if no options + are given at all. In this case, an occurrence of Cntl-Z will truncate the file, + MSDOS-style EOL (CR/LF) are transformed into UNIX-style EOL (LF) and CR sequence + stripping will not happen at all. Also the timestamp will not be alterated and + no backup of the original file will be done. + @c ----------------------------------------------------------------------------- @node utod, gxx, dtou, Top @chapter utod Each file specified on the command line is converted from unix's NL text