X-Authentication-Warning: delorie.com: mail set sender to djgpp-workers-bounces using -f X-Recipient: djgpp-workers AT delorie DOT com X-Authenticated: #27081556 X-Provags-ID: V01U2FsdGVkX1+4PGsacZm4SBOkRxPXdhYGH9PzAsN3pmdVxM+wdo 737Gel7di2dEew From: Juan Manuel Guerrero To: djgpp-workers AT delorie DOT com Subject: djtar and pax/posix headers Date: Fri, 28 Sep 2012 00:53:02 +0200 User-Agent: KMail/1.9.10 MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Content-Disposition: inline Message-Id: <201209280053.02782.juan.guerrero@gmx.de> X-Y-GMX-Trusted: 0 Reply-To: djgpp-workers AT delorie DOT com At least the GNU tar program distributed with the linux distribution I use, has already as default format pax/posix as decribed in: This program produces tar archives that if extracted with djtar and other older tar programs clobbers the directory where the files are created with directories and files corresponding to the pax header data blocks. This extra directories are called ./PaxHeader.NNNNN, where NNNNN stands for some number. AFAIK the information stored in the pax headers are of no use for the plain DOS file system. I have adjusted djtar to skip these headers together with the data blocks that may follow them. Only the "normal" file header will be honored as it used to be. If someone wants a tar program with full pax/posix support it will be better to port latest GNU tar or GNU pax than trying to implement this support in djtar. I tested this patch with all formats produced by GNU tar 1.26. AS usual suggestions, objections, comments are welcome. Regards, Juan M. Guerrero Logging in to :pserver:anonymous AT cvs DOT delorie DOT com:2401/cvs/djgpp Index: djgpp/src/docs/kb/wc204.txi =================================================================== RCS file: /cvs/djgpp/djgpp/src/docs/kb/wc204.txi,v retrieving revision 1.201 diff -U 5 -r1.201 wc204.txi --- djgpp/src/docs/kb/wc204.txi 22 Jan 2012 23:40:28 -0000 1.201 +++ djgpp/src/docs/kb/wc204.txi 27 Sep 2012 22:37:33 -0000 @@ -1244,5 +1244,9 @@ @findex STYP_NRELOC_OVFL AT r{, new flag bit added to @code{s_flags} of @acronym{COFF} section header} The @code{s_flags} of the @acronym{COFF} section header now honors the new @code{STYP_NRELOC_OVFL} bit that signals that the section contains extended relocations and that the @code{s_nreloc} counter has overflown. The bit set in case of overflow by @code{STYP_NRELOC_OVFL} is @code{0x01000000}. +@pindex djtar AT r{, support for @code{tar} archives with @code{pax} headers} +The djtar program can now unpack @code{tar} archives that contain @code{pax} headers +conforming to @acronym{POSIX} 1003.1-2001. The @code{pax} headers are always skipped +and their contents are ignored. Index: djgpp/src/utils/djtar/untar.c =================================================================== RCS file: /cvs/djgpp/djgpp/src/utils/djtar/untar.c,v retrieving revision 1.10 diff -U 5 -r1.10 untar.c --- djgpp/src/utils/djtar/untar.c 24 Sep 2012 18:46:12 -0000 1.10 +++ djgpp/src/utils/djtar/untar.c 27 Sep 2012 22:37:34 -0000 @@ -32,30 +32,84 @@ extern int list_only; extern FILE *log_out; /*------------------------------------------------------------------------*/ +/* tar Header Block, from POSIX 1003.1-1990. */ -typedef struct { - char name[100]; - char operm[8]; - char ouid[8]; - char ogid[8]; - char osize[12]; - char otime[12]; - char ocsum[8]; - char flags[1]; - char filler[355]; +/* POSIX header. */ + +typedef struct posix_header +{ /* byte offset */ + char name[100]; /* 0 */ + char mode[8]; /* 100 */ + char uid[8]; /* 108 */ + char gid[8]; /* 116 */ + char size[12]; /* 124 */ + char mtime[12]; /* 136 */ + char chksum[8]; /* 148 */ + char typeflag; /* 156 */ + char linkname[100]; /* 157 */ + char magic[6]; /* 257 */ + char version[2]; /* 263 */ + char uname[32]; /* 265 */ + char gname[32]; /* 297 */ + char devmajor[8]; /* 329 */ + char devminor[8]; /* 337 */ + char prefix[155]; /* 345 */ + char filler[12]; /* 500 */ + /* 512 */ } TARREC; + +#define NAME_FIELD_SIZE 100 +#define PREFIX_FIELD_SIZE 155 +#define FIRST_CHKSUM_OCTET 148 +#define LAST_CHKSUM_OCTET 155 + + +#define IS_USTAR_HEADER(magic) ((magic)[0] == 'u' && \ + (magic)[1] == 's' && \ + (magic)[2] == 't' && \ + (magic)[3] == 'a' && \ + (magic)[4] == 'r' && \ + (magic)[5] == '\0') + +#define IS_PAX_HEADER(h) ((((h).typeflag == XGLTYPE) || ((h).typeflag == XHDTYPE)) && \ + IS_USTAR_HEADER((h).magic)) + +#define IS_CHKSUM_OCTET(d) ((d) > (FIRST_CHKSUM_OCTET - 1) && \ + (d) < (LAST_CHKSUM_OCTET + 1)) + + +/* tar files are made in basic blocks of this size. */ +#define BLOCKSIZE 512 + + +/* Values used in typeflag field. */ +#define REGTYPE '0' /* regular file */ +#define AREGTYPE '\0' /* regular file */ +#define LNKTYPE '1' /* link */ +#define SYMTYPE '2' /* reserved */ +#define CHRTYPE '3' /* character special */ +#define BLKTYPE '4' /* block special */ +#define DIRTYPE '5' /* directory */ +#define FIFOTYPE '6' /* FIFO special */ +#define CONTTYPE '7' /* reserved */ + +#define XHDTYPE 'x' /* Extended header referring to the + next file in the archive */ +#define XGLTYPE 'g' /* Global extended header */ + + static TARREC header; static int error_message_printed; static int looking_for_header; static char *changed_name; static int first_block = 1; static File_type file_type = DOS_BINARY; -static long perm, uid, gid, size; +static long mode, uid, gid, size; static long posn = 0; static time_t ftime; static struct ftime ftimes; static struct tm *tm; static int r; @@ -69,11 +123,11 @@ int should_be_written, batch_file_processing = 0; while (buf_size) { int write_errno = 0; - int dsize = 512, wsize; + int dsize = BLOCKSIZE, wsize; if (skipping) { if (skipping <= buf_size) { @@ -86,23 +140,46 @@ return 0; } else { bytes_out += buf_size; - skipping -= buf_size; + skipping -= buf_size; return 0; } } if (looking_for_header) { + char name[PREFIX_FIELD_SIZE + 1 + NAME_FIELD_SIZE + 1]; char *extension; int head_csum = 0; int i; size_t nlen; memcpy(&header, buf, sizeof header); + + /* Skip global extended and extended pax headers. */ + if (IS_PAX_HEADER(header)) + { + /* + * The pax header block is identical to a ustar header block + * except that two additional typeflag values are defined: + * x: represents extended header records for the following + * file in the archive (with its one ustar header block). + * g: represents global extended header records for the + * following files in the archive. + * + * Skip header plus all following pax data blocks. + */ + buf += sizeof header; + buf_size -= sizeof header; + bytes_out += sizeof header; + sscanf(header.size, " %lo", &size); + skipping = (size + (BLOCKSIZE - 1)) & ~(BLOCKSIZE - 1); + continue; + } + if (header.name[0] == 0) { bytes_out += buf_size; /* assume everything left should be counted */ return EOF; } @@ -118,20 +195,20 @@ so we will extract them with DOS-style EOL. */ extension = strrchr(basename(header.name), '.'); if (extension && !stricmp(extension, ".bat")) batch_file_processing = 1; /* LF -> CRLF */ - sscanf(header.operm, " %lo", &perm); - sscanf(header.ouid, " %lo", &uid); - sscanf(header.ogid, " %lo", &gid); - sscanf(header.osize, " %lo", &size); - sscanf(header.otime, " %o", &ftime); - sscanf(header.ocsum, " %o", &head_csum); + sscanf(header.mode, " %lo", &mode); + sscanf(header.uid, " %lo", &uid); + sscanf(header.gid, " %lo", &gid); + sscanf(header.size, " %lo", &size); + sscanf(header.mtime, " %o", &ftime); + sscanf(header.chksum, " %o", &head_csum); for (i = 0; i < (int)(sizeof header); i++) { /* Checksum on header, but with the checksum field blanked out. */ - int j = (i > 147 && i < 156) ? ' ' : *((unsigned char *)&header + i); + int j = IS_CHKSUM_OCTET(i) ? ' ' : *((unsigned char *)&header + i); head_csum -= j; } if (head_csum && !ignore_csum) { @@ -147,55 +224,72 @@ looking_for_header = 1; bytes_out += buf_size; return EOF; } - changed_name = get_new_name(header.name, &should_be_written); + /* Accept file names as specified by + POSIX.1-1996 section 10.1.1. */ + changed_name = name; + if (header.prefix[0] && IS_USTAR_HEADER(header.magic)) + { + /* + * A new pathname shall be formed by concatenating + * prefix (up to the first NUL character), a slash + * character, and name; otherwise, name is used alone. + */ + size_t len = sizeof header.prefix; + memcpy(changed_name, header.prefix, len); + changed_name[len] = '/'; + changed_name++; + } + memcpy(changed_name, header.name, sizeof header.name); + changed_name[sizeof header.name] = '\0'; + + changed_name = get_new_name(name, &should_be_written); if (v_switch) - fprintf(log_out, "%08lx %6lo ", posn, perm); + fprintf(log_out, "%08lx %6lo ", posn, mode); else fprintf(log_out, "%c%c%c%c ", - S_ISDIR(perm) ? 'd' : header.flags[0] == '2' ? 'l' : '-', - perm & S_IRUSR ? 'r' : '-', - perm & S_IWUSR ? 'w' : '-', - perm & S_IXUSR ? 'x' : '-'); + S_ISDIR(mode) ? 'd' : header.typeflag == SYMTYPE ? 'l' : '-', + mode & S_IRUSR ? 'r' : '-', + mode & S_IWUSR ? 'w' : '-', + mode & S_IXUSR ? 'x' : '-'); fprintf(log_out, "%.20s %9ld %s", ctime(&ftime) + 4, size, changed_name); #if 0 fprintf(log_out, "(out: %ld)", bytes_out); #endif - if (header.flags[0] == '2') - fprintf(log_out, " -> %s", header.filler); - else if (header.flags[0] == '1') - fprintf(log_out, " link to %s", header.filler); + if (header.typeflag == SYMTYPE) + fprintf(log_out, " -> %s", header.linkname); + else if (header.typeflag == LNKTYPE) + fprintf(log_out, " link to %s", header.linkname); fprintf(log_out, "%s\n", !should_be_written && !list_only ? "\t[ skipped ]" : ""); - posn += 512 + ((size + 511) & ~511); + posn += BLOCKSIZE + ((size + (BLOCKSIZE - 1)) & ~(BLOCKSIZE - 1)); #if 0 - fprintf(log_out, "%6lo %02x %12ld %s\n", perm, header.flags[0], size, changed_name); + fprintf(log_out, "%6lo %02x %12ld %s\n", mode, header.typeflag, size, changed_name); #endif - if (header.flags[0] == '1' || header.flags[0] == '2') + if (header.typeflag == LNKTYPE || header.typeflag == SYMTYPE) { /* Symbolic links always have zero data, but some broken tar programs claim otherwise. */ size = 0; } if (should_be_written == 0) { - skipping = (size + 511) & ~511; - if (!skipping) /* an empty file or a directory */ + skipping = (size + (BLOCKSIZE - 1)) & ~(BLOCKSIZE - 1); + if (!skipping) /* an empty file or a directory */ { looking_for_header = 1; if (buf_size < (long)(sizeof header)) return 0; } continue; } else if ((changed_name[nlen = strlen(changed_name) - 1] == '/' - || header.flags[0] == '5') /* '5' flags a directory */ - && !to_stdout) + || header.typeflag == DIRTYPE) && !to_stdout) { if (changed_name != new) { memcpy(new, changed_name, nlen + 2); changed_name = new; @@ -224,11 +318,11 @@ { if (change(changed_name, "Cannot exclusively open file", 0)) goto open_file; else { - skipping = (size + 511) & ~511; + skipping = (size + (BLOCKSIZE - 1)) & ~(BLOCKSIZE - 1); continue; } } } else @@ -246,16 +340,16 @@ char tbuf[1024]; char *wbuf = buf; if (buf_size <= 0) /* this buffer exhausted */ return 0; - if (size < 512) + if (size < BLOCKSIZE) dsize = size; - else if (buf_size < 512) + else if (buf_size < BLOCKSIZE) dsize = buf_size; else - dsize = 512; + dsize = BLOCKSIZE; if (batch_file_processing && !to_tty) { /* LF -> CRLF. Note that we don't alter the original uncompressed data so as not to screw up the CRC computations. */ @@ -285,12 +379,12 @@ /* If they asked for text files to be written Unix style, or we are writing to console, remove the CR and ^Z characters from DOS text files. Note that we don't alter the original uncompressed data so as not to screw up the CRC computations. */ - char *s=buf, *d=tbuf; - while (s-buf < dsize) + char *s = buf, *d = tbuf; + while (s - buf < dsize) { if (*s != '\r' && *s != 26) *d++ = *s; s++; } @@ -329,24 +423,25 @@ ftimes.ft_day = tm->tm_mday; ftimes.ft_month = tm->tm_mon + 1; ftimes.ft_year = tm->tm_year - 80; setftime(r, &ftimes); close(r); - chmod(changed_name, perm); + chmod(changed_name, mode); } batch_file_processing = 0; looking_for_header = 1; if (write_errno == ENOSPC) /* target disk full: quit early */ { bytes_out += buf_size; return EOF; } else if (write_errno) /* other error: skip this file, try next */ - skipping = (size - dsize + 511) & ~511; - else /* skip the slack garbage to the next 512-byte boundary */ - skipping = 512 - dsize; + skipping = (size - dsize + (BLOCKSIZE - 1)) & ~(BLOCKSIZE - 1); + else /* skip the slack garbage to the next BLOCKSIZE-byte boundary */ + skipping = BLOCKSIZE - dsize; } + return 0; } /*------------------------------------------------------------------------*/