From: olleo AT sics DOT se (Olle Olsson) Subject: Re: B19: WinNT _stat()/_stat_worker() return bogus st_ino 1 Oct 1998 22:49:36 -0700 Message-ID: <36138FA7.1A018084.cygnus.gnu-win32@sics.se> References: <361002D5 DOT 68716A01 AT kd1 DOT com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit To: Roger Sunshine Cc: gnu-win32 AT cygnus DOT com This explains the defective behavior when using find in situations like the following: dirMAJOR/ dirXXX/ a full directory tree dirXXX.old the "isomorphic" directory tree with older versions of the same files Here it is guaranteed that if "find dirMAJOR -name 'foo*' " is performed, then directories with the same name will appear several times. Which is really dangerous if you rely upon "find" to find *all* the relevant files. So this bug needs to be fixed. /olle Roger Sunshine wrote: > I've noticed in a few situations that du fails to correctly compute > disk usage in certain directory structures, and that find occasionally > skips directories when searching. In both cases, the root cause > appears to be that the directory structure in question has multiple > directories with the same name. Both find and du search only the > first occurance of these directories (even though they are not hard > linked to each other). Tracing the problem reveals that both programs > attempt to avoid searching the same directory by comparing the > st_ino and st_dev fields of the stat buffer to a saved list of seen > directories. > > Looking at the implementation of _stat_worker() in sycalls.cc > and fhandler_base::fstat() in fhandler.cc reveals that the > st_ino field for the stat structure is (usually) derived from > a hash on the file name (hence the reason directories with the > same name get the same inode number). On Windows NT, there is > perhaps a better way to get an inode like number - If one > opens the directory with the FILE_FLAG_BACKUP_SEMANTICS, then > you can call GetFileInformationByHandle() which returns a unique > identifier for the file (nFileIndexHigh/Low). Currently this > function is used only in the impementation of fstat (which presents > another problem - fstat() uses the [probably more correct] volume > serial number as the value for st_dev, while _stat_worker() uses > only the constant 42 [which means find -xdev will probably not > function correctly, nor would tar --one-file-system]). Additionally, > if at all possible, only the file index values should be used for > the st_ino field, as xoring the namehash would prevent two hard > linked files from having the same st_ino when they have differing > names. > > Included are some diffs to src/winsup/fhandler.cc and src/winsup/syscalls.cc that make find and du work much better > under Windows NT. Unfortunately I suspect that they do not solve the > problem for 95 (as devstudio's documentation implies that the > backup semantics flag is only applicable to NT, and since I don't have > access to 95 at the moment, I can't really test them there). > > Thanks, Roger > -- > Roger Sunshine - Phone: (512)349-5644 - Tempest Rules, NT doesn't > Senior Software Engineer - Knowledge Discovery One - sunshine AT kd1 DOT com > > ------------------------------------------------------------------------ > *** src/winsup/syscalls.cc Tue Feb 10 21:15:16 1998 > --- cdk/winsup/syscalls.cc Mon Sep 28 14:40:53 1998 > *************** > *** 1339,1345 **** > /* FIXME: Doing this is of dubious merit. */ > if (atts == -1 > && ((len = strlen (win32_name)) < 5 > ! || strcasecmp (win32_name + len - 4, ".exe"))) > { > debug_printf ("trying with .exe suffix\n"); > strcpy (tmp, win32_name); > --- 1339,1345 ---- > /* FIXME: Doing this is of dubious merit. */ > if (atts == -1 > && ((len = strlen (win32_name)) < 5 > ! || strcasecmp (win32_name + len - 4, ".exe"))) > { > debug_printf ("trying with .exe suffix\n"); > strcpy (tmp, win32_name); > *************** > *** 1351,1400 **** > { > /* probably a raw device, try fstat */ > win32_name = name; > ! goto fstat; > } > > debug_printf ("%d = GetFileAttributesA (%s)\n", atts, win32_name); > - if (atts & FILE_ATTRIBUTE_DIRECTORY) > - { > - WIN32_FIND_DATA wfd; > - HANDLE handle; > - /* hmm, the number of links to a directory includes the > - number of entries in the directory, since all the things > - in the directory point to it */ > - buf->st_nlink += num_entries (win32_name); > - buf->st_dev = 42; > - buf->st_ino = hash_path_name (0, real_path.get_win32 ()); > - buf->st_mode = S_IFDIR | STD_RBITS | STD_XBITS; > - if ((atts & FILE_ATTRIBUTE_READONLY) == 0) > - buf->st_mode |= STD_WBITS; > - buf->st_uid = get_file_owner (real_path.get_win32 ()); > - buf->st_gid = get_file_group (real_path.get_win32 ()); > - if ((handle = FindFirstFile (real_path.get_win32(), &wfd)) != INVALID_HANDLE_VALUE) > - { > - buf->st_atime = to_time_t (&wfd.ftLastAccessTime); > - buf->st_mtime = to_time_t (&wfd.ftLastWriteTime); > - buf->st_ctime = to_time_t (&wfd.ftCreationTime); > - buf->st_size = wfd.nFileSizeLow; > - buf->st_blksize = S_BLKSIZE; > - buf->st_blocks = (buf->st_size + S_BLKSIZE-1) / S_BLKSIZE; > - FindClose (handle); > - } > - res = 0; > - } > - else > - { > fstat: > ! int h = _open (win32_name, > ! O_RDONLY | O_BINARY | (nofollow ? O_NOSYMLINK : 0), > ! 0); > if (h >= 0) > ! { > ! res = _fstat (h, buf); > ! _close (h); > } > } > ! > done: > syscall_printf ("%d = %s (%s, %p)\n", res, caller, name, buf); > return res; > --- 1351,1372 ---- > { > /* probably a raw device, try fstat */ > win32_name = name; > ! atts = 0; > } > > debug_printf ("%d = GetFileAttributesA (%s)\n", atts, win32_name); > fstat: > ! { > ! int h = _open(win32_name, > ! (atts & FILE_ATTRIBUTE_DIRECTORY ? O_DIROPEN : O_RDONLY) | > ! O_BINARY | (nofollow ? O_NOSYMLINK : 0), 0); > if (h >= 0) > ! { > ! res = _fstat (h, buf); > ! _close (h); > } > } > ! > done: > syscall_printf ("%d = %s (%s, %p)\n", res, caller, name, buf); > return res; > *** src/winsup/fhandler.cc Tue Feb 10 21:14:55 1998 > --- cdk/winsup/fhandler.cc Mon Sep 28 14:43:19 1998 > *************** > *** 334,339 **** > --- 334,343 ---- > { > access_ = GENERIC_WRITE; > } > + else if ((flags & (O_RDONLY | O_WRONLY | O_RDWR)) == 0) > + { > + access_ = 0; > + } > else > { > access_ = GENERIC_READ | GENERIC_WRITE; > *************** > *** 375,381 **** > } > > /* These flags are host dependent. */ > ! shared = host_dependent.shared; > > sa.nLength = sizeof (sa); > sa.lpSecurityDescriptor = 0; > --- 379,391 ---- > } > > /* These flags are host dependent. */ > ! if (access_ & (GENERIC_READ | GENERIC_WRITE)) { > ! shared = host_dependent.shared; > ! } > ! else { > ! // No access only works with file share read > ! shared = FILE_SHARE_READ; > ! } > > sa.nLength = sizeof (sa); > sa.lpSecurityDescriptor = 0; > *************** > *** 852,859 **** > buf->st_ctime = to_time_t (&local.ftCreationTime); > buf->st_nlink = local.nNumberOfLinks; > buf->st_dev = local.dwVolumeSerialNumber; > buf->st_size = local.nFileSizeLow; > ! buf->st_ino = local.nFileIndexLow ^ namehash_; > buf->st_blksize = S_BLKSIZE; > buf->st_blocks = (buf->st_size + S_BLKSIZE-1) / S_BLKSIZE; > buf->st_uid = get_file_owner (win32_path.get_win32 ()); > --- 862,870 ---- > buf->st_ctime = to_time_t (&local.ftCreationTime); > buf->st_nlink = local.nNumberOfLinks; > buf->st_dev = local.dwVolumeSerialNumber; > + buf->st_rdev = buf->st_dev; > buf->st_size = local.nFileSizeLow; > ! buf->st_ino = local.nFileIndexLow ^ local.nFileIndexHigh; > buf->st_blksize = S_BLKSIZE; > buf->st_blocks = (buf->st_size + S_BLKSIZE-1) / S_BLKSIZE; > buf->st_uid = get_file_owner (win32_path.get_win32 ()); > *************** > *** 863,868 **** > --- 874,881 ---- > buf->st_mode &= ~S_IFMT; > if (symlink_p_) > buf->st_mode |= S_IFLNK; > + else if (local.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) > + buf->st_mode |= S_IFDIR; > else > buf->st_mode |= S_IFREG; > } > *************** > *** 870,882 **** > { > buf->st_mode = 0; > buf->st_mode |= STD_RBITS; > ! > if (! (local.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) > buf->st_mode |= STD_WBITS; > /* | S_IWGRP | S_IWOTH; we don't give write to group etc */ > > if (symlink_p_) > buf->st_mode |= S_IFLNK; > else > switch (GetFileType (get_handle ())) > { > --- 883,898 ---- > { > buf->st_mode = 0; > buf->st_mode |= STD_RBITS; > ! > if (! (local.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) > buf->st_mode |= STD_WBITS; > /* | S_IWGRP | S_IWOTH; we don't give write to group etc */ > > if (symlink_p_) > buf->st_mode |= S_IFLNK; > + else if (local.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { > + buf->st_mode |= S_IFDIR | STD_XBITS; > + } > else > switch (GetFileType (get_handle ())) > { - For help on using this list (especially unsubscribing), send a message to "gnu-win32-request AT cygnus DOT com" with one line of text: "help".