From: sunshine AT kd1 DOT com (Roger Sunshine) Subject: B19: WinNT _stat()/_stat_worker() return bogus st_ino 29 Sep 1998 15:45:32 -0700 Message-ID: <361002D5.68716A01.cygnus.gnu-win32@kd1.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------DF54E691C75119A8038D3F47" To: gnu-win32 AT cygnus DOT com Cc: sunshine AT kd1 DOT com This is a multi-part message in MIME format. --------------DF54E691C75119A8038D3F47 Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit I've noticed in a few situations that du fails to correctly compute disk usage in certain directory structures, and that find occasionally skips directories when searching. In both cases, the root cause appears to be that the directory structure in question has multiple directories with the same name. Both find and du search only the first occurance of these directories (even though they are not hard linked to each other). Tracing the problem reveals that both programs attempt to avoid searching the same directory by comparing the st_ino and st_dev fields of the stat buffer to a saved list of seen directories. Looking at the implementation of _stat_worker() in sycalls.cc and fhandler_base::fstat() in fhandler.cc reveals that the st_ino field for the stat structure is (usually) derived from a hash on the file name (hence the reason directories with the same name get the same inode number). On Windows NT, there is perhaps a better way to get an inode like number - If one opens the directory with the FILE_FLAG_BACKUP_SEMANTICS, then you can call GetFileInformationByHandle() which returns a unique identifier for the file (nFileIndexHigh/Low). Currently this function is used only in the impementation of fstat (which presents another problem - fstat() uses the [probably more correct] volume serial number as the value for st_dev, while _stat_worker() uses only the constant 42 [which means find -xdev will probably not function correctly, nor would tar --one-file-system]). Additionally, if at all possible, only the file index values should be used for the st_ino field, as xoring the namehash would prevent two hard linked files from having the same st_ino when they have differing names. Included are some diffs to src/winsup/fhandler.cc and src/winsup/syscalls.cc that make find and du work much better under Windows NT. Unfortunately I suspect that they do not solve the problem for 95 (as devstudio's documentation implies that the backup semantics flag is only applicable to NT, and since I don't have access to 95 at the moment, I can't really test them there). Thanks, Roger -- Roger Sunshine - Phone: (512)349-5644 - Tempest Rules, NT doesn't Senior Software Engineer - Knowledge Discovery One - sunshine AT kd1 DOT com --------------DF54E691C75119A8038D3F47 Content-Type: text/plain; charset=us-ascii; name="diffs" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="diffs" *** src/winsup/syscalls.cc Tue Feb 10 21:15:16 1998 --- cdk/winsup/syscalls.cc Mon Sep 28 14:40:53 1998 *************** *** 1339,1345 **** /* FIXME: Doing this is of dubious merit. */ if (atts == -1 && ((len = strlen (win32_name)) < 5 ! || strcasecmp (win32_name + len - 4, ".exe"))) { debug_printf ("trying with .exe suffix\n"); strcpy (tmp, win32_name); --- 1339,1345 ---- /* FIXME: Doing this is of dubious merit. */ if (atts == -1 && ((len = strlen (win32_name)) < 5 ! || strcasecmp (win32_name + len - 4, ".exe"))) { debug_printf ("trying with .exe suffix\n"); strcpy (tmp, win32_name); *************** *** 1351,1400 **** { /* probably a raw device, try fstat */ win32_name = name; ! goto fstat; } debug_printf ("%d = GetFileAttributesA (%s)\n", atts, win32_name); - if (atts & FILE_ATTRIBUTE_DIRECTORY) - { - WIN32_FIND_DATA wfd; - HANDLE handle; - /* hmm, the number of links to a directory includes the - number of entries in the directory, since all the things - in the directory point to it */ - buf->st_nlink += num_entries (win32_name); - buf->st_dev = 42; - buf->st_ino = hash_path_name (0, real_path.get_win32 ()); - buf->st_mode = S_IFDIR | STD_RBITS | STD_XBITS; - if ((atts & FILE_ATTRIBUTE_READONLY) == 0) - buf->st_mode |= STD_WBITS; - buf->st_uid = get_file_owner (real_path.get_win32 ()); - buf->st_gid = get_file_group (real_path.get_win32 ()); - if ((handle = FindFirstFile (real_path.get_win32(), &wfd)) != INVALID_HANDLE_VALUE) - { - buf->st_atime = to_time_t (&wfd.ftLastAccessTime); - buf->st_mtime = to_time_t (&wfd.ftLastWriteTime); - buf->st_ctime = to_time_t (&wfd.ftCreationTime); - buf->st_size = wfd.nFileSizeLow; - buf->st_blksize = S_BLKSIZE; - buf->st_blocks = (buf->st_size + S_BLKSIZE-1) / S_BLKSIZE; - FindClose (handle); - } - res = 0; - } - else - { fstat: ! int h = _open (win32_name, ! O_RDONLY | O_BINARY | (nofollow ? O_NOSYMLINK : 0), ! 0); if (h >= 0) ! { ! res = _fstat (h, buf); ! _close (h); } } ! done: syscall_printf ("%d = %s (%s, %p)\n", res, caller, name, buf); return res; --- 1351,1372 ---- { /* probably a raw device, try fstat */ win32_name = name; ! atts = 0; } debug_printf ("%d = GetFileAttributesA (%s)\n", atts, win32_name); fstat: ! { ! int h = _open(win32_name, ! (atts & FILE_ATTRIBUTE_DIRECTORY ? O_DIROPEN : O_RDONLY) | ! O_BINARY | (nofollow ? O_NOSYMLINK : 0), 0); if (h >= 0) ! { ! res = _fstat (h, buf); ! _close (h); } } ! done: syscall_printf ("%d = %s (%s, %p)\n", res, caller, name, buf); return res; *** src/winsup/fhandler.cc Tue Feb 10 21:14:55 1998 --- cdk/winsup/fhandler.cc Mon Sep 28 14:43:19 1998 *************** *** 334,339 **** --- 334,343 ---- { access_ = GENERIC_WRITE; } + else if ((flags & (O_RDONLY | O_WRONLY | O_RDWR)) == 0) + { + access_ = 0; + } else { access_ = GENERIC_READ | GENERIC_WRITE; *************** *** 375,381 **** } /* These flags are host dependent. */ ! shared = host_dependent.shared; sa.nLength = sizeof (sa); sa.lpSecurityDescriptor = 0; --- 379,391 ---- } /* These flags are host dependent. */ ! if (access_ & (GENERIC_READ | GENERIC_WRITE)) { ! shared = host_dependent.shared; ! } ! else { ! // No access only works with file share read ! shared = FILE_SHARE_READ; ! } sa.nLength = sizeof (sa); sa.lpSecurityDescriptor = 0; *************** *** 852,859 **** buf->st_ctime = to_time_t (&local.ftCreationTime); buf->st_nlink = local.nNumberOfLinks; buf->st_dev = local.dwVolumeSerialNumber; buf->st_size = local.nFileSizeLow; ! buf->st_ino = local.nFileIndexLow ^ namehash_; buf->st_blksize = S_BLKSIZE; buf->st_blocks = (buf->st_size + S_BLKSIZE-1) / S_BLKSIZE; buf->st_uid = get_file_owner (win32_path.get_win32 ()); --- 862,870 ---- buf->st_ctime = to_time_t (&local.ftCreationTime); buf->st_nlink = local.nNumberOfLinks; buf->st_dev = local.dwVolumeSerialNumber; + buf->st_rdev = buf->st_dev; buf->st_size = local.nFileSizeLow; ! buf->st_ino = local.nFileIndexLow ^ local.nFileIndexHigh; buf->st_blksize = S_BLKSIZE; buf->st_blocks = (buf->st_size + S_BLKSIZE-1) / S_BLKSIZE; buf->st_uid = get_file_owner (win32_path.get_win32 ()); *************** *** 863,868 **** --- 874,881 ---- buf->st_mode &= ~S_IFMT; if (symlink_p_) buf->st_mode |= S_IFLNK; + else if (local.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + buf->st_mode |= S_IFDIR; else buf->st_mode |= S_IFREG; } *************** *** 870,882 **** { buf->st_mode = 0; buf->st_mode |= STD_RBITS; ! if (! (local.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) buf->st_mode |= STD_WBITS; /* | S_IWGRP | S_IWOTH; we don't give write to group etc */ if (symlink_p_) buf->st_mode |= S_IFLNK; else switch (GetFileType (get_handle ())) { --- 883,898 ---- { buf->st_mode = 0; buf->st_mode |= STD_RBITS; ! if (! (local.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) buf->st_mode |= STD_WBITS; /* | S_IWGRP | S_IWOTH; we don't give write to group etc */ if (symlink_p_) buf->st_mode |= S_IFLNK; + else if (local.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + buf->st_mode |= S_IFDIR | STD_XBITS; + } else switch (GetFileType (get_handle ())) { --------------DF54E691C75119A8038D3F47-- - For help on using this list (especially unsubscribing), send a message to "gnu-win32-request AT cygnus DOT com" with one line of text: "help".