X-Recipient: archive-cygwin AT delorie DOT com X-Spam-Check-By: sourceware.org Date: Fri, 23 Nov 2012 14:33:32 +0100 From: Corinna Vinschen To: cygwin AT cygwin DOT com Subject: Re: Possible race in SYSV IPC (semaphores) Message-ID: <20121123133332.GU17347@calimero.vinschen.de> Reply-To: cygwin AT cygwin DOT com Mail-Followup-To: cygwin AT cygwin DOT com References: <5F8AAC04F9616747BC4CC0E803D5907D012856 AT MLBXV09 DOT nih DOT gov> <20121123113605 DOT GN17347 AT calimero DOT vinschen DOT de> <20121123131020 DOT GR17347 AT calimero DOT vinschen DOT de> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <20121123131020.GR17347@calimero.vinschen.de> User-Agent: Mutt/1.5.21 (2010-09-15) Mailing-List: contact cygwin-help AT cygwin DOT com; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: cygwin-owner AT cygwin DOT com Mail-Followup-To: cygwin AT cygwin DOT com Delivered-To: mailing list cygwin AT cygwin DOT com On Nov 23 14:10, Corinna Vinschen wrote: > On Nov 23 12:36, Corinna Vinschen wrote: > > On Nov 19 21:06, Lavrentiev, Anton (NIH/NLM/NCBI) [C] wrote: > > > Hello again, > > > > > > I can now positively confirm the race condition in cygserver w.r.t. the named > > > pipe used to serialize SYSV requests through the server. The race is due to > > > that transport_layer_pipes::accept() (bool *const recoverable) (file: transport_pipes.cc) does actually _create_ the pipe when pipe_instance == 0 > > > (ironically, transport_layer_pipes::listen() does not create any OS primitives > > > at all!). > > > > > > This means that under heavy load, cygserver threads may all end up processing > > > their requests and closing all instances of the pipe (bringing pipe_instance == 0) > > > yet not being able to get to the point of accepting new request (that is, to > > > re-create the pipe). For the client (user process), this looks like the pipe > > > does not exist (during that very tiny period of time), and the following message > > > gets printed: > > > > > > Iteration 3016 > > > 1 [main] a 4872 transport_layer_pipes::connect: lost connection to cygserver, error = 2 > > > > Thanks for analyzing this situation. IIUC, that means if we create the > > pipe in listen(), and then create another pipe instance per accepting > > thread in accept(), we will always have at least one instance of the pipe, > > whatever the load, right? > > > > Are you set up to test a patch? If so, I'd propose the below patch. It > > would be nice if you could test it in your environment. > > Forget it. That doesn't work. > > Surprisingly, the next client calling CreateFile is *not* connected to > any server side of the pipe which calls ConnectNamedPipe, but apparently > the pipes are used in the order of creation. Thus, the first client > hangs waiting for the pipe instance created in the listen() method, but > since the server never calls ConnectNamedPipe on that instance, the > client will wait forever. > > Bummer. > > Back to the drawing board... Try the below one. It also connects to the listening pipe in listen(), so there's a single instance of the pipe connected and blocked for further use forever. This should avoid the race (*and* work...) Please give it a try. Corinna * cygserver.cc (main): Call listen right after creating the transport. * transport_pipes.cc (transport_layer_pipes::listen): Create first instance of the named pipe here. Connect the client side to block it for further use by the system. (transport_layer_pipes::accept): Don't handle first pipe instance here. Change debug output accordingly. Index: cygserver.cc =================================================================== RCS file: /cvs/src/src/winsup/cygserver/cygserver.cc,v retrieving revision 1.16 diff -u -p -r1.16 cygserver.cc --- cygserver.cc 10 Oct 2011 15:48:54 -0000 1.16 +++ cygserver.cc 23 Nov 2012 13:32:22 -0000 @@ -715,15 +715,15 @@ main (const int argc, char *argv[]) transport_layer_base *const transport = create_server_transport (); assert (transport); + if (transport->listen () == -1) + return 1; + process_cache cache (process_cache_size, cleanup_threads); server_submission_loop submission_loop (&request_queue, transport, &cache); request_queue.add_submission_loop (&submission_loop); - if (transport->listen () == -1) - return 1; - cache.start (); request_queue.start (); Index: transport_pipes.cc =================================================================== RCS file: /cvs/src/src/winsup/cygserver/transport_pipes.cc,v retrieving revision 1.14 diff -u -p -r1.14 transport_pipes.cc --- transport_pipes.cc 14 Feb 2012 14:12:11 -0000 1.14 +++ transport_pipes.cc 23 Nov 2012 13:32:22 -0000 @@ -107,7 +107,32 @@ transport_layer_pipes::listen () _is_listening_endpoint = true; - /* no-op */ + debug ("Try to create named pipe: %ls", _pipe_name); + + HANDLE listen_pipe = + CreateNamedPipeW (_pipe_name, + PIPE_ACCESS_DUPLEX | FILE_FLAG_FIRST_PIPE_INSTANCE, + PIPE_TYPE_BYTE | PIPE_WAIT, PIPE_UNLIMITED_INSTANCES, + 0, 0, 1000, &sec_all_nih); + if (listen_pipe != INVALID_HANDLE_VALUE) + { + HANDLE connect_pipe = + CreateFileW (_pipe_name, GENERIC_READ | GENERIC_WRITE, 0, &sec_all_nih, + OPEN_EXISTING, 0, NULL); + if (connect_pipe == INVALID_HANDLE_VALUE) + { + CloseHandle (listen_pipe); + listen_pipe = INVALID_HANDLE_VALUE; + } + } + + if (listen_pipe == INVALID_HANDLE_VALUE) + { + system_printf ("failed to create named pipe: " + "is the daemon already running?"); + return -1; + } + return 0; } @@ -125,38 +150,19 @@ transport_layer_pipes::accept (bool *con // Read: http://www.securityinternals.com/research/papers/namedpipe.php // See also the Microsoft security bulletins MS00-053 and MS01-031. - // FIXME: Remove FILE_CREATE_PIPE_INSTANCE. - - const bool first_instance = (pipe_instance == 0); - - debug ("Try to create named pipe: %ls", _pipe_name); + debug ("Try to create named pipe instance %ld: %ls", + pipe_instance + 1, _pipe_name); const HANDLE accept_pipe = - CreateNamedPipeW (_pipe_name, - (PIPE_ACCESS_DUPLEX - | (first_instance ? FILE_FLAG_FIRST_PIPE_INSTANCE : 0)), - (PIPE_TYPE_BYTE | PIPE_WAIT), - PIPE_UNLIMITED_INSTANCES, - 0, 0, 1000, - &sec_all_nih); - - const bool duplicate = (accept_pipe == INVALID_HANDLE_VALUE - && pipe_instance == 0 - && GetLastError () == ERROR_ACCESS_DENIED); + CreateNamedPipeW (_pipe_name, PIPE_ACCESS_DUPLEX, + PIPE_TYPE_BYTE | PIPE_WAIT, PIPE_UNLIMITED_INSTANCES, + 0, 0, 1000, &sec_all_nih); if (accept_pipe != INVALID_HANDLE_VALUE) InterlockedIncrement (&pipe_instance); LeaveCriticalSection (&pipe_instance_lock); - if (duplicate) - { - *recoverable = false; - system_printf ("failed to create named pipe: " - "is the daemon already running?"); - return NULL; - } - if (accept_pipe == INVALID_HANDLE_VALUE) { debug_printf ("error creating pipe (%lu).", GetLastError ()); @@ -164,8 +170,6 @@ transport_layer_pipes::accept (bool *con return NULL; } - assert (accept_pipe); - if (!ConnectNamedPipe (accept_pipe, NULL) && GetLastError () != ERROR_PIPE_CONNECTED) { -- Corinna Vinschen Please, send mails regarding Cygwin to Cygwin Project Co-Leader cygwin AT cygwin DOT com Red Hat -- Problem reports: http://cygwin.com/problems.html FAQ: http://cygwin.com/faq/ Documentation: http://cygwin.com/docs.html Unsubscribe info: http://cygwin.com/ml/#unsubscribe-simple