@@ -53,7 +53,44 @@ semaphore_init(Semaphore *semaphore)
5353 }
5454 else
5555 {
56- return semaphore_create (semaphore );
56+ bool success = semaphore_create (semaphore );
57+
58+ /*
59+ * Only the main process should unlink the semaphore at exit time.
60+ *
61+ * When we create a semaphore, ensure we put our semId in the expected
62+ * environment variable (PG_AUTOCTL_LOG_SEMAPHORE), and we assign the
63+ * current process' pid as the semaphore owner.
64+ *
65+ * When we open a pre-existing semaphore using PG_AUTOCTL_LOG_SEMAPHORE
66+ * as the semId, the semaphore owner is left to zero.
67+ *
68+ * The atexit(3) function that removes the semaphores only acts when
69+ * the owner is our current pid. That way, in case of an early failure
70+ * in execv(), the semaphore is not dropped from under the main
71+ * program.
72+ *
73+ * A typical way execv() would fail is when calling run_program() on a
74+ * pathname that does not exists.
75+ *
76+ * Per atexit(3) manual page:
77+ *
78+ * When a child process is created via fork(2), it inherits copies of
79+ * its parent's registrations. Upon a successful call to one of the
80+ * exec(3) functions, all registrations are removed.
81+ *
82+ * And that's why it's important that we don't remove the semaphore in
83+ * the atexit() cleanup function when a call to run_command() fails
84+ * early.
85+ */
86+ if (success )
87+ {
88+ IntString semIdString = intToString (semaphore -> semId );
89+
90+ setenv (PG_AUTOCTL_LOG_SEMAPHORE , semIdString .strValue , 1 );
91+ }
92+
93+ return success ;
5794 }
5895}
5996
@@ -64,15 +101,33 @@ semaphore_init(Semaphore *semaphore)
64101bool
65102semaphore_finish (Semaphore * semaphore )
66103{
67- if (env_exists (PG_AUTOCTL_LOG_SEMAPHORE ))
104+ /*
105+ * At initialization time we either create a new semaphore and register
106+ * getpid() as the owner, or we open a previously existing semaphore from
107+ * its semId as found in our environment variable PG_AUTOCTL_LOG_SEMAPHORE.
108+ *
109+ * At finish time (called from the atexit(3) registry), we remove the
110+ * semaphore only when we are the owner of it. We expect semaphore->owner
111+ * to be either zero (0), or to have been filled with our own pid.
112+ */
113+ if (semaphore -> owner == 0 )
68114 {
69115 /* there's no semaphore closing protocol in SysV */
70116 return true;
71117 }
72- else
118+ else if ( semaphore -> owner == getpid ())
73119 {
74120 return semaphore_unlink (semaphore );
75121 }
122+ else
123+ {
124+ log_fatal ("BUG: semaphore_finish semId %d owner is %d, getpid is %d" ,
125+ semaphore -> semId ,
126+ semaphore -> owner ,
127+ getpid ());
128+
129+ return false;
130+ }
76131}
77132
78133
@@ -84,6 +139,7 @@ semaphore_create(Semaphore *semaphore)
84139{
85140 union semun semun ;
86141
142+ semaphore -> owner = getpid ();
87143 semaphore -> semId = semget (IPC_PRIVATE , 1 , 0600 );
88144
89145 if (semaphore -> semId < 0 )
@@ -122,7 +178,10 @@ semaphore_create(Semaphore *semaphore)
122178bool
123179semaphore_open (Semaphore * semaphore )
124180{
125- char semIdString [BUFSIZE ];
181+ char semIdString [BUFSIZE ] = { 0 };
182+
183+ /* ensure the owner is set to zero when we re-open an existing semaphore */
184+ semaphore -> owner = 0 ;
126185
127186 if (!get_env_copy (PG_AUTOCTL_LOG_SEMAPHORE , semIdString , BUFSIZE ))
128187 {
0 commit comments