@@ -217,8 +217,67 @@ impl RefreshCoordinator {
217217 * state = RefreshCoordinatorState :: Idle ;
218218 self . changed . notify_all ( ) ;
219219 }
220+ RefreshCoordinatorState :: Running ( active) if active. key == * key => {
221+ // Recovery path: if begin_completion() panicked, the state was
222+ // restored to Running before the unwind. Transition to Idle so
223+ // waiters are not stuck forever.
224+ * state = RefreshCoordinatorState :: Idle ;
225+ self . changed . notify_all ( ) ;
226+ }
220227 RefreshCoordinatorState :: Idle => { }
221- _ => { }
228+ RefreshCoordinatorState :: Completing ( active) => {
229+ // Mismatched key — another refresh owns this state. Log and
230+ // leave it alone; the owning refresh will clean up.
231+ error ! (
232+ "force_complete_request called with mismatched key while coordinator was Completing; caller key: {:?}, active key: {:?}" ,
233+ key,
234+ active. key
235+ ) ;
236+ }
237+ RefreshCoordinatorState :: Running ( active) => {
238+ // Mismatched key — another refresh owns this state. Log and
239+ // leave it alone; the owning refresh will clean up.
240+ error ! (
241+ "force_complete_request called with mismatched key while coordinator was Running; caller key: {:?}, active key: {:?}" ,
242+ key,
243+ active. key
244+ ) ;
245+ }
246+ }
247+ }
248+ }
249+
250+ /// Safety guard created when a refresh thread takes ownership of the `Running`
251+ /// state. If the thread exits the `Start` arm without ever constructing a
252+ /// `RefreshCompletionGuard` (e.g., because `begin_completion` panics), this
253+ /// guard calls `force_complete_request` to transition the coordinator back to
254+ /// `Idle`, preventing a permanent deadlock.
255+ struct RefreshSafetyGuard < ' a > {
256+ coordinator : & ' a RefreshCoordinator ,
257+ key : RefreshKey ,
258+ disarmed : bool ,
259+ }
260+
261+ impl < ' a > RefreshSafetyGuard < ' a > {
262+ fn new ( coordinator : & ' a RefreshCoordinator , key : RefreshKey ) -> Self {
263+ Self {
264+ coordinator,
265+ key,
266+ disarmed : false ,
267+ }
268+ }
269+
270+ /// Disarm the safety guard once a `RefreshCompletionGuard` takes over
271+ /// responsibility for the state transition.
272+ fn disarm ( & mut self ) {
273+ self . disarmed = true ;
274+ }
275+ }
276+
277+ impl Drop for RefreshSafetyGuard < ' _ > {
278+ fn drop ( & mut self ) {
279+ if !self . disarmed {
280+ self . coordinator . force_complete_request ( & self . key ) ;
222281 }
223282 }
224283}
@@ -530,6 +589,11 @@ pub fn handle_configure(context: Arc<Context>, id: u32, params: Value) {
530589 state. config . cache_directory = Some ( cache_directory) ;
531590 }
532591 state. generation += 1 ;
592+ // Reset missing-env reporting so that the next refresh
593+ // after reconfiguration can trigger it again (Fixes #395).
594+ // Done inside the write lock to avoid a TOCTOU window with
595+ // concurrent refresh threads reading the generation.
596+ MISSING_ENVS_REPORTING_STATE . store ( MISSING_ENVS_AVAILABLE , Ordering :: Release ) ;
533597 trace ! (
534598 "Configuring locators with generation {}: {:?}" ,
535599 state. generation,
@@ -886,6 +950,15 @@ pub fn handle_refresh(context: Arc<Context>, id: u32, params: Value) {
886950 context. refresh_coordinator . wait_until_idle ( ) ;
887951 }
888952 RefreshRegistration :: Start => {
953+ // Safety guard: if anything in this arm panics
954+ // (including begin_completion), force the
955+ // coordinator back to Idle so waiters are not
956+ // stuck forever.
957+ // Move refresh_key into the guard to avoid an
958+ // extra clone of potentially large search_paths.
959+ let mut safety_guard =
960+ RefreshSafetyGuard :: new ( & context. refresh_coordinator , refresh_key) ;
961+
889962 let refresh_result = panic:: catch_unwind ( AssertUnwindSafe ( || {
890963 execute_refresh (
891964 context. as_ref ( ) ,
@@ -899,8 +972,9 @@ pub fn handle_refresh(context: Arc<Context>, id: u32, params: Value) {
899972 let refresh_result = execution. result . clone ( ) ;
900973 let mut completion_guard = RefreshCompletionGuard :: begin (
901974 & context. refresh_coordinator ,
902- & refresh_key ,
975+ & safety_guard . key ,
903976 ) ;
977+ safety_guard. disarm ( ) ;
904978 finish_refresh_replies ( & mut completion_guard, & refresh_result) ;
905979 report_refresh_follow_up ( execution) ;
906980 }
@@ -911,8 +985,9 @@ pub fn handle_refresh(context: Arc<Context>, id: u32, params: Value) {
911985 ) ;
912986 let mut completion_guard = RefreshCompletionGuard :: begin (
913987 & context. refresh_coordinator ,
914- & refresh_key ,
988+ & safety_guard . key ,
915989 ) ;
990+ safety_guard. disarm ( ) ;
916991 finish_refresh_errors (
917992 & mut completion_guard,
918993 "Refresh failed unexpectedly" ,
@@ -1898,4 +1973,138 @@ mod tests {
18981973 assert_eq ! ( result_config. executables, Some ( vec![ executable] ) ) ;
18991974 assert ! ( matches!( search_scope, Some ( SearchScope :: Workspace ) ) ) ;
19001975 }
1976+
1977+ /// Test for #396: force_complete_request recovers from Running state.
1978+ /// When begin_completion() cannot be reached (e.g., the thread panics before
1979+ /// constructing a RefreshCompletionGuard), force_complete_request must still
1980+ /// transition Running → Idle to unblock waiters.
1981+ #[ test]
1982+ fn test_force_complete_request_recovers_from_running_state ( ) {
1983+ let coordinator = RefreshCoordinator :: default ( ) ;
1984+ let key = make_refresh_key ( 1 , RefreshOptions :: default ( ) ) ;
1985+
1986+ // State → Running(key)
1987+ assert ! ( matches!(
1988+ coordinator. register_request( 1 , key. clone( ) ) ,
1989+ RefreshRegistration :: Start
1990+ ) ) ;
1991+
1992+ // Simulate recovery: force_complete_request from Running state.
1993+ coordinator. force_complete_request ( & key) ;
1994+
1995+ // Verify we're back to Idle and can start a new refresh.
1996+ assert ! ( matches!(
1997+ coordinator. register_request( 2 , key. clone( ) ) ,
1998+ RefreshRegistration :: Start
1999+ ) ) ;
2000+ }
2001+
2002+ /// Test for #396: RefreshSafetyGuard transitions Running → Idle on drop
2003+ /// when begin_completion is never reached.
2004+ #[ test]
2005+ fn test_safety_guard_recovers_running_state_on_drop ( ) {
2006+ let coordinator = Arc :: new ( RefreshCoordinator :: default ( ) ) ;
2007+ let key = make_refresh_key ( 1 , RefreshOptions :: default ( ) ) ;
2008+ let other_key = make_refresh_key (
2009+ 1 ,
2010+ RefreshOptions {
2011+ search_kind : Some ( PythonEnvironmentKind :: Venv ) ,
2012+ search_paths : None ,
2013+ } ,
2014+ ) ;
2015+
2016+ assert ! ( matches!(
2017+ coordinator. register_request( 1 , key. clone( ) ) ,
2018+ RefreshRegistration :: Start
2019+ ) ) ;
2020+
2021+ let ( state_tx, state_rx) = mpsc:: channel ( ) ;
2022+ let waiter = {
2023+ let coordinator = coordinator. clone ( ) ;
2024+ let other_key = other_key. clone ( ) ;
2025+ thread:: spawn ( move || {
2026+ // Different key → returns Wait (not Joined).
2027+ assert ! ( matches!(
2028+ coordinator. register_request( 2 , other_key. clone( ) ) ,
2029+ RefreshRegistration :: Wait
2030+ ) ) ;
2031+ state_tx. send ( "waiting" ) . unwrap ( ) ;
2032+ coordinator. wait_until_idle ( ) ;
2033+ state_tx. send ( "idle" ) . unwrap ( ) ;
2034+ } )
2035+ } ;
2036+
2037+ assert_eq ! ( state_rx. recv( ) . unwrap( ) , "waiting" ) ;
2038+
2039+ // Create and immediately drop the safety guard without disarming it.
2040+ // This simulates the thread dying before begin_completion.
2041+ {
2042+ let _guard = RefreshSafetyGuard :: new ( & coordinator, key. clone ( ) ) ;
2043+ }
2044+
2045+ // Waiter should be unblocked.
2046+ assert_eq ! ( state_rx. recv( ) . unwrap( ) , "idle" ) ;
2047+ waiter. join ( ) . unwrap ( ) ;
2048+ }
2049+
2050+ /// Test for #396: RefreshSafetyGuard does NOT interfere when disarmed
2051+ /// (normal path where RefreshCompletionGuard takes over).
2052+ #[ test]
2053+ fn test_safety_guard_disarmed_does_not_interfere ( ) {
2054+ let coordinator = RefreshCoordinator :: default ( ) ;
2055+ let key = make_refresh_key ( 1 , RefreshOptions :: default ( ) ) ;
2056+
2057+ assert ! ( matches!(
2058+ coordinator. register_request( 1 , key. clone( ) ) ,
2059+ RefreshRegistration :: Start
2060+ ) ) ;
2061+
2062+ {
2063+ let mut safety_guard = RefreshSafetyGuard :: new ( & coordinator, key. clone ( ) ) ;
2064+ let mut completion_guard = RefreshCompletionGuard :: begin ( & coordinator, & key) ;
2065+ safety_guard. disarm ( ) ;
2066+ let ids = completion_guard. drain_request_ids ( ) ;
2067+ assert_eq ! ( ids, vec![ 1 ] ) ;
2068+ assert ! ( completion_guard. finish_if_no_pending( ) ) ;
2069+ }
2070+
2071+ // Should be Idle — can start a new refresh.
2072+ assert ! ( matches!(
2073+ coordinator. register_request( 2 , key. clone( ) ) ,
2074+ RefreshRegistration :: Start
2075+ ) ) ;
2076+ }
2077+
2078+ /// Test for #395: configure resets MISSING_ENVS_REPORTING_STATE so that
2079+ /// subsequent refreshes can trigger missing-env reporting again.
2080+ #[ test]
2081+ fn test_configure_resets_completed_missing_env_reporting ( ) {
2082+ let _guard = MISSING_ENVS_TEST_LOCK . lock ( ) . unwrap ( ) ;
2083+
2084+ let configuration = Arc :: new ( RwLock :: new ( ConfigurationState {
2085+ generation : 1 ,
2086+ config : Configuration :: default ( ) ,
2087+ } ) ) ;
2088+
2089+ // Simulate a completed first refresh.
2090+ MISSING_ENVS_REPORTING_STATE . store ( MISSING_ENVS_AVAILABLE , Ordering :: Release ) ;
2091+ assert ! ( try_begin_missing_env_reporting( configuration. as_ref( ) , 1 ) ) ;
2092+ complete_missing_env_reporting ( 1 ) ;
2093+
2094+ // Missing-env reporting is now exhausted.
2095+ assert ! ( !try_begin_missing_env_reporting( configuration. as_ref( ) , 1 ) ) ;
2096+
2097+ // Simulate what handle_configure does: bump generation and reset.
2098+ {
2099+ let mut state = configuration. write ( ) . unwrap ( ) ;
2100+ state. generation = 2 ;
2101+ MISSING_ENVS_REPORTING_STATE . store ( MISSING_ENVS_AVAILABLE , Ordering :: Release ) ;
2102+ }
2103+
2104+ // Missing-env reporting should work again for the new generation.
2105+ assert ! ( try_begin_missing_env_reporting( configuration. as_ref( ) , 2 ) ) ;
2106+
2107+ // Cleanup.
2108+ MISSING_ENVS_REPORTING_STATE . store ( MISSING_ENVS_AVAILABLE , Ordering :: Release ) ;
2109+ }
19012110}
0 commit comments