Skip to content
This repository was archived by the owner on Jun 2, 2021. It is now read-only.

Commit f2fa2f4

Browse files
Derik EvangelistaBrian Butz
andauthored
v3(services): orphan mitigation when delete fails
Co-authored-by: Brian Butz <bbutz@pivotal.io>
1 parent b6759fa commit f2fa2f4

9 files changed

Lines changed: 432 additions & 35 deletions

File tree

app/jobs/v3/delete_service_instance_job.rb

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,18 @@
22

33
module VCAP::CloudController
44
module V3
5+
class DeprovisionBadResponse < StandardError
6+
end
7+
58
class DeleteServiceInstanceJob < ServiceInstanceAsyncJob
69
def initialize(guid, audit_info)
710
super(guid, audit_info)
811
end
912

1013
def send_broker_request(client)
1114
client.deprovision(service_instance, { accepts_incomplete: true })
15+
rescue VCAP::Services::ServiceBrokers::V2::Errors::ServiceBrokerBadResponse => err
16+
raise DeprovisionBadResponse.new(err.message)
1217
end
1318

1419
def operation_succeeded
@@ -30,6 +35,30 @@ def operation_type
3035
def gone!
3136
finish
3237
end
38+
39+
def restart_on_failure?
40+
true
41+
end
42+
43+
def restart_job(msg)
44+
super
45+
logger.info("could not complete the operation: #{msg}. Triggering orphan mitigation")
46+
end
47+
48+
def fail!(err)
49+
case err
50+
when DeprovisionBadResponse
51+
trigger_orphan_mitigation(err)
52+
else
53+
super
54+
end
55+
end
56+
57+
private
58+
59+
def trigger_orphan_mitigation(err)
60+
restart_job(err.message)
61+
end
3362
end
3463
end
3564
end

app/jobs/v3/service_instance_async_job.rb

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22

33
module VCAP::CloudController
44
module V3
5+
class LastOperationStateFailed < StandardError
6+
end
7+
58
class ServiceInstanceAsyncJob < VCAP::CloudController::Jobs::ReoccurringJob
9+
MAX_RETRIES = 3
610
attr_reader :warnings
711

812
def initialize(guid, audit_info)
@@ -12,15 +16,13 @@ def initialize(guid, audit_info)
1216
@warnings = []
1317
@request_attr = {}
1418
@first_time = true
19+
@attempts = 0
1520
end
1621

1722
def perform
1823
gone! && return if service_instance.blank?
19-
last_operation_type = service_instance.last_operation&.type
2024

21-
if service_instance.operation_in_progress? && last_operation_type != operation_type
22-
aborted!(last_operation_type)
23-
end
25+
raise_if_cannot_proceed!
2426

2527
client = VCAP::Services::ServiceClientProvider.provide({ instance: service_instance })
2628

@@ -42,13 +44,13 @@ def perform
4244
record_event(si, @request_attr)
4345
finish
4446
end
47+
rescue LastOperationStateFailed => err
48+
fail_and_raise!(err.message) unless restart_on_failure?
49+
50+
restart_job(err.message || 'no error description returned by the broker')
4551
rescue => err
4652
fail!(err)
4753
end
48-
49-
if service_instance.present? && service_instance.last_operation.state == 'failed'
50-
operation_failed!(service_instance.last_operation.description)
51-
end
5254
end
5355

5456
def handle_timeout
@@ -80,6 +82,10 @@ def display_name
8082
"service_instance.#{operation_type}"
8183
end
8284

85+
def restart_on_failure?
86+
false
87+
end
88+
8389
private
8490

8591
attr_reader :service_instance_guid
@@ -93,6 +99,21 @@ def execute_request(client)
9399
)
94100
end
95101

102+
def raise_if_cannot_proceed!
103+
last_operation_type = service_instance.last_operation&.type
104+
105+
if service_instance.operation_in_progress? && last_operation_type != operation_type
106+
aborted!(last_operation_type)
107+
end
108+
end
109+
110+
def restart_job(msg)
111+
@attempts += 1
112+
fail_and_raise!(msg) unless @attempts < MAX_RETRIES
113+
114+
@first_time = true
115+
end
116+
96117
def operation_completed?
97118
service_instance.last_operation.state == 'succeeded' && service_instance.last_operation.type == operation_type
98119
end
@@ -101,6 +122,13 @@ def fetch_last_operation(client)
101122
last_operation_result = client.fetch_service_instance_last_operation(service_instance)
102123
self.polling_interval_seconds = last_operation_result[:retry_after] if last_operation_result[:retry_after]
103124

125+
operation_failed!(last_operation_result.dig(:last_operation)[:description]) if last_operation_result[:http_status_code] == HTTP::Status::BAD_REQUEST
126+
127+
lo = last_operation_result[:last_operation]
128+
if lo[:state] == 'failed'
129+
raise LastOperationStateFailed.new(lo[:description])
130+
end
131+
104132
service_instance.save_and_update_operation(
105133
last_operation: last_operation_result[:last_operation].slice(:state, :description)
106134
)
@@ -139,15 +167,23 @@ def operation_succeeded
139167
nil
140168
end
141169

142-
def fail!(e)
170+
def fail_last_operation(msg)
143171
unless service_instance.blank?
144172
service_instance.save_with_new_operation({}, {
145173
type: operation_type,
146174
state: 'failed',
147-
description: e.message,
175+
description: msg,
148176
})
149177
end
178+
end
179+
180+
def fail_and_raise!(msg)
181+
fail_last_operation(msg)
182+
operation_failed!(msg)
183+
end
150184

185+
def fail!(e)
186+
fail_last_operation(e.message)
151187
raise e
152188
end
153189

@@ -168,6 +204,10 @@ def volume_services_disabled?
168204
def route_services_disabled?
169205
!VCAP::CloudController::Config.config.get(:route_services_enabled)
170206
end
207+
208+
def logger
209+
Steno.logger('cc-background')
210+
end
171211
end
172212
end
173213
end

lib/services/service_brokers/v2/response_parser.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,8 @@ def validate(method:, uri:, code:, response:)
718718
'last_operation' => {
719719
'state' => 'failed',
720720
'description' => description
721-
}
721+
},
722+
'http_status_code' => 400
722723
}
723724
end
724725
end

0 commit comments

Comments
 (0)