From f064e6e7ade16d1ee0466280b44b77fc99bb701f Mon Sep 17 00:00:00 2001 From: nishkarsh-db Date: Tue, 19 Aug 2025 11:44:14 +0530 Subject: [PATCH 1/2] Changed the retry logic to be idempotency based --- src/databricks/sql/auth/retry.py | 86 ++++++++++++++++---------------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/src/databricks/sql/auth/retry.py b/src/databricks/sql/auth/retry.py index 4281883d..645c637c 100755 --- a/src/databricks/sql/auth/retry.py +++ b/src/databricks/sql/auth/retry.py @@ -48,6 +48,40 @@ def get(cls, value: str): return cls.OTHER +class CommandIdempotency(Enum): + IDEMPOTENT = "idempotent" + NON_IDEMPOTENT = "non_idempotent" + + +# Mapping of CommandType to CommandIdempotency +# Only EXECUTE_STATEMENT is non-idempotent, all others are idempotent +COMMAND_IDEMPOTENCY_MAP = { + CommandType.EXECUTE_STATEMENT: CommandIdempotency.NON_IDEMPOTENT, + CommandType.CLOSE_SESSION: CommandIdempotency.IDEMPOTENT, + CommandType.CLOSE_OPERATION: CommandIdempotency.IDEMPOTENT, + CommandType.GET_OPERATION_STATUS: CommandIdempotency.IDEMPOTENT, + CommandType.OTHER: CommandIdempotency.IDEMPOTENT, +} + +# HTTP status codes that should never be retried, even for idempotent requests +# These are client error codes that indicate permanent issues +NON_RETRYABLE_STATUS_CODES = { + 400, # Bad Request + 401, # Unauthorized + 403, # Forbidden + 404, # Not Found + 405, # Method Not Allowed + 409, # Conflict + 410, # Gone + 411, # Length Required + 412, # Precondition Failed + 413, # Payload Too Large + 414, # URI Too Long + 415, # Unsupported Media Type + 416, # Range Not Satisfiable +} + + class DatabricksRetryPolicy(Retry): """ Implements our v3 retry policy by extending urllib3's robust default retry behaviour. @@ -358,21 +392,13 @@ def should_retry(self, method: str, status_code: int) -> Tuple[bool, str]: if status_code // 100 <= 3: return False, "2xx/3xx codes are not retried" - if status_code == 400: + # Check for non-retryable status codes that should never be retried + if status_code in NON_RETRYABLE_STATUS_CODES: return ( False, - "Received 400 - BAD_REQUEST. Please check the request parameters.", + f"{self.command_type.value if self.command_type else 'Unknown'} received {status_code} - Client error codes are not retried", ) - if status_code == 401: - return ( - False, - "Received 401 - UNAUTHORIZED. Confirm your authentication credentials.", - ) - - if status_code == 403: - return False, "403 codes are not retried" - # Request failed and server said NotImplemented. This isn't recoverable. Don't retry. if status_code == 501: return False, "Received code 501 from server." @@ -381,50 +407,26 @@ def should_retry(self, method: str, status_code: int) -> Tuple[bool, str]: if not self._is_method_retryable(method): return False, "Only POST requests are retried" - # Request failed with 404 and was a GetOperationStatus. This is not recoverable. Don't retry. - if status_code == 404 and self.command_type == CommandType.GET_OPERATION_STATUS: - return ( - False, - "GetOperationStatus received 404 code from Databricks. Operation was canceled.", - ) - - # Request failed with 404 because CloseSession returns 404 if you repeat the request. - if ( - status_code == 404 - and self.command_type == CommandType.CLOSE_SESSION - and len(self.history) > 0 - ): - raise SessionAlreadyClosedError( - "CloseSession received 404 code from Databricks. Session is already closed." - ) - - # Request failed with 404 because CloseOperation returns 404 if you repeat the request. - if ( - status_code == 404 - and self.command_type == CommandType.CLOSE_OPERATION - and len(self.history) > 0 - ): - raise CursorAlreadyClosedError( - "CloseOperation received 404 code from Databricks. Cursor is already closed." - ) + # Get command idempotency for use in multiple conditions below + command_idempotency = COMMAND_IDEMPOTENCY_MAP.get(self.command_type, CommandIdempotency.IDEMPOTENT) - # Request failed, was an ExecuteStatement and the command may have reached the server + # Request failed, was a non-idempotent command and the command may have reached the server if ( - self.command_type == CommandType.EXECUTE_STATEMENT + command_idempotency == CommandIdempotency.NON_IDEMPOTENT and status_code not in self.status_forcelist and status_code not in self.force_dangerous_codes ): return ( False, - "ExecuteStatement command can only be retried for codes 429 and 503", + f"{self.command_type.value} command can only be retried for codes 429 and 503", ) - # Request failed with a dangerous code, was an ExecuteStatement, but user forced retries for this + # Request failed with a dangerous code, was a non-idempotent command, but user forced retries for this # dangerous code. Note that these lines _are not required_ to make these requests retry. They would # retry automatically. This code is included only so that we can log the exact reason for the retry. # This gives users signal that their _retry_dangerous_codes setting actually did something. if ( - self.command_type == CommandType.EXECUTE_STATEMENT + command_idempotency == CommandIdempotency.NON_IDEMPOTENT and status_code in self.force_dangerous_codes ): return ( From 3d36b9a9abfe33c34d64a679568c4b0557ce3361 Mon Sep 17 00:00:00 2001 From: nishkarsh-db Date: Wed, 20 Aug 2025 13:42:59 +0530 Subject: [PATCH 2/2] Fix type checking and formatting issues in retry.py --- src/databricks/sql/auth/retry.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/databricks/sql/auth/retry.py b/src/databricks/sql/auth/retry.py index 645c637c..f78537d5 100755 --- a/src/databricks/sql/auth/retry.py +++ b/src/databricks/sql/auth/retry.py @@ -67,7 +67,7 @@ class CommandIdempotency(Enum): # These are client error codes that indicate permanent issues NON_RETRYABLE_STATUS_CODES = { 400, # Bad Request - 401, # Unauthorized + 401, # Unauthorized 403, # Forbidden 404, # Not Found 405, # Method Not Allowed @@ -408,7 +408,9 @@ def should_retry(self, method: str, status_code: int) -> Tuple[bool, str]: return False, "Only POST requests are retried" # Get command idempotency for use in multiple conditions below - command_idempotency = COMMAND_IDEMPOTENCY_MAP.get(self.command_type, CommandIdempotency.IDEMPOTENT) + command_idempotency = COMMAND_IDEMPOTENCY_MAP.get( + self.command_type or CommandType.OTHER, CommandIdempotency.IDEMPOTENT + ) # Request failed, was a non-idempotent command and the command may have reached the server if ( @@ -418,7 +420,7 @@ def should_retry(self, method: str, status_code: int) -> Tuple[bool, str]: ): return ( False, - f"{self.command_type.value} command can only be retried for codes 429 and 503", + f"{self.command_type.value if self.command_type else 'Unknown'} command can only be retried for codes 429 and 503", ) # Request failed with a dangerous code, was a non-idempotent command, but user forced retries for this