diff --git a/.github/workflows/code-quality-checks.yml b/.github/workflows/code-quality-checks.yml index b6db61a3c..22db995c5 100644 --- a/.github/workflows/code-quality-checks.yml +++ b/.github/workflows/code-quality-checks.yml @@ -8,6 +8,16 @@ jobs: strategy: matrix: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + dependency-version: ["default", "min"] + # Optimize matrix - test min/max on subset of Python versions + exclude: + - python-version: "3.12" + dependency-version: "min" + - python-version: "3.13" + dependency-version: "min" + + name: "Unit Tests (Python ${{ matrix.python-version }}, ${{ matrix.dependency-version }} deps)" + steps: #---------------------------------------------- # check-out repo and set-up python @@ -37,7 +47,7 @@ jobs: uses: actions/cache@v4 with: path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }} + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ matrix.dependency-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }} #---------------------------------------------- # install dependencies if cache does not exist #---------------------------------------------- @@ -50,8 +60,31 @@ jobs: - name: Install library run: poetry install --no-interaction #---------------------------------------------- + # override with custom dependency versions + #---------------------------------------------- + - name: Install Python tools for custom versions + if: matrix.dependency-version != 'default' + run: poetry run pip install toml packaging + + - name: Generate requirements file + if: matrix.dependency-version != 'default' + run: | + poetry run python scripts/dependency_manager.py ${{ matrix.dependency-version }} --output requirements-${{ matrix.dependency-version }}.txt + echo "Generated requirements for ${{ matrix.dependency-version }} versions:" + cat requirements-${{ matrix.dependency-version }}.txt + + - name: Override with custom dependency versions + if: matrix.dependency-version != 'default' + run: poetry run pip install -r requirements-${{ matrix.dependency-version }}.txt + + #---------------------------------------------- # run test suite #---------------------------------------------- + - name: Show installed versions + run: | + echo "=== Dependency Version: ${{ matrix.dependency-version }} ===" + poetry run pip list + - name: Run tests run: poetry run python -m pytest tests/unit run-unit-tests-with-arrow: @@ -59,6 +92,15 @@ jobs: strategy: matrix: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + dependency-version: ["default", "min"] + exclude: + - python-version: "3.12" + dependency-version: "min" + - python-version: "3.13" + dependency-version: "min" + + name: "Unit Tests + PyArrow (Python ${{ matrix.python-version }}, ${{ matrix.dependency-version }} deps)" + steps: #---------------------------------------------- # check-out repo and set-up python @@ -88,7 +130,7 @@ jobs: uses: actions/cache@v4 with: path: .venv-pyarrow - key: venv-pyarrow-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }} + key: venv-pyarrow-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ matrix.dependency-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }} #---------------------------------------------- # install dependencies if cache does not exist #---------------------------------------------- @@ -101,8 +143,30 @@ jobs: - name: Install library run: poetry install --no-interaction --all-extras #---------------------------------------------- + # override with custom dependency versions + #---------------------------------------------- + - name: Install Python tools for custom versions + if: matrix.dependency-version != 'default' + run: poetry run pip install toml packaging + + - name: Generate requirements file with pyarrow + if: matrix.dependency-version != 'default' + run: | + poetry run python scripts/dependency_manager.py ${{ matrix.dependency-version }} --output requirements-${{ matrix.dependency-version }}-arrow.txt + echo "Generated requirements for ${{ matrix.dependency-version }} versions with PyArrow:" + cat requirements-${{ matrix.dependency-version }}-arrow.txt + + - name: Override with custom dependency versions + if: matrix.dependency-version != 'default' + run: poetry run pip install -r requirements-${{ matrix.dependency-version }}-arrow.txt + #---------------------------------------------- # run test suite #---------------------------------------------- + - name: Show installed versions + run: | + echo "=== Dependency Version: ${{ matrix.dependency-version }} with PyArrow ===" + poetry run pip list + - name: Run tests run: poetry run python -m pytest tests/unit check-linting: diff --git a/scripts/dependency_manager.py b/scripts/dependency_manager.py new file mode 100644 index 000000000..d73d095f2 --- /dev/null +++ b/scripts/dependency_manager.py @@ -0,0 +1,234 @@ +""" +Dependency version management for testing. +Generates requirements files for min and default dependency versions. +For min versions, creates flexible constraints (e.g., >=1.2.5,<1.3.0) to allow +compatible patch updates instead of pinning exact versions. +""" + +import toml +import sys +import argparse +from packaging.specifiers import SpecifierSet +from packaging.requirements import Requirement +from pathlib import Path + +class DependencyManager: + def __init__(self, pyproject_path="pyproject.toml"): + self.pyproject_path = Path(pyproject_path) + self.dependencies = self._load_dependencies() + + # Map of packages that need specific transitive dependency constraints when downgraded + self.transitive_dependencies = { + 'pandas': { + # When pandas is downgraded to 1.x, ensure numpy compatibility + 'numpy': { + 'min_constraint': '>=1.16.5,<2.0.0', # pandas 1.x works with numpy 1.x + 'applies_when': lambda version: version.startswith('1.') + } + } + } + + def _load_dependencies(self): + """Load dependencies from pyproject.toml""" + with open(self.pyproject_path, 'r') as f: + pyproject = toml.load(f) + return pyproject['tool']['poetry']['dependencies'] + + def _parse_constraint(self, name, constraint): + """Parse a dependency constraint into version info""" + if isinstance(constraint, str): + return constraint, False # version_constraint, is_optional + elif isinstance(constraint, list): + # Handle complex constraints like pandas/pyarrow + first_constraint = constraint[0] + version = first_constraint['version'] + is_optional = first_constraint.get('optional', False) + return version, is_optional + elif isinstance(constraint, dict): + if 'version' in constraint: + return constraint['version'], constraint.get('optional', False) + return None, False + + def _extract_versions_from_specifier(self, spec_set_str): + """Extract minimum version from a specifier set""" + try: + # Handle caret (^) and tilde (~) constraints that packaging doesn't support + if spec_set_str.startswith('^'): + # ^1.2.3 means >=1.2.3, <2.0.0 + min_version = spec_set_str[1:] # Remove ^ + return min_version, None + elif spec_set_str.startswith('~'): + # ~1.2.3 means >=1.2.3, <1.3.0 + min_version = spec_set_str[1:] # Remove ~ + return min_version, None + + spec_set = SpecifierSet(spec_set_str) + min_version = None + + for spec in spec_set: + if spec.operator in ('>=', '=='): + min_version = spec.version + break + + return min_version, None + except Exception as e: + print(f"Warning: Could not parse constraint '{spec_set_str}': {e}", file=sys.stderr) + return None, None + + def _create_flexible_minimum_constraint(self, package_name, min_version): + """Create a flexible minimum constraint that allows compatible updates""" + try: + # Split version into parts + version_parts = min_version.split('.') + + if len(version_parts) >= 2: + major = version_parts[0] + minor = version_parts[1] + + # Special handling for packages that commonly have conflicts + # For these packages, use wider constraints to allow more compatibility + if package_name in ['requests', 'urllib3', 'pandas']: + # Use wider constraint: >=min_version,=2.18.1,<3.0.0 + next_major = int(major) + 1 + upper_bound = f"{next_major}.0.0" + return f"{package_name}>={min_version},<{upper_bound}" + else: + # For other packages, use minor version constraint + # e.g., 1.2.5 becomes >=1.2.5,<1.3.0 + next_minor = int(minor) + 1 + upper_bound = f"{major}.{next_minor}.0" + return f"{package_name}>={min_version},<{upper_bound}" + else: + # If version doesn't have minor version, just use exact version + return f"{package_name}=={min_version}" + + except (ValueError, IndexError) as e: + print(f"Warning: Could not create flexible constraint for {package_name}=={min_version}: {e}", file=sys.stderr) + # Fallback to exact version + return f"{package_name}=={min_version}" + + def _get_transitive_dependencies(self, package_name, version, version_type): + """Get transitive dependencies that need specific constraints based on the main package version""" + transitive_reqs = [] + + if package_name in self.transitive_dependencies: + transitive_deps = self.transitive_dependencies[package_name] + + for dep_name, dep_config in transitive_deps.items(): + # Check if this transitive dependency applies for this version + if dep_config['applies_when'](version): + if version_type == "min": + # Use the predefined constraint for minimum versions + constraint = dep_config['min_constraint'] + transitive_reqs.append(f"{dep_name}{constraint}") + # For default version_type, we don't add transitive deps as Poetry handles them + + return transitive_reqs + + def generate_requirements(self, version_type="min", include_optional=False): + """ + Generate requirements for specified version type. + + Args: + version_type: "min" or "default" + include_optional: Whether to include optional dependencies + """ + requirements = [] + transitive_requirements = [] + + for name, constraint in self.dependencies.items(): + if name == 'python': + continue + + version_constraint, is_optional = self._parse_constraint(name, constraint) + if not version_constraint: + continue + + if is_optional and not include_optional: + continue + + if version_type == "default": + # For default, just use the constraint as-is (let poetry resolve) + requirements.append(f"{name}{version_constraint}") + elif version_type == "min": + min_version, _ = self._extract_versions_from_specifier(version_constraint) + if min_version: + # Create flexible constraint that allows patch updates for compatibility + flexible_constraint = self._create_flexible_minimum_constraint(name, min_version) + requirements.append(flexible_constraint) + + # Check if this package needs specific transitive dependencies + transitive_deps = self._get_transitive_dependencies(name, min_version, version_type) + transitive_requirements.extend(transitive_deps) + + # Combine main requirements with transitive requirements + all_requirements = requirements + transitive_requirements + + # Remove duplicates (prefer main requirements over transitive ones) + seen_packages = set() + final_requirements = [] + + # First add main requirements + for req in requirements: + package_name = Requirement(req).name + seen_packages.add(package_name) + final_requirements.append(req) + + # Then add transitive requirements that don't conflict + for req in transitive_requirements: + package_name = Requirement(req).name + if package_name not in seen_packages: + final_requirements.append(req) + + return final_requirements + + + def write_requirements_file(self, filename, version_type="min", include_optional=False): + """Write requirements to a file""" + requirements = self.generate_requirements(version_type, include_optional) + + with open(filename, 'w') as f: + if version_type == "min": + f.write(f"# Minimum compatible dependency versions generated from pyproject.toml\n") + f.write(f"# Uses flexible constraints to resolve compatibility conflicts:\n") + f.write(f"# - Common packages (requests, urllib3, pandas): >=min,=min, Generator[urllib3.BaseHTTPResponse, None, None]: + ) -> Generator[BaseHTTPResponse, None, None]: """ Context manager for making HTTP requests with proper resource cleanup. @@ -233,7 +241,7 @@ def request_context( **kwargs: Additional arguments passed to urllib3 request Yields: - urllib3.BaseHTTPResponse: The HTTP response object + BaseHTTPResponse: The HTTP response object """ logger.debug( "Making %s request to %s", method, urllib.parse.urlparse(url).netloc @@ -270,7 +278,7 @@ def request( url: str, headers: Optional[Dict[str, str]] = None, **kwargs, - ) -> urllib3.BaseHTTPResponse: + ) -> BaseHTTPResponse: """ Make an HTTP request. @@ -281,7 +289,7 @@ def request( **kwargs: Additional arguments passed to urllib3 request Returns: - urllib3.BaseHTTPResponse: The HTTP response object with data and metadata pre-loaded + BaseHTTPResponse: The HTTP response object with data and metadata pre-loaded """ with self.request_context(method, url, headers=headers, **kwargs) as response: # Read the response data to ensure it's available after context exit diff --git a/tests/e2e/common/retry_test_mixins.py b/tests/e2e/common/retry_test_mixins.py index 2798541ad..b2350bd98 100755 --- a/tests/e2e/common/retry_test_mixins.py +++ b/tests/e2e/common/retry_test_mixins.py @@ -2,6 +2,7 @@ import time from typing import Optional, List from unittest.mock import MagicMock, PropertyMock, patch +import io import pytest from urllib3.exceptions import MaxRetryError @@ -91,22 +92,78 @@ def _test_retry_disabled_with_message( assert error_msg_substring in str(cm.exception) +class SimpleHttpResponse: + """A simple HTTP response mock that works with both urllib3 v1.x and v2.x""" + + def __init__(self, status: int, headers: dict, redirect_location: Optional[str] = None): + # Import the correct HTTP message type that urllib3 v1.x expects + try: + from http.client import HTTPMessage + except ImportError: + from httplib import HTTPMessage + + self.status = status + # Create proper HTTPMessage for urllib3 v1.x compatibility + self.headers = HTTPMessage() + for key, value in headers.items(): + self.headers[key] = str(value) + self.msg = self.headers # For urllib3~=1.0.0 compatibility + self.reason = "Mocked Response" + self.version = 11 + self.length = 0 + self.length_remaining = 0 + self._redirect_location = redirect_location + self._body = b"" + self._fp = io.BytesIO(self._body) + self._url = "https://example.com" + + def get_redirect_location(self, *args, **kwargs): + """Return the redirect location or False""" + return False if self._redirect_location is None else self._redirect_location + + def read(self, amt=None): + """Mock read method for file-like behavior""" + return self._body + + def close(self): + """Mock close method""" + pass + + def drain_conn(self): + """Mock drain_conn method for urllib3 v2.x""" + pass + + def isclosed(self): + """Mock isclosed method for urllib3 v1.x""" + return False + + def release_conn(self): + """Mock release_conn method for thrift HTTP client""" + pass + + @property + def data(self): + """Mock data property for urllib3 v2.x""" + return self._body + + @property + def url(self): + """Mock url property""" + return self._url + + @url.setter + def url(self, value): + """Mock url setter""" + self._url = value + + @contextmanager def mocked_server_response( status: int = 200, headers: dict = {}, redirect_location: Optional[str] = None ): - """Context manager for patching urllib3 responses""" - - # When mocking mocking a BaseHTTPResponse for urllib3 the mock must include - # 1. A status code - # 2. A headers dict - # 3. mock.get_redirect_location() return falsy by default - - # `msg` is included for testing when urllib3~=1.0.0 is installed - mock_response = MagicMock(headers=headers, msg=headers, status=status) - mock_response.get_redirect_location.return_value = ( - False if redirect_location is None else redirect_location - ) + """Context manager for patching urllib3 responses with version compatibility""" + + mock_response = SimpleHttpResponse(status, headers, redirect_location) with patch("urllib3.connectionpool.HTTPSConnectionPool._get_conn") as getconn_mock: getconn_mock.return_value.getresponse.return_value = mock_response @@ -127,18 +184,14 @@ def mock_sequential_server_responses(responses: List[dict]): - redirect_location: str """ - mock_responses = [] - - # Each resp should have these members: - - for resp in responses: - _mock = MagicMock( - headers=resp["headers"], msg=resp["headers"], status=resp["status"] - ) - _mock.get_redirect_location.return_value = ( - False if resp["redirect_location"] is None else resp["redirect_location"] + mock_responses = [ + SimpleHttpResponse( + status=resp["status"], + headers=resp["headers"], + redirect_location=resp["redirect_location"] ) - mock_responses.append(_mock) + for resp in responses + ] with patch("urllib3.connectionpool.HTTPSConnectionPool._get_conn") as getconn_mock: getconn_mock.return_value.getresponse.side_effect = mock_responses @@ -475,21 +528,23 @@ def test_retry_abort_close_operation_on_404(self, extra_params, caplog): ], ) @patch("databricks.sql.telemetry.telemetry_client.TelemetryClient._send_telemetry") - def test_retry_max_redirects_raises_too_many_redirects_exception( + def test_3xx_redirect_codes_are_not_retried( self, mock_send_telemetry, extra_params ): """GIVEN the connector is configured with a custom max_redirects - WHEN the DatabricksRetryPolicy is created - THEN the connector raises a MaxRedirectsError if that number is exceeded + WHEN the DatabricksRetryPolicy receives a 302 redirect + THEN the connector does not retry since 3xx codes are not retried per policy """ - max_redirects, expected_call_count = 1, 2 + max_redirects, expected_call_count = 1, 1 - # Code 302 is a redirect + # Code 302 is a redirect, but 3xx codes are not retried per policy + # Note: We don't set redirect_location because that would cause urllib3 v2.x + # to follow redirects internally, bypassing our retry policy test with mocked_server_response( - status=302, redirect_location="/foo.bar" + status=302, redirect_location=None ) as mock_obj: - with pytest.raises(MaxRetryError) as cm: + with pytest.raises(RequestError): # Should get RequestError, not MaxRetryError with self.connection( extra_params={ **extra_params, @@ -498,8 +553,7 @@ def test_retry_max_redirects_raises_too_many_redirects_exception( } ): pass - assert "too many redirects" == str(cm.value.reason) - # Total call count should be 2 (original + 1 retry) + # Total call count should be 1 (original only, no retries for 3xx codes) assert mock_obj.return_value.getresponse.call_count == expected_call_count @pytest.mark.parametrize( @@ -510,21 +564,23 @@ def test_retry_max_redirects_raises_too_many_redirects_exception( ], ) @patch("databricks.sql.telemetry.telemetry_client.TelemetryClient._send_telemetry") - def test_retry_max_redirects_unset_doesnt_redirect_forever( + def test_3xx_codes_not_retried_regardless_of_max_redirects_setting( self, mock_send_telemetry, extra_params ): """GIVEN the connector is configured without a custom max_redirects - WHEN the DatabricksRetryPolicy is used - THEN the connector raises a MaxRedirectsError if that number is exceeded + WHEN the DatabricksRetryPolicy receives a 302 redirect + THEN the connector does not retry since 3xx codes are not retried per policy - This test effectively guarantees that regardless of _retry_max_redirects, - _stop_after_attempts_count is enforced. + This test confirms that 3xx codes (including redirects) are not retried + according to the DatabricksRetryPolicy regardless of redirect settings. """ - # Code 302 is a redirect + # Code 302 is a redirect, but 3xx codes are not retried per policy + # Note: We don't set redirect_location because that would cause urllib3 v2.x + # to follow redirects internally, bypassing our retry policy test with mocked_server_response( - status=302, redirect_location="/foo.bar/" + status=302, redirect_location=None ) as mock_obj: - with pytest.raises(MaxRetryError) as cm: + with pytest.raises(RequestError): # Should get RequestError, not MaxRetryError with self.connection( extra_params={ **extra_params, @@ -533,8 +589,8 @@ def test_retry_max_redirects_unset_doesnt_redirect_forever( ): pass - # Total call count should be 6 (original + _retry_stop_after_attempts_count) - assert mock_obj.return_value.getresponse.call_count == 6 + # Total call count should be 1 (original only, no retries for 3xx codes) + assert mock_obj.return_value.getresponse.call_count == 1 @pytest.mark.parametrize( "extra_params", @@ -543,13 +599,13 @@ def test_retry_max_redirects_unset_doesnt_redirect_forever( {"use_sea": True}, ], ) - def test_retry_max_redirects_is_bounded_by_stop_after_attempts_count( + def test_3xx_codes_stop_request_immediately_no_retry_attempts( self, extra_params ): - # If I add another 503 or 302 here the test will fail with a MaxRetryError + # Since 3xx codes are not retried per policy, we only ever see the first 302 response responses = [ {"status": 302, "headers": {}, "redirect_location": "/foo.bar"}, - {"status": 500, "headers": {}, "redirect_location": None}, + {"status": 500, "headers": {}, "redirect_location": None}, # Never reached ] additional_settings = { @@ -568,7 +624,7 @@ def test_retry_max_redirects_is_bounded_by_stop_after_attempts_count( ): pass - # The error should be the result of the 500, not because of too many requests. + # The error should be the result of the 302, since 3xx codes are not retried assert "too many redirects" not in str(cm.value.message) assert "Error during request to server" in str(cm.value.message) diff --git a/tests/unit/test_sea_http_client.py b/tests/unit/test_sea_http_client.py index 10f10592d..39ecb58a7 100644 --- a/tests/unit/test_sea_http_client.py +++ b/tests/unit/test_sea_http_client.py @@ -96,7 +96,8 @@ def test_make_request_success(self, mock_get_auth_headers, sea_http_client): # Setup mock response mock_response = Mock() mock_response.status = 200 - mock_response.json.return_value = {"result": "success"} + # Mock response.data.decode() to return a valid JSON string + mock_response.data.decode.return_value = '{"result": "success"}' mock_response.__enter__ = Mock(return_value=mock_response) mock_response.__exit__ = Mock(return_value=None)