From 609a3428e9a064ec93c50f489eaa7e7e05129ed7 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Tue, 19 Aug 2025 14:44:18 +0530 Subject: [PATCH] Add documentation for proxy support Signed-off-by: Vikrant Puppala --- README.md | 2 + docs/proxy.md | 232 +++++++++++++++++++++++++++++++ examples/README.md | 1 + examples/proxy_authentication.py | 153 ++++++++++++++++++++ 4 files changed, 388 insertions(+) create mode 100644 docs/proxy.md create mode 100644 examples/proxy_authentication.py diff --git a/README.md b/README.md index a4c5a1307..d57efda1f 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ The Databricks SQL Connector for Python allows you to develop Python application This connector uses Arrow as the data-exchange format, and supports APIs (e.g. `fetchmany_arrow`) to directly fetch Arrow tables. Arrow tables are wrapped in the `ArrowQueue` class to provide a natural API to get several rows at a time. [PyArrow](https://arrow.apache.org/docs/python/index.html) is required to enable this and use these APIs, you can install it via `pip install pyarrow` or `pip install databricks-sql-connector[pyarrow]`. +The connector includes built-in support for HTTP/HTTPS proxy servers with multiple authentication methods including basic authentication and Kerberos/Negotiate authentication. See `docs/proxy.md` and `examples/proxy_authentication.py` for details. + You are welcome to file an issue here for general use cases. You can also contact Databricks Support [here](help.databricks.com). ## Requirements diff --git a/docs/proxy.md b/docs/proxy.md new file mode 100644 index 000000000..2e0bec292 --- /dev/null +++ b/docs/proxy.md @@ -0,0 +1,232 @@ +# Proxy Support + +The Databricks SQL Connector supports connecting through HTTP and HTTPS proxy servers with various authentication methods. This feature automatically detects system proxy configuration and handles proxy authentication transparently. + +## Quick Start + +The connector automatically uses your system's proxy configuration when available: + +```python +from databricks import sql + +# Basic connection - uses system proxy automatically +with sql.connect( + server_hostname="your-workspace.cloud.databricks.com", + http_path="/sql/1.0/endpoints/your-endpoint-id", + access_token="your-token" +) as connection: + # Your queries here... +``` + +For advanced proxy authentication (like Kerberos), specify the authentication method: + +```python +with sql.connect( + server_hostname="your-workspace.cloud.databricks.com", + http_path="/sql/1.0/endpoints/your-endpoint-id", + access_token="your-token", + _proxy_auth_method="negotiate" # Enable Kerberos proxy auth +) as connection: + # Your queries here... +``` + +## Proxy Configuration + +### Environment Variables + +The connector follows standard proxy environment variable conventions: + +| Variable | Description | Example | +|----------|-------------|---------| +| `HTTP_PROXY` | Proxy for HTTP requests | `http://proxy.company.com:8080` | +| `HTTPS_PROXY` | Proxy for HTTPS requests | `https://proxy.company.com:8080` | +| `NO_PROXY` | Hosts to bypass proxy | `localhost,127.0.0.1,.company.com` | + +**Note**: The connector also recognizes lowercase versions (`http_proxy`, `https_proxy`, `no_proxy`). + +### Proxy URL Formats + +Basic proxy (no authentication): +```bash +export HTTPS_PROXY="http://proxy.company.com:8080" +``` + +Proxy with basic authentication: +```bash +export HTTPS_PROXY="http://username:password@proxy.company.com:8080" +``` + +## Authentication Methods + +The connector supports multiple proxy authentication methods via the `_proxy_auth_method` parameter: + +### 1. Basic Authentication (`basic` or `None`) + +**Default behavior** when credentials are provided in the proxy URL or when `_proxy_auth_method="basic"` is specified. + +```python +# Method 1: Credentials in proxy URL (recommended) +# Set environment: HTTPS_PROXY="http://user:pass@proxy.company.com:8080" +with sql.connect( + server_hostname="your-workspace.com", + http_path="/sql/1.0/endpoints/abc123", + access_token="your-token" + # No _proxy_auth_method needed - detected automatically +) as conn: + pass + +# Method 2: Explicit basic authentication +with sql.connect( + server_hostname="your-workspace.com", + http_path="/sql/1.0/endpoints/abc123", + access_token="your-token", + _proxy_auth_method="basic" # Explicit basic auth +) as conn: + pass +``` + +### 2. Kerberos/Negotiate Authentication (`negotiate`) + +For corporate environments using Kerberos authentication with proxy servers. + +**Prerequisites:** +- Valid Kerberos tickets (run `kinit` first) +- Properly configured Kerberos environment + +```python +with sql.connect( + server_hostname="your-workspace.com", + http_path="/sql/1.0/endpoints/abc123", + access_token="your-token", + _proxy_auth_method="negotiate" # Enable Kerberos proxy auth +) as conn: + pass +``` + +**Kerberos Setup Example:** +```bash +# Obtain Kerberos tickets +kinit your-username@YOUR-DOMAIN.COM + +# Set proxy (no credentials in URL for Kerberos) +export HTTPS_PROXY="http://proxy.company.com:8080" + +# Run your Python script +python your_script.py +``` + +## Proxy Bypass + +The connector respects system proxy bypass rules. Requests to hosts listed in `NO_PROXY` or system bypass lists will connect directly, bypassing the proxy. + +```bash +# Bypass proxy for local and internal hosts +export NO_PROXY="localhost,127.0.0.1,*.internal.company.com,10.*" +``` + +## Advanced Configuration + +### Per-Request Proxy Decisions + +The connector automatically makes per-request decisions about proxy usage based on: + +1. **System proxy configuration** - Detected from environment variables +2. **Proxy bypass rules** - Honor `NO_PROXY` and system bypass settings +3. **Target host** - Check if the specific host should use proxy + +### Connection Pooling + +The connector maintains separate connection pools for direct and proxy connections, allowing efficient handling of mixed proxy/direct traffic. + +### SSL/TLS with Proxy + +HTTPS connections through HTTP proxies use the CONNECT method for SSL tunneling. The connector handles this automatically while preserving all SSL verification settings. + +## Troubleshooting + +### Common Issues + +**Problem**: Connection fails with proxy-related errors +``` +Solution: +1. Verify proxy environment variables are set correctly +2. Check if proxy requires authentication +3. Ensure proxy allows CONNECT method for HTTPS +4. Test proxy connectivity with curl: + curl -x $HTTPS_PROXY https://your-workspace.com +``` + +**Problem**: Kerberos authentication fails +``` +Solution: +1. Verify Kerberos tickets: klist +2. Renew tickets if expired: kinit +3. Check proxy supports negotiate authentication +4. Ensure time synchronization between client and KDC +``` + +**Problem**: Some requests bypass proxy unexpectedly +``` +Solution: +1. Check NO_PROXY environment variable +2. Review system proxy bypass settings +3. Verify the target hostname format +``` + +### Debug Logging + +Enable detailed logging to troubleshoot proxy issues: + +```python +import logging + +# Enable connector debug logging +logging.basicConfig(level=logging.DEBUG) +logging.getLogger("databricks.sql").setLevel(logging.DEBUG) + +# Enable urllib3 logging for HTTP details +logging.getLogger("urllib3").setLevel(logging.DEBUG) +``` + +### Testing Proxy Configuration + +Use the provided example script to test different proxy authentication methods: + +```bash +cd examples/ +python proxy_authentication.py +``` + +This script tests: +- Default proxy behavior +- Basic authentication +- Kerberos/Negotiate authentication + +## Examples + +See `examples/proxy_authentication.py` for a comprehensive demonstration of proxy authentication methods. + +## Implementation Details + +### How Proxy Detection Works + +1. **Environment Variables**: Check `HTTP_PROXY`/`HTTPS_PROXY` environment variables +2. **System Configuration**: Use Python's `urllib.request.getproxies()` to detect system settings +3. **Bypass Rules**: Honor `NO_PROXY` and `urllib.request.proxy_bypass()` rules +4. **Per-Request Logic**: Decide proxy usage for each request based on target host + +### Supported Proxy Types + +- **HTTP Proxies**: For both HTTP and HTTPS traffic (via CONNECT) +- **HTTPS Proxies**: Encrypted proxy connections +- **Authentication**: Basic, Negotiate/Kerberos +- **Bypass Rules**: Full support for NO_PROXY patterns + +### Connection Architecture + +The connector uses a unified HTTP client that maintains: +- **Direct Pool Manager**: For non-proxy connections +- **Proxy Pool Manager**: For proxy connections +- **Per-Request Routing**: Automatic selection based on target host + +This architecture ensures optimal performance and correct proxy handling across all connector operations. diff --git a/examples/README.md b/examples/README.md index 43d248dab..d73c58a6b 100644 --- a/examples/README.md +++ b/examples/README.md @@ -42,3 +42,4 @@ this example the string `ExamplePartnerTag` will be added to the the user agent - **`custom_cred_provider.py`** shows how to pass a custom credential provider to bypass connector authentication. Please install databricks-sdk prior to running this example. - **`v3_retries_query_execute.py`** shows how to enable v3 retries in connector version 2.9.x including how to enable retries for non-default retry cases. - **`parameters.py`** shows how to use parameters in native and inline modes. +- **`proxy_authentication.py`** demonstrates how to connect through proxy servers using different authentication methods including basic authentication and Kerberos/Negotiate authentication. diff --git a/examples/proxy_authentication.py b/examples/proxy_authentication.py new file mode 100644 index 000000000..8547336b3 --- /dev/null +++ b/examples/proxy_authentication.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +""" +Example: Databricks SQL Connector with Proxy Authentication + +This example demonstrates how to connect to Databricks through a proxy server +using different authentication methods: +1. Basic authentication (username/password in proxy URL) +2. Kerberos/Negotiate authentication +3. Default system proxy behavior + +Prerequisites: +- Configure your system proxy settings (HTTP_PROXY/HTTPS_PROXY environment variables) +- For Kerberos: Ensure you have valid Kerberos tickets (kinit) +- Set your Databricks credentials in environment variables +""" + +import os +from databricks import sql +import logging + +# Configure logging to see proxy activity +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) + +# Uncomment for detailed debugging (shows HTTP requests/responses) +# logging.getLogger("urllib3").setLevel(logging.DEBUG) +# logging.getLogger("urllib3.connectionpool").setLevel(logging.DEBUG) + +def check_proxy_environment(): + """Check if proxy environment variables are configured.""" + proxy_vars = ['HTTP_PROXY', 'HTTPS_PROXY', 'http_proxy', 'https_proxy'] + configured_proxies = {var: os.environ.get(var) for var in proxy_vars if os.environ.get(var)} + + if configured_proxies: + print("✓ Proxy environment variables found:") + for var, value in configured_proxies.items(): + # Hide credentials in output for security + safe_value = value.split('@')[-1] if '@' in value else value + print(f" {var}: {safe_value}") + return True + else: + print("⚠ No proxy environment variables found") + print(" Set HTTP_PROXY and/or HTTPS_PROXY if using a proxy") + return False + +def test_connection(connection_params, test_name): + """Test a database connection with given parameters.""" + print(f"\n--- Testing {test_name} ---") + + try: + with sql.connect(**connection_params) as connection: + print("✓ Successfully connected!") + + with connection.cursor() as cursor: + # Test basic query + cursor.execute("SELECT current_user() as user, current_database() as database") + result = cursor.fetchone() + print(f"✓ Connected as user: {result.user}") + print(f"✓ Default database: {result.database}") + + # Test a simple computation + cursor.execute("SELECT 1 + 1 as result") + result = cursor.fetchone() + print(f"✓ Query result: 1 + 1 = {result.result}") + + return True + + except Exception as e: + print(f"✗ Connection failed: {e}") + return False + +def main(): + print("Databricks SQL Connector - Proxy Authentication Examples") + print("=" * 60) + + # Check proxy configuration + has_proxy = check_proxy_environment() + + # Get Databricks connection parameters + server_hostname = os.environ.get('DATABRICKS_SERVER_HOSTNAME') + http_path = os.environ.get('DATABRICKS_HTTP_PATH') + access_token = os.environ.get('DATABRICKS_TOKEN') + + if not all([server_hostname, http_path, access_token]): + print("\n✗ Missing required environment variables:") + print(" DATABRICKS_SERVER_HOSTNAME") + print(" DATABRICKS_HTTP_PATH") + print(" DATABRICKS_TOKEN") + return 1 + + print(f"\nConnecting to: {server_hostname}") + + # Base connection parameters + base_params = { + 'server_hostname': server_hostname, + 'http_path': http_path, + 'access_token': access_token + } + + success_count = 0 + total_tests = 0 + + # Test 1: Default proxy behavior (no _proxy_auth_method specified) + # This uses basic auth if credentials are in proxy URL, otherwise no auth + print("\n" + "="*60) + print("Test 1: Default Proxy Behavior") + print("Uses basic authentication if credentials are in proxy URL") + total_tests += 1 + if test_connection(base_params, "Default Proxy Behavior"): + success_count += 1 + + # Test 2: Explicit basic authentication + print("\n" + "="*60) + print("Test 2: Explicit Basic Authentication") + print("Explicitly requests basic authentication (same as default)") + total_tests += 1 + basic_params = base_params.copy() + basic_params['_proxy_auth_method'] = 'basic' + if test_connection(basic_params, "Basic Proxy Authentication"): + success_count += 1 + + # Test 3: Kerberos/Negotiate authentication + print("\n" + "="*60) + print("Test 3: Kerberos/Negotiate Authentication") + print("Uses Kerberos tickets for proxy authentication") + print("Note: Requires valid Kerberos tickets (run 'kinit' first)") + total_tests += 1 + kerberos_params = base_params.copy() + kerberos_params['_proxy_auth_method'] = 'negotiate' + if test_connection(kerberos_params, "Kerberos Proxy Authentication"): + success_count += 1 + + # Summary + print(f"\n{'='*60}") + print(f"Summary: {success_count}/{total_tests} tests passed") + + if success_count == total_tests: + print("✓ All proxy authentication methods working!") + return 0 + elif success_count > 0: + print("⚠ Some proxy authentication methods failed") + print("This may be normal depending on your proxy configuration") + return 0 + else: + print("✗ All proxy authentication methods failed") + if not has_proxy: + print("Consider checking your proxy configuration") + return 1 + +if __name__ == "__main__": + exit(main())