diff --git a/tests/e2e/test_driver.py b/tests/e2e/test_driver.py index 26b7d186b..c8713bf08 100644 --- a/tests/e2e/test_driver.py +++ b/tests/e2e/test_driver.py @@ -1,3 +1,4 @@ +import itertools from contextlib import contextmanager from collections import OrderedDict import datetime @@ -52,6 +53,7 @@ def __init__(self, method_name): # If running in local mode, just use environment variables for params. self.arguments = os.environ if get_args_from_env else {} self.arraysize = 1000 + self.buffer_size_bytes = 104857600 def connection_params(self, arguments): params = { @@ -84,7 +86,7 @@ def connection(self, extra_params=()): @contextmanager def cursor(self, extra_params=()): with self.connection(extra_params) as conn: - cursor = conn.cursor(arraysize=self.arraysize) + cursor = conn.cursor(arraysize=self.arraysize, buffer_size_bytes=self.buffer_size_bytes) try: yield cursor finally: @@ -104,6 +106,36 @@ def get_some_rows(self, cursor, fetchmany_size): else: return None + @skipUnless(pysql_supports_arrow(), 'needs arrow support') + def test_cloud_fetch(self): + # This test can take several minutes to run + limits = [100000, 300000] + threads = [10, 25] + self.arraysize = 100000 + # This test requires a large table with many rows to properly initiate cloud fetch. + # e2-dogfood host > hive_metastore catalog > main schema has such a table called store_sales. + # If this table is deleted or this test is run on a different host, a different table may need to be used. + base_query = "SELECT * FROM store_sales WHERE ss_sold_date_sk = 2452234 " + for num_limit, num_threads, lz4_compression in itertools.product(limits, threads, [True, False]): + with self.subTest(num_limit=num_limit, num_threads=num_threads, lz4_compression=lz4_compression): + cf_result, noop_result = None, None + query = base_query + "LIMIT " + str(num_limit) + with self.cursor({ + "use_cloud_fetch": True, + "max_download_threads": num_threads, + "catalog": "hive_metastore" + }) as cursor: + cursor.execute(query) + cf_result = cursor.fetchall() + with self.cursor({ + "catalog": "hive_metastore" + }) as cursor: + cursor.execute(query) + noop_result = cursor.fetchall() + assert len(cf_result) == len(noop_result) + for i in range(len(cf_result)): + assert cf_result[i] == noop_result[i] + # Exclude Retry tests because they require specific setups, and LargeQueries too slow for core # tests