diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py index 3968e98a69..84bc4f6d01 100644 --- a/bigframes/_config/bigquery_options.py +++ b/bigframes/_config/bigquery_options.py @@ -59,7 +59,9 @@ def _get_validated_location(value: Optional[str]) -> Optional[str]: # -> bpd.options.bigquery.location = "us-central-1" # -> location.setter # -> _get_validated_location - msg = UNKNOWN_LOCATION_MESSAGE.format(location=location, possibility=possibility) + msg = bfe.format_message( + UNKNOWN_LOCATION_MESSAGE.format(location=location, possibility=possibility) + ) warnings.warn(msg, stacklevel=3, category=bfe.UnknownLocationWarning) return value @@ -294,7 +296,7 @@ def use_regional_endpoints(self, value: bool): ) if value: - msg = ( + msg = bfe.format_message( "Use of regional endpoints is a feature in preview and " "available only in selected regions and projects. " ) @@ -354,7 +356,7 @@ def client_endpoints_override(self) -> dict: @client_endpoints_override.setter def client_endpoints_override(self, value: dict): - msg = ( + msg = bfe.format_message( "This is an advanced configuration option for directly setting endpoints. " "Incorrect use may lead to unexpected behavior or system instability. " "Proceed only if you fully understand its implications." diff --git a/bigframes/_config/experiment_options.py b/bigframes/_config/experiment_options.py index b958667628..3d52976004 100644 --- a/bigframes/_config/experiment_options.py +++ b/bigframes/_config/experiment_options.py @@ -34,7 +34,7 @@ def semantic_operators(self) -> bool: @semantic_operators.setter def semantic_operators(self, value: bool): if value is True: - msg = ( + msg = bfe.format_message( "Semantic operators are still under experiments, and are subject " "to change in the future." ) @@ -48,7 +48,7 @@ def blob(self) -> bool: @blob.setter def blob(self, value: bool): if value is True: - msg = ( + msg = bfe.format_message( "BigFrames Blob is still under experiments. It may not work and " "subject to change in the future." ) @@ -62,7 +62,7 @@ def udf(self) -> bool: @udf.setter def udf(self, value: bool): if value is True: - msg = ( + msg = bfe.format_message( "BigFrames managed function (udf) is still under experiments. " "It may not work and subject to change in the future." ) diff --git a/bigframes/core/array_value.py b/bigframes/core/array_value.py index 9325e3e5a8..9c44255941 100644 --- a/bigframes/core/array_value.py +++ b/bigframes/core/array_value.py @@ -107,8 +107,8 @@ def from_table( if offsets_col and primary_key: raise ValueError("must set at most one of 'offests', 'primary_key'") if any(i.field_type == "JSON" for i in table.schema if i.name in schema.names): - msg = ( - "Interpreting JSON column(s) as the `db_dtypes.dbjson` extension type is" + msg = bfe.format_message( + "Interpreting JSON column(s) as the `db_dtypes.dbjson` extension type is " "in preview; this behavior may change in future versions." ) warnings.warn(msg, bfe.PreviewWarning) @@ -232,7 +232,9 @@ def slice( self, start: Optional[int], stop: Optional[int], step: Optional[int] ) -> ArrayValue: if self.node.order_ambiguous and not (self.session._strictly_ordered): - msg = "Window ordering may be ambiguous, this can cause unstable results." + msg = bfe.format_message( + "Window ordering may be ambiguous, this can cause unstable results." + ) warnings.warn(msg, bfe.AmbiguousWindowWarning) return ArrayValue( nodes.SliceNode( @@ -254,7 +256,7 @@ def promote_offsets(self) -> Tuple[ArrayValue, str]: "Generating offsets not supported in partial ordering mode" ) else: - msg = ( + msg = bfe.format_message( "Window ordering may be ambiguous, this can cause unstable results." ) warnings.warn(msg, category=bfe.AmbiguousWindowWarning) @@ -417,7 +419,9 @@ def project_window_op( "Generating offsets not supported in partial ordering mode" ) else: - msg = "Window ordering may be ambiguous, this can cause unstable results." + msg = bfe.format_message( + "Window ordering may be ambiguous, this can cause unstable results." + ) warnings.warn(msg, category=bfe.AmbiguousWindowWarning) output_name = self._gen_namespaced_uid() diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index 7ac2b03f28..b4e3ea0f86 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -64,6 +64,7 @@ import bigframes.core.utils as utils import bigframes.core.window_spec as windows import bigframes.dtypes +import bigframes.exceptions as bfe import bigframes.features import bigframes.operations as ops import bigframes.operations.aggregations as agg_ops @@ -630,12 +631,12 @@ def _materialize_local( # Since we cannot acquire the table size without a query_job, # we skip the sampling. if sample_config.enable_downsampling: - warnings.warn( + msg = bfe.format_message( "Sampling is disabled and there is no download size limit when 'allow_large_results' is set to " "False. To prevent downloading excessive data, it is recommended to use the peek() method, or " - "limit the data with methods like .head() or .sample() before proceeding with downloads.", - UserWarning, + "limit the data with methods like .head() or .sample() before proceeding with downloads." ) + warnings.warn(msg, category=UserWarning) fraction = 2 # TODO: Maybe materialize before downsampling @@ -652,7 +653,7 @@ def _materialize_local( " # Setting it to None will download all the data\n" f"{constants.FEEDBACK_LINK}" ) - msg = ( + msg = bfe.format_message( f"The data size ({table_mb:.2f} MB) exceeds the maximum download limit of" f"({max_download_size} MB). It will be downsampled to {max_download_size} " "MB for download.\nPlease refer to the documentation for configuring " diff --git a/bigframes/core/compile/aggregate_compiler.py b/bigframes/core/compile/aggregate_compiler.py index edf1e14b3a..93fddf196e 100644 --- a/bigframes/core/compile/aggregate_compiler.py +++ b/bigframes/core/compile/aggregate_compiler.py @@ -165,7 +165,7 @@ def _( ) -> ibis_types.NumericValue: # Will be null if all inputs are null. Pandas defaults to zero sum though. bq_sum = _apply_window_if_present(column.sum(), window) - return bq_sum.fillna(ibis_types.literal(0)) + return bq_sum.fill_null(ibis_types.literal(0)) @compile_unary_agg.register @@ -610,12 +610,7 @@ def _( result = _apply_window_if_present(_is_true(column).all(), window) literal = ibis_types.literal(True) - return cast( - ibis_types.BooleanScalar, - result.fill_null(literal) - if hasattr(result, "fill_null") - else result.fillna(literal), - ) + return cast(ibis_types.BooleanScalar, result.fill_null(literal)) @compile_unary_agg.register @@ -628,12 +623,7 @@ def _( result = _apply_window_if_present(_is_true(column).any(), window) literal = ibis_types.literal(False) - return cast( - ibis_types.BooleanScalar, - result.fill_null(literal) - if hasattr(result, "fill_null") - else result.fillna(literal), - ) + return cast(ibis_types.BooleanScalar, result.fill_null(literal)) @compile_ordered_unary_agg.register diff --git a/bigframes/core/global_session.py b/bigframes/core/global_session.py index 8b32fee5b4..d4d70f5a06 100644 --- a/bigframes/core/global_session.py +++ b/bigframes/core/global_session.py @@ -39,7 +39,7 @@ def _try_close_session(session: bigframes.session.Session): session_id = session.session_id location = session._location project_id = session._project - msg = ( + msg = bfe.format_message( f"Session cleanup failed for session with id: {session_id}, " f"location: {location}, project: {project_id}" ) diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index 97115a3ed0..c0c4d9ec11 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -407,7 +407,7 @@ def _struct_accessor_check_and_warn( return if not bigframes.dtypes.is_string_like(series.index.dtype): - msg = ( + msg = bfe.format_message( "Are you trying to access struct fields? If so, please use Series.struct.field(...) " "method instead." ) diff --git a/bigframes/core/utils.py b/bigframes/core/utils.py index 18061dca18..e38c43e73e 100644 --- a/bigframes/core/utils.py +++ b/bigframes/core/utils.py @@ -196,7 +196,7 @@ def decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): - warnings.warn(msg, category=bfe.PreviewWarning) + warnings.warn(bfe.format_message(msg), category=bfe.PreviewWarning) return func(*args, **kwargs) return wrapper diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index b5174dbd3e..a48e06d86c 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -1581,7 +1581,10 @@ def to_arrow( Returns: pyarrow.Table: A pyarrow Table with all rows and columns of this DataFrame. """ - msg = "to_arrow is in preview. Types and unnamed / duplicate name columns may change in future." + msg = bfe.format_message( + "to_arrow is in preview. Types and unnamed or duplicate name columns may " + "change in future." + ) warnings.warn(msg, category=bfe.PreviewWarning) pa_table, query_job = self._block.to_arrow( @@ -4104,7 +4107,7 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs): # to the applied function should be a Series, not a scalar. if utils.get_axis_number(axis) == 1: - msg = "axis=1 scenario is in preview." + msg = bfe.format_message("axis=1 scenario is in preview.") warnings.warn(msg, category=bfe.PreviewWarning) # TODO(jialuo): Deprecate the "bigframes_remote_function" attribute. diff --git a/bigframes/exceptions.py b/bigframes/exceptions.py index 97e2da40a1..8b35d9122b 100644 --- a/bigframes/exceptions.py +++ b/bigframes/exceptions.py @@ -14,6 +14,8 @@ """Public exceptions and warnings used across BigQuery DataFrames.""" +import textwrap + # NOTE: This module should not depend on any others in the package. @@ -87,3 +89,27 @@ class ApiDeprecationWarning(FutureWarning): class BadIndexerKeyWarning(Warning): """The indexer key is not used correctly.""" + + +class ColorFormatter: + WARNING = "\033[93m" + ENDC = "\033[0m" + + +def format_message(message: str, fill: bool = True): + """Formats a warning message with ANSI color codes for the warning color. + + Args: + message: The warning message string. + fill: Whether to wrap the message text using `textwrap.fill`. + Defaults to True. Set to False to prevent wrapping, + especially if the message already contains newlines. + + Returns: + The formatted message string, with ANSI color codes for warning color + if color is supported, otherwise the original message. If `fill` is + True, the message will be wrapped to fit the terminal width. + """ + if fill: + message = textwrap.fill(message) + return ColorFormatter.WARNING + message + ColorFormatter.ENDC diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index 20dcf45103..ce0ade26ff 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -489,7 +489,7 @@ def remote_function( if cloud_function_ingress_settings is None: cloud_function_ingress_settings = "all" - msg = ( + msg = bfe.format_message( "The `cloud_function_ingress_settings` are set to 'all' by default, " "which will change to 'internal-only' for enhanced security in future version 2.0 onwards. " "However, you will be able to explicitly pass cloud_function_ingress_settings='all' if you need. " @@ -549,7 +549,7 @@ def wrapper(func): (input_type := input_types[0]) == bf_series.Series or input_type == pandas.Series ): - msg = "input_types=Series is in preview." + msg = bfe.format_message("input_types=Series is in preview.") warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning) # we will model the row as a json serialized string containing the data @@ -836,7 +836,7 @@ def wrapper(func): (input_type := input_types[0]) == bf_series.Series or input_type == pandas.Series ): - msg = "input_types=Series is in preview." + msg = bfe.format_message("input_types=Series is in preview.") warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning) # we will model the row as a json serialized string containing diff --git a/bigframes/functions/function.py b/bigframes/functions/function.py index 392a209714..16416eb864 100644 --- a/bigframes/functions/function.py +++ b/bigframes/functions/function.py @@ -231,7 +231,7 @@ def func(*bigframes_args, **bigframes_kwargs): ) function_input_dtypes.append(input_dtype) if has_unknown_dtypes: - msg = ( + msg = bfe.format_message( "The function has one or more missing input data types. BigQuery DataFrames " f"will assume default data type {bigframes.dtypes.DEFAULT_DTYPE} for them." ) diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py index c353e47f3a..a0800c19e6 100644 --- a/bigframes/ml/base.py +++ b/bigframes/ml/base.py @@ -27,6 +27,7 @@ import bigframes_vendored.sklearn.base +import bigframes.exceptions as bfe from bigframes.ml import core import bigframes.ml.utils as utils import bigframes.pandas as bpd @@ -269,7 +270,7 @@ def _predict_and_retry( if df_succ.empty: if max_retries > 0: - msg = "Can't make any progress, stop retrying." + msg = bfe.format_message("Can't make any progress, stop retrying.") warnings.warn(msg, category=RuntimeWarning) break @@ -281,7 +282,7 @@ def _predict_and_retry( break if not df_fail.empty: - msg = ( + msg = bfe.format_message( f"Some predictions failed. Check column {self._status_col} for detailed " "status. You may want to filter the failed rows and retry." ) diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index 72c49e124b..0117444f16 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -189,9 +189,11 @@ def _create_bqml_model(self): ) if self.model_name not in _TEXT_GENERATOR_ENDPOINTS: - msg = _MODEL_NOT_SUPPORTED_WARNING.format( - model_name=self.model_name, - known_models=", ".join(_TEXT_GENERATOR_ENDPOINTS), + msg = exceptions.format_message( + _MODEL_NOT_SUPPORTED_WARNING.format( + model_name=self.model_name, + known_models=", ".join(_TEXT_GENERATOR_ENDPOINTS), + ) ) warnings.warn(msg) @@ -368,7 +370,7 @@ def predict( df = self._bqml_model.generate_text(X, options) if (df[_ML_GENERATE_TEXT_STATUS] != "").any(): - msg = ( + msg = exceptions.format_message( f"Some predictions failed. Check column {_ML_GENERATE_TEXT_STATUS} for " "detailed status. You may want to filter the failed rows and retry." ) @@ -522,9 +524,11 @@ def _create_bqml_model(self): ) if self.model_name not in _PALM2_EMBEDDING_GENERATOR_ENDPOINTS: - msg = _MODEL_NOT_SUPPORTED_WARNING.format( - model_name=self.model_name, - known_models=", ".join(_PALM2_EMBEDDING_GENERATOR_ENDPOINTS), + msg = exceptions.format_message( + _MODEL_NOT_SUPPORTED_WARNING.format( + model_name=self.model_name, + known_models=", ".join(_PALM2_EMBEDDING_GENERATOR_ENDPOINTS), + ) ) warnings.warn(msg) @@ -598,7 +602,7 @@ def predict(self, X: utils.ArrayType) -> bigframes.dataframe.DataFrame: ) if (df[_ML_EMBED_TEXT_STATUS] != "").any(): - msg = ( + msg = exceptions.format_message( f"Some predictions failed. Check column {_ML_EMBED_TEXT_STATUS} for " "detailed status. You may want to filter the failed rows and retry." ) @@ -666,9 +670,11 @@ def _create_bqml_model(self): ) if self.model_name not in _TEXT_EMBEDDING_ENDPOINTS: - msg = _MODEL_NOT_SUPPORTED_WARNING.format( - model_name=self.model_name, - known_models=", ".join(_TEXT_EMBEDDING_ENDPOINTS), + msg = exceptions.format_message( + _MODEL_NOT_SUPPORTED_WARNING.format( + model_name=self.model_name, + known_models=", ".join(_TEXT_EMBEDDING_ENDPOINTS), + ) ) warnings.warn(msg) @@ -805,9 +811,11 @@ def _create_bqml_model(self): ) if self.model_name != _MULTIMODAL_EMBEDDING_001_ENDPOINT: - msg = _MODEL_NOT_SUPPORTED_WARNING.format( - model_name=self.model_name, - known_models=_MULTIMODAL_EMBEDDING_001_ENDPOINT, + msg = exceptions.format_message( + _MODEL_NOT_SUPPORTED_WARNING.format( + model_name=self.model_name, + known_models=_MULTIMODAL_EMBEDDING_001_ENDPOINT, + ) ) warnings.warn(msg) @@ -952,7 +960,7 @@ def __init__( max_iterations: int = 300, ): if model_name in _GEMINI_PREVIEW_ENDPOINTS: - msg = ( + msg = exceptions.format_message( f'Model {model_name} is subject to the "Pre-GA Offerings Terms" in ' "the General Service Terms section of the Service Specific Terms" "(https://cloud.google.com/terms/service-terms#1). Pre-GA products and " @@ -976,9 +984,11 @@ def _create_bqml_model(self): ) if self.model_name not in _GEMINI_ENDPOINTS: - msg = _MODEL_NOT_SUPPORTED_WARNING.format( - model_name=self.model_name, - known_models=", ".join(_GEMINI_ENDPOINTS), + msg = exceptions.format_message( + _MODEL_NOT_SUPPORTED_WARNING.format( + model_name=self.model_name, + known_models=", ".join(_GEMINI_ENDPOINTS), + ) ) warnings.warn(msg) @@ -1343,9 +1353,11 @@ def _create_bqml_model(self): ) if self.model_name not in _CLAUDE_3_ENDPOINTS: - msg = _MODEL_NOT_SUPPORTED_WARNING.format( - model_name=self.model_name, - known_models=", ".join(_CLAUDE_3_ENDPOINTS), + msg = exceptions.format_message( + _MODEL_NOT_SUPPORTED_WARNING.format( + model_name=self.model_name, + known_models=", ".join(_CLAUDE_3_ENDPOINTS), + ) ) warnings.warn(msg) options = { diff --git a/bigframes/ml/remote.py b/bigframes/ml/remote.py index 6ee6840656..cc711cbe3b 100644 --- a/bigframes/ml/remote.py +++ b/bigframes/ml/remote.py @@ -21,6 +21,7 @@ from bigframes.core import global_session, log_adapter import bigframes.dataframe +import bigframes.exceptions as bfe from bigframes.ml import base, core, globals, utils import bigframes.session @@ -119,7 +120,7 @@ def predict( # unlike LLM models, the general remote model status is null for successful runs. if (df[_REMOTE_MODEL_STATUS].notna()).any(): - msg = ( + msg = bfe.format_message( f"Some predictions failed. Check column {_REMOTE_MODEL_STATUS} for " "detailed status. You may want to filter the failed rows and retry." ) diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py index 9c68a2c5ca..a5f53b9f64 100644 --- a/bigframes/operations/_matplotlib/core.py +++ b/bigframes/operations/_matplotlib/core.py @@ -20,6 +20,7 @@ import pandas as pd import bigframes.dtypes as dtypes +import bigframes.exceptions as bfe DEFAULT_SAMPLING_N = 1000 DEFAULT_SAMPLING_STATE = 0 @@ -70,10 +71,12 @@ def _compute_sample_data(self, data): if self._sampling_warning_msg is not None: total_n = data.shape[0] if sampling_n < total_n: - msg = self._sampling_warning_msg.format( - sampling_n=sampling_n, total_n=total_n + msg = bfe.format_message( + self._sampling_warning_msg.format( + sampling_n=sampling_n, total_n=total_n + ) ) - warnings.warn(msg) + warnings.warn(msg, category=UserWarning) sampling_random_state = self.kwargs.pop( "sampling_random_state", DEFAULT_SAMPLING_STATE diff --git a/bigframes/operations/semantics.py b/bigframes/operations/semantics.py index 3b7a77e5b7..686db50a43 100644 --- a/bigframes/operations/semantics.py +++ b/bigframes/operations/semantics.py @@ -141,11 +141,11 @@ def agg( column = columns[0] if ground_with_google_search: - msg = ( + msg = exceptions.format_message( "Enables Grounding with Google Search may impact billing cost. See pricing " "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models" ) - warnings.warn(msg) + warnings.warn(msg, category=UserWarning) user_instruction = self._format_instruction(instruction, columns) @@ -372,11 +372,11 @@ def filter(self, instruction: str, model, ground_with_google_search: bool = Fals raise ValueError(f"Column {column} not found.") if ground_with_google_search: - msg = ( + msg = exceptions.format_message( "Enables Grounding with Google Search may impact billing cost. See pricing " "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models" ) - warnings.warn(msg) + warnings.warn(msg, category=UserWarning) self._confirm_operation(len(self._df)) @@ -471,11 +471,11 @@ def map( raise ValueError(f"Column {column} not found.") if ground_with_google_search: - msg = ( + msg = exceptions.format_message( "Enables Grounding with Google Search may impact billing cost. See pricing " "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models" ) - warnings.warn(msg) + warnings.warn(msg, category=UserWarning) self._confirm_operation(len(self._df)) @@ -573,11 +573,11 @@ def join( columns = self._parse_columns(instruction) if ground_with_google_search: - msg = ( + msg = exceptions.format_message( "Enables Grounding with Google Search may impact billing cost. See pricing " "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models" ) - warnings.warn(msg) + warnings.warn(msg, category=UserWarning) work_estimate = len(self._df) * len(other) self._confirm_operation(work_estimate) @@ -816,11 +816,11 @@ def top_k( ) if ground_with_google_search: - msg = ( + msg = exceptions.format_message( "Enables Grounding with Google Search may impact billing cost. See pricing " "details: https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models" ) - warnings.warn(msg) + warnings.warn(msg, category=UserWarning) work_estimate = int(len(self._df) * (len(self._df) - 1) / 2) self._confirm_operation(work_estimate) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 13e49fca42..3f081e2177 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -152,7 +152,9 @@ def __init__( if context.location is None: self._location = "US" - msg = f"No explicit location is set, so using location {self._location} for the session." + msg = bfe.format_message( + f"No explicit location is set, so using location {self._location} for the session." + ) # User's code # -> get_global_session() # -> connect() @@ -344,25 +346,25 @@ def _project(self): @property def bytes_processed_sum(self): """The sum of all bytes processed by bigquery jobs using this session.""" - warnings.warn( + msg = bfe.format_message( "Queries executed with `allow_large_results=False` within the session will not " "have their bytes processed counted in this sum. If you need precise " "bytes processed information, query the `INFORMATION_SCHEMA` tables " "to get relevant metrics.", - UserWarning, ) + warnings.warn(msg, UserWarning) return self._metrics.bytes_processed @property def slot_millis_sum(self): """The sum of all slot time used by bigquery jobs in this session.""" - warnings.warn( + msg = bfe.format_message( "Queries executed with `allow_large_results=False` within the session will not " "have their slot milliseconds counted in this sum. If you need precise slot " "milliseconds information, query the `INFORMATION_SCHEMA` tables " "to get relevant metrics.", - UserWarning, ) + warnings.warn(msg, UserWarning) return self._metrics.slot_millis @property @@ -612,7 +614,9 @@ def read_gbq_table_streaming( bigframes.streaming.dataframe.StreamingDataFrame: A StreamingDataFrame representing results of the table. """ - msg = "The bigframes.streaming module is a preview feature, and subject to change." + msg = bfe.format_message( + "The bigframes.streaming module is a preview feature, and subject to change." + ) warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning) import bigframes.streaming.dataframe as streaming_dataframe diff --git a/bigframes/session/_io/bigquery/read_gbq_table.py b/bigframes/session/_io/bigquery/read_gbq_table.py index ed68762ee8..9fa97cb6e1 100644 --- a/bigframes/session/_io/bigquery/read_gbq_table.py +++ b/bigframes/session/_io/bigquery/read_gbq_table.py @@ -59,7 +59,7 @@ def get_table_metadata( # Cache hit could be unexpected. See internal issue 329545805. # Raise a warning with more information about how to avoid the # problems with the cache. - msg = ( + msg = bfe.format_message( f"Reading cached table from {snapshot_timestamp} to avoid " "incompatibilies with previous reads of this table. To read " "the latest version, set `use_cache=False` or close the " @@ -104,7 +104,7 @@ def validate_table( # Only true tables support time travel elif table.table_type != "TABLE": if table.table_type == "MATERIALIZED_VIEW": - msg = ( + msg = bfe.format_message( "Materialized views do not support FOR SYSTEM_TIME AS OF queries. " "Attempting query without time travel. Be aware that as materialized views " "are updated periodically, modifications to the underlying data in the view may " @@ -142,7 +142,7 @@ def validate_table( snapshot_sql, job_config=bigquery.QueryJobConfig(dry_run=True) ) if time_travel_not_found: - msg = ( + msg = bfe.format_message( "NotFound error when reading table with time travel." " Attempting query without time travel. Warning: Without" " time travel, modifications to the underlying table may" @@ -269,7 +269,7 @@ def get_index_cols( # resource utilization because of the default sequential index. See # internal issue 335727141. if _is_table_clustered_or_partitioned(table) and not primary_keys: - msg = ( + msg = bfe.format_message( f"Table '{str(table.reference)}' is clustered and/or " "partitioned, but BigQuery DataFrames was not able to find a " "suitable index. To avoid this warning, set at least one of: " diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py index fd8f387c3d..5b707ad478 100644 --- a/bigframes/session/clients.py +++ b/bigframes/session/clients.py @@ -32,6 +32,7 @@ import pydata_google_auth import bigframes.constants +import bigframes.exceptions as bfe import bigframes.version _ENV_DEFAULT_PROJECT = "GOOGLE_CLOUD_PROJECT" @@ -102,12 +103,13 @@ def __init__( and location.lower() not in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS ): - warnings.warn( + msg = bfe.format_message( bigframes.constants.LEP_DEPRECATION_WARNING_MESSAGE.format( location=location ), - category=FutureWarning, + fill=False, ) + warnings.warn(msg, category=FutureWarning) self._location = location self._use_regional_endpoints = use_regional_endpoints diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py index 22d1c1dcea..0644b0e6d9 100644 --- a/bigframes/session/executor.py +++ b/bigframes/session/executor.py @@ -48,6 +48,7 @@ import bigframes.core.schema import bigframes.core.tree_properties as tree_properties import bigframes.dtypes +import bigframes.exceptions as bfe import bigframes.features import bigframes.session._io.bigquery as bq_io import bigframes.session.metrics @@ -271,13 +272,13 @@ def iterator_supplier(): size_bytes = None if size_bytes is not None and size_bytes >= MAX_SMALL_RESULT_BYTES: - warnings.warn( + msg = bfe.format_message( "The query result size has exceeded 10 GB. In BigFrames 2.0 and " "later, you might need to manually set `allow_large_results=True` in " "the IO method or adjust the BigFrames option: " - "`bigframes.options.bigquery.allow_large_results=True`.", - FutureWarning, + "`bigframes.options.bigquery.allow_large_results=True`." ) + warnings.warn(msg, FutureWarning) # Runs strict validations to ensure internal type predictions and ibis are completely in sync # Do not execute these validations outside of testing suite. if "PYTEST_CURRENT_TEST" in os.environ: @@ -383,7 +384,7 @@ def peek( """ plan = self.replace_cached_subtrees(array_value.node) if not tree_properties.can_fast_peek(plan): - msg = "Peeking this value cannot be done efficiently." + msg = bfe.format_message("Peeking this value cannot be done efficiently.") warnings.warn(msg) if use_explicit_destination is None: use_explicit_destination = bigframes.options.bigquery.allow_large_results diff --git a/bigframes/streaming/dataframe.py b/bigframes/streaming/dataframe.py index 2180a66207..4acefd6283 100644 --- a/bigframes/streaming/dataframe.py +++ b/bigframes/streaming/dataframe.py @@ -372,7 +372,9 @@ def _to_bigtable( For example, the job can be cancelled or its error status can be examined. """ - msg = "The bigframes.streaming module is a preview feature, and subject to change." + msg = bfe.format_message( + "The bigframes.streaming module is a preview feature, and subject to change." + ) warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning) # get default client if not passed @@ -484,7 +486,9 @@ def _to_pubsub( For example, the job can be cancelled or its error status can be examined. """ - msg = "The bigframes.streaming module is a preview feature, and subject to change." + msg = bfe.format_message( + "The bigframes.streaming module is a preview feature, and subject to change." + ) warnings.warn(msg, stacklevel=1, category=bfe.PreviewWarning) # get default client if not passed diff --git a/tests/system/large/test_dataframe_io.py b/tests/system/large/test_dataframe_io.py index c055babce6..76a7001fe3 100644 --- a/tests/system/large/test_dataframe_io.py +++ b/tests/system/large/test_dataframe_io.py @@ -46,9 +46,7 @@ def test_to_pandas_batches_override_global_option( ) assert len(w) == 2 assert issubclass(w[0].category, FutureWarning) - assert str(w[0].message).startswith( - "The query result size has exceeded 10 GB." - ) + assert "The query result size has exceeded 10 GB." in str(w[0].message) def test_to_pandas_raise_when_large_result_not_allowed(session): diff --git a/tests/system/large/test_location.py b/tests/system/large/test_location.py index 0b4a7afe2b..7801f5dada 100644 --- a/tests/system/large/test_location.py +++ b/tests/system/large/test_location.py @@ -163,11 +163,7 @@ def test_bq_lep_endpoints(bigquery_location): location=bigquery_location, use_regional_endpoints=True ) assert len(record) == 1 - assert typing.cast(Warning, record[0].message).args[ - 0 - ] == bigframes.constants.LEP_DEPRECATION_WARNING_MESSAGE.format( - location=bigquery_location - ) + assert bigquery_location in typing.cast(Warning, record[0].message).args[0] # Verify that location and endpoints are correctly set for the BigQuery API # client diff --git a/tests/system/small/functions/test_remote_function.py b/tests/system/small/functions/test_remote_function.py index c12d0e03f5..075a57f23d 100644 --- a/tests/system/small/functions/test_remote_function.py +++ b/tests/system/small/functions/test_remote_function.py @@ -929,7 +929,7 @@ def test_read_gbq_function_requires_explicit_types( ) with pytest.warns( bigframes.exceptions.UnknownDataTypeWarning, - match="missing input data types.*assume default data type", + match=r"missing input data types[\s\S]*assume default data type", ): bff.read_gbq_function( str(only_return_type_specified.reference), diff --git a/tests/unit/_config/test_bigquery_options.py b/tests/unit/_config/test_bigquery_options.py index 31f43ffee5..98a74d4e4c 100644 --- a/tests/unit/_config/test_bigquery_options.py +++ b/tests/unit/_config/test_bigquery_options.py @@ -164,14 +164,19 @@ def set_location_property(): options.location = invalid_location for op in [set_location_in_constructor, set_location_property]: - with pytest.warns( - bigframes.exceptions.UnknownLocationWarning, - match=re.escape( - f"The location '{invalid_location}' is set to an unknown value. Did you mean '{possibility}'?" - ), - ): + with warnings.catch_warnings(record=True) as w: op() + assert issubclass( + w[0].category, bigframes.exceptions.UnknownLocationWarning + ) + assert ( + f"The location '{invalid_location}' is set to an unknown value. " + in str(w[0].message) + ) + # The message might contain newlines added by textwrap.fill. + assert possibility in str(w[0].message).replace("\n", "") + def test_client_endpoints_override_set_shows_warning(): options = bigquery_options.BigQueryOptions()