content / test / gpu / bad_machine_finder / bigquery.py [blame]

# Copyright 2024 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Code for interacting with BigQuery."""

from typing import Generator

# pylint: disable=import-error
from google.cloud import bigquery
from google.cloud import bigquery_storage
import pandas
# pylint: enable=import-error


class Querier:

  def __init__(self, billing_project: str):
    self._billing_project = billing_project

  def GetSeriesForQuery(self,
                        query: str) -> Generator[pandas.Series, None, None]:
    """Generates results for |query|.

    Args:
      query: The BigQuery query to run.

    Yields:
      A pandas.Series object for each row returned by the query. Columns can be
      accessed directly as attributes.
    """
    client = bigquery.Client(project=self._billing_project)
    job = client.query(query)
    row_iterator = job.result()
    # Using a Dataframe iterator instead of directly using |row_iterator| allows
    # us to use the BigQuery Storage API, which results in ~10x faster query
    # result retrieval at the cost of a few more dependencies.
    dataframe_iterator = row_iterator.to_dataframe_iterable(
        bigquery_storage.BigQueryReadClient())
    for df in dataframe_iterator:
      for _, row in df.iterrows():
        yield row