1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
content / test / gpu / bad_machine_finder / bigquery.py [blame]
# Copyright 2024 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Code for interacting with BigQuery."""
from typing import Generator
# pylint: disable=import-error
from google.cloud import bigquery
from google.cloud import bigquery_storage
import pandas
# pylint: enable=import-error
class Querier:
def __init__(self, billing_project: str):
self._billing_project = billing_project
def GetSeriesForQuery(self,
query: str) -> Generator[pandas.Series, None, None]:
"""Generates results for |query|.
Args:
query: The BigQuery query to run.
Yields:
A pandas.Series object for each row returned by the query. Columns can be
accessed directly as attributes.
"""
client = bigquery.Client(project=self._billing_project)
job = client.query(query)
row_iterator = job.result()
# Using a Dataframe iterator instead of directly using |row_iterator| allows
# us to use the BigQuery Storage API, which results in ~10x faster query
# result retrieval at the cost of a few more dependencies.
dataframe_iterator = row_iterator.to_dataframe_iterable(
bigquery_storage.BigQueryReadClient())
for df in dataframe_iterator:
for _, row in df.iterrows():
yield row