1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196

infra / config / lib / builder_health_indicators.star [blame]

# Copyright 2023 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Library for defining builder health indicator thresholds.

See chromium/src -- //docs/infra/builder_health_indicators.md for more info.
"""

load("@stdlib//internal/graph.star", "graph")
load("@stdlib//internal/luci/common.star", "keys")
load("//project.star", "settings")
load("./builder_exemptions.star", "exempted_from_contact_builders")
load("./nodes.star", "nodes")
load("./structs.star", "structs")

_HEALTH_SPEC = nodes.create_bucket_scoped_node_type("health_spec")

# See https://source.chromium.org/chromium/infra/infra/+/main:go/src/infra/cr_builder_health/src_config.go
# for all configurable thresholds.
_default_specs = {
    "Unhealthy": struct(
        score = 5,
        period_days = 7 if settings.project.startswith("chromium") else 14,
        # If any of these thresholds are exceeded, the builder will be deemed
        # unhealthy.
        # Setting a value of None will ignore that threshold
        infra_fail_rate = struct(
            average = 0.05,
        ),
        fail_rate = struct(
            average = 0.2,
        ),
        build_time = struct(
            p50_mins = None,
        ),
        pending_time = struct(
            p50_mins = 20 if settings.project.startswith("chromium") else 60,
        ),
    ),
    "Low Value": struct(
        score = 1,
        period_days = 90,
        # If any of these thresholds are met, the builder will be deemed
        # low-value and will be considered for deletion.
        # Setting a value of None will ignore that threshold
        fail_rate = struct(
            average = 0.99,
        ),
    ),
}

_blank_unhealthy_thresholds = struct(
    infra_fail_rate = struct(
        average = None,
    ),
    fail_rate = struct(
        average = None,
    ),
    build_time = struct(
        p50_mins = None,
    ),
    pending_time = struct(
        p50_mins = None,
    ),
)

blank_low_value_thresholds = struct(
    fail_rate = struct(
        average = None,
    ),
)

DEFAULT = {
    "Unhealthy": struct(
        score = 5,
        period_days = 7 if settings.project.startswith("chromium") else 14,
        _default = "_default",
    ),
    "Low Value": struct(
        score = 1,
        period_days = 90,
        _default = "_default",
    ),
}

# Users define the specs as {problem_name -> problem_spec} for aesthetic reasons
# So all user-exposed functions expect a dictionary.
# We then convert that into a list of [problem_specs] so the object encapsulates
# its own name, for ease of processing
def unhealthy_thresholds(
        fail_rate = struct(),
        infra_fail_rate = struct(),
        build_time = struct(),
        pending_time = struct()):
    thresholds = {"fail_rate": fail_rate, "infra_fail_rate": infra_fail_rate, "build_time": build_time, "pending_time": pending_time}
    fail_if_any_none_val(thresholds)

    return structs.evolve(_blank_unhealthy_thresholds, **thresholds)

def low_value_thresholds(
        fail_rate = struct()):
    thresholds = {"fail_rate": fail_rate}
    fail_if_any_none_val(thresholds)

    return structs.evolve(blank_low_value_thresholds, **thresholds)

def fail_if_any_none_val(vals):
    for k, v in vals.items():
        if v == None:
            fail(k + " threshold was None. Thresholds can't be None. Use an empty struct() instead")

def modified_default(modifications):
    return _merge_mods(_default_specs, modifications)

def _merge_mods(base, modifications):
    spec = dict(base)

    for mod_name, mod in modifications.items():
        mods_proto = structs.to_proto_properties(mod)
        if len(mods_proto) == 0:
            fail("Modifications for health spec \"{}\" were empty.".format(mod_name))

        if mod_name not in spec:
            spec[mod_name] = mod
        else:
            spec[mod_name] = structs.evolve(spec[mod_name], **mods_proto)

    return spec

def _exempted_from_contact(bucket, builder):
    return builder in exempted_from_contact_builders.get(bucket, [])

def register_health_spec(bucket, name, specs, contact_team_email):
    if not contact_team_email and not _exempted_from_contact(bucket, name):
        fail("Builder " + name + " must have a contact_team_email. All new builders must specify a team email for contact in case the builder stops being healthy or providing value.")
    elif contact_team_email and _exempted_from_contact(bucket, name):
        fail("Need to remove builder " + bucket + "/" + name + " from exempted_from_contact_builders")

    if specs:
        spec = struct(
            problem_specs = _convert_specs(specs),
            contact_team_email = contact_team_email,
        )
        health_spec_key = _HEALTH_SPEC.add(
            bucket,
            name,
            props = structs.to_proto_properties(spec),
            idempotent = True,
        )

        graph.add_edge(keys.project(), health_spec_key)

def _convert_specs(specs):
    """Users define the specs as {problem_name -> problem_spec} for aesthetic reasons,

    So all user-exposed functions expect a dictionary.
    We then convert that into a list of [problem_specs] so the object encapsulates its own name, for ease of processing
    """
    converted_specs = []
    for name, spec in specs.items():
        thresholds_spec = structs.to_proto_properties(spec)
        thresholds_spec.pop("score")
        thresholds_spec.pop("period_days")
        converted_specs.append(struct(
            name = name,
            score = spec.score,
            period_days = spec.period_days,
            thresholds = thresholds_spec,
        ))

    return converted_specs

def _generate_health_specs(ctx):
    specs = {}

    for node in graph.children(keys.project(), _HEALTH_SPEC.kind):
        bucket = node.key.container.id
        builder = node.key.id
        specs.setdefault(bucket, {})[builder] = node.props

    result = {
        "_default_specs": _convert_specs(_default_specs),
        "specs": specs,
    }

    ctx.output["health-specs/health-specs.json"] = json.indent(json.encode(result), indent = "  ")

health_spec = struct(
    DEFAULT = DEFAULT,
    unhealthy_thresholds = unhealthy_thresholds,
    low_value_thresholds = low_value_thresholds,
    modified_default = modified_default,
)

lucicfg.generator(_generate_health_specs)