Coverage for /builds/BuildGrid/buildgrid/buildgrid/server/metrics_names.py: 100.00%

91 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-05 15:37 +0000

1# Copyright (C) 2020 Bloomberg LP 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# <http://www.apache.org/licenses/LICENSE-2.0> 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15# 

16# CAS metrics 

17# 

18 

19#: Number of exceptions thrown from CAS servicer functions 

20CAS_EXCEPTION_COUNT_METRIC_NAME = "cas-exception" 

21 

22#: Number of bytes uploaded to a CAS instance 

23CAS_UPLOADED_BYTES_METRIC_NAME = "cas-uploaded-bytes" 

24 

25#: Number of bytes downloaded from a CAS instance 

26CAS_DOWNLOADED_BYTES_METRIC_NAME = "cas-downloaded-bytes" 

27 

28#: Number of blobs requested in ``FindMissingBlobs()`` calls 

29CAS_FIND_MISSING_BLOBS_NUM_REQUESTED_METRIC_NAME = "find-missing-blobs-num-requested" 

30 

31#: Size of blobs requested in ``FindMissingBlobs()`` calls 

32CAS_FIND_MISSING_BLOBS_SIZE_BYTES_REQUESTED_METRIC_NAME = "find-missing-blobs-size-bytes-requested" 

33 

34#: Number of blobs reported to be missing in ``FindMissingBlobs()`` calls 

35CAS_FIND_MISSING_BLOBS_NUM_MISSING_METRIC_NAME = "find-missing-blobs-num-missing" 

36 

37#: Percentage of blobs reported to be missing in ``FindMissingBlobs()`` calls 

38CAS_FIND_MISSING_BLOBS_PERCENT_MISSING_METRIC_NAME = "find-missing-blobs-percent-missing" 

39 

40#: Size of blobs reported to be missing in ``FindMissingBlobs()`` calls 

41CAS_FIND_MISSING_BLOBS_SIZE_BYTES_MISSING_METRIC_NAME = "find-missing-blobs-size-bytes-missing" 

42 

43#: Time that ``FindMissingBlobs()`` operations took to complete 

44CAS_FIND_MISSING_BLOBS_TIME_METRIC_NAME = "find-missing-blobs" 

45 

46#: Time that ``BatchUpdateBlobs()`` operations took to complete 

47CAS_BATCH_UPDATE_BLOBS_TIME_METRIC_NAME = "batch-update-blobs" 

48 

49#: Size of blobs written with ``BatchUpdateBlobs()`` calls 

50CAS_BATCH_UPDATE_BLOBS_SIZE_BYTES = "batch-update-blobs-size-bytes" 

51 

52#: Time that ``BatchReadBlobs()`` operations took to complete 

53CAS_BATCH_READ_BLOBS_TIME_METRIC_NAME = "batch-read-blobs" 

54 

55#: Size of blobs read with ``BatchReadBlobs()`` calls 

56CAS_BATCH_READ_BLOBS_SIZE_BYTES = "batch-read-blobs-size-bytes" 

57 

58#: Time that ``GetTree()`` operations took to complete 

59CAS_GET_TREE_TIME_METRIC_NAME = "get-tree" 

60 

61#: Time that ``ByteStream.Read()`` operations took to complete 

62CAS_BYTESTREAM_READ_TIME_METRIC_NAME = "bytestream-read" 

63 

64#: Size of blobs read with ``ByteStream.Read()`` 

65CAS_BYTESTREAM_READ_SIZE_BYTES = "bytestream-read-size-bytes" 

66 

67#: Time that ``ByteStream.Write()`` operations took to complete 

68CAS_BYTESTREAM_WRITE_TIME_METRIC_NAME = "bytestream-write" 

69 

70#: Size of blobs written with ``ByteStream.Write()`` 

71CAS_BYTESTREAM_WRITE_SIZE_BYTES = "bytestream-write-size-bytes" 

72 

73# CAS cache wrapper metrics 

74 

75#: Count of cache misses in BatchReadBlobs requests to the 

76# !with-cache-storage. This only counts the blobs which were 

77# in the fallback storage; blobs that were entirely missing 

78# don't count as cache misses, since this metric is intended 

79# to measure how many things that *could* have been cached 

80# were actually not. 

81CAS_CACHE_BULK_READ_MISS_COUNT_NAME = "cas-withcache-bulk-read-misses" 

82 

83#: Count of cache hits in BatchReadBlobs requests to the !with-cache-storage 

84CAS_CACHE_BULK_READ_HIT_COUNT_NAME = "cas-withcache-bulk-read-hits" 

85 

86#: Percentage of cache hits in a given BatchReadBlobs request in the 

87# !with-cache-storage. This is as a percentage of total blobs requested, 

88# including blobs which were missing entirely. 

89CAS_CACHE_BULK_READ_HIT_PERCENTAGE_NAME = "cas-withcache-bulk-read-hit-percent" 

90 

91#: Count of cache misses in ByteStream Read requests to the 

92# !with-cache-storage. This only counts the blobs which were 

93# in the fallback storage; blobs that were entirely missing 

94# don't count as cache misses, since this metric is intended 

95# to measure how many things that *could* have been cached 

96# were actually not. 

97CAS_CACHE_GET_BLOB_MISS_COUNT_NAME = "cas-withcache-get-blob-misses" 

98 

99#: Count of cache hits in ByteStream Read requests to the !with-cache-storage 

100CAS_CACHE_GET_BLOB_HIT_COUNT_NAME = "cas-withcache-get-blob-hits" 

101 

102# Indexed CAS metrics 

103 

104#: Time taken to bulk select a number of digests from the index 

105CAS_INDEX_BULK_SELECT_DIGEST_TIME_METRIC_NAME = "cas.index.bulk-select-digest-time" 

106 

107#: Time taken to update a blob timestamp in the index 

108CAS_INDEX_BLOB_TIMESTAMP_UPDATE_TIME_METRIC_NAME = "cas.index.blob-timestamp-update-time" 

109 

110#: Time taken to run a bulk timestamp update in the index 

111CAS_INDEX_BULK_TIMESTAMP_UPDATE_TIME_METRIC_NAME = "cas.index.bulk-timestamp-update-time" 

112 

113#: Time taken to return from `get_blob()`. This includes the time taken to 

114# check and update the index, along with to time to fetch the blob from the 

115# underlying storage, and update the index if `fallback_on_get` is enabled. 

116CAS_INDEX_GET_BLOB_TIME_METRIC_NAME = "cas.index.get-blob-time" 

117 

118#: Time taken to store a list of digests in the index 

119CAS_INDEX_SAVE_DIGESTS_TIME_METRIC_NAME = "cas.index.save-digests-time" 

120 

121#: Time taken to get the total size of the CAS the index is for 

122CAS_INDEX_SIZE_CALCULATION_TIME_METRIC_NAME = "cas.index.total-size-calculation-time" 

123 

124# 

125# ActionCache metrics 

126# 

127 

128#: Time that ``GetActionResult()`` operations took to complete 

129AC_GET_ACTION_RESULT_TIME_METRIC_NAME = "get-action-result" 

130 

131#: Time that ``UpdateActionResult()`` operations took to complete 

132AC_UPDATE_ACTION_RESULT_TIME_METRIC_NAME = "update-action-result" 

133 

134#: Number of cache hits from the ActionCache 

135AC_CACHE_HITS_METRIC_NAME = "action-cache-hits" 

136 

137#: Number of cache misses from the ActionCache 

138AC_CACHE_MISSES_METRIC_NAME = "action-cache-misses" 

139 

140#: Number of cache hits which became misses due to missing blobs in CAS 

141AC_UNUSABLE_CACHE_HITS_METRIC_NAME = "action-cache-hits-with-missing-blobs" 

142 

143# 

144# S3 metrics 

145# 

146 

147#: Time taken to check errors from a bulk_delete 

148S3_DELETE_ERROR_CHECK_METRIC_NAME = "s3-deletion-error-check-timer" 

149 

150 

151# 

152# Cleanup metrics 

153# 

154 

155#: Number of blobs deleted per second in a cleanup batch 

156CLEANUP_BLOBS_DELETION_RATE_METRIC_NAME = "cleanup.blobs-deleted-per-second" 

157 

158#: Number of bytes deleted per second in a cleanup batch 

159CLEANUP_BYTES_DELETION_RATE_METRIC_NAME = "cleanup.bytes-deleted-per-second" 

160 

161#: Total time taken to clean enough blobs to get the CAS size down to the low watermark 

162CLEANUP_RUNTIME_METRIC_NAME = "cleanup.runtime-timer" 

163 

164#: Time taken to bulk delete a set of blobs from the index 

165CLEANUP_INDEX_BULK_DELETE_METRIC_NAME = "cleanup.index.bulk-delete-timer" 

166 

167#: Time taken to mark a set of blobs as deleted in the index 

168CLEANUP_INDEX_MARK_DELETED_METRIC_NAME = "cleanup.index.mark-as-deleted-timer" 

169 

170#: Number of blobs that were already marked for deletion in the index when marking as deleted 

171CLEANUP_INDEX_PREMARKED_BLOBS_METRIC_NAME = "cleanup.index.premarked-blobs-count" 

172 

173#: Time taken to bulk delete a set of blobs from the storage backend 

174CLEANUP_STORAGE_BULK_DELETE_METRIC_NAME = "cleanup.storage.bulk-delete-timer" 

175 

176#: Number of blobs that failed to be deleted from the storage backend in a given bulk delete request 

177CLEANUP_STORAGE_DELETION_FAILURES_METRIC_NAME = "cleanup.storage.deletion-failures-count" 

178 

179 

180# 

181# ExecutedActionMetadata metrics 

182# 

183 

184#: Time spent queued before being assigned to a worker 

185QUEUED_TIME_METRIC_NAME = "action-queued-time" 

186 

187#: Time spent in the worker (fetching inputs + executing + uploading outputs) 

188WORKER_HANDLING_TIME_METRIC_NAME = "worker-handling-time" 

189 

190#: Time spent fetching inputs before execution 

191INPUTS_FETCHING_TIME_METRIC_NAME = "inputs-fetching-time" 

192 

193#: Time spent waiting for executions to complete 

194EXECUTION_TIME_METRIC_NAME = "execution-time" 

195 

196#: Time spent uploading inputs after execution 

197OUTPUTS_UPLOADING_TIME_METRIC_NAME = "outputs-uploading-time" 

198 

199#: Total time spent servicing an execution request (time queued +fetching inputs + 

200# executing + uploading outputs) 

201TOTAL_HANDLING_TIME_METRIC_NAME = "total-handling-time" 

202 

203 

204# 

205# Execution service metrics 

206# 

207 

208#: Number of bots connected 

209BOT_COUNT_METRIC_NAME = "bots-count" 

210 

211#: Number of clients connected 

212CLIENT_COUNT_METRIC_NAME = "clients-count" 

213 

214#: Number of leases present in the scheduler 

215LEASE_COUNT_METRIC_NAME = "lease-count" 

216 

217#: Counter metric indicating lease stage transitions 

218LEASE_CHANGES_COUNTER_METRIC_NAME = "lease-state-transitions-counter" 

219 

220#: Number of active jobs in the scheduler 

221JOB_COUNT_METRIC_NAME = "job-count" 

222 

223#: Counter metric indicating job stage transitions 

224JOB_CHANGES_COUNTER_METRIC_NAME = "job-stage-transitions-counter" 

225 

226#: Average time that a job spends waiting to be executed 

227AVERAGE_QUEUE_TIME_METRIC_NAME = "average-queue-time" 

228 

229#: Number of ``Execute()`` requests received: 

230EXECUTE_REQUEST_COUNT_METRIC_NAME = "execute-call-count" 

231 

232#: Time spent servicing ``Execute()`` requests: 

233EXECUTE_SERVICER_TIME_METRIC_NAME = "execute-servicing-time" 

234 

235#: Number of ``WaitExecution()`` requests received: 

236WAIT_EXECUTION_REQUEST_COUNT_METRIC_NAME = "wait-execution-call-count" 

237 

238#: Time spent servicing ``WaitExecution()`` requests: 

239WAIT_EXECUTION_SERVICER_TIME_METRIC_NAME = "wait-execution-servicing-time" 

240 

241# 

242# LogStream service metrics 

243# 

244 

245#: Time spent creating a LogStream 

246LOGSTREAM_CREATE_LOG_STREAM_TIME_METRIC_NAME = "logstream.create-logstream-time" 

247 

248#: Number of bytes in a committed logstream 

249LOGSTREAM_WRITE_UPLOADED_BYTES_COUNT = "logstream.write.uploaded-bytes-count" 

250 

251# 

252# Authentication Metrics 

253# 

254 

255#: Number of invalid JWTs recieved: 

256INVALID_JWT_COUNT_METRIC_NAME = "authentication.jwt.invalid-jwt-count" 

257 

258#: Duration of JWK fetch request: 

259JWK_FETCH_TIME_METRIC_NAME = "authentication.jwk.fetch-request-time" 

260 

261#: Duration of JWT decoding: 

262JWT_DECODE_TIME_METRIC_NAME = "authentication.jwt.decode-jwt-time" 

263 

264#: Duration of JWT validation (can include fetching JWK): 

265JWT_VALIDATION_TIME_METRIC_NAME = "authentication.jwt.validate-jwt-time" 

266 

267# 

268# Bots service metrics 

269# 

270 

271#: Time spent servicing ``CreateBotSession()`` requests 

272BOTS_CREATE_BOT_SESSION_TIME_METRIC_NAME = "bots.create-bot-session-time" 

273 

274#: Time spent servicing ``UpdateBotSession()`` requests 

275BOTS_UPDATE_BOT_SESSION_TIME_METRIC_NAME = "bots.update-bot-session-time" 

276 

277#: Time spent selecting an Action from the data store to create a lease for 

278BOTS_ASSIGN_JOB_LEASES_TIME_METRIC_NAME = "bots.assign-job-leases-time" 

279 

280 

281# 

282# Scheduler metrics 

283# 

284 

285#: Time taken to queue an Action 

286SCHEDULER_QUEUE_ACTION_TIME_METRIC_NAME = "scheduler.queue-action-time" 

287 

288#: Time taken to update a job's Lease 

289SCHEDULER_UPDATE_LEASE_TIME_METRIC_NAME = "scheduler.update-lease-time" 

290 

291#: Time taken to cancel an Operation 

292SCHEDULER_CANCEL_OPERATION_TIME_METRIC_NAME = "scheduler.cancel-operation-time" 

293 

294 

295# 

296# Data Store (scheduler's backend) metrics 

297# 

298# Some of these seem like duplicates of the request-level timers 

299# at a glance, but measuring at the data store level allows us to 

300# see how much overhead our own code is adding to the calls. 

301# 

302 

303#: Time taken to create a Job 

304DATA_STORE_CREATE_JOB_TIME_METRIC_NAME = "datastore.all.create-job-time" 

305 

306#: Time taken to enqueue a Job 

307DATA_STORE_QUEUE_JOB_TIME_METRIC_NAME = "datastore.all.queue-job-time" 

308 

309#: Time taken to update a Job 

310DATA_STORE_UPDATE_JOB_TIME_METRIC_NAME = "datastore.all.update-job-time" 

311 

312#: Time taken to create a Lease 

313DATA_STORE_CREATE_LEASE_TIME_METRIC_NAME = "datastore.all.create-lease-time" 

314 

315#: Time taken to update a Lease 

316DATA_STORE_UPDATE_LEASE_TIME_METRIC_NAME = "datastore.all.update-lease-time" 

317 

318#: Time taken to create an Operation 

319DATA_STORE_CREATE_OPERATION_TIME_METRIC_NAME = "datastore.all.create-operation-time" 

320 

321#: Time taken to update an Operation 

322DATA_STORE_UPDATE_OPERATION_TIME_METRIC_NAME = "datastore.all.update-operation-time" 

323 

324#: Time taken to get a list of Operations 

325DATA_STORE_LIST_OPERATIONS_TIME_METRIC_NAME = "datastore.all.list-operations-time" 

326 

327#: Time taken to get a Job by Action Digest 

328DATA_STORE_GET_JOB_BY_DIGEST_TIME_METRIC_NAME = "datastore.all.get-job-by-digest-time" 

329 

330#: Time taken to get a Job by name 

331DATA_STORE_GET_JOB_BY_NAME_TIME_METRIC_NAME = "datastore.all.get-job-by-name-time" 

332 

333#: Time taken to get a Job by Operation name 

334DATA_STORE_GET_JOB_BY_OPERATION_TIME_METRIC_NAME = "datastore.all.get-job-by-operation-time" 

335 

336#: Time taken to handle checking for a job update. When using 

337# a database backend other than PostgreSQL, this will measure 

338# how long it takes to check all watched jobs for updates once. 

339# For PostgreSQL and the in-memory scheduler, this measures how 

340# long it takes to handle a job update notification. 

341DATA_STORE_CHECK_FOR_UPDATE_TIME_METRIC_NAME = "datastore.all.check-for-update-time" 

342 

343# SQL-specific metrics 

344 

345#: Time taken to store the ExecuteResponse 

346DATA_STORE_STORE_RESPONSE_TIME_METRIC_NAME = "datastore.sql.store-response-time" 

347 

348#: Number of rows deleted from the jobs table during each pruning 

349DATA_STORE_PRUNER_NUM_ROWS_DELETED_METRIC_NAME = "datastore.sql.pruner-num-rows-deleted" 

350 

351#: Time taken per scheduler pruning invocation 

352DATA_STORE_PRUNER_DELETE_TIME_METRIC_NAME = "datastore.sql.pruner-delete-time" 

353 

354 

355# 

356# Operations service metrics 

357# 

358 

359#: Time taken to completely handle a ListOperations request 

360OPERATIONS_LIST_OPERATIONS_TIME_METRIC_NAME = "operations.list-operations-time" 

361 

362#: Time taken to completely handle a GetOperation request 

363OPERATIONS_GET_OPERATION_TIME_METRIC_NAME = "operations.get-operation-time" 

364 

365#: Time taken to completely handle a CancelOperation request 

366OPERATIONS_CANCEL_OPERATION_TIME_METRIC_NAME = "operations.cancel-operation-time" 

367 

368#: Time taken to completely handle a DeleteOperation request. BuildGrid 

369# doesn't actually support DeleteOperation, but this metric will at 

370# least provide insight into whether people are attempting to call it. 

371OPERATIONS_DELETE_OPERATION_TIME_METRIC_NAME = "operations.delete-operation-time"