Coverage for /builds/BuildGrid/buildgrid/buildgrid/server/metrics_names.py: 100.00%

90 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-22 21:04 +0000

1# Copyright (C) 2020 Bloomberg LP 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# <http://www.apache.org/licenses/LICENSE-2.0> 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15# 

16# CAS metrics 

17# 

18 

19#: Number of exceptions thrown from CAS servicer functions 

20CAS_EXCEPTION_COUNT_METRIC_NAME = 'cas-exception' 

21 

22#: Number of bytes uploaded to a CAS instance 

23CAS_UPLOADED_BYTES_METRIC_NAME = 'cas-uploaded-bytes' 

24 

25#: Number of bytes downloaded from a CAS instance 

26CAS_DOWNLOADED_BYTES_METRIC_NAME = 'cas-downloaded-bytes' 

27 

28#: Number of blobs requested in ``FindMissingBlobs()`` calls 

29CAS_FIND_MISSING_BLOBS_NUM_REQUESTED_METRIC_NAME = 'find-missing-blobs-num-requested' 

30 

31#: Size of blobs requested in ``FindMissingBlobs()`` calls 

32CAS_FIND_MISSING_BLOBS_SIZE_BYTES_REQUESTED_METRIC_NAME = 'find-missing-blobs-size-bytes-requested' 

33 

34#: Number of blobs reported to be missing in ``FindMissingBlobs()`` calls 

35CAS_FIND_MISSING_BLOBS_NUM_MISSING_METRIC_NAME = 'find-missing-blobs-num-missing' 

36 

37#: Percentage of blobs reported to be missing in ``FindMissingBlobs()`` calls 

38CAS_FIND_MISSING_BLOBS_PERCENT_MISSING_METRIC_NAME = 'find-missing-blobs-percent-missing' 

39 

40#: Size of blobs reported to be missing in ``FindMissingBlobs()`` calls 

41CAS_FIND_MISSING_BLOBS_SIZE_BYTES_MISSING_METRIC_NAME = 'find-missing-blobs-size-bytes-missing' 

42 

43#: Time that ``FindMissingBlobs()`` operations took to complete 

44CAS_FIND_MISSING_BLOBS_TIME_METRIC_NAME = 'find-missing-blobs' 

45 

46#: Time that ``BatchUpdateBlobs()`` operations took to complete 

47CAS_BATCH_UPDATE_BLOBS_TIME_METRIC_NAME = 'batch-update-blobs' 

48 

49#: Size of blobs written with ``BatchUpdateBlobs()`` calls 

50CAS_BATCH_UPDATE_BLOBS_SIZE_BYTES = 'batch-update-blobs-size-bytes' 

51 

52#: Time that ``BatchReadBlobs()`` operations took to complete 

53CAS_BATCH_READ_BLOBS_TIME_METRIC_NAME = 'batch-read-blobs' 

54 

55#: Size of blobs read with ``BatchReadBlobs()`` calls 

56CAS_BATCH_READ_BLOBS_SIZE_BYTES = 'batch-read-blobs-size-bytes' 

57 

58#: Time that ``GetTree()`` operations took to complete 

59CAS_GET_TREE_TIME_METRIC_NAME = 'get-tree' 

60 

61#: Time that ``ByteStream.Read()`` operations took to complete 

62CAS_BYTESTREAM_READ_TIME_METRIC_NAME = 'bytestream-read' 

63 

64#: Size of blobs read with ``ByteStream.Read()`` 

65CAS_BYTESTREAM_READ_SIZE_BYTES = 'bytestream-read-size-bytes' 

66 

67#: Time that ``ByteStream.Write()`` operations took to complete 

68CAS_BYTESTREAM_WRITE_TIME_METRIC_NAME = 'bytestream-write' 

69 

70#: Size of blobs written with ``ByteStream.Write()`` 

71CAS_BYTESTREAM_WRITE_SIZE_BYTES = 'bytestream-write-size-bytes' 

72 

73# CAS cache wrapper metrics 

74 

75#: Count of cache misses in BatchReadBlobs requests to the 

76# !with-cache-storage. This only counts the blobs which were 

77# in the fallback storage; blobs that were entirely missing 

78# don't count as cache misses, since this metric is intended 

79# to measure how many things that *could* have been cached 

80# were actually not. 

81CAS_CACHE_BULK_READ_MISS_COUNT_NAME = 'cas-withcache-bulk-read-misses' 

82 

83#: Count of cache hits in BatchReadBlobs requests to the !with-cache-storage 

84CAS_CACHE_BULK_READ_HIT_COUNT_NAME = 'cas-withcache-bulk-read-hits' 

85 

86#: Percentage of cache hits in a given BatchReadBlobs request in the 

87# !with-cache-storage. This is as a percentage of total blobs requested, 

88# including blobs which were missing entirely. 

89CAS_CACHE_BULK_READ_HIT_PERCENTAGE_NAME = 'cas-withcache-bulk-read-hit-percent' 

90 

91#: Count of cache misses in ByteStream Read requests to the 

92# !with-cache-storage. This only counts the blobs which were 

93# in the fallback storage; blobs that were entirely missing 

94# don't count as cache misses, since this metric is intended 

95# to measure how many things that *could* have been cached 

96# were actually not. 

97CAS_CACHE_GET_BLOB_MISS_COUNT_NAME = 'cas-withcache-get-blob-misses' 

98 

99#: Count of cache hits in ByteStream Read requests to the !with-cache-storage 

100CAS_CACHE_GET_BLOB_HIT_COUNT_NAME = 'cas-withcache-get-blob-hits' 

101 

102# Indexed CAS metrics 

103 

104#: Time taken to bulk select a number of digests from the index 

105CAS_INDEX_BULK_SELECT_DIGEST_TIME_METRIC_NAME = 'cas.index.bulk-select-digest-time' 

106 

107#: Time taken to update a blob timestamp in the index 

108CAS_INDEX_BLOB_TIMESTAMP_UPDATE_TIME_METRIC_NAME = 'cas.index.blob-timestamp-update-time' 

109 

110#: Time taken to run a bulk timestamp update in the index 

111CAS_INDEX_BULK_TIMESTAMP_UPDATE_TIME_METRIC_NAME = 'cas.index.bulk-timestamp-update-time' 

112 

113#: Time taken to return from `get_blob()`. This includes the time taken to 

114# check and update the index, along with to time to fetch the blob from the 

115# underlying storage, and update the index if `fallback_on_get` is enabled. 

116CAS_INDEX_GET_BLOB_TIME_METRIC_NAME = 'cas.index.get-blob-time' 

117 

118#: Time taken to store a list of digests in the index 

119CAS_INDEX_SAVE_DIGESTS_TIME_METRIC_NAME = 'cas.index.save-digests-time' 

120 

121#: Time taken to get the total size of the CAS the index is for 

122CAS_INDEX_SIZE_CALCULATION_TIME_METRIC_NAME = 'cas.index.total-size-calculation-time' 

123 

124# 

125# ActionCache metrics 

126# 

127 

128#: Time that ``GetActionResult()`` operations took to complete 

129AC_GET_ACTION_RESULT_TIME_METRIC_NAME = 'get-action-result' 

130 

131#: Time that ``UpdateActionResult()`` operations took to complete 

132AC_UPDATE_ACTION_RESULT_TIME_METRIC_NAME = 'update-action-result' 

133 

134#: Number of cache hits from the ActionCache 

135AC_CACHE_HITS_METRIC_NAME = 'action-cache-hits' 

136 

137#: Number of cache misses from the ActionCache 

138AC_CACHE_MISSES_METRIC_NAME = 'action-cache-misses' 

139 

140 

141# 

142# S3 metrics 

143# 

144 

145#: Time taken to check errors from a bulk_delete 

146S3_DELETE_ERROR_CHECK_METRIC_NAME = "s3-deletion-error-check-timer" 

147 

148 

149# 

150# Cleanup metrics 

151# 

152 

153#: Number of blobs deleted per second in a cleanup batch 

154CLEANUP_BLOBS_DELETION_RATE_METRIC_NAME = "cleanup.blobs-deleted-per-second" 

155 

156#: Number of bytes deleted per second in a cleanup batch 

157CLEANUP_BYTES_DELETION_RATE_METRIC_NAME = "cleanup.bytes-deleted-per-second" 

158 

159#: Total time taken to clean enough blobs to get the CAS size down to the low watermark 

160CLEANUP_RUNTIME_METRIC_NAME = "cleanup.runtime-timer" 

161 

162#: Time taken to bulk delete a set of blobs from the index 

163CLEANUP_INDEX_BULK_DELETE_METRIC_NAME = "cleanup.index.bulk-delete-timer" 

164 

165#: Time taken to mark a set of blobs as deleted in the index 

166CLEANUP_INDEX_MARK_DELETED_METRIC_NAME = "cleanup.index.mark-as-deleted-timer" 

167 

168#: Number of blobs that were already marked for deletion in the index when marking as deleted 

169CLEANUP_INDEX_PREMARKED_BLOBS_METRIC_NAME = "cleanup.index.premarked-blobs-count" 

170 

171#: Time taken to bulk delete a set of blobs from the storage backend 

172CLEANUP_STORAGE_BULK_DELETE_METRIC_NAME = "cleanup.storage.bulk-delete-timer" 

173 

174#: Number of blobs that failed to be deleted from the storage backend in a given bulk delete request 

175CLEANUP_STORAGE_DELETION_FAILURES_METRIC_NAME = "cleanup.storage.deletion-failures-count" 

176 

177 

178# 

179# ExecutedActionMetadata metrics 

180# 

181 

182#: Time spent queued before being assigned to a worker 

183QUEUED_TIME_METRIC_NAME = 'action-queued-time' 

184 

185#: Time spent in the worker (fetching inputs + executing + uploading outputs) 

186WORKER_HANDLING_TIME_METRIC_NAME = 'worker-handling-time' 

187 

188#: Time spent fetching inputs before execution 

189INPUTS_FETCHING_TIME_METRIC_NAME = 'inputs-fetching-time' 

190 

191#: Time spent waiting for executions to complete 

192EXECUTION_TIME_METRIC_NAME = 'execution-time' 

193 

194#: Time spent uploading inputs after execution 

195OUTPUTS_UPLOADING_TIME_METRIC_NAME = 'outputs-uploading-time' 

196 

197#: Total time spent servicing an execution request (time queued +fetching inputs + 

198# executing + uploading outputs) 

199TOTAL_HANDLING_TIME_METRIC_NAME = 'total-handling-time' 

200 

201 

202# 

203# Execution service metrics 

204# 

205 

206#: Number of bots connected 

207BOT_COUNT_METRIC_NAME = 'bots-count' 

208 

209#: Number of clients connected 

210CLIENT_COUNT_METRIC_NAME = 'clients-count' 

211 

212#: Number of leases present in the scheduler 

213LEASE_COUNT_METRIC_NAME = 'lease-count' 

214 

215#: Counter metric indicating lease stage transitions 

216LEASE_CHANGES_COUNTER_METRIC_NAME = 'lease-state-transitions-counter' 

217 

218#: Number of active jobs in the scheduler 

219JOB_COUNT_METRIC_NAME = 'job-count' 

220 

221#: Counter metric indicating job stage transitions 

222JOB_CHANGES_COUNTER_METRIC_NAME = 'job-stage-transitions-counter' 

223 

224#: Average time that a job spends waiting to be executed 

225AVERAGE_QUEUE_TIME_METRIC_NAME = 'average-queue-time' 

226 

227#: Number of ``Execute()`` requests received: 

228EXECUTE_REQUEST_COUNT_METRIC_NAME = 'execute-call-count' 

229 

230#: Time spent servicing ``Execute()`` requests: 

231EXECUTE_SERVICER_TIME_METRIC_NAME = 'execute-servicing-time' 

232 

233#: Number of ``WaitExecution()`` requests received: 

234WAIT_EXECUTION_REQUEST_COUNT_METRIC_NAME = 'wait-execution-call-count' 

235 

236#: Time spent servicing ``WaitExecution()`` requests: 

237WAIT_EXECUTION_SERVICER_TIME_METRIC_NAME = 'wait-execution-servicing-time' 

238 

239# 

240# LogStream service metrics 

241# 

242 

243#: Time spent creating a LogStream 

244LOGSTREAM_CREATE_LOG_STREAM_TIME_METRIC_NAME = 'logstream.create-logstream-time' 

245 

246#: Number of bytes in a committed logstream 

247LOGSTREAM_WRITE_UPLOADED_BYTES_COUNT = 'logstream.write.uploaded-bytes-count' 

248 

249# 

250# Authentication Metrics 

251# 

252 

253#: Number of invalid JWTs recieved: 

254INVALID_JWT_COUNT_METRIC_NAME = 'authentication.jwt.invalid-jwt-count' 

255 

256#: Duration of JWK fetch request: 

257JWK_FETCH_TIME_METRIC_NAME = 'authentication.jwk.fetch-request-time' 

258 

259#: Duration of JWT decoding: 

260JWT_DECODE_TIME_METRIC_NAME = 'authentication.jwt.decode-jwt-time' 

261 

262#: Duration of JWT validation (can include fetching JWK): 

263JWT_VALIDATION_TIME_METRIC_NAME = 'authentication.jwt.validate-jwt-time' 

264 

265# 

266# Bots service metrics 

267# 

268 

269#: Time spent servicing ``CreateBotSession()`` requests 

270BOTS_CREATE_BOT_SESSION_TIME_METRIC_NAME = 'bots.create-bot-session-time' 

271 

272#: Time spent servicing ``UpdateBotSession()`` requests 

273BOTS_UPDATE_BOT_SESSION_TIME_METRIC_NAME = 'bots.update-bot-session-time' 

274 

275#: Time spent selecting an Action from the data store to create a lease for 

276BOTS_ASSIGN_JOB_LEASES_TIME_METRIC_NAME = 'bots.assign-job-leases-time' 

277 

278 

279# 

280# Scheduler metrics 

281# 

282 

283#: Time taken to queue an Action 

284SCHEDULER_QUEUE_ACTION_TIME_METRIC_NAME = 'scheduler.queue-action-time' 

285 

286#: Time taken to update a job's Lease 

287SCHEDULER_UPDATE_LEASE_TIME_METRIC_NAME = 'scheduler.update-lease-time' 

288 

289#: Time taken to cancel an Operation 

290SCHEDULER_CANCEL_OPERATION_TIME_METRIC_NAME = 'scheduler.cancel-operation-time' 

291 

292 

293# 

294# Data Store (scheduler's backend) metrics 

295# 

296# Some of these seem like duplicates of the request-level timers 

297# at a glance, but measuring at the data store level allows us to 

298# see how much overhead our own code is adding to the calls. 

299# 

300 

301#: Time taken to create a Job 

302DATA_STORE_CREATE_JOB_TIME_METRIC_NAME = 'datastore.all.create-job-time' 

303 

304#: Time taken to enqueue a Job 

305DATA_STORE_QUEUE_JOB_TIME_METRIC_NAME = 'datastore.all.queue-job-time' 

306 

307#: Time taken to update a Job 

308DATA_STORE_UPDATE_JOB_TIME_METRIC_NAME = 'datastore.all.update-job-time' 

309 

310#: Time taken to create a Lease 

311DATA_STORE_CREATE_LEASE_TIME_METRIC_NAME = 'datastore.all.create-lease-time' 

312 

313#: Time taken to update a Lease 

314DATA_STORE_UPDATE_LEASE_TIME_METRIC_NAME = 'datastore.all.update-lease-time' 

315 

316#: Time taken to create an Operation 

317DATA_STORE_CREATE_OPERATION_TIME_METRIC_NAME = 'datastore.all.create-operation-time' 

318 

319#: Time taken to update an Operation 

320DATA_STORE_UPDATE_OPERATION_TIME_METRIC_NAME = 'datastore.all.update-operation-time' 

321 

322#: Time taken to get a list of Operations 

323DATA_STORE_LIST_OPERATIONS_TIME_METRIC_NAME = 'datastore.all.list-operations-time' 

324 

325#: Time taken to get a Job by Action Digest 

326DATA_STORE_GET_JOB_BY_DIGEST_TIME_METRIC_NAME = 'datastore.all.get-job-by-digest-time' 

327 

328#: Time taken to get a Job by name 

329DATA_STORE_GET_JOB_BY_NAME_TIME_METRIC_NAME = 'datastore.all.get-job-by-name-time' 

330 

331#: Time taken to get a Job by Operation name 

332DATA_STORE_GET_JOB_BY_OPERATION_TIME_METRIC_NAME = 'datastore.all.get-job-by-operation-time' 

333 

334#: Time taken to handle checking for a job update. When using 

335# a database backend other than PostgreSQL, this will measure 

336# how long it takes to check all watched jobs for updates once. 

337# For PostgreSQL and the in-memory scheduler, this measures how 

338# long it takes to handle a job update notification. 

339DATA_STORE_CHECK_FOR_UPDATE_TIME_METRIC_NAME = 'datastore.all.check-for-update-time' 

340 

341# SQL-specific metrics 

342 

343#: Time taken to store the ExecuteResponse 

344DATA_STORE_STORE_RESPONSE_TIME_METRIC_NAME = 'datastore.sql.store-response-time' 

345 

346#: Number of rows deleted from the jobs table during each pruning 

347DATA_STORE_PRUNER_NUM_ROWS_DELETED_METRIC_NAME = "datastore.sql.pruner-num-rows-deleted" 

348 

349#: Time taken per scheduler pruning invocation 

350DATA_STORE_PRUNER_DELETE_TIME_METRIC_NAME = 'datastore.sql.pruner-delete-time' 

351 

352 

353# 

354# Operations service metrics 

355# 

356 

357#: Time taken to completely handle a ListOperations request 

358OPERATIONS_LIST_OPERATIONS_TIME_METRIC_NAME = 'operations.list-operations-time' 

359 

360#: Time taken to completely handle a GetOperation request 

361OPERATIONS_GET_OPERATION_TIME_METRIC_NAME = 'operations.get-operation-time' 

362 

363#: Time taken to completely handle a CancelOperation request 

364OPERATIONS_CANCEL_OPERATION_TIME_METRIC_NAME = 'operations.cancel-operation-time' 

365 

366#: Time taken to completely handle a DeleteOperation request. BuildGrid 

367# doesn't actually support DeleteOperation, but this metric will at 

368# least provide insight into whether people are attempting to call it. 

369OPERATIONS_DELETE_OPERATION_TIME_METRIC_NAME = 'operations.delete-operation-time'