summary.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. // Copyright 2014 The Prometheus Authors
  2. // Licensed under the Apache License, Version 2.0 (the "License");
  3. // you may not use this file except in compliance with the License.
  4. // You may obtain a copy of the License at
  5. //
  6. // http://www.apache.org/licenses/LICENSE-2.0
  7. //
  8. // Unless required by applicable law or agreed to in writing, software
  9. // distributed under the License is distributed on an "AS IS" BASIS,
  10. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package prometheus
  14. import (
  15. "fmt"
  16. "math"
  17. "sort"
  18. "sync"
  19. "time"
  20. "github.com/beorn7/perks/quantile"
  21. "github.com/golang/protobuf/proto"
  22. dto "github.com/prometheus/client_model/go"
  23. )
  24. // quantileLabel is used for the label that defines the quantile in a
  25. // summary.
  26. const quantileLabel = "quantile"
  27. // A Summary captures individual observations from an event or sample stream and
  28. // summarizes them in a manner similar to traditional summary statistics: 1. sum
  29. // of observations, 2. observation count, 3. rank estimations.
  30. //
  31. // A typical use-case is the observation of request latencies. By default, a
  32. // Summary provides the median, the 90th and the 99th percentile of the latency
  33. // as rank estimations.
  34. //
  35. // Note that the rank estimations cannot be aggregated in a meaningful way with
  36. // the Prometheus query language (i.e. you cannot average or add them). If you
  37. // need aggregatable quantiles (e.g. you want the 99th percentile latency of all
  38. // queries served across all instances of a service), consider the Histogram
  39. // metric type. See the Prometheus documentation for more details.
  40. //
  41. // To create Summary instances, use NewSummary.
  42. type Summary interface {
  43. Metric
  44. Collector
  45. // Observe adds a single observation to the summary.
  46. Observe(float64)
  47. }
  48. // DefObjectives are the default Summary quantile values.
  49. //
  50. // Deprecated: DefObjectives will not be used as the default objectives in
  51. // v0.10 of the library. The default Summary will have no quantiles then.
  52. var (
  53. DefObjectives = map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}
  54. errQuantileLabelNotAllowed = fmt.Errorf(
  55. "%q is not allowed as label name in summaries", quantileLabel,
  56. )
  57. )
  58. // Default values for SummaryOpts.
  59. const (
  60. // DefMaxAge is the default duration for which observations stay
  61. // relevant.
  62. DefMaxAge time.Duration = 10 * time.Minute
  63. // DefAgeBuckets is the default number of buckets used to calculate the
  64. // age of observations.
  65. DefAgeBuckets = 5
  66. // DefBufCap is the standard buffer size for collecting Summary observations.
  67. DefBufCap = 500
  68. )
  69. // SummaryOpts bundles the options for creating a Summary metric. It is
  70. // mandatory to set Name and Help to a non-empty string. All other fields are
  71. // optional and can safely be left at their zero value.
  72. type SummaryOpts struct {
  73. // Namespace, Subsystem, and Name are components of the fully-qualified
  74. // name of the Summary (created by joining these components with
  75. // "_"). Only Name is mandatory, the others merely help structuring the
  76. // name. Note that the fully-qualified name of the Summary must be a
  77. // valid Prometheus metric name.
  78. Namespace string
  79. Subsystem string
  80. Name string
  81. // Help provides information about this Summary. Mandatory!
  82. //
  83. // Metrics with the same fully-qualified name must have the same Help
  84. // string.
  85. Help string
  86. // ConstLabels are used to attach fixed labels to this
  87. // Summary. Summaries with the same fully-qualified name must have the
  88. // same label names in their ConstLabels.
  89. //
  90. // Note that in most cases, labels have a value that varies during the
  91. // lifetime of a process. Those labels are usually managed with a
  92. // SummaryVec. ConstLabels serve only special purposes. One is for the
  93. // special case where the value of a label does not change during the
  94. // lifetime of a process, e.g. if the revision of the running binary is
  95. // put into a label. Another, more advanced purpose is if more than one
  96. // Collector needs to collect Summaries with the same fully-qualified
  97. // name. In that case, those Summaries must differ in the values of
  98. // their ConstLabels. See the Collector examples.
  99. //
  100. // If the value of a label never changes (not even between binaries),
  101. // that label most likely should not be a label at all (but part of the
  102. // metric name).
  103. ConstLabels Labels
  104. // Objectives defines the quantile rank estimates with their respective
  105. // absolute error. If Objectives[q] = e, then the value reported for q
  106. // will be the φ-quantile value for some φ between q-e and q+e. The
  107. // default value is DefObjectives. It is used if Objectives is left at
  108. // its zero value (i.e. nil). To create a Summary without Objectives,
  109. // set it to an empty map (i.e. map[float64]float64{}).
  110. //
  111. // Deprecated: Note that the current value of DefObjectives is
  112. // deprecated. It will be replaced by an empty map in v0.10 of the
  113. // library. Please explicitly set Objectives to the desired value.
  114. Objectives map[float64]float64
  115. // MaxAge defines the duration for which an observation stays relevant
  116. // for the summary. Must be positive. The default value is DefMaxAge.
  117. MaxAge time.Duration
  118. // AgeBuckets is the number of buckets used to exclude observations that
  119. // are older than MaxAge from the summary. A higher number has a
  120. // resource penalty, so only increase it if the higher resolution is
  121. // really required. For very high observation rates, you might want to
  122. // reduce the number of age buckets. With only one age bucket, you will
  123. // effectively see a complete reset of the summary each time MaxAge has
  124. // passed. The default value is DefAgeBuckets.
  125. AgeBuckets uint32
  126. // BufCap defines the default sample stream buffer size. The default
  127. // value of DefBufCap should suffice for most uses. If there is a need
  128. // to increase the value, a multiple of 500 is recommended (because that
  129. // is the internal buffer size of the underlying package
  130. // "github.com/bmizerany/perks/quantile").
  131. BufCap uint32
  132. }
  133. // Great fuck-up with the sliding-window decay algorithm... The Merge method of
  134. // perk/quantile is actually not working as advertised - and it might be
  135. // unfixable, as the underlying algorithm is apparently not capable of merging
  136. // summaries in the first place. To avoid using Merge, we are currently adding
  137. // observations to _each_ age bucket, i.e. the effort to add a sample is
  138. // essentially multiplied by the number of age buckets. When rotating age
  139. // buckets, we empty the previous head stream. On scrape time, we simply take
  140. // the quantiles from the head stream (no merging required). Result: More effort
  141. // on observation time, less effort on scrape time, which is exactly the
  142. // opposite of what we try to accomplish, but at least the results are correct.
  143. //
  144. // The quite elegant previous contraption to merge the age buckets efficiently
  145. // on scrape time (see code up commit 6b9530d72ea715f0ba612c0120e6e09fbf1d49d0)
  146. // can't be used anymore.
  147. // NewSummary creates a new Summary based on the provided SummaryOpts.
  148. func NewSummary(opts SummaryOpts) Summary {
  149. return newSummary(
  150. NewDesc(
  151. BuildFQName(opts.Namespace, opts.Subsystem, opts.Name),
  152. opts.Help,
  153. nil,
  154. opts.ConstLabels,
  155. ),
  156. opts,
  157. )
  158. }
  159. func newSummary(desc *Desc, opts SummaryOpts, labelValues ...string) Summary {
  160. if len(desc.variableLabels) != len(labelValues) {
  161. panic(errInconsistentCardinality)
  162. }
  163. for _, n := range desc.variableLabels {
  164. if n == quantileLabel {
  165. panic(errQuantileLabelNotAllowed)
  166. }
  167. }
  168. for _, lp := range desc.constLabelPairs {
  169. if lp.GetName() == quantileLabel {
  170. panic(errQuantileLabelNotAllowed)
  171. }
  172. }
  173. if opts.Objectives == nil {
  174. opts.Objectives = DefObjectives
  175. }
  176. if opts.MaxAge < 0 {
  177. panic(fmt.Errorf("illegal max age MaxAge=%v", opts.MaxAge))
  178. }
  179. if opts.MaxAge == 0 {
  180. opts.MaxAge = DefMaxAge
  181. }
  182. if opts.AgeBuckets == 0 {
  183. opts.AgeBuckets = DefAgeBuckets
  184. }
  185. if opts.BufCap == 0 {
  186. opts.BufCap = DefBufCap
  187. }
  188. s := &summary{
  189. desc: desc,
  190. objectives: opts.Objectives,
  191. sortedObjectives: make([]float64, 0, len(opts.Objectives)),
  192. labelPairs: makeLabelPairs(desc, labelValues),
  193. hotBuf: make([]float64, 0, opts.BufCap),
  194. coldBuf: make([]float64, 0, opts.BufCap),
  195. streamDuration: opts.MaxAge / time.Duration(opts.AgeBuckets),
  196. }
  197. s.headStreamExpTime = time.Now().Add(s.streamDuration)
  198. s.hotBufExpTime = s.headStreamExpTime
  199. for i := uint32(0); i < opts.AgeBuckets; i++ {
  200. s.streams = append(s.streams, s.newStream())
  201. }
  202. s.headStream = s.streams[0]
  203. for qu := range s.objectives {
  204. s.sortedObjectives = append(s.sortedObjectives, qu)
  205. }
  206. sort.Float64s(s.sortedObjectives)
  207. s.init(s) // Init self-collection.
  208. return s
  209. }
  210. type summary struct {
  211. selfCollector
  212. bufMtx sync.Mutex // Protects hotBuf and hotBufExpTime.
  213. mtx sync.Mutex // Protects every other moving part.
  214. // Lock bufMtx before mtx if both are needed.
  215. desc *Desc
  216. objectives map[float64]float64
  217. sortedObjectives []float64
  218. labelPairs []*dto.LabelPair
  219. sum float64
  220. cnt uint64
  221. hotBuf, coldBuf []float64
  222. streams []*quantile.Stream
  223. streamDuration time.Duration
  224. headStream *quantile.Stream
  225. headStreamIdx int
  226. headStreamExpTime, hotBufExpTime time.Time
  227. }
  228. func (s *summary) Desc() *Desc {
  229. return s.desc
  230. }
  231. func (s *summary) Observe(v float64) {
  232. s.bufMtx.Lock()
  233. defer s.bufMtx.Unlock()
  234. now := time.Now()
  235. if now.After(s.hotBufExpTime) {
  236. s.asyncFlush(now)
  237. }
  238. s.hotBuf = append(s.hotBuf, v)
  239. if len(s.hotBuf) == cap(s.hotBuf) {
  240. s.asyncFlush(now)
  241. }
  242. }
  243. func (s *summary) Write(out *dto.Metric) error {
  244. sum := &dto.Summary{}
  245. qs := make([]*dto.Quantile, 0, len(s.objectives))
  246. s.bufMtx.Lock()
  247. s.mtx.Lock()
  248. // Swap bufs even if hotBuf is empty to set new hotBufExpTime.
  249. s.swapBufs(time.Now())
  250. s.bufMtx.Unlock()
  251. s.flushColdBuf()
  252. sum.SampleCount = proto.Uint64(s.cnt)
  253. sum.SampleSum = proto.Float64(s.sum)
  254. for _, rank := range s.sortedObjectives {
  255. var q float64
  256. if s.headStream.Count() == 0 {
  257. q = math.NaN()
  258. } else {
  259. q = s.headStream.Query(rank)
  260. }
  261. qs = append(qs, &dto.Quantile{
  262. Quantile: proto.Float64(rank),
  263. Value: proto.Float64(q),
  264. })
  265. }
  266. s.mtx.Unlock()
  267. if len(qs) > 0 {
  268. sort.Sort(quantSort(qs))
  269. }
  270. sum.Quantile = qs
  271. out.Summary = sum
  272. out.Label = s.labelPairs
  273. return nil
  274. }
  275. func (s *summary) newStream() *quantile.Stream {
  276. return quantile.NewTargeted(s.objectives)
  277. }
  278. // asyncFlush needs bufMtx locked.
  279. func (s *summary) asyncFlush(now time.Time) {
  280. s.mtx.Lock()
  281. s.swapBufs(now)
  282. // Unblock the original goroutine that was responsible for the mutation
  283. // that triggered the compaction. But hold onto the global non-buffer
  284. // state mutex until the operation finishes.
  285. go func() {
  286. s.flushColdBuf()
  287. s.mtx.Unlock()
  288. }()
  289. }
  290. // rotateStreams needs mtx AND bufMtx locked.
  291. func (s *summary) maybeRotateStreams() {
  292. for !s.hotBufExpTime.Equal(s.headStreamExpTime) {
  293. s.headStream.Reset()
  294. s.headStreamIdx++
  295. if s.headStreamIdx >= len(s.streams) {
  296. s.headStreamIdx = 0
  297. }
  298. s.headStream = s.streams[s.headStreamIdx]
  299. s.headStreamExpTime = s.headStreamExpTime.Add(s.streamDuration)
  300. }
  301. }
  302. // flushColdBuf needs mtx locked.
  303. func (s *summary) flushColdBuf() {
  304. for _, v := range s.coldBuf {
  305. for _, stream := range s.streams {
  306. stream.Insert(v)
  307. }
  308. s.cnt++
  309. s.sum += v
  310. }
  311. s.coldBuf = s.coldBuf[0:0]
  312. s.maybeRotateStreams()
  313. }
  314. // swapBufs needs mtx AND bufMtx locked, coldBuf must be empty.
  315. func (s *summary) swapBufs(now time.Time) {
  316. if len(s.coldBuf) != 0 {
  317. panic("coldBuf is not empty")
  318. }
  319. s.hotBuf, s.coldBuf = s.coldBuf, s.hotBuf
  320. // hotBuf is now empty and gets new expiration set.
  321. for now.After(s.hotBufExpTime) {
  322. s.hotBufExpTime = s.hotBufExpTime.Add(s.streamDuration)
  323. }
  324. }
  325. type quantSort []*dto.Quantile
  326. func (s quantSort) Len() int {
  327. return len(s)
  328. }
  329. func (s quantSort) Swap(i, j int) {
  330. s[i], s[j] = s[j], s[i]
  331. }
  332. func (s quantSort) Less(i, j int) bool {
  333. return s[i].GetQuantile() < s[j].GetQuantile()
  334. }
  335. // SummaryVec is a Collector that bundles a set of Summaries that all share the
  336. // same Desc, but have different values for their variable labels. This is used
  337. // if you want to count the same thing partitioned by various dimensions
  338. // (e.g. HTTP request latencies, partitioned by status code and method). Create
  339. // instances with NewSummaryVec.
  340. type SummaryVec struct {
  341. *MetricVec
  342. }
  343. // NewSummaryVec creates a new SummaryVec based on the provided SummaryOpts and
  344. // partitioned by the given label names. At least one label name must be
  345. // provided.
  346. func NewSummaryVec(opts SummaryOpts, labelNames []string) *SummaryVec {
  347. desc := NewDesc(
  348. BuildFQName(opts.Namespace, opts.Subsystem, opts.Name),
  349. opts.Help,
  350. labelNames,
  351. opts.ConstLabels,
  352. )
  353. return &SummaryVec{
  354. MetricVec: newMetricVec(desc, func(lvs ...string) Metric {
  355. return newSummary(desc, opts, lvs...)
  356. }),
  357. }
  358. }
  359. // GetMetricWithLabelValues replaces the method of the same name in MetricVec.
  360. // The difference is that this method returns an Observer and not a Metric so
  361. // that no type conversion to an Observer is required.
  362. func (m *SummaryVec) GetMetricWithLabelValues(lvs ...string) (Observer, error) {
  363. metric, err := m.MetricVec.GetMetricWithLabelValues(lvs...)
  364. if metric != nil {
  365. return metric.(Observer), err
  366. }
  367. return nil, err
  368. }
  369. // GetMetricWith replaces the method of the same name in MetricVec. The
  370. // difference is that this method returns an Observer and not a Metric so that
  371. // no type conversion to an Observer is required.
  372. func (m *SummaryVec) GetMetricWith(labels Labels) (Observer, error) {
  373. metric, err := m.MetricVec.GetMetricWith(labels)
  374. if metric != nil {
  375. return metric.(Observer), err
  376. }
  377. return nil, err
  378. }
  379. // WithLabelValues works as GetMetricWithLabelValues, but panics where
  380. // GetMetricWithLabelValues would have returned an error. By not returning an
  381. // error, WithLabelValues allows shortcuts like
  382. // myVec.WithLabelValues("404", "GET").Observe(42.21)
  383. func (m *SummaryVec) WithLabelValues(lvs ...string) Observer {
  384. return m.MetricVec.WithLabelValues(lvs...).(Observer)
  385. }
  386. // With works as GetMetricWith, but panics where GetMetricWithLabels would have
  387. // returned an error. By not returning an error, With allows shortcuts like
  388. // myVec.With(Labels{"code": "404", "method": "GET"}).Observe(42.21)
  389. func (m *SummaryVec) With(labels Labels) Observer {
  390. return m.MetricVec.With(labels).(Observer)
  391. }
  392. type constSummary struct {
  393. desc *Desc
  394. count uint64
  395. sum float64
  396. quantiles map[float64]float64
  397. labelPairs []*dto.LabelPair
  398. }
  399. func (s *constSummary) Desc() *Desc {
  400. return s.desc
  401. }
  402. func (s *constSummary) Write(out *dto.Metric) error {
  403. sum := &dto.Summary{}
  404. qs := make([]*dto.Quantile, 0, len(s.quantiles))
  405. sum.SampleCount = proto.Uint64(s.count)
  406. sum.SampleSum = proto.Float64(s.sum)
  407. for rank, q := range s.quantiles {
  408. qs = append(qs, &dto.Quantile{
  409. Quantile: proto.Float64(rank),
  410. Value: proto.Float64(q),
  411. })
  412. }
  413. if len(qs) > 0 {
  414. sort.Sort(quantSort(qs))
  415. }
  416. sum.Quantile = qs
  417. out.Summary = sum
  418. out.Label = s.labelPairs
  419. return nil
  420. }
  421. // NewConstSummary returns a metric representing a Prometheus summary with fixed
  422. // values for the count, sum, and quantiles. As those parameters cannot be
  423. // changed, the returned value does not implement the Summary interface (but
  424. // only the Metric interface). Users of this package will not have much use for
  425. // it in regular operations. However, when implementing custom Collectors, it is
  426. // useful as a throw-away metric that is generated on the fly to send it to
  427. // Prometheus in the Collect method.
  428. //
  429. // quantiles maps ranks to quantile values. For example, a median latency of
  430. // 0.23s and a 99th percentile latency of 0.56s would be expressed as:
  431. // map[float64]float64{0.5: 0.23, 0.99: 0.56}
  432. //
  433. // NewConstSummary returns an error if the length of labelValues is not
  434. // consistent with the variable labels in Desc.
  435. func NewConstSummary(
  436. desc *Desc,
  437. count uint64,
  438. sum float64,
  439. quantiles map[float64]float64,
  440. labelValues ...string,
  441. ) (Metric, error) {
  442. if len(desc.variableLabels) != len(labelValues) {
  443. return nil, errInconsistentCardinality
  444. }
  445. return &constSummary{
  446. desc: desc,
  447. count: count,
  448. sum: sum,
  449. quantiles: quantiles,
  450. labelPairs: makeLabelPairs(desc, labelValues),
  451. }, nil
  452. }
  453. // MustNewConstSummary is a version of NewConstSummary that panics where
  454. // NewConstMetric would have returned an error.
  455. func MustNewConstSummary(
  456. desc *Desc,
  457. count uint64,
  458. sum float64,
  459. quantiles map[float64]float64,
  460. labelValues ...string,
  461. ) Metric {
  462. m, err := NewConstSummary(desc, count, sum, quantiles, labelValues...)
  463. if err != nil {
  464. panic(err)
  465. }
  466. return m
  467. }