GRPC Core  9.0.0
xds_client_stats.h
Go to the documentation of this file.
1 /*
2  *
3  * Copyright 2018 gRPC authors.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18 
19 #ifndef GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_XDS_XDS_CLIENT_STATS_H
20 #define GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_XDS_XDS_CLIENT_STATS_H
21 
23 
25 
28 #include "src/core/lib/gprpp/map.h"
33 
34 namespace grpc_core {
35 
36 class XdsLocalityName : public RefCounted<XdsLocalityName> {
37  public:
38  struct Less {
40  const RefCountedPtr<XdsLocalityName>& rhs) const {
41  int cmp_result = strcmp(lhs->region_.get(), rhs->region_.get());
42  if (cmp_result != 0) return cmp_result < 0;
43  cmp_result = strcmp(lhs->zone_.get(), rhs->zone_.get());
44  if (cmp_result != 0) return cmp_result < 0;
45  return strcmp(lhs->sub_zone_.get(), rhs->sub_zone_.get()) < 0;
46  }
47  };
48 
52  : region_(std::move(region)),
53  zone_(std::move(zone)),
54  sub_zone_(std::move(subzone)) {}
55 
56  bool operator==(const XdsLocalityName& other) const {
57  return strcmp(region_.get(), other.region_.get()) == 0 &&
58  strcmp(zone_.get(), other.zone_.get()) == 0 &&
59  strcmp(sub_zone_.get(), other.sub_zone_.get()) == 0;
60  }
61 
62  const char* region() const { return region_.get(); }
63  const char* zone() const { return zone_.get(); }
64  const char* sub_zone() const { return sub_zone_.get(); }
65 
66  const char* AsHumanReadableString() {
67  if (human_readable_string_ == nullptr) {
68  char* tmp;
69  gpr_asprintf(&tmp, "{region=\"%s\", zone=\"%s\", sub_zone=\"%s\"}",
70  region_.get(), zone_.get(), sub_zone_.get());
71  human_readable_string_.reset(tmp);
72  }
73  return human_readable_string_.get();
74  }
75 
76  private:
80  grpc_core::UniquePtr<char> human_readable_string_;
81 };
82 
83 // The stats classes (i.e., XdsClientStats, LocalityStats, and LoadMetric) can
84 // be taken a snapshot (and reset) to populate the load report. The snapshots
85 // are contained in the respective Snapshot structs. The Snapshot structs have
86 // no synchronization. The stats classes use several different synchronization
87 // methods. 1. Most of the counters are Atomic<>s for performance. 2. Some of
88 // the Map<>s are protected by Mutex if we are not guaranteed that the accesses
89 // to them are synchronized by the callers. 3. The Map<>s to which the accesses
90 // are already synchronized by the callers do not have additional
91 // synchronization here. Note that the Map<>s we mentioned in 2 and 3 refer to
92 // the map's tree structure rather than the content in each tree node.
94  public:
95  class LocalityStats : public RefCounted<LocalityStats> {
96  public:
97  class LoadMetric {
98  public:
99  struct Snapshot {
100  bool IsAllZero() const;
101 
104  };
105 
106  // Returns a snapshot of this instance and reset all the accumulative
107  // counters.
109 
110  private:
111  uint64_t num_requests_finished_with_metric_{0};
112  double total_metric_value_{0};
113  };
114 
116  std::map<grpc_core::UniquePtr<char>, LoadMetric, StringLess>;
118  std::map<grpc_core::UniquePtr<char>, LoadMetric::Snapshot, StringLess>;
119 
120  struct Snapshot {
121  // TODO(juanlishen): Change this to const method when const_iterator is
122  // added to Map<>.
123  bool IsAllZero();
124 
130  };
131 
132  // Returns a snapshot of this instance and reset all the accumulative
133  // counters.
135 
136  // Each XdsLb::PickerWrapper holds a ref to the perspective LocalityStats.
137  // If the refcount is 0, there won't be new calls recorded to the
138  // LocalityStats, so the LocalityStats can be safely deleted when all the
139  // in-progress calls have finished.
140  // Only be called from the control plane combiner.
141  void RefByPicker() { picker_refcount_.FetchAdd(1, MemoryOrder::ACQ_REL); }
142  // Might be called from the control plane combiner or the data plane
143  // combiner.
144  // TODO(juanlishen): Once https://github.com/grpc/grpc/pull/19390 is merged,
145  // this method will also only be invoked in the control plane combiner.
146  // We may then be able to simplify the LocalityStats' lifetime by making it
147  // RefCounted<> and populating the protobuf in its dtor.
148  void UnrefByPicker() { picker_refcount_.FetchSub(1, MemoryOrder::ACQ_REL); }
149  // Only be called from the control plane combiner.
150  // The only place where the picker_refcount_ can be increased is
151  // RefByPicker(), which also can only be called from the control plane
152  // combiner. Also, if the picker_refcount_ is 0, total_requests_in_progress_
153  // can't be increased from 0. So it's safe to delete the LocalityStats right
154  // after this method returns true.
155  bool IsSafeToDelete() {
156  return picker_refcount_.FetchAdd(0, MemoryOrder::ACQ_REL) == 0 &&
157  total_requests_in_progress_.FetchAdd(0, MemoryOrder::ACQ_REL) == 0;
158  }
159 
160  void AddCallStarted();
161  void AddCallFinished(bool fail = false);
162 
163  private:
164  Atomic<uint64_t> total_successful_requests_{0};
165  Atomic<uint64_t> total_requests_in_progress_{0};
166  // Requests that were issued (not dropped) but failed.
167  Atomic<uint64_t> total_error_requests_{0};
168  Atomic<uint64_t> total_issued_requests_{0};
169  // Protects load_metric_stats_. A mutex is necessary because the length of
170  // load_metric_stats_ can be accessed by both the callback intercepting the
171  // call's recv_trailing_metadata (not from any combiner) and the load
172  // reporting thread (from the control plane combiner).
173  Mutex load_metric_stats_mu_;
174  LoadMetricMap load_metric_stats_;
175  // Can be accessed from either the control plane combiner or the data plane
176  // combiner.
177  Atomic<uint8_t> picker_refcount_{0};
178  };
179 
180  // TODO(juanlishen): The value type of Map<> must be movable in current
181  // implementation. To avoid making LocalityStats movable, we wrap it by
182  // std::unique_ptr<>. We should remove this wrapper if the value type of Map<>
183  // doesn't have to be movable.
185  std::map<RefCountedPtr<XdsLocalityName>, RefCountedPtr<LocalityStats>,
188  std::map<RefCountedPtr<XdsLocalityName>, LocalityStats::Snapshot,
191  std::map<grpc_core::UniquePtr<char>, uint64_t, StringLess>;
193 
194  struct Snapshot {
195  // TODO(juanlishen): Change this to const method when const_iterator is
196  // added to Map<>.
197  bool IsAllZero();
198 
202  // The actual load report interval.
204  };
205 
206  // Returns a snapshot of this instance and reset all the accumulative
207  // counters.
209 
212  const RefCountedPtr<XdsLocalityName>& locality_name);
213  void PruneLocalityStats();
214  void AddCallDropped(const grpc_core::UniquePtr<char>& category);
215 
216  private:
217  // The stats for each locality.
218  LocalityStatsMap upstream_locality_stats_;
219  Atomic<uint64_t> total_dropped_requests_{0};
220  // Protects dropped_requests_. A mutex is necessary because the length of
221  // dropped_requests_ can be accessed by both the picker (from data plane
222  // combiner) and the load reporting thread (from the control plane combiner).
223  Mutex dropped_requests_mu_;
224  DroppedRequestsMap dropped_requests_;
225  // The timestamp of last reporting. For the LB-policy-wide first report, the
226  // last_report_time is the time we scheduled the first reporting timer.
227  grpc_millis last_report_time_ = -1;
228 };
229 
230 } // namespace grpc_core
231 
232 #endif /* GRPC_CORE_EXT_FILTERS_CLIENT_CHANNEL_XDS_XDS_CLIENT_STATS_H */
T FetchSub(Arg arg, MemoryOrder order=MemoryOrder::SEQ_CST)
Definition: atomic.h:77
T FetchAdd(Arg arg, MemoryOrder order=MemoryOrder::SEQ_CST)
Definition: atomic.h:71
Definition: sync.h:40
Definition: ref_counted.h:248
Definition: ref_counted_ptr.h:35
T * get() const
Definition: ref_counted_ptr.h:144
Snapshot GetSnapshotAndReset()
Definition: xds_client_stats.cc:51
Definition: xds_client_stats.h:95
std::map< grpc_core::UniquePtr< char >, LoadMetric, StringLess > LoadMetricMap
Definition: xds_client_stats.h:116
void AddCallFinished(bool fail=false)
Definition: xds_client_stats.cc:105
Snapshot GetSnapshotAndReset()
Definition: xds_client_stats.cc:79
void UnrefByPicker()
Definition: xds_client_stats.h:148
void RefByPicker()
Definition: xds_client_stats.h:141
void AddCallStarted()
Definition: xds_client_stats.cc:100
bool IsSafeToDelete()
Definition: xds_client_stats.h:155
std::map< grpc_core::UniquePtr< char >, LoadMetric::Snapshot, StringLess > LoadMetricSnapshotMap
Definition: xds_client_stats.h:118
Definition: xds_client_stats.h:93
DroppedRequestsMap DroppedRequestsSnapshotMap
Definition: xds_client_stats.h:192
RefCountedPtr< LocalityStats > FindLocalityStats(const RefCountedPtr< XdsLocalityName > &locality_name)
Definition: xds_client_stats.cc:159
void MaybeInitLastReportTime()
Definition: xds_client_stats.cc:155
std::map< RefCountedPtr< XdsLocalityName >, RefCountedPtr< LocalityStats >, XdsLocalityName::Less > LocalityStatsMap
Definition: xds_client_stats.h:186
std::map< grpc_core::UniquePtr< char >, uint64_t, StringLess > DroppedRequestsMap
Definition: xds_client_stats.h:191
void PruneLocalityStats()
Definition: xds_client_stats.cc:170
std::map< RefCountedPtr< XdsLocalityName >, LocalityStats::Snapshot, XdsLocalityName::Less > LocalityStatsSnapshotMap
Definition: xds_client_stats.h:189
Snapshot GetSnapshotAndReset()
Definition: xds_client_stats.cc:130
void AddCallDropped(const grpc_core::UniquePtr< char > &category)
Definition: xds_client_stats.cc:181
Definition: xds_client_stats.h:36
const char * zone() const
Definition: xds_client_stats.h:63
const char * AsHumanReadableString()
Definition: xds_client_stats.h:66
XdsLocalityName(grpc_core::UniquePtr< char > region, grpc_core::UniquePtr< char > zone, grpc_core::UniquePtr< char > subzone)
Definition: xds_client_stats.h:49
const char * sub_zone() const
Definition: xds_client_stats.h:64
const char * region() const
Definition: xds_client_stats.h:62
bool operator==(const XdsLocalityName &other) const
Definition: xds_client_stats.h:56
int64_t grpc_millis
Definition: exec_ctx.h:35
Round Robin Policy.
Definition: backend_metric.cc:24
std::unique_ptr< T, DefaultDeleteChar > UniquePtr
Definition: memory.h:45
GPRAPI int gpr_asprintf(char **strp, const char *format,...) GPR_PRINT_FORMAT_CHECK(2
printf to a newly-allocated string.
Definition: map.h:33
double total_metric_value
Definition: xds_client_stats.h:103
bool IsAllZero() const
Definition: xds_client_stats.cc:42
uint64_t num_requests_finished_with_metric
Definition: xds_client_stats.h:102
Definition: xds_client_stats.h:120
bool IsAllZero()
Definition: xds_client_stats.cc:62
uint64_t total_issued_requests
Definition: xds_client_stats.h:128
LoadMetricSnapshotMap load_metric_stats
Definition: xds_client_stats.h:129
uint64_t total_requests_in_progress
Definition: xds_client_stats.h:126
uint64_t total_error_requests
Definition: xds_client_stats.h:127
uint64_t total_successful_requests
Definition: xds_client_stats.h:125
Definition: xds_client_stats.h:194
bool IsAllZero()
Definition: xds_client_stats.cc:116
grpc_millis load_report_interval
Definition: xds_client_stats.h:203
LocalityStatsSnapshotMap upstream_locality_stats
Definition: xds_client_stats.h:199
uint64_t total_dropped_requests
Definition: xds_client_stats.h:200
DroppedRequestsSnapshotMap dropped_requests
Definition: xds_client_stats.h:201
Definition: xds_client_stats.h:38
bool operator()(const RefCountedPtr< XdsLocalityName > &lhs, const RefCountedPtr< XdsLocalityName > &rhs) const
Definition: xds_client_stats.h:39