1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
17 #ifndef CEPH_MESSENGER_H
18 #define CEPH_MESSENGER_H
28 #include "Dispatcher.h"
30 #include "common/Throttle.h"
31 #include "include/Context.h"
32 #include "include/types.h"
33 #include "include/ceph_features.h"
34 #include "auth/Crypto.h"
35 #include "common/item_history.h"
36 #include "auth/AuthRegistry.h"
37 #include "compressor_registry.h"
38 #include "include/ceph_assert.h"
44 #define SOCKET_PRIORITY_MIN_DELAY 6
51 #ifdef UNIT_TESTS_BUILT
55 std::condition_variable cond_var
;
57 enum ACTION
: uint32_t {
64 START_CLIENT_BANNER_EXCHANGE
= 1,
65 START_SERVER_BANNER_EXCHANGE
,
66 BANNER_EXCHANGE_BANNER_CONNECTING
,
68 HANDLE_PEER_BANNER_BANNER_CONNECTING
,
70 HANDLE_PEER_BANNER_PAYLOAD_HELLO_CONNECTING
,
71 HANDLE_PEER_BANNER_PAYLOAD
,
73 HANDLE_AUTH_REQUEST_ACCEPTING_SIGN
,
80 READ_MESSAGE_COMPLETE
,
82 SEND_COMPRESSION_REQUEST
,
83 HANDLE_COMPRESSION_REQUEST
86 virtual ~Interceptor() {}
87 virtual ACTION
intercept(Connection
*conn
, uint32_t step
) = 0;
94 std::deque
<Dispatcher
*> dispatchers
;
95 std::deque
<Dispatcher
*> fast_dispatchers
;
96 ZTracer::Endpoint trace_endpoint
;
99 void set_endpoint_addr(const entity_addr_t
& a
,
100 const entity_name_t
&name
);
103 /// the "name" of the local daemon. eg client.99
104 entity_name_t my_name
;
107 safe_item_history
<entity_addrvec_t
> my_addrs
;
109 int default_send_priority
;
110 /// std::set to true once the Messenger has started, and std::set to false on shutdown
116 AuthClient
*auth_client
= 0;
117 AuthServer
*auth_server
= 0;
119 #ifdef UNIT_TESTS_BUILT
120 Interceptor
*interceptor
= nullptr;
124 * The CephContext this Messenger uses. Many other components initialize themselves
130 using Policy
= ceph::net::Policy
<Throttle
>;
133 // allow unauthenticated connections. This is needed for
134 // compatibility with pre-nautilus OSDs, which do not authenticate
135 // the heartbeat sessions.
136 bool require_authorizer
= true;
139 // for authentication
140 AuthRegistry auth_registry
;
144 * Messenger constructor. Call this from your implementation.
145 * Messenger users should construct full implementations directly,
146 * or use the create() function.
148 Messenger(CephContext
*cct_
, entity_name_t w
);
149 virtual ~Messenger() {}
152 * create a new messenger
154 * Create a new messenger instance, with whatever implementation is
155 * available or specified via the configuration in cct.
158 * @param type name of messenger type
159 * @param name entity name to register
160 * @param lname logical name of the messenger in this process (e.g., "client")
161 * @param nonce nonce value to uniquely identify this instance on the current host
163 static Messenger
*create(CephContext
*cct
,
164 const std::string
&type
,
169 static uint64_t get_random_nonce();
170 static uint64_t get_pid_nonce();
173 * create a new messenger
175 * Create a new messenger instance.
176 * Same as the above, but a slightly simpler interface for clients:
177 * - Generate a random nonce
178 * - get the messenger type from cct
179 * - use the client entity_type
182 * @param lname logical name of the messenger in this process (e.g., "client")
184 static Messenger
*create_client_messenger(CephContext
*cct
, std::string lname
);
187 * @defgroup Accessors
190 int get_mytype() const { return my_name
.type(); }
193 * Retrieve the Messenger's name
195 * @return A const reference to the name this Messenger
196 * currently believes to be its own.
198 const entity_name_t
& get_myname() { return my_name
; }
201 * Retrieve the Messenger's address.
203 * @return A const reference to the address this Messenger
204 * currently believes to be its own.
206 const entity_addrvec_t
& get_myaddrs() {
211 * get legacy addr for myself, suitable for protocol v1
213 * Note that myaddrs might be a proper addrvec with v1 in it, or it might be an
214 * ANY addr (if i am a pure client).
216 entity_addr_t
get_myaddr_legacy() {
217 return my_addrs
->as_legacy_addr();
222 * std::set messenger's instance
224 uint32_t get_magic() { return magic
; }
225 void set_magic(int _magic
) { magic
= _magic
; }
227 void set_auth_client(AuthClient
*ac
) {
230 void set_auth_server(AuthServer
*as
) {
235 CompressorRegistry comp_registry
;
239 * std::set messenger's address
241 virtual void set_myaddrs(const entity_addrvec_t
& a
) {
243 set_endpoint_addr(a
.front(), my_name
);
247 * @return the zipkin trace endpoint
249 const ZTracer::Endpoint
* get_trace_endpoint() const {
250 return &trace_endpoint
;
254 * set the name of the local entity. The name is reported to others and
255 * can be changed while the system is running, but doing so at incorrect
256 * times may have bad results.
258 * @param m The name to std::set.
260 void set_myname(const entity_name_t
& m
) { my_name
= m
; }
263 * set the unknown address components for this Messenger.
264 * This is useful if the Messenger doesn't know its full address just by
265 * binding, but another Messenger on the same interface has already learned
266 * its full address. This function does not fill in known address elements,
267 * cause a rebind, or do anything of that sort.
269 * @param addr The address to use as a template.
271 virtual bool set_addr_unknowns(const entity_addrvec_t
&addrs
) = 0;
273 * set the address for this Messenger. This is useful if the Messenger
274 * binds to a specific address but advertises a different address on the
277 * @param addr The address to use.
279 virtual void set_addrs(const entity_addrvec_t
&addr
) = 0;
280 /// Get the default send priority.
281 int get_default_send_priority() { return default_send_priority
; }
283 * Get the number of Messages which the Messenger has received
284 * but not yet dispatched.
286 virtual int get_dispatch_queue_len() = 0;
289 * Get age of oldest undelivered message
290 * (0 if the queue is empty)
292 virtual double get_dispatch_queue_max_age(utime_t now
) = 0;
299 * @defgroup Configuration
303 * set the cluster protocol in use by this daemon.
304 * This is an init-time function and cannot be called after calling
307 * @param p The cluster protocol to use. Defined externally.
309 virtual void set_cluster_protocol(int p
) = 0;
311 * set a policy which is applied to all peers who do not have a type-specific
313 * This is an init-time function and cannot be called after calling
316 * @param p The Policy to apply.
318 virtual void set_default_policy(Policy p
) = 0;
320 * set a policy which is applied to all peers of the given type.
321 * This is an init-time function and cannot be called after calling
324 * @param type The peer type this policy applies to.
325 * @param p The policy to apply.
327 virtual void set_policy(int type
, Policy p
) = 0;
329 * set the Policy associated with a type of peer.
331 * This can be called either on initial setup, or after connections
332 * are already established. However, the policies for existing
333 * connections will not be affected; the new policy will only apply
334 * to future connections.
336 * @param t The peer type to get the default policy for.
337 * @return A const Policy reference.
339 virtual Policy
get_policy(int t
) = 0;
341 * Get the default Policy
343 * @return A const Policy reference.
345 virtual Policy
get_default_policy() = 0;
347 * set Throttlers applied to all Messages from the given type of peer
349 * This is an init-time function and cannot be called after calling
352 * @param type The peer type the Throttlers will apply to.
353 * @param bytes The Throttle for the number of bytes carried by the message
354 * @param msgs The Throttle for the number of messages for this @p type
355 * @note The Messenger does not take ownership of the Throttle pointers, but
356 * you must not destroy them before you destroy the Messenger.
358 virtual void set_policy_throttlers(int type
, Throttle
*bytes
, Throttle
*msgs
=NULL
) = 0;
360 * set the default send priority
362 * This is an init-time function and must be called *before* calling
365 * @param p The cluster protocol to use. Defined externally.
367 void set_default_send_priority(int p
) {
368 ceph_assert(!started
);
369 default_send_priority
= p
;
372 * set the priority(SO_PRIORITY) for all packets to be sent on this socket.
374 * Linux uses this value to order the networking queues: packets with a higher
375 * priority may be processed first depending on the selected device queueing
378 * @param prio The priority. Setting a priority outside the range 0 to 6
379 * requires the CAP_NET_ADMIN capability.
381 void set_socket_priority(int prio
) {
382 socket_priority
= prio
;
385 * Get the socket priority
387 * @return the socket priority
389 int get_socket_priority() {
390 return socket_priority
;
393 * Add a new Dispatcher to the front of the list. If you add
394 * a Dispatcher which is already included, it will get a duplicate
395 * entry. This will reduce efficiency but not break anything.
397 * @param d The Dispatcher to insert into the list.
399 void add_dispatcher_head(Dispatcher
*d
) {
400 bool first
= dispatchers
.empty();
401 dispatchers
.push_front(d
);
402 if (d
->ms_can_fast_dispatch_any())
403 fast_dispatchers
.push_front(d
);
408 * Add a new Dispatcher to the end of the list. If you add
409 * a Dispatcher which is already included, it will get a duplicate
410 * entry. This will reduce efficiency but not break anything.
412 * @param d The Dispatcher to insert into the list.
414 void add_dispatcher_tail(Dispatcher
*d
) {
415 bool first
= dispatchers
.empty();
416 dispatchers
.push_back(d
);
417 if (d
->ms_can_fast_dispatch_any())
418 fast_dispatchers
.push_back(d
);
423 * Bind the Messenger to a specific address. If bind_addr
424 * is not completely filled in the system will use the
425 * valid portions and cycle through the unset ones (eg, the port)
426 * in an unspecified order.
428 * @param bind_addr The address to bind to.
429 * @return 0 on success, or -1 on error, or -errno if
430 * we can be more specific about the failure.
432 virtual int bind(const entity_addr_t
& bind_addr
) = 0;
434 virtual int bindv(const entity_addrvec_t
& addrs
);
437 * This function performs a full restart of the Messenger component,
438 * whatever that means. Other entities who connect to this
439 * Messenger post-rebind() should perceive it as a new entity which
440 * they have not previously contacted, and it MUST bind to a
441 * different address than it did previously.
443 * @param avoid_ports Additional port to avoid binding to.
445 virtual int rebind(const std::set
<int>& avoid_ports
) { return -EOPNOTSUPP
; }
447 * Bind the 'client' Messenger to a specific address.Messenger will bind
448 * the address before connect to others when option ms_bind_before_connect
450 * @param bind_addr The address to bind to.
451 * @return 0 on success, or -1 on error, or -errno if
452 * we can be more specific about the failure.
454 virtual int client_bind(const entity_addr_t
& bind_addr
) = 0;
457 * reset the 'client' Messenger. Mark all the existing Connections down
458 * and update 'nonce'.
460 virtual int client_reset() = 0;
463 virtual bool should_use_msgr2() {
468 * @} // Configuration
472 * @defgroup Startup/Shutdown
476 * Perform any resource allocation, thread startup, etc
477 * that is required before attempting to connect to other
478 * Messengers or transmit messages.
479 * Once this function completes, started shall be set to true.
481 * @return 0 on success; -errno on failure.
483 virtual int start() { started
= true; return 0; }
487 * Block until the Messenger has finished shutting down (according
488 * to the shutdown() function).
489 * It is valid to call this after calling shutdown(), but it must
490 * be called before deleting the Messenger.
492 virtual void wait() = 0;
494 * Initiate a shutdown of the Messenger.
496 * @return 0 on success, -errno otherwise.
498 virtual int shutdown() { started
= false; return 0; }
500 * @} // Startup/Shutdown
504 * @defgroup Messaging
508 * Queue the given Message for the given entity.
509 * Success in this function does not guarantee Message delivery, only
510 * success in queueing the Message. Other guarantees may be provided based
511 * on the Connection policy associated with the dest.
513 * @param m The Message to send. The Messenger consumes a single reference
514 * when you pass it in.
515 * @param dest The entity to send the Message to.
517 * DEPRECATED: please do not use this interface for any new code;
518 * use the Connection* variant.
520 * @return 0 on success, or -errno on failure.
525 const entity_addrvec_t
& addr
) = 0;
527 Message
*m
, const entity_addrvec_t
& addrs
) {
528 return send_to(m
, CEPH_ENTITY_TYPE_MON
, addrs
);
531 Message
*m
, const entity_addrvec_t
& addrs
) {
532 return send_to(m
, CEPH_ENTITY_TYPE_MDS
, addrs
);
539 * @defgroup Connection Management
543 * Get the Connection object associated with a given entity. If a
544 * Connection does not exist, create one and establish a logical connection.
545 * The caller owns a reference when this returns. Call ->put() when you're
548 * @param dest The entity to get a connection for.
550 virtual ConnectionRef
connect_to(
551 int type
, const entity_addrvec_t
& dest
,
552 bool anon
=false, bool not_local_dest
=false) = 0;
553 ConnectionRef
connect_to_mon(const entity_addrvec_t
& dest
,
554 bool anon
=false, bool not_local_dest
=false) {
555 return connect_to(CEPH_ENTITY_TYPE_MON
, dest
, anon
, not_local_dest
);
557 ConnectionRef
connect_to_mds(const entity_addrvec_t
& dest
,
558 bool anon
=false, bool not_local_dest
=false) {
559 return connect_to(CEPH_ENTITY_TYPE_MDS
, dest
, anon
, not_local_dest
);
561 ConnectionRef
connect_to_osd(const entity_addrvec_t
& dest
,
562 bool anon
=false, bool not_local_dest
=false) {
563 return connect_to(CEPH_ENTITY_TYPE_OSD
, dest
, anon
, not_local_dest
);
565 ConnectionRef
connect_to_mgr(const entity_addrvec_t
& dest
,
566 bool anon
=false, bool not_local_dest
=false) {
567 return connect_to(CEPH_ENTITY_TYPE_MGR
, dest
, anon
, not_local_dest
);
571 * Get the Connection object associated with ourselves.
573 virtual ConnectionRef
get_loopback_connection() = 0;
575 * Mark down a Connection to a remote.
577 * This will cause us to discard our outgoing queue for them, and if
578 * reset detection is enabled in the policy and the endpoint tries
579 * to reconnect they will discard their queue when we inform them of
582 * If there is no Connection to the given dest, it is a no-op.
584 * This generates a RESET notification to the Dispatcher.
586 * DEPRECATED: please do not use this interface for any new code;
587 * use the Connection* variant.
589 * @param a The address to mark down.
591 virtual void mark_down(const entity_addr_t
& a
) = 0;
592 virtual void mark_down_addrs(const entity_addrvec_t
& a
) {
593 mark_down(a
.legacy_addr());
596 * Mark all the existing Connections down. This is equivalent
597 * to iterating over all Connections and calling mark_down()
600 * This will generate a RESET event for each closed connections.
602 virtual void mark_down_all() = 0;
604 * @} // Connection Management
608 * @defgroup Subclass Interfacing
612 * A courtesy function for Messenger implementations which
613 * will be called when we receive our first Dispatcher.
615 virtual void ready() { }
617 * @} // Subclass Interfacing
620 #ifdef CEPH_USE_SIGPIPE_BLOCKER
622 * We need to disable SIGPIPE on all platforms, and if they
623 * don't give us a better mechanism (read: are on Solaris) that
624 * means blocking the signal whenever we do a send or sendmsg...
625 * That means any implementations must invoke MSGR_SIGPIPE_STOPPER in-scope
626 * whenever doing so. On most systems that's blank, but on systems where
627 * it's needed we construct an RAII object to plug and un-plug the SIGPIPE.
628 * See http://www.microhowto.info/howto/ignore_sigpipe_without_affecting_other_threads_in_a_process.html
630 struct sigpipe_stopper
{
632 sigset_t existing_mask
;
635 sigemptyset(&pipe_mask
);
636 sigaddset(&pipe_mask
, SIGPIPE
);
638 sigemptyset(&signals
);
639 sigpending(&signals
);
640 if (sigismember(&signals
, SIGPIPE
)) {
644 int r
= pthread_sigmask(SIG_BLOCK
, &pipe_mask
, &existing_mask
);
650 struct timespec nowait
{0};
651 int r
= sigtimedwait(&pipe_mask
, 0, &nowait
);
652 ceph_assert(r
== EAGAIN
|| r
== 0);
653 r
= pthread_sigmask(SIG_SETMASK
, &existing_mask
, 0);
658 # define MSGR_SIGPIPE_STOPPER Messenger::sigpipe_stopper stopper();
660 # define MSGR_SIGPIPE_STOPPER
663 * @defgroup Dispatcher Interfacing
667 * Determine whether a message can be fast-dispatched. We will
668 * query each Dispatcher in sequence to determine if they are
669 * capable of handling a particular message via "fast dispatch".
671 * @param m The Message we are testing.
673 bool ms_can_fast_dispatch(const ceph::cref_t
<Message
>& m
) {
674 for (const auto &dispatcher
: fast_dispatchers
) {
675 if (dispatcher
->ms_can_fast_dispatch2(m
))
682 * Deliver a single Message via "fast dispatch".
684 * @param m The Message we are fast dispatching.
685 * If none of our Dispatchers can handle it, ceph_abort().
687 void ms_fast_dispatch(const ceph::ref_t
<Message
> &m
) {
688 m
->set_dispatch_stamp(ceph_clock_now());
689 for (const auto &dispatcher
: fast_dispatchers
) {
690 if (dispatcher
->ms_can_fast_dispatch2(m
)) {
691 dispatcher
->ms_fast_dispatch2(m
);
697 void ms_fast_dispatch(Message
*m
) {
698 return ms_fast_dispatch(ceph::ref_t
<Message
>(m
, false)); /* consume ref */
703 void ms_fast_preprocess(const ceph::ref_t
<Message
> &m
) {
704 for (const auto &dispatcher
: fast_dispatchers
) {
705 dispatcher
->ms_fast_preprocess2(m
);
709 * Deliver a single Message. Send it to each Dispatcher
710 * in sequence until one of them handles it.
711 * If none of our Dispatchers can handle it, ceph_abort().
713 * @param m The Message to deliver.
715 void ms_deliver_dispatch(const ceph::ref_t
<Message
> &m
) {
716 m
->set_dispatch_stamp(ceph_clock_now());
717 for (const auto &dispatcher
: dispatchers
) {
718 if (dispatcher
->ms_dispatch2(m
))
721 lsubdout(cct
, ms
, 0) << "ms_deliver_dispatch: unhandled message " << m
<< " " << *m
<< " from "
722 << m
->get_source_inst() << dendl
;
723 ceph_assert(!cct
->_conf
->ms_die_on_unhandled_msg
);
725 void ms_deliver_dispatch(Message
*m
) {
726 return ms_deliver_dispatch(ceph::ref_t
<Message
>(m
, false)); /* consume ref */
729 * Notify each Dispatcher of a new Connection. Call
730 * this function whenever a new Connection is initiated or
733 * @param con Pointer to the new Connection.
735 void ms_deliver_handle_connect(Connection
*con
) {
736 for (const auto& dispatcher
: dispatchers
) {
737 dispatcher
->ms_handle_connect(con
);
742 * Notify each fast Dispatcher of a new Connection. Call
743 * this function whenever a new Connection is initiated or
746 * @param con Pointer to the new Connection.
748 void ms_deliver_handle_fast_connect(Connection
*con
) {
749 for (const auto& dispatcher
: fast_dispatchers
) {
750 dispatcher
->ms_handle_fast_connect(con
);
755 * Notify each Dispatcher of a new incoming Connection. Call
756 * this function whenever a new Connection is accepted.
758 * @param con Pointer to the new Connection.
760 void ms_deliver_handle_accept(Connection
*con
) {
761 for (const auto& dispatcher
: dispatchers
) {
762 dispatcher
->ms_handle_accept(con
);
767 * Notify each fast Dispatcher of a new incoming Connection. Call
768 * this function whenever a new Connection is accepted.
770 * @param con Pointer to the new Connection.
772 void ms_deliver_handle_fast_accept(Connection
*con
) {
773 for (const auto& dispatcher
: fast_dispatchers
) {
774 dispatcher
->ms_handle_fast_accept(con
);
779 * Notify each Dispatcher of a Connection which may have lost
780 * Messages. Call this function whenever you detect that a lossy Connection
781 * has been disconnected.
783 * @param con Pointer to the broken Connection.
785 void ms_deliver_handle_reset(Connection
*con
) {
786 for (const auto& dispatcher
: dispatchers
) {
787 if (dispatcher
->ms_handle_reset(con
))
792 * Notify each Dispatcher of a Connection which has been "forgotten" about
793 * by the remote end, implying that messages have probably been lost.
794 * Call this function whenever you detect a reset.
796 * @param con Pointer to the broken Connection.
798 void ms_deliver_handle_remote_reset(Connection
*con
) {
799 for (const auto& dispatcher
: dispatchers
) {
800 dispatcher
->ms_handle_remote_reset(con
);
805 * Notify each Dispatcher of a Connection for which reconnection
806 * attempts are being refused. Call this function whenever you
807 * detect that a lossy Connection has been disconnected and it's
808 * impossible to reconnect.
810 * @param con Pointer to the broken Connection.
812 void ms_deliver_handle_refused(Connection
*con
) {
813 for (const auto& dispatcher
: dispatchers
) {
814 if (dispatcher
->ms_handle_refused(con
))
819 void set_require_authorizer(bool b
) {
820 require_authorizer
= b
;
824 * @} // Dispatcher Interfacing