// ============================================================================ // // = LIBRARY // ULib - c++ library // // = FILENAME // socket.cpp // // = AUTHOR // Stefano Casazza // // ============================================================================ #include #include #include #include #include #include #ifdef _MSWINDOWS_ # include # include #else # include # ifndef __clang__ U_DUMP_KERNEL_VERSION(LINUX_VERSION_CODE) # endif #endif #ifdef USE_LIBSSL # include #endif int USocket::iBackLog = SOMAXCONN; int USocket::server_flags; int USocket::accept4_flags; // If flags is 0, then accept4() is the same as accept() bool USocket::tcp_reuseport; bool USocket::tcp_autocorking; SocketAddress* USocket::cLocal; #include "socket_address.cpp" USocket::USocket(bool bSocketIsIPv6) { U_TRACE_REGISTER_OBJECT(0, USocket, "%b", bSocketIsIPv6) flags = O_RDWR; iState = CLOSE; iSockDesc = -1; iLocalPort = iRemotePort = 0; #ifdef ENABLE_IPV6 U_socket_IPv6(this) = bSocketIsIPv6; #else U_socket_IPv6(this) = false; #endif U_socket_Type(this) = SK_STREAM; U_socket_LocalSet(this) = false; #ifdef _MSWINDOWS_ fh = -1; #endif } USocket::~USocket() { U_TRACE_UNREGISTER_OBJECT(0, USocket) if (isOpen()) _closesocket(); } __pure unsigned int USocket::localPortNumber() { U_TRACE(0, "USocket::localPortNumber()") U_CHECK_MEMORY U_INTERNAL_ASSERT(isLocalSet()) U_RETURN(iLocalPort); } __pure UIPAddress& USocket::localIPAddress() { U_TRACE(0, "USocket::localIPAddress()") U_CHECK_MEMORY U_INTERNAL_ASSERT(isLocalSet()) return cLocalAddress; } void USocket::_socket(int iSocketType, int domain, int protocol) { U_TRACE(1, "USocket::_socket(%d,%d,%d)", iSocketType, domain, protocol) U_INTERNAL_ASSERT(isClosed()) if (domain == 0) { domain = (U_socket_Type(this) == SK_UNIX ? AF_UNIX : U_socket_IPv6(this) ? AF_INET6 : AF_INET); } else { if (domain == AF_UNIX) U_socket_Type(this) = SK_UNIX; // AF_UNIX == 1 } if (iSocketType == 0) { iSocketType = (U_socket_Type(this) == SK_DGRAM ? SOCK_DGRAM : SOCK_STREAM); } else if (iSocketType == SOCK_RAW) { U_socket_Type(this) = SK_RAW; } else if (iSocketType == SOCK_DGRAM) { U_socket_Type(this) = SK_DGRAM; } U_INTERNAL_DUMP("U_socket_Type = %d", U_socket_Type(this)) #ifdef _MSWINDOWS_ fh = U_SYSCALL(socket, "%d,%d,%d", domain, iSocketType, protocol); iSockDesc = _open_osfhandle((long)fh, O_RDWR | O_BINARY); #else iSockDesc = U_SYSCALL(socket, "%d,%d,%d", domain, iSocketType, protocol); #endif if (isOpen()) { flags = O_RDWR; iRemotePort = 0; U_socket_LocalSet(this) = false; } } /** * The shutdown() tells the receiver the server is done sending data. No * more data is going to be send. More importantly, it doesn't close the * socket. At the socket layer, this sends a TCP/IP FIN packet to the receiver */ bool USocket::shutdown(int how) { U_TRACE(1, "USocket::shutdown(%d)", how) U_CHECK_MEMORY U_INTERNAL_ASSERT(isOpen()) if (U_SYSCALL(shutdown, "%d,%d", getFd(), how) == 0) { /** * SO_KEEPALIVE makes the kernel more aggressive about continually verifying the connection even when you're not doing anything, * but does not change or enhance the way the information is delivered to you. You'll find out when you try to actually do something * (for example "write"), and you'll find out right away since the kernel is now just reporting the status of a previously set flag, * rather than having to wait a few seconds (or much longer in some cases) for network activity to fail. The exact same code logic you * had for handling the "other side went away unexpectedly" condition will still be used; what changes is the timing (not the method) * * Ref1: FIN_WAIT2 [https://kb.iu.edu/d/ajmi] * Ref2: tcp_fin_timeout [https://www.frozentux.net/ipsysctl-tutorial/chunkyhtml/tcpvariables.html#AEN370] * Ref3: tcp_retries2 [https://www.frozentux.net/ipsysctl-tutorial/chunkyhtml/tcpvariables.html#AEN444] * Ref4: tcp_max_orphans [https://www.frozentux.net/ipsysctl-tutorial/chunkyhtml/tcpvariables.html#AEN388] */ // (void) setSockOpt(SOL_SOCKET, SO_KEEPALIVE, (const int[]){ 1 }, sizeof(int)); U_RETURN(true); } U_RETURN(false); } bool USocket::connectServer(const UIPAddress& cAddr, unsigned int iServPort) { U_TRACE(1, "USocket::connectServer(%p,%d)", &cAddr, iServPort) U_CHECK_MEMORY if (isOpen() == false) _socket(); if ((iRemotePort = iServPort, cRemoteAddress = cAddr, connect())) U_RETURN(true); U_RETURN(false); } bool USocket::checkErrno() { U_TRACE(0, "USocket::checkErrno()") U_INTERNAL_DUMP("errno = %d", errno) if (errno == EAGAIN) { iState |= TIMEOUT; U_RETURN(true); } iState = (errno == ECONNRESET ? EPOLLERROR : BROKEN); closesocket(); U_INTERNAL_DUMP("state = %d", iState) U_RETURN(false); } bool USocket::checkTime(long time_limit, long& timeout) { U_TRACE(1, "USocket::checkTime(%ld,%ld)", time_limit, timeout) U_INTERNAL_ASSERT_RANGE(1,time_limit,8L*60L) // 8 minuts U_gettimeofday; // NB: optimization if it is enough a time resolution of one second... if (timeout == 0) timeout = u_now->tv_sec + time_limit; if (u_now->tv_sec > timeout) { iState = BROKEN | TIMEOUT; closesocket(); U_RETURN(false); } U_RETURN(true); } void USocket::setLocal() { U_TRACE(1, "USocket::setLocal()") U_CHECK_MEMORY U_INTERNAL_ASSERT(isOpen()) SocketAddress tmp; socklen_t slDummy = tmp.sizeOf(); if (U_SYSCALL(getsockname, "%d,%p,%p", getFd(), (sockaddr*)tmp, &slDummy) == 0) { tmp.getPortNumber(iLocalPort); tmp.getIPAddress(cLocalAddress); U_socket_LocalSet(this) = true; } } void USocket::setLocal(const UIPAddress& addr) { U_TRACE(0, "USocket::setLocal(%p)", &addr) cLocalAddress = addr; if (cLocal) cLocal->setIPAddress(cLocalAddress); U_socket_LocalSet(this) = true; } /** * The method is called with a local IP address and port number to bind the socket to. * A default port number of zero is a wildcard and lets the OS choose the port number */ bool USocket::bind() { U_TRACE(1, "USocket::bind()") U_CHECK_MEMORY U_INTERNAL_ASSERT(isOpen()) U_INTERNAL_ASSERT_POINTER(cLocal) int result, counter = 0; loop: result = U_SYSCALL(bind, "%d,%p,%d", getFd(), (sockaddr*)&(cLocal->addr.psaGeneric), cLocal->sizeOf()); if (result == -1 && errno == EADDRINUSE && ++counter <= 3) { UTimeVal(1L).nanosleep(); goto loop; } if (result == 0) U_RETURN(true); if (errno == EADDRINUSE) U_WARNING("Probably another instance of userver is running on the same port: %u", iLocalPort); U_RETURN(false); } void USocket::setTcpFastOpen() { U_TRACE(0, "USocket::setTcpFastOpen()") #if !defined(U_SERVER_CAPTIVE_PORTAL) && !defined(_MSWINDOWS_) // && LINUX_VERSION_CODE >= KERNEL_VERSION(3,6,0) # ifndef TCP_FASTOPEN # define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */ # endif (void) setSockOpt(SOL_TCP, TCP_FASTOPEN, (const int[]){ 5 }, sizeof(int)); #endif } void USocket::setReusePort() { U_TRACE(0, "USocket::setReusePort()") /** * As with TCP, SO_REUSEPORT allows multiple UDP sockets to be bound to the same port. This * facility could, for example, be useful in a DNS server operating over UDP. With SO_REUSEPORT, * each thread could use recv() on its own socket to accept datagrams arriving on the port. * The traditional approach is that all threads would compete to perform recv() calls on a * single shared socket. This can lead to unbalanced loads across the threads. By contrast, * SO_REUSEPORT distributes datagrams evenly across all of the receiving threads */ #if !defined(U_SERVER_CAPTIVE_PORTAL) && !defined(_MSWINDOWS_) // && LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0) # ifndef SO_REUSEPORT # define SO_REUSEPORT 15 # endif tcp_reuseport = (U_socket_Type(this) != SK_UNIX && setSockOpt(SOL_SOCKET, SO_REUSEPORT, (const int[]){ 1 }, sizeof(int)) && U_socket_Type(this) != SK_DGRAM); U_INTERNAL_DUMP("tcp_reuseport = %b", tcp_reuseport) #endif } void USocket::setReuseAddress() { U_TRACE(0, "USocket::setReuseAddress()") /** * SO_REUSEADDR allows your server to bind to an address which is in a TIME_WAIT state. * It does not allow more than one server to bind to the same address. It was mentioned * that use of this flag can create a security risk because another server can bind to a * the same port, by binding to a specific address as opposed to INADDR_ANY. */ (void) setSockOpt(SOL_SOCKET, SO_REUSEADDR, (const int[]){ 1 }, sizeof(int)); } void USocket::setAddress(void* address) { U_TRACE(0, "USocket::setAddress(%p)", address) U_INTERNAL_ASSERT_POINTER(cLocal) cLocalAddress.setAddress(address, (bool)U_socket_IPv6(this)); cLocal->setIPAddress(cLocalAddress); U_socket_LocalSet(this) = true; } bool USocket::setHostName(const UString& pcNewHostName) { U_TRACE(0, "USocket::setHostName(%.*S)", U_STRING_TO_TRACE(pcNewHostName)) U_INTERNAL_ASSERT_POINTER(cLocal) if (cLocalAddress.setHostName(pcNewHostName, U_socket_IPv6(this))) { cLocal->setIPAddress(cLocalAddress); U_socket_LocalSet(this) = true; U_RETURN(true); } U_RETURN(false); } // We try to bind the USocket to the specified port number and any local IP Address using the bind() method bool USocket::setServer(unsigned int port, UString* localAddress) { U_TRACE(0, "USocket::setServer(%u,%p)", port, localAddress) U_CHECK_MEMORY if (isOpen() == false) _socket(); #ifndef _MSWINDOWS_ setReusePort(); setReuseAddress(); if (isIPC()) { /** * A Unix domain "server" is created as a Unix domain socket that is bound * to a pathname and that has a backlog queue to listen for connection requests */ U_INTERNAL_ASSERT_POINTER(UUnixSocket::path) (void) UFile::_unlink(UUnixSocket::path); if (U_SYSCALL(bind, "%d,%p,%d", iSockDesc, &(UUnixSocket::addr.psaGeneric), UUnixSocket::len) == 0 && U_SYSCALL(listen, "%d,%d", iSockDesc, iBackLog) == 0) { iLocalPort = iRemotePort = port; U_socket_LocalSet(this) = true; U_RETURN(true); } U_RETURN(false); } #endif U_INTERNAL_DUMP("cLocal = %p", cLocal) U_INTERNAL_ASSERT_EQUALS(cLocal, 0) cLocal = new SocketAddress; cLocal->setPortNumber(port); if (localAddress == 0) { cLocal->setIPAddressWildCard(U_socket_IPv6(this)); } else if (setHostName(*localAddress) == false) { U_RETURN(false); } /** * The normal TCP termination sequence looks like this (simplified). We have two peers: A and B * * 1. A calls close() * A sends FIN to B * A goes into FIN_WAIT_1 state * 2. B receives FIN * B sends ACK to A * B goes into CLOSE_WAIT state * 3. A receives ACK * A goes into FIN_WAIT_2 state * 4. B calls close() * B sends FIN to A * B goes into LAST_ACK state * 5. A receives FIN * A sends ACK to B * A goes into TIME_WAIT state * 6. B receives ACK * B goes to CLOSED state - i.e. is removed from the socket tables * * So the peer that initiates the termination - i.e. calls close() first - will end up in the TIME_WAIT state. * It can be a problem with lots of sockets in TIME_WAIT state on a server as it could eventually prevent new * connections from being accepted. Setting SO_LINGER with timeout 0 prior to calling close() will cause the * normal termination sequence not to be initiated. Instead, the peer setting this option and calling close() * will send a RST (connection reset) which indicates an error condition and this is how it will be perceived * at the other end. You will typically see errors like "Connection reset by peer". * * When linger is off the TCP stack doesn't wait for pending data to be sent before closing the connection. Data * could be lost due to this but by setting linger to off you're accepting this and asking that the connection be * reset straight away rather than closed gracefully. This causes an RST to be sent rather than the usual FIN * * SO_KEEPALIVE makes the kernel more aggressive about continually verifying the connection even when you're not doing anything, * but does not change or enhance the way the information is delivered to you. You'll find out when you try to actually do something * (for example "write"), and you'll find out right away since the kernel is now just reporting the status of a previously set flag, * rather than having to wait a few seconds (or much longer in some cases) for network activity to fail. The exact same code logic you * had for handling the "other side went away unexpectedly" condition will still be used; what changes is the timing (not the method). * * Virtually every "practical" sockets program in some way provides non-blocking access to the sockets during the data phase (maybe with * select()/poll(), or maybe with fcntl()/O_NONBLOCK/EINPROGRESS/EWOULDBLOCK, or if your kernel supports it maybe with MSG_DONTWAIT). * Assuming this is already done for other reasons, it's trivial (sometimes requiring no code at all) to in addition find out right away * about a connection dropping. But if the data phase does not already somehow provide non-blocking access to the sockets, you won't find * out about the connection dropping until the next time you try to do something. * * A TCP socket connection without some sort of non-blocking behaviour during the data phase is notoriously fragile, as if the wrong packet * encounters a network problem it's very easy for the program to then "hang" indefinitely, and there's not a whole lot you can do about it */ U_INTERNAL_DUMP("tcp_reuseport = %b", tcp_reuseport) if (tcp_reuseport || (bind() && U_SYSCALL(listen, "%d,%d", iSockDesc, iBackLog) == 0)) { U_RETURN(true); } iState = -errno; U_RETURN(false); } void USocket::reusePort() { U_TRACE(1, "USocket::reusePort()") U_CHECK_MEMORY #if !defined(U_SERVER_CAPTIVE_PORTAL) && !defined(_MSWINDOWS_) U_INTERNAL_DUMP("tcp_reuseport = %b", tcp_reuseport) if (tcp_reuseport) { U_ASSERT_EQUALS(isUDP(), false) U_ASSERT_EQUALS(isIPC(), false) int old = iSockDesc, domain = (U_socket_Type(this) == SK_UNIX ? AF_UNIX : U_socket_IPv6(this) ? AF_INET6 : AF_INET), iSocketType = (U_socket_Type(this) == SK_DGRAM ? SOCK_DGRAM : SOCK_STREAM); // coverity[+alloc] iSockDesc = U_SYSCALL(socket, "%d,%d,%d", domain, iSocketType, 0); if (isClosed() || (setReuseAddress(), setReusePort(), cLocal->setIPAddressWildCard(U_socket_IPv6(this)), bind()) == false || U_SYSCALL(listen, "%d,%d", iSockDesc, iBackLog) != 0) { U_ERROR("SO_REUSEPORT failed", 0); } (void) U_SYSCALL(close, "%d", old); } #endif setFlags(server_flags); if (isOpen() && isUDP() == false) { setTcpLingerOff(); } } void USocket::setRemote() { U_TRACE(1, "USocket::setRemote()") U_CHECK_MEMORY U_INTERNAL_ASSERT(isOpen()) SocketAddress cRemote; socklen_t slDummy = cRemote.sizeOf(); if (U_SYSCALL(getpeername, "%d,%p,%p", getFd(), (sockaddr*)cRemote, &slDummy) == 0) { cRemote.getPortNumber(iRemotePort); cRemote.getIPAddress(cRemoteAddress); } } bool USocket::connect() { U_TRACE(1, "USocket::connect()") U_CHECK_MEMORY U_INTERNAL_ASSERT(isOpen()) int result; SocketAddress cServer; cServer.setPortNumber(iRemotePort); cServer.setIPAddress(cRemoteAddress); #if !defined(U_SERVER_CAPTIVE_PORTAL) && !defined(_MSWINDOWS_) setTcpQuickAck(); setTcpFastOpen(); #endif loop: result = U_SYSCALL(connect, "%d,%p,%d", getFd(), (sockaddr*)cServer, cServer.sizeOf()); if (result == 0) { setLocal(); iState = CONNECT; U_RETURN(true); } if (errno == EINTR && UInterrupt::checkForEventSignalPending()) { goto loop; } if (errno == EISCONN) { _closesocket(); _socket(); goto loop; } U_RETURN(false); } int USocket::recvFrom(void* pBuffer, uint32_t iBufLength, uint32_t uiFlags, UIPAddress& cSourceIP, unsigned int& iSourcePortNumber) { U_TRACE(1, "USocket::recvFrom(%p,%u,%u,%p,%p)", pBuffer, iBufLength, uiFlags, &cSourceIP, &iSourcePortNumber) U_CHECK_MEMORY U_INTERNAL_ASSERT(isOpen()) int iBytesRead; SocketAddress cSource; socklen_t slDummy = cSource.sizeOf(); loop: iBytesRead = U_SYSCALL(recvfrom, "%d,%p,%u,%u,%p,%p", getFd(), CAST(pBuffer), iBufLength, uiFlags, (sockaddr*)cSource, &slDummy); if (iBytesRead > 0) { U_INTERNAL_DUMP("BytesRead(%d) = %#.*S", iBytesRead, iBytesRead, CAST(pBuffer)) cSource.getIPAddress(cSourceIP); cSource.getPortNumber(iSourcePortNumber); U_RETURN(iBytesRead); } if (errno == EINTR && UInterrupt::checkForEventSignalPending()) { goto loop; } U_RETURN(-1); } int USocket::sendTo(void* pPayload, uint32_t iPayloadLength, uint32_t uiFlags, UIPAddress& cDestinationIP, unsigned int iDestinationPortNumber) { U_TRACE(1, "USocket::sendTo(%p,%u,%u,%p,%d)", pPayload, iPayloadLength, uiFlags, &cDestinationIP, iDestinationPortNumber) U_CHECK_MEMORY U_INTERNAL_ASSERT(isOpen()) int iBytesWrite; SocketAddress cDestination; cDestination.setIPAddress(cDestinationIP); cDestination.setPortNumber(iDestinationPortNumber); loop: iBytesWrite = U_SYSCALL(sendto, "%d,%p,%u,%u,%p,%d", getFd(), CAST(pPayload), iPayloadLength, uiFlags, (sockaddr*)cDestination, cDestination.sizeOf()); if (iBytesWrite > 0) { U_INTERNAL_DUMP("BytesWrite(%d) = %#.*S", iBytesWrite, iBytesWrite, CAST(pPayload)) U_RETURN(iBytesWrite); } if (errno == EINTR && UInterrupt::checkForEventSignalPending()) { goto loop; } U_RETURN(-1); } /** * Stick a TCP cork in the socket. It's not clear that this will help performance, but it might. * * TCP_CORK: If set, don't send out partial frames. All queued partial frames are sent when the option is cleared again. This is useful * for prepending headers before calling sendfile(), or for throughput optimization. As currently implemented, there is a 200 * millisecond ceiling on the time for which output is corked by TCP_CORK. If this ceiling is reached, then queued data is * automatically transmitted. * * This is a no-op if we don't think this platform has corks. */ void USocket::setTcpCork(USocket* sk, uint32_t value) { U_TRACE(1, "USocket::setTcpCork(%p,%u)", sk, value) U_INTERNAL_ASSERT_POINTER(sk) #if defined(TCP_CORK) && !defined(_MSWINDOWS_) if (tcp_autocorking == false) { # ifdef USE_LIBSSL if (sk->isSSL(true)) return; # endif (void) sk->setSockOpt(SOL_TCP, TCP_CORK, (const void*)&value, sizeof(uint32_t)); } #endif } /** * Enables/disables the @c SO_TIMEOUT pseudo option. * @c SO_TIMEOUT is not one of the options defined for Berkeley sockets, but * was actually introduced as part of the Java API. For client sockets * it has the same meaning as the @c SO_RCVTIMEO option, which specifies * the maximum number of milliseconds that a blocking @c read() call will * wait for data to arrive on the socket. * * @param timeoutMS the specified timeout value, in milliseconds. A value of zero indicates no timeout, i.e. an infinite wait. */ bool USocket::setTimeoutRCV(uint32_t timeoutMS) { U_TRACE(1, "USocket::setTimeoutRCV(%u)", timeoutMS) U_INTERNAL_ASSERT(timeoutMS >= 200) // suspicious... #if !defined(SO_RCVTIMEO) U_RETURN(false); #endif // SO_RCVTIMEO is poorly documented in Winsock API, but it appears // to be measured as an int value in milliseconds, whereas BSD-style // sockets use a timeval #if defined(_MSWINDOWS_) bool result = setSockOpt(SOL_SOCKET, SO_RCVTIMEO, (const char*)&timeoutMS, sizeof(uint32_t)); #else // Convert the timeout value (in milliseconds) into a timeval struct struct timeval timer; timer.tv_sec = timeoutMS / 1000; timer.tv_usec = (timeoutMS % 1000) * 1000; bool result = setSockOpt(SOL_SOCKET, SO_RCVTIMEO, &timer, sizeof(timer)); #endif U_RETURN(result); } bool USocket::setTimeoutSND(uint32_t timeoutMS) { U_TRACE(1, "USocket::setTimeoutSND(%u)", timeoutMS) U_INTERNAL_ASSERT(timeoutMS >= 200) // suspicious... #ifndef SO_SNDTIMEO U_RETURN(false); #endif #if defined(_MSWINDOWS_) bool result = setSockOpt(SOL_SOCKET, SO_SNDTIMEO, (const char*)&timeoutMS, sizeof(uint32_t)); #else // Convert the timeout value (in milliseconds) into a timeval struct struct timeval timer; timer.tv_sec = timeoutMS / 1000; timer.tv_usec = (timeoutMS % 1000) * 1000; bool result = setSockOpt(SOL_SOCKET, SO_SNDTIMEO, &timer, sizeof(timer)); #endif U_RETURN(result); } int USocket::recvBinary16Bits() { U_TRACE(0, "USocket::recvBinary16Bits()") uint16_t uiNetOrder; uint32_t iBytesLeft = sizeof(uint16_t); char* pcEndReadBuffer = ((char*)&uiNetOrder) + iBytesLeft; do { iBytesLeft -= recv((void*)(pcEndReadBuffer - iBytesLeft), iBytesLeft); } while (iBytesLeft); int result = ntohs(uiNetOrder); U_RETURN(result); } uint32_t USocket::recvBinary32Bits() { U_TRACE(0, "USocket::recvBinary32Bits()") uint32_t uiNetOrder, iBytesLeft = sizeof(uint32_t); char* pcEndReadBuffer = ((char*)&uiNetOrder) + iBytesLeft; do { iBytesLeft -= recv((void*)(pcEndReadBuffer - iBytesLeft), iBytesLeft); } while (iBytesLeft); int result = ntohl(uiNetOrder); U_RETURN(result); } bool USocket::sendBinary16Bits(uint16_t iData) { U_TRACE(0, "USocket::sendBinary16Bits(%u)", iData) uint16_t uiNetOrder = htons(iData); bool result = (send((const char*)&uiNetOrder, sizeof(uint16_t)) == sizeof(uint16_t)); U_RETURN(result); } bool USocket::sendBinary32Bits(uint32_t lData) { U_TRACE(0, "USocket::sendBinary32Bits(%u)", lData) uint32_t uiNetOrder = htonl(lData); bool result = (send((const char*)&uiNetOrder, sizeof(uint32_t)) == sizeof(uint32_t)); U_RETURN(result); } void USocket::setBlocking() { U_TRACE(1, "USocket::setBlocking()") U_CHECK_MEMORY U_INTERNAL_ASSERT(isOpen()) U_INTERNAL_ASSERT_EQUALS(flags & O_NONBLOCK, O_NONBLOCK) flags &= ~O_NONBLOCK; (void) U_SYSCALL(fcntl, "%d,%d,%d", getFd(), F_SETFL, flags); U_INTERNAL_DUMP("O_NONBLOCK = %B, flags = %B", O_NONBLOCK, flags) } void USocket::setNonBlocking() { U_TRACE(1, "USocket::setNonBlocking()") U_CHECK_MEMORY U_INTERNAL_ASSERT(isOpen()) U_INTERNAL_ASSERT_DIFFERS(flags & O_NONBLOCK, O_NONBLOCK) flags |= O_NONBLOCK; (void) U_SYSCALL(fcntl, "%d,%d,%d", getFd(), F_SETFL, flags); U_INTERNAL_DUMP("O_NONBLOCK = %B, flags = %B", O_NONBLOCK, flags) } #ifdef closesocket #undef closesocket #endif void USocket::_closesocket() { U_TRACE(1, "USocket::_closesocket()") U_CHECK_MEMORY U_INTERNAL_ASSERT(isOpen()) #ifdef _MSWINDOWS_ (void) U_SYSCALL(closesocket, "%d", fh); fh = -1; #elif defined(DEBUG) if (U_SYSCALL( close, "%d", iSockDesc)) U_ERROR_SYSCALL("closesocket"); #else (void) U_SYSCALL(close, "%d", iSockDesc); #endif iSockDesc = -1; iRemotePort = 0; } void USocket::close() { U_TRACE(0, "USocket::close()") closesocket(); iState = CLOSE; } void USocket::closesocket() { U_TRACE(1, "USocket::closesocket()") U_CHECK_MEMORY U_INTERNAL_ASSERT(isOpen()) U_INTERNAL_DUMP("U_ClientImage_parallelization = %u", U_ClientImage_parallelization) if (U_ClientImage_parallelization == 1) // 1 => child of parallelization { iSockDesc = -1; return; } #ifdef USE_LIBSSL if (isSSL(true)) ((USSLSocket*)this)->closesocket(); #endif U_INTERNAL_DUMP("isBroken() = %b", isBroken()) U_INTERNAL_DUMP("isTimeout() = %b", isTimeout()) U_INTERNAL_DUMP("isEpollErr() = %b", isEpollErr()) if ((iState & (BROKEN | EPOLLERROR)) != 0) { # if defined(HAVE_EPOLL_WAIT) && !defined(USE_LIBEVENT) if (isEpollErr()) goto epoll_del; # endif /** * To obtain a clean closure sockets, one would call shutdown() with SHUT_WR * on the socket, call recv() until obtaining a return value of 0 indicating * that the peer has also performed an orderly shutdown, and finally call * close() on the socket. * * The shutdown() tells the receiver the server is done sending data. No * more data is going to be send. More importantly, it doesn't close the * socket. At the socket layer, this sends a TCP/IP FIN packet to the receiver */ if (shutdown(SHUT_WR)) { uint32_t count = 0; char _buf[8 * 1024]; /** * At this point, the socket layer has to wait until the receiver has * acknowledged the FIN packet by receiving a ACK packet. This is done by * using the recv() command in a loop until 0 or less value is returned. * Once recv() returns 0 (or less), 1/2 of the socket is closed */ if (isBlocking()) (void) UFile::setBlocking(iSockDesc, flags, false); do { if (++count > 5) break; errno = 0; if (count == 2 && USocket::isTimeout() == false) { (void) UFile::setBlocking(iSockDesc, flags, true); } } while ((U_SYSCALL(recv, "%d,%p,%u,%d", getFd(), _buf, sizeof(_buf), 0) > 0) || (errno == EAGAIN && UNotifier::waitForRead(iSockDesc, 500) > 0)); } } // NB: to avoid epoll_wait() fire events on file descriptor already closed... #if defined(HAVE_EPOLL_WAIT) && !defined(USE_LIBEVENT) if (UNotifier::isHandler(iSockDesc)) { epoll_del: (void) U_SYSCALL(epoll_ctl, "%d,%d,%d,%p", UNotifier::epollfd, EPOLL_CTL_DEL, iSockDesc, (struct epoll_event*)1); UNotifier::handlerDelete(iSockDesc, EPOLLIN | EPOLLRDHUP); } #endif // Now we know that our FIN is ACK-ed, then you can close the second half of the socket by calling closesocket() _closesocket(); } bool USocket::acceptClient(USocket* pcNewConnection) { U_TRACE(1, "USocket::acceptClient(%p)", pcNewConnection) U_CHECK_MEMORY U_INTERNAL_ASSERT(isOpen()) U_INTERNAL_ASSERT_POINTER(pcNewConnection) SocketAddress cRemote; socklen_t slDummy = cRemote.sizeOf(); #ifdef HAVE_ACCEPT4 pcNewConnection->iSockDesc = U_SYSCALL(accept4, "%d,%p,%p,%d", iSockDesc, (sockaddr*)cRemote, &slDummy, accept4_flags); // if (pcNewConnection->iSockDesc != -1 || errno != ENOSYS) goto next; #elif defined(_MSWINDOWS_) pcNewConnection->fh = U_SYSCALL(accept, "%d,%p,%p", fh, (sockaddr*)cRemote, &slDummy); pcNewConnection->iSockDesc = _open_osfhandle((long)(pcNewConnection->fh), O_RDWR | O_BINARY); #else pcNewConnection->iSockDesc = U_SYSCALL(accept, "%d,%p,%p", iSockDesc, (sockaddr*)cRemote, &slDummy); #endif //next: // ------------------------------------------------------------------------------------------------------- // On success, this system call return a NONNEGATIVE INTEGER that is a descriptor for the accepted socket. // On error, -1 is returned, and errno is set appropriately // ------------------------------------------------------------------------------------------------------- if (pcNewConnection->iSockDesc != -1) { pcNewConnection->iState = CONNECT; cRemote.getPortNumber(pcNewConnection->iRemotePort); cRemote.getIPAddress( pcNewConnection->cRemoteAddress); U_INTERNAL_DUMP("pcNewConnection->iSockDesc = %d pcNewConnection->flags = %d %B", pcNewConnection->iSockDesc, pcNewConnection->flags, pcNewConnection->flags) U_INTERNAL_ASSERT_EQUALS(U_socket_IPv6(pcNewConnection), (cRemoteAddress.getAddressFamily() == AF_INET6)) # ifdef HAVE_ACCEPT4 U_INTERNAL_ASSERT_EQUALS(((accept4_flags & SOCK_CLOEXEC) != 0),((pcNewConnection->flags & O_CLOEXEC) != 0)) U_INTERNAL_ASSERT_EQUALS(((accept4_flags & SOCK_NONBLOCK) != 0),((pcNewConnection->flags & O_NONBLOCK) != 0)) # else if (accept4_flags) (void) U_SYSCALL(fcntl, "%d,%d,%d", pcNewConnection->iSockDesc, F_SETFL, pcNewConnection->flags); # endif /* #ifdef DEBUG struct linger x = { 0, -1 }; // { int l_onoff; int l_linger; } uint32_t tmp0 = sizeof(struct linger), value = U_NOT_FOUND, tmp = sizeof(uint32_t); (void) pcNewConnection->getSockOpt(SOL_SOCKET, SO_LINGER, (void*)&x, tmp0); U_INTERNAL_DUMP("SO_LINGER = { %d %d }", x.l_onoff, x.l_linger) U_DUMP("getBufferRCV() = %u getBufferSND() = %u", pcNewConnection->getBufferRCV(), pcNewConnection->getBufferSND()) # ifdef TCP_CORK (void) pcNewConnection->getSockOpt(SOL_TCP, TCP_CORK, (void*)&value, tmp); U_INTERNAL_DUMP("TCP_CORK = %d", value) # endif # ifdef TCP_DEFER_ACCEPT (void) pcNewConnection->getSockOpt(SOL_TCP, TCP_DEFER_ACCEPT, (void*)&value, tmp); U_INTERNAL_DUMP("TCP_DEFER_ACCEPT = %d", value) # endif # ifdef TCP_QUICKACK (void) pcNewConnection->getSockOpt(SOL_TCP, TCP_QUICKACK, (void*)&value, tmp); U_INTERNAL_DUMP("TCP_QUICKACK = %d", value) # endif # ifdef TCP_NODELAY (void) pcNewConnection->getSockOpt(SOL_TCP, TCP_NODELAY, (void*)&value, tmp); U_INTERNAL_DUMP("TCP_NODELAY = %d", value) # endif # ifdef TCP_FASTOPEN (void) pcNewConnection->getSockOpt(SOL_TCP, TCP_FASTOPEN, (void*)&value, tmp); U_INTERNAL_DUMP("TCP_FASTOPEN = %d", value) # endif # ifdef SO_KEEPALIVE (void) pcNewConnection->getSockOpt(SOL_SOCKET, SO_KEEPALIVE, (void*)&value, tmp); U_INTERNAL_DUMP("SO_KEEPALIVE = %d", value) # endif # ifdef TCP_CONGESTION char buffer[32]; uint32_t tmp1 = sizeof(buffer); (void) pcNewConnection->getSockOpt(IPPROTO_TCP, TCP_CONGESTION, (void*)buffer, tmp1); U_INTERNAL_DUMP("TCP_CONGESTION = %S", buffer) # endif #endif */ # ifdef USE_LIBSSL if (isSSL(true) && ((USSLSocket*)this)->acceptSSL((USSLSocket*)pcNewConnection) == false) { U_RETURN(false); } # endif U_RETURN(true); } U_INTERNAL_ASSERT_EQUALS(pcNewConnection->iSockDesc, -1) // NB: we never restart accept(), in general the socket server is NOT blocking... if (errno == EINTR) (void) UInterrupt::checkForEventSignalPending(); pcNewConnection->iState = -errno; U_RETURN(false); } void USocket::setMsgError() { U_TRACE(0, "USocket::setMsgError()") #ifdef USE_LIBSSL if (isSSL(true)) { U_INTERNAL_DUMP("ret = %d", ((USSLSocket*)this)->ret) if (((USSLSocket*)this)->ret != SSL_ERROR_NONE) { ((USSLSocket*)this)->setStatus(false); return; } } #endif U_INTERNAL_DUMP("iState = %d", iState) U_INTERNAL_ASSERT_EQUALS(u_buffer_len, 0) if (isSysError()) { u_errno = errno = -iState; (void) u__snprintf(u_buffer, U_BUFFER_SIZE, "%#R", 0); // NB: the last argument (0) is necessary... } } // VIRTUAL METHOD bool USocket::connectServer(const UString& server, unsigned int iServPort, int timeoutMS) { U_TRACE(1, "USocket::connectServer(%.*S,%u,%d)", U_STRING_TO_TRACE(server), iServPort, timeoutMS) U_CHECK_MEMORY U_INTERNAL_ASSERT(server.isNullTerminated()) // This method is called to connect the socket to a server TCP socket that is specified // by the provided IP Address and port number. We call the connect() method to perform the connection. if (isOpen() == false) _socket(); if (cRemoteAddress.setHostName(server, U_socket_IPv6(this))) { int result; SocketAddress cServer; bool bflag = (timeoutMS && ((flags & O_NONBLOCK) != O_NONBLOCK)); if (bflag) setNonBlocking(); // setting socket to nonblocking if (timeoutMS) (void) setTimeoutRCV(timeoutMS); cServer.setIPAddress(cRemoteAddress); cServer.setPortNumber((iRemotePort = iServPort)); loop: result = U_SYSCALL(connect, "%d,%p,%d", getFd(), (sockaddr*)cServer, cServer.sizeOf()); if (result == 0) { ok: setLocal(); iState = CONNECT; if (bflag) setBlocking(); // restore file status flags U_RETURN(true); } if (result == -1) { if (errno == EINPROGRESS) { result = UNotifier::waitForWrite(iSockDesc, timeoutMS); if (result == 1) { uint32_t error = U_NOT_FOUND, tmp = sizeof(uint32_t); (void) getSockOpt(SOL_SOCKET, SO_ERROR, (void*)&error, tmp); if (error == 0) goto ok; iState = -(u_errno = errno = error); } else if (result == 0) { // timeout _closesocket(); iState = TIMEOUT; u_errno = errno = ETIMEDOUT; } U_RETURN(false); } if (errno == EINTR && UInterrupt::checkForEventSignalPending()) { goto loop; } if (errno == EISCONN) { _closesocket(); _socket(); goto loop; } } } U_RETURN(false); } int USocket::send(const char* pData, uint32_t iDataLen) { U_TRACE(1, "USocket::send(%p,%u)", pData, iDataLen) U_CHECK_MEMORY U_INTERNAL_ASSERT(isOpen()) int iBytesWrite; loop: iBytesWrite = U_SYSCALL(send, "%d,%p,%u,%u", getFd(), CAST(pData), iDataLen, 0); if (iBytesWrite >= 0) { # ifdef DEBUG if (iBytesWrite > 0) U_INTERNAL_DUMP("BytesWrite(%d) = %#.*S", iBytesWrite, iBytesWrite, CAST(pData)) # endif U_RETURN(iBytesWrite); } if (errno == EINTR && UInterrupt::checkForEventSignalPending()) { goto loop; } U_RETURN(-1); } int USocket::recv(void* pBuffer, uint32_t iBufLength) { U_TRACE(0, "USocket::recv(%p,%u)", pBuffer, iBufLength) U_CHECK_MEMORY U_INTERNAL_ASSERT(isOpen()) int iBytesRead; loop: iBytesRead = U_SYSCALL(recv, "%d,%p,%u,%d", getFd(), CAST(pBuffer), iBufLength, 0); if (iBytesRead >= 0) { # ifdef DEBUG if (iBytesRead > 0) U_INTERNAL_DUMP("BytesRead(%d) = %#.*S", iBytesRead, iBytesRead, CAST(pBuffer)) # endif U_RETURN(iBytesRead); } if (errno == EINTR && UInterrupt::checkForEventSignalPending()) { goto loop; } U_RETURN(-1); } // DEBUG #if defined(U_STDCPP_ENABLE) && defined(DEBUG) const char* USocket::dump(bool reset) const { *UObjectIO::os << "flags " << flags << '\n' << "iState " << iState << '\n' << "iSockDesc " << iSockDesc << '\n' << "iLocalPort " << iLocalPort << '\n' << "iRemotePort " << iRemotePort << '\n' << "cLocalAddress (UIPAddress " << (void*)&cLocalAddress << ")\n" << "cRemoteAddress (UIPAddress " << (void*)&cRemoteAddress << ')'; if (reset) { UObjectIO::output(); return UObjectIO::buffer_output; } return 0; } #endif