Sysprog17

60 %
40 %
Information about Sysprog17
Technology

Published on May 10, 2009

Author: linuxawy

Source: slideshare.net

Description

The 17th session in eglug's system programming course, I only attend the session, the slides are not written by me

C/C++ Linux System Programming Session 17 User-space System Programming – session 7

Session 17

User-space System Programming

– session 7

Outline Device File I/O ops Networking Concepts Socket Concepts and Ops Sockets for IPC

Device File I/O ops

Networking Concepts

Socket Concepts and Ops

Sockets for IPC

DEVICES Major and minor numbers int mknod(const char *pathname, mode_t mode, dev_t dev); UDEV FS Events and rules

Major and minor numbers

int mknod(const char *pathname, mode_t mode, dev_t dev);

UDEV

FS

Events and rules

I/O Multiplexing int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout); void FD_CLR(int fd, fd_set *set); int FD_ISSET(int fd, fd_set *set); void FD_SET(int fd, fd_set *set); void FD_ZERO(fd_set *set); int pselect(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timespec *timeout, const sigset_t *sigmask); int poll(struct pollfd *fds, nfds_t nfds, int timeout); int ppoll(struct pollfd *fds, nfds_vt nfds, const struct timespec *timeout, const sigset_t *sigmask); POLLIN/POLLOUT/POLLPRI/POLLERR

int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout);

void FD_CLR(int fd, fd_set *set);

int FD_ISSET(int fd, fd_set *set);

void FD_SET(int fd, fd_set *set);

void FD_ZERO(fd_set *set);

int pselect(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timespec *timeout, const sigset_t *sigmask);

int poll(struct pollfd *fds, nfds_t nfds, int timeout);

int ppoll(struct pollfd *fds, nfds_vt nfds, const struct timespec *timeout, const sigset_t *sigmask);

POLLIN/POLLOUT/POLLPRI/POLLERR

Epoll Decouple interest set registration from poll +: O(1) on the wait +: Edge trigger - : system call for adding onto the set int epoll_create(int size); //desc, need close int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event); int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout); typedef union epoll_data { void *ptr; int fd; uint32_t u32; uint64_t u64; } epoll_data_t; struct epoll_event { uint32_t events; /* Epoll events */ epoll_data_t data; /* User data variable */ };

Decouple interest set registration from poll

+: O(1) on the wait

+: Edge trigger

- : system call for adding onto the set

int epoll_create(int size); //desc, need close

int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);

int epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout);

IOCTL Device / special file control int ioctl(int d, int request, ...); Request is specific to device being controlled, and may have a payload (ioctl_list)

Device / special file control

int ioctl(int d, int request, ...);

Request is specific to device being controlled, and may have a payload (ioctl_list)

Filesystem events int inotify_init(void); // desc, need close int inotify_add_watch(int fd, const char *pathname, uint32_t mask); // watch desc int inotify_rm_watch(int fd, uint32_t wd); FIONREAD ioctl fcntl: F_NOTIFY struct inotify_event { int wd; /* watch descriptor */ uint32_t mask; /* mask of events */ uint32_t cookie; /* unique cookie */ uint32_t len; /* size of 'name' field */ char name[]; /* null-terminated name */ };

int inotify_init(void); // desc, need close

int inotify_add_watch(int fd, const char *pathname, uint32_t mask); // watch desc

int inotify_rm_watch(int fd, uint32_t wd);

FIONREAD ioctl

fcntl: F_NOTIFY

int inotifyd_main(int argc UNUSED_PARAM, char **argv) { unsigned mask = IN_ALL_EVENTS; // assume we want all events struct pollfd pfd; char **watched = ++argv; // watched name list const char *args[] = { *argv, NULL, NULL, NULL, NULL }; // open inotify pfd.fd = inotify_init(); if (pfd.fd < 0) bb_perror_msg_and_die(&quot;no kernel support&quot;); // setup watched while (*++argv) { char *path = *argv; char *masks = strchr(path, ':'); int wd; // watch descriptor // if mask is specified -> if (masks) { *masks = ''; // split path and mask // convert mask names to mask bitset mask = 0; while (*++masks) { int i = strchr(mask_names, *masks) - mask_names; if (i >= 0) { mask |= (1 << i); } } } // add watch wd = inotify_add_watch(pfd.fd, path, mask); if (wd < 0) { bb_perror_msg_and_die(&quot;add watch (%s) failed&quot;, path); } } static const char mask_names[] ALIGN1 = &quot;a&quot; // 0x00000001 File was accessed &quot;c&quot; // 0x00000002 File was modified &quot;e&quot; // 0x00000004 Metadata changed &quot;w&quot; // 0x00000008 Writtable file was closed &quot;0&quot; // 0x00000010 Unwrittable file closed &quot;r&quot; // 0x00000020 File was opened &quot;m&quot; // 0x00000040 File was moved from X &quot;y&quot; // 0x00000080 File was moved to Y &quot;n&quot; // 0x00000100 Subfile was created &quot;d&quot; // 0x00000200 Subfile was deleted &quot;D&quot; // 0x00000400 Self was deleted &quot;M&quot; // 0x00000800 Self was moved ; pfd.events = POLLIN; while (!signalled && poll(&pfd, 1, -1) > 0) { ssize_t len; void *buf; struct inotify_event *ie; // read out all pending events xioctl(pfd.fd, FIONREAD, &len); #define eventbuf bb_common_bufsiz1 ie = buf = (len <= sizeof(eventbuf)) ? eventbuf : xmalloc(len); len = full_read(pfd.fd, buf, len); // process events. N.B. events may vary in length while (len > 0) { int i; char events[12]; char *s = events; unsigned m = ie->mask; for (i = 0; i < 12; ++i, m >>= 1) { if (m & 1) { *s++ = mask_names[i]; } } *s = ''; args[1] = events; args[2] = watched[ie->wd]; args[3] = ie->len ? ie->name : NULL; xspawn((char **)args); // next event i = sizeof(struct inotify_event) + ie->len; len -= i; ie = (void*)((char*)ie + i); } if (eventbuf != buf) free(buf); } return EXIT_SUCCESS; }

Asynchronous I/O Only on O_DIRECT struct aiocb { int aio_filedes; /* file descriptor * int aio_lio_opcode; /* operation to perform */ int aio_reqprio; /* request priority offset * volatile void *aio_buf; /* pointer to buffer */ size_t aio_nbytes; /* length of operation */ struct sigevent aio_sigevent; /* signal number and value */ /* internal, private members follow... */ }; int aio_read (struct aiocb *aiocbp); int aio_write (struct aiocb *aiocbp); int aio_error (const struct aiocb *aiocbp); int aio_return (struct aiocb *aiocbp); int aio_cancel (int fd, struct aiocb *aiocbp); int aio_fsync (int op, struct aiocb *aiocbp); int aio_suspend (const struct aiocb * const cblist[], int n, const struct timespec *timeout);

Only on O_DIRECT

Network Architecture Application – telnet/ftp/http...etc Presentation -- intended for e.g. encryption Session -- e.g. iSCSI Transport – PORTS Network – IP, ATM Link -- Physical – Ethernet, wifi... OSI Packets and Data Encapsulation Protocols can be stacked on top of that e.g. CIM over HTTP ------------------------------------------------------------- | Eth | IP | TCP | App | DDDDAAAATTTTAAAA | -------------------------------------------------------------

OSI

Packets and Data Encapsulation

Protocols can be stacked on top of that

e.g. CIM over HTTP

Focus Link is handled by HW and drivers Network: IP, handled by kernel, affects addressing and byte ordering Transport layer TCP – Reliable, sequenced, Connection-oriented UDP – Unreliable, unsequenced, connectionless Handled by kernel which provides us an interface Application is what you are writing

Link is handled by HW and drivers

Network: IP, handled by kernel, affects addressing and byte ordering

Transport layer

TCP – Reliable, sequenced, Connection-oriented

UDP – Unreliable, unsequenced, connectionless

Handled by kernel which provides us an interface

Application is what you are writing

Network Layer Concerns Byte ordering Network byte order vs Host byte order Addressing IPV4: 4 octets xx.xx.xx.xx (32 bits) IPV6: 8 16-bit hex digits separated by : (128 bits) Ipv4 compatibility Scopes Subnets Unicasting/Broadcasting (v4) /Multicasting (v6) /Anycasting (v6) Ports Loopback

Byte ordering

Network byte order vs Host byte order

Addressing

IPV4: 4 octets xx.xx.xx.xx (32 bits)

IPV6: 8 16-bit hex digits separated by : (128 bits)

Ipv4 compatibility

Scopes

Subnets

Unicasting/Broadcasting (v4) /Multicasting (v6) /Anycasting (v6)

Ports

Loopback

Network Byte Order uint32_t htonl(uint32_t hostlong); uint16_t htons(uint16_t hostshort); uint32_t ntohl(uint32_t netlong); uint16_t ntohs(uint16_t netshort); What about everything else? Agreement: the higher level protocol Abstraction layers for cross-platform calls (e.g. RPC, RMI): (un)marshalling

uint32_t htonl(uint32_t hostlong);

uint16_t htons(uint16_t hostshort);

uint32_t ntohl(uint32_t netlong);

uint16_t ntohs(uint16_t netshort);

What about everything else?

Agreement: the higher level protocol

Abstraction layers for cross-platform calls (e.g. RPC, RMI): (un)marshalling

IP Address Casting struct sockaddr { sa_family_t sa_family; char sa_data[14]; } struct sockaddr_in { sa_family_t sin_family; /* AF_INET */ uint16_t sin_port; /* port */ struct in_addr sin_addr; }; struct in_addr { uint32_t s_addr; }; struct sockaddr_in6 { uint16_t sin6_family; /* AF_INET6 */ uint16_t sin6_port; /* port */ uint32_t sin6_flowinfo; struct in6_addr sin6_addr; uint32_t sin6_scope_id; }; struct in6_addr { unsigned char s6_addr[16]; }; IPV4 IPV6

Name Service what hosts (sometimes, what service) DNS/BIND, NIS/YP, LDAP DNS: domain name (fully qualified) The Resolver named /etc/hosts Order: /etc/host.conf

what hosts (sometimes, what service)

DNS/BIND, NIS/YP, LDAP

DNS: domain name (fully qualified)

The Resolver

named

/etc/hosts

Order: /etc/host.conf

Name / Address Info address ==> name Name ==> address(es) String ==> Address Address ==> String My host Info int getnameinfo(const struct sockaddr *sa, socklen_t salen, char *host, size_t hostlen, char *serv, size_t servlen, int flags); int getaddrinfo(const char *node, const char *service, const struct addrinfo *hints, struct addrinfo **res); void freeaddrinfo(struct addrinfo *res); const char *gai_strerror(int errcode); struct addrinfo { int ai_flags; int ai_family; int ai_socktype; int ai_protocol; size_t ai_addrlen; struct sockaddr *ai_addr; char *ai_canonname; struct addrinfo *ai_next; }; int inet_pton(int af, const char *src, void *dst); const char *inet_ntop(int af, const void *src, char *dst, socklen_t cnt); NI_NOFQDN NI_NUMERICHOST NI_NAMEREQD NI_NUMERICSERV NI_DGRAM int gethostname(char *name, size_t len);

address ==> name

Name ==> address(es)

String ==> Address

Address ==> String

My host Info

Legacy Name/Address Info struct hostent *gethostbyname(const char *name); struct hostent *gethostbyaddr(const void *addr, socklen_t len, int type); void herror(const char *s); const char *hstrerror(int err); Require a deep copy GNU extensions: re-entrancy (_r), POSIX extension: gethostent(void) IPV4 only: inet_ntoa/aton and family struct hostent { char *h_name; char **h_aliases; int h_addrtype; int h_length; char **h_addr_list; }

struct hostent *gethostbyname(const char *name);

struct hostent *gethostbyaddr(const void *addr,

socklen_t len, int type);

void herror(const char *s);

const char *hstrerror(int err);

Require a deep copy

GNU extensions: re-entrancy (_r), POSIX extension: gethostent(void)

IPV4 only: inet_ntoa/aton and family

Sockets Model Virtual hookup (like the phone) A special “descriptor” (hooks VFS to transport layer) Creation int socket(int domain, int type, int protocol); Domains: PF_{INET, INET6, UNIX, NETLINK ....} Types: SOCK_{STREAM, DGRAM, RAW, ...} Protocols and getprotoent() Address / Socket binding int bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen); INADDR_ANY, INADDR6_ANY

Model

Virtual hookup (like the phone)

A special “descriptor” (hooks VFS to transport layer)

Creation

int socket(int domain, int type, int protocol);

Domains: PF_{INET, INET6, UNIX, NETLINK ....}

Types: SOCK_{STREAM, DGRAM, RAW, ...}

Protocols and getprotoent()

Address / Socket binding

int bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen);

INADDR_ANY, INADDR6_ANY

Reliable Sockets Connect to server address int connect(int sockfd, const struct sockaddr *serv_addr, socklen_t addrlen); Listening to incoming connections int listen(int sockfd, int backlog); Accepting a new connection int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen); Gets a new “child” socket descriptor Stevens et al

Connect to server address

int connect(int sockfd, const struct sockaddr *serv_addr, socklen_t addrlen);

Listening to incoming connections

int listen(int sockfd, int backlog);

Accepting a new connection

int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen);

Gets a new “child” socket descriptor

Socket States Stevens et al

Socket Options int getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen); int setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen); Some important options: SO_KEEPALIVE SO_RCVBUF / SO_SNDBUF SO_LINGER SO_REUSEADDR

int getsockopt(int s, int level, int optname, void *optval, socklen_t *optlen);

int setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen);

Some important options:

SO_KEEPALIVE

SO_RCVBUF / SO_SNDBUF

SO_LINGER

SO_REUSEADDR

Unreliable Communication ssize_t sendto(int s, const void *buf, size_t len, int flags, const struct sockaddr *to, socklen_t tolen); ssize_t recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, socklen_t *fromlen); To add reliability: Connection (You can still connect, no handshake) Sequence Replies + timeouts + retransmission

ssize_t sendto(int s, const void *buf, size_t len, int flags, const struct sockaddr *to, socklen_t tolen);

ssize_t recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, socklen_t *fromlen);

To add reliability:

Connection (You can still connect, no handshake)

Sequence

Replies + timeouts + retransmission

I/O Like File I/O: read/write/readv/writev/poll/select/ fcntl-SIGIO... ssize_t send(int s, const void *buf, size_t len, int flags); ssize_t recv(int s, void *buf, size_t len, int flags); Flags only matter on connections MSG_{CONFIRM, DONTROUTE, DONTWAIT, EOR, MORE, NOSIGNAL, OOB, WAITALL, PEEK}

Like File I/O:

read/write/readv/writev/poll/select/ fcntl-SIGIO...

ssize_t send(int s, const void *buf, size_t len, int flags);

ssize_t recv(int s, void *buf, size_t len, int flags);

Flags only matter on connections

MSG_{CONFIRM, DONTROUTE, DONTWAIT, EOR, MORE, NOSIGNAL, OOB, WAITALL, PEEK}

Message-Based Transfers ssize_t recvmsg(int s, struct msghdr *msg, int flags); ssize_t sendmsg(int s, const struct msghdr *msg, int flags); Raw sockets Ancillary data struct msghdr { void *msg_name; socklen_t msg_namelen; struct iovec *msg_iov; size_t msg_iovlen; void *msg_control; socklen_t msg_controllen; int msg_flags; }; struct cmsghdr { socklen_t cmsg_len; int cmsg_level; int cmsg_type; /* unsigned char cmsg_data[]; */ }; struct cmsghdr *CMSG_FIRSTHDR(struct msghdr *msgh); struct cmsghdr *CMSG_NXTHDR(struct msghdr *msgh, struct cmsghdr *cmsg); size_t CMSG_ALIGN(size_t length); size_t CMSG_SPACE(size_t length); size_t CMSG_LEN(size_t length); unsigned char *CMSG_DATA(struct cmsghdr *cmsg);

ssize_t recvmsg(int s, struct msghdr *msg, int flags);

ssize_t sendmsg(int s, const struct msghdr *msg, int flags);

Raw sockets

Ancillary data

Design Decisions UDP, TCP, Raw On connection server Iterative vs Concurrent Thread vs Process Pre vs Post

UDP, TCP, Raw

On connection server

Iterative vs Concurrent

Thread vs Process

Pre vs Post

Some examples TCP sshd Raw ping UDP snmp

TCP sshd

Raw ping

UDP snmp

UNIX Domain Sockets IPC Ancillary data: SOL_SOCKET level SCM_RIGHTS int socketpair(int d, int type, int protocol, int sv[2]); udevmonitor example Ioctls: FIONREAD, TIOCOUTQ struct sockaddr_un { sa_family_t sun_family; char sun_path[UNIX_PATH_MAX]; };

IPC

Ancillary data:

SOL_SOCKET level

SCM_RIGHTS

int socketpair(int d, int type, int protocol, int sv[2]);

udevmonitor example

Ioctls: FIONREAD, TIOCOUTQ

Add a comment

Related presentations

Presentación que realice en el Evento Nacional de Gobierno Abierto, realizado los ...

In this presentation we will describe our experience developing with a highly dyna...

Presentation to the LITA Forum 7th November 2014 Albuquerque, NM

Un recorrido por los cambios que nos generará el wearabletech en el futuro

Um paralelo entre as novidades & mercado em Wearable Computing e Tecnologias Assis...

Microsoft finally joins the smartwatch and fitness tracker game by introducing the...