the OCI runtime will proxy the socket into the container to receive ready notification.
podman原理:
通过python测试NOTIFY_SOCKET. (service文件可以放到 /usr/lib/systemd/system/ 下测试)
https://stackoverflow.com/questions/63540832/how-to-run-a-service-running-in-a-container-in-systemd-including-systemd-notify
�
podman是自己建立一套NOTIFY_SOCKET的机制,没有复用runc的能力。
conmon通过socket读取 /var/lib/containers/storage/overlay-containers/c2184b9d7b043500a54374fe2a19ede77a1d9e06e51fc45df74e2675f2a745ba/userdata/notify/notify.sock,同时该socket文件会mount到容器里的/run/notify/notify.sock(容器内进程会写这个socket,因为这个socket会设置为NOTIFY_SOCKET)
�
当conmon读取READY=1,写入/run/systemd/notify到READY=1
podman会把MAINPID设置为conmon进程的PID
if c.config.SdNotifyMode != define.SdNotifyModeIgnore {payload := fmt.Sprintf("MAINPID=%d", c.state.ConmonPID)if c.config.SdNotifyMode == define.SdNotifyModeConmon {payload += "\n"payload += daemon.SdNotifyReady}if sent, err := daemon.SdNotify(false, payload); err != nil {logrus.Errorf("Notifying systemd of Conmon PID: %s", err.Error())} else if sent {logrus.Debugf("Notify sent successfully")}}
runc原理:
runc和podman是两套机制。
runc作为 OCI runtime,会启动一个gorouting代理NOTIFY_SOCKET的处理。
notify_socket.go
type notifySocket struct {socket *net.UnixConn // 在容器外监听socketPath,容器内程序会向这个socket发送"READY=1"host string // 通过环境变量NOTIFY_SOCKET设置的sock,容器外路径。从socket读取到"READY="之后,向这个发送"READY="和pidsocketPath string // 是的notifySocket.socket使用的socket文件,容器外路径是/$rootfs/notify/notify.sock ,该文件会bind mount到容器内 /run/notify/notify.sock}
func (n *notifySocket) run(pid1 int) error {if n.socket == nil {return nil}notifySocketHostAddr := net.UnixAddr{Name: n.host, Net: "unixgram"}client, err := net.DialUnix("unixgram", nil, ¬ifySocketHostAddr)if err != nil {return err}ticker := time.NewTicker(time.Millisecond * 100)defer ticker.Stop()// 获取容器内程序发送的 "READY=1"fileChan := make(chan []byte)go func() {for {buf := make([]byte, 4096)r, err := n.socket.Read(buf)if err != nil {return}got := buf[0:r]// systemd-ready sends a single datagram with the state string as payload,// so we don't need to worry about partial messages.for _, line := range bytes.Split(got, []byte{'\n'}) {if bytes.HasPrefix(got, []byte("READY=")) {fileChan <- linereturn}}}}()// 向NOTIFY_SOCKET对应的socket发送 "READY=1\nMAINPID=$pid"for {select {case <-ticker.C:_, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid1)))if err != nil {return nil}case b := <-fileChan:var out bytes.Buffer_, err = out.Write(b)if err != nil {return err}_, err = out.Write([]byte{'\n'})if err != nil {return err}_, err = client.Write(out.Bytes())if err != nil {return err}// now we can inform systemd to use pid1 as the pid to monitor// 使用容器的1号进程作为systemd的MAINPIDnewPid := "MAINPID=" + strconv.Itoa(pid1)_, err := client.Write([]byte(newPid + "\n"))if err != nil {return err}return nil}}}
