Ads 468x60px

##EasyReadMore##

12 12月, 2017

watch dog 解釋, sample

前言

如果未熟悉 watchdog 前, 千萬不要在 Production Server 上啟用它
 
  因為你將會意外地 reboot 它多次 !!
 
  在 Linux 2.6 的 watchdog 系統上分別由以下兩東西組成
 
  watchdog daemon
  softdog kernel module
 

softdog

hardware watchdog 比 softdog 可靠, 因為 softdog 由一內核模塊 softdog.ko 通過 timer 機制實現
當 Kernel 完仕死亡時, 那 softdog 自然無效 ........

載入 kernel module:

mknod /dev/watchdog c 10 130
  chmod 600 /dev/watchdog
  modprobe softdog                             // 本身自帶, 不用額外安裝

P.S.
  載入後, 並不代表已經啟用 !!
  lsmod | grep softdog
  softdog                 2108  0
  # stop: 0 start:1
 
  要使用 softdog, 必須要叫醒一次

Enable:

echo a > /dev/watchdog
  之後如果在 default=60 內不叫它一次, 系統就會被 reboot !! (soft_margin=60)
  [823573.854213] SoftDog: Unexpected close, not stopping watchdog!         <-- 叫醒       
 

30 秒 & ONLY_TESTING :
  modprobe softdog soft_margin=30 soft_noboot=1
  [823603.936044] SoftDog: Triggered - Reboot ignored.                        <-- ONLY_TESTING
 

Disable:

echo V > /dev/watchdog

查看是否真的 stop 了:
  lsmod | grep softdog
  softdog                13510  0
 
watchdog daemon:

watchdog 是一個 daemon 來, 它負責定時對 /dev/watchdog 寫入東西
 

安裝

apt-get install watchdog

簡單的 watchdog daemon:

你必需在 timeout 前去 clear ,不然系統就會重啟
ps:
  重啟是去 trigger restart pin 腳,所以不會看到關機的 log 訊息

watchdog.c

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "watchdog.h"

int main(int argc, char **argv)
{
	int fd, timeout=5, cleartime=10;

	fd = open("/dev/watchdog",O_RDWR);
	if (fd) {
		if(argc > 2){
			timeout = atoi(argv[1]);
			cleartime = atoi(argv[2]);
		}else {
			printf("Usage: %s [WDT TIMEOUT] [CLEAR TIME]\n", argv[0]);
			close(fd);
			return 0;
		}
		ioctl(fd, WDIOC_SETTIMEOUT, &timeout);
		ioctl(fd, WDIOC_GETTIMEOUT, &timeout);
		printf("Timeout: %d\n", timeout);
		printf("Clear Time: %d\n", cleartime);
		while (1) {
			ioctl(fd, WDIOC_KEEPALIVE, 0);
			sleep(cleartime);
		}
		close(fd);
	}else
		printf("open failed...\n");
 
	return 0;
}

watchdog.h

/*
  *    Generic watchdog defines. Derived from..
  *
  * Berkshire PC Watchdog Defines
  * by Ken Hollis <khollis@bitgate.com>
*
  */

#ifndef _LINUX_WATCHDOG_H
#define _LINUX_WATCHDOG_H

#include <linux/ioctl.h>
#include <linux/types.h>

#define	WATCHDOG_IOCTL_BASE	'W'

struct watchdog_info {
	__u32 options;		/* Options the card/driver supports */
	__u32 firmware_version;	/* Firmware version of the card */
	__u8  identity[32];	/* Identity of the board */
};

#define	WDIOC_GETSUPPORT	_IOR(WATCHDOG_IOCTL_BASE, 0, struct watchdog_info)
#define	WDIOC_GETSTATUS		_IOR(WATCHDOG_IOCTL_BASE, 1, int)
#define	WDIOC_GETBOOTSTATUS	_IOR(WATCHDOG_IOCTL_BASE, 2, int)
#define	WDIOC_GETTEMP		_IOR(WATCHDOG_IOCTL_BASE, 3, int)
#define	WDIOC_SETOPTIONS	_IOR(WATCHDOG_IOCTL_BASE, 4, int)
#define	WDIOC_KEEPALIVE		_IOR(WATCHDOG_IOCTL_BASE, 5, int)
#define	WDIOC_SETTIMEOUT        _IOWR(WATCHDOG_IOCTL_BASE, 6, int)
#define	WDIOC_GETTIMEOUT        _IOR(WATCHDOG_IOCTL_BASE, 7, int)
#define	WDIOC_SETPRETIMEOUT	_IOWR(WATCHDOG_IOCTL_BASE, 8, int)
#define	WDIOC_GETPRETIMEOUT	_IOR(WATCHDOG_IOCTL_BASE, 9, int)
#define	WDIOC_GETTIMELEFT	_IOR(WATCHDOG_IOCTL_BASE, 10, int)

#define	WDIOF_UNKNOWN		-1	/* Unknown flag error */
#define	WDIOS_UNKNOWN		-1	/* Unknown status error */

#define	WDIOF_OVERHEAT		0x0001	/* Reset due to CPU overheat */
#define	WDIOF_FANFAULT		0x0002	/* Fan failed */
#define	WDIOF_EXTERN1		0x0004	/* External relay 1 */
#define	WDIOF_EXTERN2		0x0008	/* External relay 2 */
#define	WDIOF_POWERUNDER	0x0010	/* Power bad/power fault */
#define	WDIOF_CARDRESET		0x0020	/* Card previously reset the CPU */
#define	WDIOF_POWEROVER		0x0040	/* Power over voltage */
#define	WDIOF_SETTIMEOUT	0x0080  /* Set timeout (in seconds) */
#define	WDIOF_MAGICCLOSE	0x0100	/* Supports magic close char */
#define	WDIOF_PRETIMEOUT	0x0200  /* Pretimeout (in seconds), get/set */
#define	WDIOF_KEEPALIVEPING	0x8000	/* Keep alive ping reply */

#define	WDIOS_DISABLECARD	0x0001	/* Turn off the watchdog timer */
#define	WDIOS_ENABLECARD	0x0002	/* Turn on the watchdog timer */
#define	WDIOS_TEMPPANIC		0x0004	/* Kernel panic on temperature trip */


#endif  /* ifndef _LINUX_WATCHDOG_H */

Refer:

    watchdog | 夢想家 - https://goo.gl/KpRUiX

0 意見:

張貼留言

 
Blogger Templates