From 37a822b68776e7ffa1b06a15c7ee44172dcdf139 Mon Sep 17 00:00:00 2001 From: Mark Nellemann Date: Mon, 14 Jun 2021 12:34:30 +0200 Subject: [PATCH 1/3] Update README's and oshi dependency. --- gradle.properties | 2 +- plugins/README.md | 4 ++-- plugins/os-aix/README.md | 31 +------------------------------ plugins/os-base/README.md | 36 +++++++++++++++++++----------------- plugins/os-ibmi/README.md | 5 +---- plugins/os-linux/README.md | 37 ++++--------------------------------- 6 files changed, 28 insertions(+), 87 deletions(-) diff --git a/gradle.properties b/gradle.properties index 5794fb7..2f0aa76 100644 --- a/gradle.properties +++ b/gradle.properties @@ -3,4 +3,4 @@ pf4jVersion=3.6.0 slf4jVersion=1.7.30 camelVersion=3.10.0 picocliVersion=4.6.1 -oshiVersion=5.7.3 \ No newline at end of file +oshiVersion=5.7.5 \ No newline at end of file diff --git a/plugins/README.md b/plugins/README.md index b4b1d9e..2a3341f 100644 --- a/plugins/README.md +++ b/plugins/README.md @@ -1,3 +1,3 @@ -# Plugins +# System Monitor Plugins -Collections of base plugins. +Collection of sysmon plugins. \ No newline at end of file diff --git a/plugins/os-aix/README.md b/plugins/os-aix/README.md index ab367b7..7fe26a5 100644 --- a/plugins/os-aix/README.md +++ b/plugins/os-aix/README.md @@ -1,6 +1,6 @@ # AIX Plugin -## Processor Extension +## LPAR Processor Extension The processor extension works for both AIX and Linux on the Power ppc64/ppc64le architecture. @@ -10,35 +10,6 @@ Metrics reported are: - **type** - Processor type, Shared or Dedicated - **lcpu** - Number of logical CPU's available for this partition - **ent** - Processor entitlements available for this partition -- **user** - Indicates the percentage of the entitled processing capacity used while executing at the user level (application). -- **sys** - Indicates the percentage of the entitled processing capacity used while executing at the system level (kernel). -- **idle** - Indicates the percentage of the entitled processing capacity unused while the partition was idle and did not have any outstanding disk I/O request. -- **wait** - Indicates the percentage of the entitled processing capacity unused while the partition was idle and had outstanding disk I/O request(s). - **physc** - Indicates the number of physical processors consumed. - **entc** - Indicates the percentage of the entitled capacity consumed. - **lbusy** - Indicates the percentage of logical processor(s) utilization that occurred while executing at the user and system level. - - -## Memory Extension - -Metrics reported are: - -- **total** - Total amount of memory (in KB). -- **used** - real memory consumption (in KB). -- **free** - free memory for use (in KB). -- **pin** - pinned memory consumption (in KB). -- **virtual** - virtual memory consumption (in KB). -- **available** - available memory (if freeing up virtual) (in KB). -- **paged** - paging space consumption (in KB). - -*Pinning a memory region prohibits the pager from stealing pages from the pages backing the pinned memory region.* - -## Disk Extension - -Only reports first device found. Improvements on the TODO. - -Metrics reported are: - -- **device** - Name of device. -- **reads** - The total number of KB read. -- **writes** - The total number of KB written. diff --git a/plugins/os-base/README.md b/plugins/os-base/README.md index 6a0bd86..966eab0 100644 --- a/plugins/os-base/README.md +++ b/plugins/os-base/README.md @@ -1,36 +1,38 @@ # Base Plugin +The base plugin uses the [oshi](https://github.com/oshi/oshi) library to get it's metrics. + ## Processor Extension Reports the following metrics seen: -- **user** - CPU time spend on user processes. - **system** -CPU time spend on system processes. -- **iowait** - CPU time spend on waiting (for i/o). -- **idle** - CPU time spend on idle (doing nothing). -- **busy** - CPU time not spend on idle (working). +- **user** - CPU time spend on user processes. +- **nice** - CPU time spend on user processes running at lower priority. +- **iowait** - CPU time spend waiting (for i/o). +- **steal** - CPU time stolen by hypervisor and given to other virtual systems. +- **irq** - CPU time spend by kernel on interrupt requests. +- **softirq** - CPU time spend by kernel on soft interrupt requests. +- **idle** - CPU time spend idling (doing nothing). +- **busy** - CPU time spend working. ## Memory Extension -Reports the following metrics, from the *free* command: +Reports the following metrics (in bytes): -- **total** - The total amount of (installed) memory (in KB). -- **used** - Used memory (calculated as total - free - buffers - cache) (in KB). -- **free** - Unused memory (MemFree and SwapFree in /proc/meminfo) (in KB). -- **shared** - Memory used (mostly) by tmpfs (Shmem in /proc/meminfo) (in KB). -- **buffers** - Sum of buffers and cache (in KB). -- **available** - Estimation of how much memory is available for starting new applications, without swapping (in KB). +- **available** - Estimation of how much memory is available for starting new applications, without swapping. +- **total** - The total amount of (installed) memory. - **usage** - Percentage of memory used out of the total amount of memory. +- **paged** - ... +- **virtual** - ... ## Disk Extension - -Only reports first device found. Improvements on the TODO. - Metrics reported are: -- **device** - Name of device. -- **reads** - The total number of KB read. -- **writes** - The total number of KB written. +- **reads** - The total number of bytes read. +- **writes** - The total number of bytes written. +- **iotime** - Time spent on IO in milliseconds. +- **queue** - Lenght of IO queue. \ No newline at end of file diff --git a/plugins/os-ibmi/README.md b/plugins/os-ibmi/README.md index 7931bd5..b13f57d 100644 --- a/plugins/os-ibmi/README.md +++ b/plugins/os-ibmi/README.md @@ -1,7 +1,4 @@ # IBM i Plugin -## Processor Extension +TODO. Nothing here yet. -## Memory Extension - -## Disk Extension diff --git a/plugins/os-linux/README.md b/plugins/os-linux/README.md index 6a0bd86..7a43677 100644 --- a/plugins/os-linux/README.md +++ b/plugins/os-linux/README.md @@ -1,36 +1,7 @@ -# Base Plugin +# Linux Plugins -## Processor Extension +## Components -Reports the following metrics seen: +### Network Sockets -- **user** - CPU time spend on user processes. -- **system** -CPU time spend on system processes. -- **iowait** - CPU time spend on waiting (for i/o). -- **idle** - CPU time spend on idle (doing nothing). -- **busy** - CPU time not spend on idle (working). - - -## Memory Extension - -Reports the following metrics, from the *free* command: - -- **total** - The total amount of (installed) memory (in KB). -- **used** - Used memory (calculated as total - free - buffers - cache) (in KB). -- **free** - Unused memory (MemFree and SwapFree in /proc/meminfo) (in KB). -- **shared** - Memory used (mostly) by tmpfs (Shmem in /proc/meminfo) (in KB). -- **buffers** - Sum of buffers and cache (in KB). -- **available** - Estimation of how much memory is available for starting new applications, without swapping (in KB). -- **usage** - Percentage of memory used out of the total amount of memory. - - -## Disk Extension - - -Only reports first device found. Improvements on the TODO. - -Metrics reported are: - -- **device** - Name of device. -- **reads** - The total number of KB read. -- **writes** - The total number of KB written. +Collects statistics from */proc/net/sockstats*. \ No newline at end of file From 3a89ab42f97ea71f5b1799e47059303c501cc1f8 Mon Sep 17 00:00:00 2001 From: Mark Nellemann Date: Mon, 14 Jun 2021 17:57:18 +0200 Subject: [PATCH 2/3] Ensure hostname is provided, if it can't be properly detected. --- README.md | 22 +++++++++++++++++++ .../main/java/sysmon/client/Application.java | 4 ++-- plugins/README.md | 2 +- .../java/sysmon/shared/MetricExtension.java | 1 - .../main/java/sysmon/shared/PluginHelper.java | 2 +- 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 783b480..0227942 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,28 @@ Java based system monitoring solution with support for plugins. - Example Grafana [dashboard](https://bitbucket.org/mnellemann/sysmon/downloads/sysmon-example-dashboard.png) showing metrics from a host running *sysmon*. + + +## Known problems + +### Correct timezone and clock + +- Ensure you have **correct timezone and date/time** and NTPd (or similar) running to keep it accurate! + +### Naming collision + +You can't have hosts with the same name, as these cannot be distinguished when metrics are +written to InfluxDB (which uses the hostname as key). + +### Renaming hosts + +If you rename a host, the metrics in InfluxDB will still be available by the old hostname, and new metrics will be written with the new hostname. There is no easy way to migrate the old data, but you can delete it easily: + +```text +USE sysmon; +DELETE WHERE hostname = 'unknown'; +``` + ## Components ### Client diff --git a/client/src/main/java/sysmon/client/Application.java b/client/src/main/java/sysmon/client/Application.java index 67a6d3a..d134d6e 100644 --- a/client/src/main/java/sysmon/client/Application.java +++ b/client/src/main/java/sysmon/client/Application.java @@ -41,8 +41,8 @@ public class Application implements Callable { try { hostname = InetAddress.getLocalHost().getHostName(); } catch (UnknownHostException e) { - log.warn(e.getMessage()); - hostname = "unknown"; + System.err.println("Could not detect hostname. Use the '-n' or '--hostname' option to specify it."); + return -1; } } diff --git a/plugins/README.md b/plugins/README.md index 2a3341f..75d3ab9 100644 --- a/plugins/README.md +++ b/plugins/README.md @@ -1,3 +1,3 @@ # System Monitor Plugins -Collection of sysmon plugins. \ No newline at end of file +Collection of standard sysmon plugins. \ No newline at end of file diff --git a/shared/src/main/java/sysmon/shared/MetricExtension.java b/shared/src/main/java/sysmon/shared/MetricExtension.java index 4130a21..8e7f37f 100644 --- a/shared/src/main/java/sysmon/shared/MetricExtension.java +++ b/shared/src/main/java/sysmon/shared/MetricExtension.java @@ -2,7 +2,6 @@ package sysmon.shared; import org.pf4j.ExtensionPoint; -import java.io.IOException; public interface MetricExtension extends ExtensionPoint { diff --git a/shared/src/main/java/sysmon/shared/PluginHelper.java b/shared/src/main/java/sysmon/shared/PluginHelper.java index a1dc1d8..6e2ba1d 100644 --- a/shared/src/main/java/sysmon/shared/PluginHelper.java +++ b/shared/src/main/java/sysmon/shared/PluginHelper.java @@ -45,7 +45,7 @@ public class PluginHelper { } } catch (IOException | InterruptedException e) { - e.printStackTrace(); + log.warn("executeCommand() - exception: " + e.getMessage()); } return inputStream; From 126f256c6cd4513fd4e5608dbc703e0f94e07abb Mon Sep 17 00:00:00 2001 From: Mark Nellemann Date: Wed, 16 Jun 2021 10:20:17 +0200 Subject: [PATCH 3/3] Make threads configurable. Switch from jetty to netty-http. --- gradle.properties | 2 +- server/build.gradle | 5 +++-- server/src/main/java/sysmon/server/Application.java | 13 ++++++++++--- .../main/java/sysmon/server/ServerRouteBuilder.java | 12 +++++------- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/gradle.properties b/gradle.properties index 2f0aa76..9ad9b18 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,4 +1,4 @@ -version=0.0.4 +version=0.0.5 pf4jVersion=3.6.0 slf4jVersion=1.7.30 camelVersion=3.10.0 diff --git a/server/build.gradle b/server/build.gradle index 2d2c92a..4513ec5 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -18,10 +18,11 @@ dependencies { implementation group: 'org.apache.camel', name: 'camel-core', version: camelVersion implementation group: 'org.apache.camel', name: 'camel-main', version: camelVersion implementation group: 'org.apache.camel', name: 'camel-rest', version: camelVersion - implementation group: 'org.apache.camel', name: 'camel-jetty', version: camelVersion - implementation group: 'org.apache.camel', name: 'camel-stream', version: camelVersion + //implementation group: 'org.apache.camel', name: 'camel-jetty', version: camelVersion + //implementation group: 'org.apache.camel', name: 'camel-stream', version: camelVersion implementation group: 'org.apache.camel', name: 'camel-jackson', version: camelVersion implementation group: 'org.apache.camel', name: 'camel-influxdb', version: camelVersion + implementation group: 'org.apache.camel', name: 'camel-netty-http', version: camelVersion } def projectName = "sysmon-server" diff --git a/server/src/main/java/sysmon/server/Application.java b/server/src/main/java/sysmon/server/Application.java index 1ff0273..dd22e72 100644 --- a/server/src/main/java/sysmon/server/Application.java +++ b/server/src/main/java/sysmon/server/Application.java @@ -7,7 +7,6 @@ import picocli.CommandLine; import java.io.IOException; import java.net.URL; -import java.util.Properties; import java.util.concurrent.Callable; @CommandLine.Command(name = "sysmon-server", mixinStandardHelpOptions = true) @@ -22,12 +21,17 @@ public class Application implements Callable { @CommandLine.Option(names = { "-p", "--influxdb-pass" }, description = "InfluxDB Password (default: ${DEFAULT-VALUE}).", defaultValue = "", paramLabel = "") private String influxPass; + //@CommandLine.Option(names = { "-d", "--influxdb-db" }, description = "InfluxDB Database (default: ${DEFAULT-VALUE}).", defaultValue = "", paramLabel = "") + //private String influxName = "sysmon"; + @CommandLine.Option(names = { "-H", "--server-host" }, description = "Server listening address (default: ${DEFAULT-VALUE}).", paramLabel = "") private String listenHost = "0.0.0.0"; @CommandLine.Option(names = { "-P", "--server-port" }, description = "Server listening port (default: ${DEFAULT-VALUE}).", paramLabel = "") private Integer listenPort = 9925; + @CommandLine.Option(names = { "-t", "--threads" }, description = "Threads for processing inbound metrics(default: ${DEFAULT-VALUE}).", paramLabel = "") + private Integer threads = 5; public static void main(String... args) { int exitCode = new CommandLine(new Application()).execute(args); @@ -38,17 +42,20 @@ public class Application implements Callable { @Override public Integer call() throws IOException { + /* Properties properties = new Properties(); properties.put("http.host", listenHost); properties.put("http.port", listenPort); - +*/ InfluxDB influxConnectionBean = InfluxDBFactory.connect(influxUrl.toString(), influxUser, influxPass); Main main = new Main(); main.bind("myInfluxConnection", influxConnectionBean); main.bind("http.host", listenHost); main.bind("http.port", listenPort); - main.bind("properties", properties); + //main.bind("properties", properties); + main.bind("threads", threads); + //main.bind("influxdb_name", influxName); main.configure().addRoutesBuilder(ServerRouteBuilder.class); // now keep the application running until the JVM is terminated (ctrl + c or sigterm) diff --git a/server/src/main/java/sysmon/server/ServerRouteBuilder.java b/server/src/main/java/sysmon/server/ServerRouteBuilder.java index 9442696..5bfc315 100644 --- a/server/src/main/java/sysmon/server/ServerRouteBuilder.java +++ b/server/src/main/java/sysmon/server/ServerRouteBuilder.java @@ -13,17 +13,19 @@ public class ServerRouteBuilder extends RouteBuilder { Registry registry = getContext().getRegistry(); - restConfiguration().component("jetty") + restConfiguration().component("netty-http") .bindingMode(RestBindingMode.auto) .host(registry.lookupByNameAndType("http.host", String.class)) .port(registry.lookupByNameAndType("http.port", Integer.class)); + /* rest() .get("/") .produces("text/html") .route() .to("log:stdout") .endRest(); + */ rest() .post("/metrics") @@ -36,15 +38,11 @@ public class ServerRouteBuilder extends RouteBuilder { .to("seda:inbound") .endRest(); - - //from("seda:inbound").log("Got metric from: ${header.component}").to("mock:sink"); - - // TODO: Make 'concurrentConsumers' configurable - from("seda:inbound?concurrentConsumers=5") + fromF("seda:inbound?concurrentConsumers=%s", registry.lookupByNameAndType("threads", Integer.class)) .log(">>> metric: ${header.hostname} - ${body}") .doTry() .process(new MetricResultToPointProcessor()) - .to("influxdb://ref.myInfluxConnection?databaseName=sysmon&retentionPolicy=autogen") + .toF("influxdb://ref.myInfluxConnection?databaseName=%s&retentionPolicy=autogen", "sysmon") .doCatch(Exception.class) .log("Error storing metric to InfluxDB: ${exception}") .end();