commit
e68802f065
22
README.md
22
README.md
|
@ -4,6 +4,28 @@ Java based system monitoring solution with support for plugins.
|
||||||
|
|
||||||
- Example Grafana [dashboard](https://bitbucket.org/mnellemann/sysmon/downloads/sysmon-example-dashboard.png) showing metrics from a host running *sysmon*.
|
- Example Grafana [dashboard](https://bitbucket.org/mnellemann/sysmon/downloads/sysmon-example-dashboard.png) showing metrics from a host running *sysmon*.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Known problems
|
||||||
|
|
||||||
|
### Correct timezone and clock
|
||||||
|
|
||||||
|
- Ensure you have **correct timezone and date/time** and NTPd (or similar) running to keep it accurate!
|
||||||
|
|
||||||
|
### Naming collision
|
||||||
|
|
||||||
|
You can't have hosts with the same name, as these cannot be distinguished when metrics are
|
||||||
|
written to InfluxDB (which uses the hostname as key).
|
||||||
|
|
||||||
|
### Renaming hosts
|
||||||
|
|
||||||
|
If you rename a host, the metrics in InfluxDB will still be available by the old hostname, and new metrics will be written with the new hostname. There is no easy way to migrate the old data, but you can delete it easily:
|
||||||
|
|
||||||
|
```text
|
||||||
|
USE sysmon;
|
||||||
|
DELETE WHERE hostname = 'unknown';
|
||||||
|
```
|
||||||
|
|
||||||
## Components
|
## Components
|
||||||
|
|
||||||
### Client
|
### Client
|
||||||
|
|
|
@ -41,8 +41,8 @@ public class Application implements Callable<Integer> {
|
||||||
try {
|
try {
|
||||||
hostname = InetAddress.getLocalHost().getHostName();
|
hostname = InetAddress.getLocalHost().getHostName();
|
||||||
} catch (UnknownHostException e) {
|
} catch (UnknownHostException e) {
|
||||||
log.warn(e.getMessage());
|
System.err.println("Could not detect hostname. Use the '-n' or '--hostname' option to specify it.");
|
||||||
hostname = "unknown";
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
version=0.0.4
|
version=0.0.5
|
||||||
pf4jVersion=3.6.0
|
pf4jVersion=3.6.0
|
||||||
slf4jVersion=1.7.30
|
slf4jVersion=1.7.30
|
||||||
camelVersion=3.10.0
|
camelVersion=3.10.0
|
||||||
picocliVersion=4.6.1
|
picocliVersion=4.6.1
|
||||||
oshiVersion=5.7.3
|
oshiVersion=5.7.5
|
|
@ -1,3 +1,3 @@
|
||||||
# Plugins
|
# System Monitor Plugins
|
||||||
|
|
||||||
Collections of base plugins.
|
Collection of standard sysmon plugins.
|
|
@ -1,6 +1,6 @@
|
||||||
# AIX Plugin
|
# AIX Plugin
|
||||||
|
|
||||||
## Processor Extension
|
## LPAR Processor Extension
|
||||||
|
|
||||||
The processor extension works for both AIX and Linux on the Power ppc64/ppc64le architecture.
|
The processor extension works for both AIX and Linux on the Power ppc64/ppc64le architecture.
|
||||||
|
|
||||||
|
@ -10,35 +10,6 @@ Metrics reported are:
|
||||||
- **type** - Processor type, Shared or Dedicated
|
- **type** - Processor type, Shared or Dedicated
|
||||||
- **lcpu** - Number of logical CPU's available for this partition
|
- **lcpu** - Number of logical CPU's available for this partition
|
||||||
- **ent** - Processor entitlements available for this partition
|
- **ent** - Processor entitlements available for this partition
|
||||||
- **user** - Indicates the percentage of the entitled processing capacity used while executing at the user level (application).
|
|
||||||
- **sys** - Indicates the percentage of the entitled processing capacity used while executing at the system level (kernel).
|
|
||||||
- **idle** - Indicates the percentage of the entitled processing capacity unused while the partition was idle and did not have any outstanding disk I/O request.
|
|
||||||
- **wait** - Indicates the percentage of the entitled processing capacity unused while the partition was idle and had outstanding disk I/O request(s).
|
|
||||||
- **physc** - Indicates the number of physical processors consumed.
|
- **physc** - Indicates the number of physical processors consumed.
|
||||||
- **entc** - Indicates the percentage of the entitled capacity consumed.
|
- **entc** - Indicates the percentage of the entitled capacity consumed.
|
||||||
- **lbusy** - Indicates the percentage of logical processor(s) utilization that occurred while executing at the user and system level.
|
- **lbusy** - Indicates the percentage of logical processor(s) utilization that occurred while executing at the user and system level.
|
||||||
|
|
||||||
|
|
||||||
## Memory Extension
|
|
||||||
|
|
||||||
Metrics reported are:
|
|
||||||
|
|
||||||
- **total** - Total amount of memory (in KB).
|
|
||||||
- **used** - real memory consumption (in KB).
|
|
||||||
- **free** - free memory for use (in KB).
|
|
||||||
- **pin** - pinned memory consumption (in KB).
|
|
||||||
- **virtual** - virtual memory consumption (in KB).
|
|
||||||
- **available** - available memory (if freeing up virtual) (in KB).
|
|
||||||
- **paged** - paging space consumption (in KB).
|
|
||||||
|
|
||||||
*Pinning a memory region prohibits the pager from stealing pages from the pages backing the pinned memory region.*
|
|
||||||
|
|
||||||
## Disk Extension
|
|
||||||
|
|
||||||
Only reports first device found. Improvements on the TODO.
|
|
||||||
|
|
||||||
Metrics reported are:
|
|
||||||
|
|
||||||
- **device** - Name of device.
|
|
||||||
- **reads** - The total number of KB read.
|
|
||||||
- **writes** - The total number of KB written.
|
|
||||||
|
|
|
@ -1,36 +1,38 @@
|
||||||
# Base Plugin
|
# Base Plugin
|
||||||
|
|
||||||
|
The base plugin uses the [oshi](https://github.com/oshi/oshi) library to get it's metrics.
|
||||||
|
|
||||||
## Processor Extension
|
## Processor Extension
|
||||||
|
|
||||||
Reports the following metrics seen:
|
Reports the following metrics seen:
|
||||||
|
|
||||||
- **user** - CPU time spend on user processes.
|
|
||||||
- **system** -CPU time spend on system processes.
|
- **system** -CPU time spend on system processes.
|
||||||
- **iowait** - CPU time spend on waiting (for i/o).
|
- **user** - CPU time spend on user processes.
|
||||||
- **idle** - CPU time spend on idle (doing nothing).
|
- **nice** - CPU time spend on user processes running at lower priority.
|
||||||
- **busy** - CPU time not spend on idle (working).
|
- **iowait** - CPU time spend waiting (for i/o).
|
||||||
|
- **steal** - CPU time stolen by hypervisor and given to other virtual systems.
|
||||||
|
- **irq** - CPU time spend by kernel on interrupt requests.
|
||||||
|
- **softirq** - CPU time spend by kernel on soft interrupt requests.
|
||||||
|
- **idle** - CPU time spend idling (doing nothing).
|
||||||
|
- **busy** - CPU time spend working.
|
||||||
|
|
||||||
|
|
||||||
## Memory Extension
|
## Memory Extension
|
||||||
|
|
||||||
Reports the following metrics, from the *free* command:
|
Reports the following metrics (in bytes):
|
||||||
|
|
||||||
- **total** - The total amount of (installed) memory (in KB).
|
- **available** - Estimation of how much memory is available for starting new applications, without swapping.
|
||||||
- **used** - Used memory (calculated as total - free - buffers - cache) (in KB).
|
- **total** - The total amount of (installed) memory.
|
||||||
- **free** - Unused memory (MemFree and SwapFree in /proc/meminfo) (in KB).
|
|
||||||
- **shared** - Memory used (mostly) by tmpfs (Shmem in /proc/meminfo) (in KB).
|
|
||||||
- **buffers** - Sum of buffers and cache (in KB).
|
|
||||||
- **available** - Estimation of how much memory is available for starting new applications, without swapping (in KB).
|
|
||||||
- **usage** - Percentage of memory used out of the total amount of memory.
|
- **usage** - Percentage of memory used out of the total amount of memory.
|
||||||
|
- **paged** - ...
|
||||||
|
- **virtual** - ...
|
||||||
|
|
||||||
|
|
||||||
## Disk Extension
|
## Disk Extension
|
||||||
|
|
||||||
|
|
||||||
Only reports first device found. Improvements on the TODO.
|
|
||||||
|
|
||||||
Metrics reported are:
|
Metrics reported are:
|
||||||
|
|
||||||
- **device** - Name of device.
|
- **reads** - The total number of bytes read.
|
||||||
- **reads** - The total number of KB read.
|
- **writes** - The total number of bytes written.
|
||||||
- **writes** - The total number of KB written.
|
- **iotime** - Time spent on IO in milliseconds.
|
||||||
|
- **queue** - Lenght of IO queue.
|
|
@ -1,7 +1,4 @@
|
||||||
# IBM i Plugin
|
# IBM i Plugin
|
||||||
|
|
||||||
## Processor Extension
|
TODO. Nothing here yet.
|
||||||
|
|
||||||
## Memory Extension
|
|
||||||
|
|
||||||
## Disk Extension
|
|
||||||
|
|
|
@ -1,36 +1,7 @@
|
||||||
# Base Plugin
|
# Linux Plugins
|
||||||
|
|
||||||
## Processor Extension
|
## Components
|
||||||
|
|
||||||
Reports the following metrics seen:
|
### Network Sockets
|
||||||
|
|
||||||
- **user** - CPU time spend on user processes.
|
Collects statistics from */proc/net/sockstats*.
|
||||||
- **system** -CPU time spend on system processes.
|
|
||||||
- **iowait** - CPU time spend on waiting (for i/o).
|
|
||||||
- **idle** - CPU time spend on idle (doing nothing).
|
|
||||||
- **busy** - CPU time not spend on idle (working).
|
|
||||||
|
|
||||||
|
|
||||||
## Memory Extension
|
|
||||||
|
|
||||||
Reports the following metrics, from the *free* command:
|
|
||||||
|
|
||||||
- **total** - The total amount of (installed) memory (in KB).
|
|
||||||
- **used** - Used memory (calculated as total - free - buffers - cache) (in KB).
|
|
||||||
- **free** - Unused memory (MemFree and SwapFree in /proc/meminfo) (in KB).
|
|
||||||
- **shared** - Memory used (mostly) by tmpfs (Shmem in /proc/meminfo) (in KB).
|
|
||||||
- **buffers** - Sum of buffers and cache (in KB).
|
|
||||||
- **available** - Estimation of how much memory is available for starting new applications, without swapping (in KB).
|
|
||||||
- **usage** - Percentage of memory used out of the total amount of memory.
|
|
||||||
|
|
||||||
|
|
||||||
## Disk Extension
|
|
||||||
|
|
||||||
|
|
||||||
Only reports first device found. Improvements on the TODO.
|
|
||||||
|
|
||||||
Metrics reported are:
|
|
||||||
|
|
||||||
- **device** - Name of device.
|
|
||||||
- **reads** - The total number of KB read.
|
|
||||||
- **writes** - The total number of KB written.
|
|
|
@ -18,10 +18,11 @@ dependencies {
|
||||||
implementation group: 'org.apache.camel', name: 'camel-core', version: camelVersion
|
implementation group: 'org.apache.camel', name: 'camel-core', version: camelVersion
|
||||||
implementation group: 'org.apache.camel', name: 'camel-main', version: camelVersion
|
implementation group: 'org.apache.camel', name: 'camel-main', version: camelVersion
|
||||||
implementation group: 'org.apache.camel', name: 'camel-rest', version: camelVersion
|
implementation group: 'org.apache.camel', name: 'camel-rest', version: camelVersion
|
||||||
implementation group: 'org.apache.camel', name: 'camel-jetty', version: camelVersion
|
//implementation group: 'org.apache.camel', name: 'camel-jetty', version: camelVersion
|
||||||
implementation group: 'org.apache.camel', name: 'camel-stream', version: camelVersion
|
//implementation group: 'org.apache.camel', name: 'camel-stream', version: camelVersion
|
||||||
implementation group: 'org.apache.camel', name: 'camel-jackson', version: camelVersion
|
implementation group: 'org.apache.camel', name: 'camel-jackson', version: camelVersion
|
||||||
implementation group: 'org.apache.camel', name: 'camel-influxdb', version: camelVersion
|
implementation group: 'org.apache.camel', name: 'camel-influxdb', version: camelVersion
|
||||||
|
implementation group: 'org.apache.camel', name: 'camel-netty-http', version: camelVersion
|
||||||
}
|
}
|
||||||
|
|
||||||
def projectName = "sysmon-server"
|
def projectName = "sysmon-server"
|
||||||
|
|
|
@ -7,7 +7,6 @@ import picocli.CommandLine;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.Properties;
|
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
|
|
||||||
@CommandLine.Command(name = "sysmon-server", mixinStandardHelpOptions = true)
|
@CommandLine.Command(name = "sysmon-server", mixinStandardHelpOptions = true)
|
||||||
|
@ -22,12 +21,17 @@ public class Application implements Callable<Integer> {
|
||||||
@CommandLine.Option(names = { "-p", "--influxdb-pass" }, description = "InfluxDB Password (default: ${DEFAULT-VALUE}).", defaultValue = "", paramLabel = "<pass>")
|
@CommandLine.Option(names = { "-p", "--influxdb-pass" }, description = "InfluxDB Password (default: ${DEFAULT-VALUE}).", defaultValue = "", paramLabel = "<pass>")
|
||||||
private String influxPass;
|
private String influxPass;
|
||||||
|
|
||||||
|
//@CommandLine.Option(names = { "-d", "--influxdb-db" }, description = "InfluxDB Database (default: ${DEFAULT-VALUE}).", defaultValue = "", paramLabel = "<name>")
|
||||||
|
//private String influxName = "sysmon";
|
||||||
|
|
||||||
@CommandLine.Option(names = { "-H", "--server-host" }, description = "Server listening address (default: ${DEFAULT-VALUE}).", paramLabel = "<addr>")
|
@CommandLine.Option(names = { "-H", "--server-host" }, description = "Server listening address (default: ${DEFAULT-VALUE}).", paramLabel = "<addr>")
|
||||||
private String listenHost = "0.0.0.0";
|
private String listenHost = "0.0.0.0";
|
||||||
|
|
||||||
@CommandLine.Option(names = { "-P", "--server-port" }, description = "Server listening port (default: ${DEFAULT-VALUE}).", paramLabel = "<port>")
|
@CommandLine.Option(names = { "-P", "--server-port" }, description = "Server listening port (default: ${DEFAULT-VALUE}).", paramLabel = "<port>")
|
||||||
private Integer listenPort = 9925;
|
private Integer listenPort = 9925;
|
||||||
|
|
||||||
|
@CommandLine.Option(names = { "-t", "--threads" }, description = "Threads for processing inbound metrics(default: ${DEFAULT-VALUE}).", paramLabel = "<num>")
|
||||||
|
private Integer threads = 5;
|
||||||
|
|
||||||
public static void main(String... args) {
|
public static void main(String... args) {
|
||||||
int exitCode = new CommandLine(new Application()).execute(args);
|
int exitCode = new CommandLine(new Application()).execute(args);
|
||||||
|
@ -38,17 +42,20 @@ public class Application implements Callable<Integer> {
|
||||||
@Override
|
@Override
|
||||||
public Integer call() throws IOException {
|
public Integer call() throws IOException {
|
||||||
|
|
||||||
|
/*
|
||||||
Properties properties = new Properties();
|
Properties properties = new Properties();
|
||||||
properties.put("http.host", listenHost);
|
properties.put("http.host", listenHost);
|
||||||
properties.put("http.port", listenPort);
|
properties.put("http.port", listenPort);
|
||||||
|
*/
|
||||||
InfluxDB influxConnectionBean = InfluxDBFactory.connect(influxUrl.toString(), influxUser, influxPass);
|
InfluxDB influxConnectionBean = InfluxDBFactory.connect(influxUrl.toString(), influxUser, influxPass);
|
||||||
|
|
||||||
Main main = new Main();
|
Main main = new Main();
|
||||||
main.bind("myInfluxConnection", influxConnectionBean);
|
main.bind("myInfluxConnection", influxConnectionBean);
|
||||||
main.bind("http.host", listenHost);
|
main.bind("http.host", listenHost);
|
||||||
main.bind("http.port", listenPort);
|
main.bind("http.port", listenPort);
|
||||||
main.bind("properties", properties);
|
//main.bind("properties", properties);
|
||||||
|
main.bind("threads", threads);
|
||||||
|
//main.bind("influxdb_name", influxName);
|
||||||
main.configure().addRoutesBuilder(ServerRouteBuilder.class);
|
main.configure().addRoutesBuilder(ServerRouteBuilder.class);
|
||||||
|
|
||||||
// now keep the application running until the JVM is terminated (ctrl + c or sigterm)
|
// now keep the application running until the JVM is terminated (ctrl + c or sigterm)
|
||||||
|
|
|
@ -13,17 +13,19 @@ public class ServerRouteBuilder extends RouteBuilder {
|
||||||
|
|
||||||
Registry registry = getContext().getRegistry();
|
Registry registry = getContext().getRegistry();
|
||||||
|
|
||||||
restConfiguration().component("jetty")
|
restConfiguration().component("netty-http")
|
||||||
.bindingMode(RestBindingMode.auto)
|
.bindingMode(RestBindingMode.auto)
|
||||||
.host(registry.lookupByNameAndType("http.host", String.class))
|
.host(registry.lookupByNameAndType("http.host", String.class))
|
||||||
.port(registry.lookupByNameAndType("http.port", Integer.class));
|
.port(registry.lookupByNameAndType("http.port", Integer.class));
|
||||||
|
|
||||||
|
/*
|
||||||
rest()
|
rest()
|
||||||
.get("/")
|
.get("/")
|
||||||
.produces("text/html")
|
.produces("text/html")
|
||||||
.route()
|
.route()
|
||||||
.to("log:stdout")
|
.to("log:stdout")
|
||||||
.endRest();
|
.endRest();
|
||||||
|
*/
|
||||||
|
|
||||||
rest()
|
rest()
|
||||||
.post("/metrics")
|
.post("/metrics")
|
||||||
|
@ -36,15 +38,11 @@ public class ServerRouteBuilder extends RouteBuilder {
|
||||||
.to("seda:inbound")
|
.to("seda:inbound")
|
||||||
.endRest();
|
.endRest();
|
||||||
|
|
||||||
|
fromF("seda:inbound?concurrentConsumers=%s", registry.lookupByNameAndType("threads", Integer.class))
|
||||||
//from("seda:inbound").log("Got metric from: ${header.component}").to("mock:sink");
|
|
||||||
|
|
||||||
// TODO: Make 'concurrentConsumers' configurable
|
|
||||||
from("seda:inbound?concurrentConsumers=5")
|
|
||||||
.log(">>> metric: ${header.hostname} - ${body}")
|
.log(">>> metric: ${header.hostname} - ${body}")
|
||||||
.doTry()
|
.doTry()
|
||||||
.process(new MetricResultToPointProcessor())
|
.process(new MetricResultToPointProcessor())
|
||||||
.to("influxdb://ref.myInfluxConnection?databaseName=sysmon&retentionPolicy=autogen")
|
.toF("influxdb://ref.myInfluxConnection?databaseName=%s&retentionPolicy=autogen", "sysmon")
|
||||||
.doCatch(Exception.class)
|
.doCatch(Exception.class)
|
||||||
.log("Error storing metric to InfluxDB: ${exception}")
|
.log("Error storing metric to InfluxDB: ${exception}")
|
||||||
.end();
|
.end();
|
||||||
|
|
|
@ -2,7 +2,6 @@ package sysmon.shared;
|
||||||
|
|
||||||
import org.pf4j.ExtensionPoint;
|
import org.pf4j.ExtensionPoint;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
public interface MetricExtension extends ExtensionPoint {
|
public interface MetricExtension extends ExtensionPoint {
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,7 @@ public class PluginHelper {
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (IOException | InterruptedException e) {
|
} catch (IOException | InterruptedException e) {
|
||||||
e.printStackTrace();
|
log.warn("executeCommand() - exception: " + e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
return inputStream;
|
return inputStream;
|
||||||
|
|
Loading…
Reference in a new issue