commit
e68802f065
22
README.md
22
README.md
|
@ -4,6 +4,28 @@ Java based system monitoring solution with support for plugins.
|
|||
|
||||
- Example Grafana [dashboard](https://bitbucket.org/mnellemann/sysmon/downloads/sysmon-example-dashboard.png) showing metrics from a host running *sysmon*.
|
||||
|
||||
|
||||
|
||||
## Known problems
|
||||
|
||||
### Correct timezone and clock
|
||||
|
||||
- Ensure you have **correct timezone and date/time** and NTPd (or similar) running to keep it accurate!
|
||||
|
||||
### Naming collision
|
||||
|
||||
You can't have hosts with the same name, as these cannot be distinguished when metrics are
|
||||
written to InfluxDB (which uses the hostname as key).
|
||||
|
||||
### Renaming hosts
|
||||
|
||||
If you rename a host, the metrics in InfluxDB will still be available by the old hostname, and new metrics will be written with the new hostname. There is no easy way to migrate the old data, but you can delete it easily:
|
||||
|
||||
```text
|
||||
USE sysmon;
|
||||
DELETE WHERE hostname = 'unknown';
|
||||
```
|
||||
|
||||
## Components
|
||||
|
||||
### Client
|
||||
|
|
|
@ -41,8 +41,8 @@ public class Application implements Callable<Integer> {
|
|||
try {
|
||||
hostname = InetAddress.getLocalHost().getHostName();
|
||||
} catch (UnknownHostException e) {
|
||||
log.warn(e.getMessage());
|
||||
hostname = "unknown";
|
||||
System.err.println("Could not detect hostname. Use the '-n' or '--hostname' option to specify it.");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
version=0.0.4
|
||||
version=0.0.5
|
||||
pf4jVersion=3.6.0
|
||||
slf4jVersion=1.7.30
|
||||
camelVersion=3.10.0
|
||||
picocliVersion=4.6.1
|
||||
oshiVersion=5.7.3
|
||||
oshiVersion=5.7.5
|
|
@ -1,3 +1,3 @@
|
|||
# Plugins
|
||||
# System Monitor Plugins
|
||||
|
||||
Collections of base plugins.
|
||||
Collection of standard sysmon plugins.
|
|
@ -1,6 +1,6 @@
|
|||
# AIX Plugin
|
||||
|
||||
## Processor Extension
|
||||
## LPAR Processor Extension
|
||||
|
||||
The processor extension works for both AIX and Linux on the Power ppc64/ppc64le architecture.
|
||||
|
||||
|
@ -10,35 +10,6 @@ Metrics reported are:
|
|||
- **type** - Processor type, Shared or Dedicated
|
||||
- **lcpu** - Number of logical CPU's available for this partition
|
||||
- **ent** - Processor entitlements available for this partition
|
||||
- **user** - Indicates the percentage of the entitled processing capacity used while executing at the user level (application).
|
||||
- **sys** - Indicates the percentage of the entitled processing capacity used while executing at the system level (kernel).
|
||||
- **idle** - Indicates the percentage of the entitled processing capacity unused while the partition was idle and did not have any outstanding disk I/O request.
|
||||
- **wait** - Indicates the percentage of the entitled processing capacity unused while the partition was idle and had outstanding disk I/O request(s).
|
||||
- **physc** - Indicates the number of physical processors consumed.
|
||||
- **entc** - Indicates the percentage of the entitled capacity consumed.
|
||||
- **lbusy** - Indicates the percentage of logical processor(s) utilization that occurred while executing at the user and system level.
|
||||
|
||||
|
||||
## Memory Extension
|
||||
|
||||
Metrics reported are:
|
||||
|
||||
- **total** - Total amount of memory (in KB).
|
||||
- **used** - real memory consumption (in KB).
|
||||
- **free** - free memory for use (in KB).
|
||||
- **pin** - pinned memory consumption (in KB).
|
||||
- **virtual** - virtual memory consumption (in KB).
|
||||
- **available** - available memory (if freeing up virtual) (in KB).
|
||||
- **paged** - paging space consumption (in KB).
|
||||
|
||||
*Pinning a memory region prohibits the pager from stealing pages from the pages backing the pinned memory region.*
|
||||
|
||||
## Disk Extension
|
||||
|
||||
Only reports first device found. Improvements on the TODO.
|
||||
|
||||
Metrics reported are:
|
||||
|
||||
- **device** - Name of device.
|
||||
- **reads** - The total number of KB read.
|
||||
- **writes** - The total number of KB written.
|
||||
|
|
|
@ -1,36 +1,38 @@
|
|||
# Base Plugin
|
||||
|
||||
The base plugin uses the [oshi](https://github.com/oshi/oshi) library to get it's metrics.
|
||||
|
||||
## Processor Extension
|
||||
|
||||
Reports the following metrics seen:
|
||||
|
||||
- **user** - CPU time spend on user processes.
|
||||
- **system** -CPU time spend on system processes.
|
||||
- **iowait** - CPU time spend on waiting (for i/o).
|
||||
- **idle** - CPU time spend on idle (doing nothing).
|
||||
- **busy** - CPU time not spend on idle (working).
|
||||
- **user** - CPU time spend on user processes.
|
||||
- **nice** - CPU time spend on user processes running at lower priority.
|
||||
- **iowait** - CPU time spend waiting (for i/o).
|
||||
- **steal** - CPU time stolen by hypervisor and given to other virtual systems.
|
||||
- **irq** - CPU time spend by kernel on interrupt requests.
|
||||
- **softirq** - CPU time spend by kernel on soft interrupt requests.
|
||||
- **idle** - CPU time spend idling (doing nothing).
|
||||
- **busy** - CPU time spend working.
|
||||
|
||||
|
||||
## Memory Extension
|
||||
|
||||
Reports the following metrics, from the *free* command:
|
||||
Reports the following metrics (in bytes):
|
||||
|
||||
- **total** - The total amount of (installed) memory (in KB).
|
||||
- **used** - Used memory (calculated as total - free - buffers - cache) (in KB).
|
||||
- **free** - Unused memory (MemFree and SwapFree in /proc/meminfo) (in KB).
|
||||
- **shared** - Memory used (mostly) by tmpfs (Shmem in /proc/meminfo) (in KB).
|
||||
- **buffers** - Sum of buffers and cache (in KB).
|
||||
- **available** - Estimation of how much memory is available for starting new applications, without swapping (in KB).
|
||||
- **available** - Estimation of how much memory is available for starting new applications, without swapping.
|
||||
- **total** - The total amount of (installed) memory.
|
||||
- **usage** - Percentage of memory used out of the total amount of memory.
|
||||
- **paged** - ...
|
||||
- **virtual** - ...
|
||||
|
||||
|
||||
## Disk Extension
|
||||
|
||||
|
||||
Only reports first device found. Improvements on the TODO.
|
||||
|
||||
Metrics reported are:
|
||||
|
||||
- **device** - Name of device.
|
||||
- **reads** - The total number of KB read.
|
||||
- **writes** - The total number of KB written.
|
||||
- **reads** - The total number of bytes read.
|
||||
- **writes** - The total number of bytes written.
|
||||
- **iotime** - Time spent on IO in milliseconds.
|
||||
- **queue** - Lenght of IO queue.
|
|
@ -1,7 +1,4 @@
|
|||
# IBM i Plugin
|
||||
|
||||
## Processor Extension
|
||||
TODO. Nothing here yet.
|
||||
|
||||
## Memory Extension
|
||||
|
||||
## Disk Extension
|
||||
|
|
|
@ -1,36 +1,7 @@
|
|||
# Base Plugin
|
||||
# Linux Plugins
|
||||
|
||||
## Processor Extension
|
||||
## Components
|
||||
|
||||
Reports the following metrics seen:
|
||||
### Network Sockets
|
||||
|
||||
- **user** - CPU time spend on user processes.
|
||||
- **system** -CPU time spend on system processes.
|
||||
- **iowait** - CPU time spend on waiting (for i/o).
|
||||
- **idle** - CPU time spend on idle (doing nothing).
|
||||
- **busy** - CPU time not spend on idle (working).
|
||||
|
||||
|
||||
## Memory Extension
|
||||
|
||||
Reports the following metrics, from the *free* command:
|
||||
|
||||
- **total** - The total amount of (installed) memory (in KB).
|
||||
- **used** - Used memory (calculated as total - free - buffers - cache) (in KB).
|
||||
- **free** - Unused memory (MemFree and SwapFree in /proc/meminfo) (in KB).
|
||||
- **shared** - Memory used (mostly) by tmpfs (Shmem in /proc/meminfo) (in KB).
|
||||
- **buffers** - Sum of buffers and cache (in KB).
|
||||
- **available** - Estimation of how much memory is available for starting new applications, without swapping (in KB).
|
||||
- **usage** - Percentage of memory used out of the total amount of memory.
|
||||
|
||||
|
||||
## Disk Extension
|
||||
|
||||
|
||||
Only reports first device found. Improvements on the TODO.
|
||||
|
||||
Metrics reported are:
|
||||
|
||||
- **device** - Name of device.
|
||||
- **reads** - The total number of KB read.
|
||||
- **writes** - The total number of KB written.
|
||||
Collects statistics from */proc/net/sockstats*.
|
|
@ -18,10 +18,11 @@ dependencies {
|
|||
implementation group: 'org.apache.camel', name: 'camel-core', version: camelVersion
|
||||
implementation group: 'org.apache.camel', name: 'camel-main', version: camelVersion
|
||||
implementation group: 'org.apache.camel', name: 'camel-rest', version: camelVersion
|
||||
implementation group: 'org.apache.camel', name: 'camel-jetty', version: camelVersion
|
||||
implementation group: 'org.apache.camel', name: 'camel-stream', version: camelVersion
|
||||
//implementation group: 'org.apache.camel', name: 'camel-jetty', version: camelVersion
|
||||
//implementation group: 'org.apache.camel', name: 'camel-stream', version: camelVersion
|
||||
implementation group: 'org.apache.camel', name: 'camel-jackson', version: camelVersion
|
||||
implementation group: 'org.apache.camel', name: 'camel-influxdb', version: camelVersion
|
||||
implementation group: 'org.apache.camel', name: 'camel-netty-http', version: camelVersion
|
||||
}
|
||||
|
||||
def projectName = "sysmon-server"
|
||||
|
|
|
@ -7,7 +7,6 @@ import picocli.CommandLine;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
@CommandLine.Command(name = "sysmon-server", mixinStandardHelpOptions = true)
|
||||
|
@ -22,12 +21,17 @@ public class Application implements Callable<Integer> {
|
|||
@CommandLine.Option(names = { "-p", "--influxdb-pass" }, description = "InfluxDB Password (default: ${DEFAULT-VALUE}).", defaultValue = "", paramLabel = "<pass>")
|
||||
private String influxPass;
|
||||
|
||||
//@CommandLine.Option(names = { "-d", "--influxdb-db" }, description = "InfluxDB Database (default: ${DEFAULT-VALUE}).", defaultValue = "", paramLabel = "<name>")
|
||||
//private String influxName = "sysmon";
|
||||
|
||||
@CommandLine.Option(names = { "-H", "--server-host" }, description = "Server listening address (default: ${DEFAULT-VALUE}).", paramLabel = "<addr>")
|
||||
private String listenHost = "0.0.0.0";
|
||||
|
||||
@CommandLine.Option(names = { "-P", "--server-port" }, description = "Server listening port (default: ${DEFAULT-VALUE}).", paramLabel = "<port>")
|
||||
private Integer listenPort = 9925;
|
||||
|
||||
@CommandLine.Option(names = { "-t", "--threads" }, description = "Threads for processing inbound metrics(default: ${DEFAULT-VALUE}).", paramLabel = "<num>")
|
||||
private Integer threads = 5;
|
||||
|
||||
public static void main(String... args) {
|
||||
int exitCode = new CommandLine(new Application()).execute(args);
|
||||
|
@ -38,17 +42,20 @@ public class Application implements Callable<Integer> {
|
|||
@Override
|
||||
public Integer call() throws IOException {
|
||||
|
||||
/*
|
||||
Properties properties = new Properties();
|
||||
properties.put("http.host", listenHost);
|
||||
properties.put("http.port", listenPort);
|
||||
|
||||
*/
|
||||
InfluxDB influxConnectionBean = InfluxDBFactory.connect(influxUrl.toString(), influxUser, influxPass);
|
||||
|
||||
Main main = new Main();
|
||||
main.bind("myInfluxConnection", influxConnectionBean);
|
||||
main.bind("http.host", listenHost);
|
||||
main.bind("http.port", listenPort);
|
||||
main.bind("properties", properties);
|
||||
//main.bind("properties", properties);
|
||||
main.bind("threads", threads);
|
||||
//main.bind("influxdb_name", influxName);
|
||||
main.configure().addRoutesBuilder(ServerRouteBuilder.class);
|
||||
|
||||
// now keep the application running until the JVM is terminated (ctrl + c or sigterm)
|
||||
|
|
|
@ -13,17 +13,19 @@ public class ServerRouteBuilder extends RouteBuilder {
|
|||
|
||||
Registry registry = getContext().getRegistry();
|
||||
|
||||
restConfiguration().component("jetty")
|
||||
restConfiguration().component("netty-http")
|
||||
.bindingMode(RestBindingMode.auto)
|
||||
.host(registry.lookupByNameAndType("http.host", String.class))
|
||||
.port(registry.lookupByNameAndType("http.port", Integer.class));
|
||||
|
||||
/*
|
||||
rest()
|
||||
.get("/")
|
||||
.produces("text/html")
|
||||
.route()
|
||||
.to("log:stdout")
|
||||
.endRest();
|
||||
*/
|
||||
|
||||
rest()
|
||||
.post("/metrics")
|
||||
|
@ -36,15 +38,11 @@ public class ServerRouteBuilder extends RouteBuilder {
|
|||
.to("seda:inbound")
|
||||
.endRest();
|
||||
|
||||
|
||||
//from("seda:inbound").log("Got metric from: ${header.component}").to("mock:sink");
|
||||
|
||||
// TODO: Make 'concurrentConsumers' configurable
|
||||
from("seda:inbound?concurrentConsumers=5")
|
||||
fromF("seda:inbound?concurrentConsumers=%s", registry.lookupByNameAndType("threads", Integer.class))
|
||||
.log(">>> metric: ${header.hostname} - ${body}")
|
||||
.doTry()
|
||||
.process(new MetricResultToPointProcessor())
|
||||
.to("influxdb://ref.myInfluxConnection?databaseName=sysmon&retentionPolicy=autogen")
|
||||
.toF("influxdb://ref.myInfluxConnection?databaseName=%s&retentionPolicy=autogen", "sysmon")
|
||||
.doCatch(Exception.class)
|
||||
.log("Error storing metric to InfluxDB: ${exception}")
|
||||
.end();
|
||||
|
|
|
@ -2,7 +2,6 @@ package sysmon.shared;
|
|||
|
||||
import org.pf4j.ExtensionPoint;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface MetricExtension extends ExtensionPoint {
|
||||
|
||||
|
|
|
@ -45,7 +45,7 @@ public class PluginHelper {
|
|||
}
|
||||
|
||||
} catch (IOException | InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
log.warn("executeCommand() - exception: " + e.getMessage());
|
||||
}
|
||||
|
||||
return inputStream;
|
||||
|
|
Loading…
Reference in a new issue