More robust re-discovery and retry logic.

This commit is contained in:
Mark Nellemann 2020-08-21 08:51:47 +02:00
parent 360a9a61ac
commit e3717529b5
10 changed files with 59 additions and 45 deletions

View file

@ -6,7 +6,7 @@ Small utility to fetch metrics from one or more HMC's and push those to an Influ
## Usage Instructions
- Ensure you have correct date/time and NTP running to keep it accurate!
- Install jar, deb or rpm file from [downloads](https://bitbucket.org/mnellemann/hmci/downloads/) or compile from source.
- Install *.deb* or *.rpm* file from [downloads](https://bitbucket.org/mnellemann/hmci/downloads/) or compile from source
- Copy the *doc/hmci.groovy.tpl* configuration template into */etc/hmci.groovy* and edit the configuration to suit your environment
- Configure Grafana to communicate with your InfluxDB and import dashboards from *doc/* into Grafana (The dashboards are slightly modified versions of the dashboard provided by the nmon2influxdb tool)
- Run the *bin/hmci* program in a shell, as a @reboot cron task or setup a proper service :)
@ -41,4 +41,4 @@ Start the Grafana container, linking it to the InfluxDB container
docker run --name grafana --link influxdb:influxdb --rm -d -p 3000:3000 grafana/grafana:7.1.3
Configure a new InfluxDB datasource on *http://influxdb:8086* named *hmci* to connect to the InfluxDB container. The database must be created beforehand, this can be done by running the hmci tool first. Grafana dashboards can be imported from the **doc/** folder.
Configure a new InfluxDB datasource on *http://influxdb:8086* named *hmci* to connect to the InfluxDB container. The database must be created beforehand, this can be done by running the hmci tool first. Grafana dashboards can be imported from the *doc/* folder.

View file

@ -43,7 +43,7 @@
}
]
},
"description": "HMC metrics imported from nmon2influxdb",
"description": "HMC Logical Partitions",
"editable": true,
"gnetId": 1510,
"graphTooltip": 0,
@ -1502,4 +1502,4 @@
"title": "Power Systems - HMC Logical Partitions",
"uid": "Xl7oHESGz",
"version": 3
}
}

View file

@ -43,7 +43,7 @@
}
]
},
"description": "nmon2influxdb HMC system view",
"description": "HMC Managed Systems",
"editable": true,
"gnetId": 1465,
"graphTooltip": 0,
@ -1145,4 +1145,4 @@
"title": "Power Systems - HMC Managed Systems",
"uid": "ClJhHPIGz",
"version": 4
}
}

View file

@ -2,7 +2,7 @@
Copy this file to /etc/hmci.groovy and change it to suit your environment.
*/
// Query HMC's for data - in seconds
// How often to query HMC's for data - in seconds
hmci.refresh = 30
// Rescan HMC's for new systems and partitions - every x refresh
@ -16,7 +16,7 @@ influx {
database = "hmci"
}
// One or more HMC to query for data and metrics
// One or more HMC's to query for data and metrics
hmc {
// HMC on our primary site

View file

@ -1,2 +1,2 @@
group = biz.nellemann.hmci
version = 1.0.4
version = 1.0.5

View file

@ -75,10 +75,9 @@ class App implements Runnable {
hmcClients.each { hmcId, hmcClient ->
hmcClient.logoff()
hmcClient.login()
try {
hmcClient.login()
hmcClient.getManagedSystems().each { systemId, system ->
// Add to list of known systems
@ -93,7 +92,7 @@ class App implements Runnable {
}
} catch(Exception e) {
log.error("discover() - " + hmcId + " error: " + e.message)
hmcClients.remove(hmcId)
//hmcClients.remove(hmcId)
}
}

View file

@ -68,7 +68,11 @@ class HmcClient {
*
* @throws IOException
*/
void login() throws IOException {
void login(Boolean force = false) throws IOException {
if(authToken && !force) {
return
}
String payload = """\
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
@ -92,6 +96,7 @@ class HmcClient {
// Get response body and parse
String responseBody = response.body.string()
response.body().close()
def xml = new XmlSlurper().parseText(responseBody)
authToken = xml.toString()
@ -220,7 +225,7 @@ class HmcClient {
feed?.entry?.each { entry ->
String link = entry.link["@href"]
if(entry.category["@term"] == "ManagedSystem") {
jsonBody = getReponseBody(new URL(link))
jsonBody = getResponseBody(new URL(link))
}
}
@ -249,7 +254,7 @@ class HmcClient {
feed?.entry?.each { entry ->
String link = entry.link["@href"]
if(entry.category["@term"] == "LogicalPartition") {
jsonBody = getReponseBody(new URL(link))
jsonBody = getResponseBody(new URL(link))
}
}
@ -263,10 +268,11 @@ class HmcClient {
* @param url
* @return
*/
protected String getReponseBody(URL url) {
//log.debug("getBody() - " + url.toString())
protected String getResponseBody(URL url) {
Response response = getResponse(url)
return response.body.string()
String body = response.body().string()
response.body().close()
return body
}
@ -277,7 +283,7 @@ class HmcClient {
* @param url
* @return
*/
private Response getResponse(URL url) {
private Response getResponse(URL url, Integer retry = 0) {
Request request = new Request.Builder()
.url(url)
@ -288,11 +294,20 @@ class HmcClient {
Response response = client.newCall(request).execute();
if (!response.isSuccessful()) {
response.body().close()
if(response.code == 401) {
login()
} else {
throw new IOException("Unexpected code " + response)
login(true)
return getResponse(url, retry++)
}
if(retry < 2) {
log.warn("getResponse() - Retrying due to unexpected response: " + response.code)
return getResponse(url, retry++)
}
log.error("getResponse() - Unexpected response: " + response.code)
throw new IOException("getResponse() - Unexpected response: " + response.code)
};
return response

View file

@ -50,8 +50,8 @@ class LogicalPartition extends MetaSystem {
log.debug("getMemoryMetrics() - tags: " + tagsMap.toString())
HashMap<String, BigDecimal> fieldsMap = [
logicalMem: metrics.systemUtil.utilSamples.first().lparsUtil.first().memory.logicalMem.first(),
backedPhysicalMem: metrics.systemUtil.utilSamples.first().lparsUtil.first().memory.backedPhysicalMem.first(),
logicalMem: metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.memory?.logicalMem?.first(),
backedPhysicalMem: metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.memory?.backedPhysicalMem?.first(),
]
map.put("fields", fieldsMap)
log.debug("getMemoryMetrics() - fields: " + fieldsMap.toString())
@ -74,17 +74,17 @@ class LogicalPartition extends MetaSystem {
log.debug("getProcessorMetrics() - tags: " + tagsMap.toString())
HashMap<String, BigDecimal> fieldsMap = [
utilizedProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.utilizedProcUnits.first(),
utilizedProcUnits: metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.processor?.utilizedProcUnits?.first(),
//maxVirtualProcessors: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.maxVirtualProcessors.first(),
//currentVirtualProcessors: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.currentVirtualProcessors.first(),
//donatedProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.donatedProcUnits.first(),
//entitledProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.entitledProcUnits.first(),
//idleProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.idleProcUnits.first(),
//maxProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.maxProcUnits.first(),
utilizedCappedProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.utilizedCappedProcUnits.first(),
utilizedUncappedProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.utilizedUncappedProcUnits.first(),
timePerInstructionExecution: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.timeSpentWaitingForDispatch.first(),
timeSpentWaitingForDispatch: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.timePerInstructionExecution.first(),
utilizedCappedProcUnits: metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.processor?.utilizedCappedProcUnits?.first(),
utilizedUncappedProcUnits: metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.processor?.utilizedUncappedProcUnits?.first(),
timePerInstructionExecution: metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.processor?.timeSpentWaitingForDispatch?.first(),
timeSpentWaitingForDispatch: metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.processor?.timePerInstructionExecution?.first(),
]
map.put("fields", fieldsMap)
log.debug("getProcessorMetrics() - fields: " + fieldsMap.toString())
@ -98,7 +98,7 @@ class LogicalPartition extends MetaSystem {
List<Map> list = new ArrayList<>()
Map<String, Map> map = new HashMap<String, Map>()
metrics.systemUtil.utilSamples.first().lparsUtil.first().network?.virtualEthernetAdapters?.each {
metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.network?.virtualEthernetAdapters?.each {
HashMap<String, String> tagsMap = [
system: system.name,
@ -132,7 +132,7 @@ class LogicalPartition extends MetaSystem {
List<Map> list = new ArrayList<>()
Map<String, Map> map = new HashMap<String, Map>()
metrics.systemUtil.utilSamples.first().lparsUtil.first().storage?.virtualFiberChannelAdapters?.each {
metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.storage?.virtualFiberChannelAdapters?.each {
HashMap<String, String> tagsMap = [
system: system.name,

View file

@ -55,10 +55,10 @@ class ManagedSystem extends MetaSystem {
log.debug("getMemoryMetrics() - tags: " + tagsMap.toString())
HashMap<String, BigDecimal> fieldsMap = [
totalMem: metrics.systemUtil.utilSamples.first().serverUtil.memory.totalMem.first(),
availableMem: metrics.systemUtil.utilSamples.first().serverUtil.memory.availableMem.first(),
configurableMem: metrics.systemUtil.utilSamples.first().serverUtil.memory.configurableMem.first(),
assignedMemToLpars: metrics.systemUtil.utilSamples.first().serverUtil.memory.assignedMemToLpars.first(),
totalMem: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.memory?.totalMem?.first(),
availableMem: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.memory?.availableMem?.first(),
configurableMem: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.memory?.configurableMem?.first(),
assignedMemToLpars: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.memory?.assignedMemToLpars?.first(),
]
map.put("fields", fieldsMap)
log.debug("getMemoryMetrics() - fields: " + fieldsMap.toString())
@ -80,10 +80,10 @@ class ManagedSystem extends MetaSystem {
log.debug("getProcessorMetrics() - tags: " + tagsMap.toString())
HashMap<String, BigDecimal> fieldsMap = [
availableProcUnits: metrics.systemUtil.utilSamples.first().serverUtil.processor.totalProcUnits.first(),
utilizedProcUnits: metrics.systemUtil.utilSamples.first().serverUtil.processor.utilizedProcUnits.first(),
availableProcUnits: metrics.systemUtil.utilSamples.first().serverUtil.processor.availableProcUnits.first(),
configurableProcUnits: metrics.systemUtil.utilSamples.first().serverUtil.processor.configurableProcUnits.first(),
availableProcUnits: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.processor?.totalProcUnits?.first(),
utilizedProcUnits: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.processor?.utilizedProcUnits?.first(),
availableProcUnits: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.processor?.availableProcUnits?.first(),
configurableProcUnits: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.processor?.configurableProcUnits?.first(),
]
map.put("fields", fieldsMap)
log.debug("getProcessorMetrics() - fields: " + fieldsMap.toString())
@ -97,7 +97,7 @@ class ManagedSystem extends MetaSystem {
List<Map> list = new ArrayList<>()
Map<String, Map> map = new HashMap<String, Map>()
metrics.systemUtil.utilSamples.first().serverUtil.sharedProcessorPool.each {
metrics.systemUtil?.utilSamples?.first()?.serverUtil?.sharedProcessorPool?.each {
HashMap<String, String> tagsMap = [
system: name,
@ -125,7 +125,7 @@ class ManagedSystem extends MetaSystem {
List<Map> list = new ArrayList<>()
Map<String, Map> map = new HashMap<String, Map>()
metrics.systemUtil.utilSamples.first().viosUtil.each {vios ->
metrics.systemUtil?.utilSamples?.first()?.viosUtil?.each {vios ->
vios.network.sharedAdapters.each {
@ -158,8 +158,8 @@ class ManagedSystem extends MetaSystem {
List<Map> list = new ArrayList<>()
Map<String, Map> map = new HashMap<String, Map>()
metrics.systemUtil.utilSamples.first().viosUtil.each { vios ->
vios.storage.fiberChannelAdapters.each {
metrics.systemUtil?.utilSamples?.first()?.viosUtil?.each { vios ->
vios.storage?.fiberChannelAdapters?.each {
HashMap<String, String> tagsMap = [
system: name,

View file

@ -61,7 +61,7 @@ class HmcClientTest extends Specification {
mockServer.enqueue(new MockResponse().setBody(testJson));
when:
String jsonString = hmc.getReponseBody(new URL(mockServer.url("/rest/api/pcm/ProcessedMetrics/ManagedSystem_e09834d1-c930-3883-bdad-405d8e26e166_20200807T122600+0200_20200807T122600+0200_30.json") as String))
String jsonString = hmc.getResponseBody(new URL(mockServer.url("/rest/api/pcm/ProcessedMetrics/ManagedSystem_e09834d1-c930-3883-bdad-405d8e26e166_20200807T122600+0200_20200807T122600+0200_30.json") as String))
then:
jsonString.contains('"uuid": "e09834d1-c930-3883-bdad-405d8e26e166"')
@ -75,7 +75,7 @@ class HmcClientTest extends Specification {
mockServer.enqueue(new MockResponse().setBody(testJson));
when:
String jsonString = hmc.getReponseBody(new URL(mockServer.url("/rest/api/pcm/ProcessedMetrics/LogicalPartition_2DE05DB6-8AD5-448F-8327-0F488D287E82_20200807T123730+0200_20200807T123730+0200_30.json") as String))
String jsonString = hmc.getResponseBody(new URL(mockServer.url("/rest/api/pcm/ProcessedMetrics/LogicalPartition_2DE05DB6-8AD5-448F-8327-0F488D287E82_20200807T123730+0200_20200807T123730+0200_30.json") as String))
then:
jsonString.contains('"uuid": "b597e4da-2aab-3f52-8616-341d62153559"')