More robust re-discovery and retry logic.

This commit is contained in:
Mark Nellemann 2020-08-21 08:51:47 +02:00
parent 360a9a61ac
commit e3717529b5
10 changed files with 59 additions and 45 deletions

View file

@ -6,7 +6,7 @@ Small utility to fetch metrics from one or more HMC's and push those to an Influ
## Usage Instructions ## Usage Instructions
- Ensure you have correct date/time and NTP running to keep it accurate! - Ensure you have correct date/time and NTP running to keep it accurate!
- Install jar, deb or rpm file from [downloads](https://bitbucket.org/mnellemann/hmci/downloads/) or compile from source. - Install *.deb* or *.rpm* file from [downloads](https://bitbucket.org/mnellemann/hmci/downloads/) or compile from source
- Copy the *doc/hmci.groovy.tpl* configuration template into */etc/hmci.groovy* and edit the configuration to suit your environment - Copy the *doc/hmci.groovy.tpl* configuration template into */etc/hmci.groovy* and edit the configuration to suit your environment
- Configure Grafana to communicate with your InfluxDB and import dashboards from *doc/* into Grafana (The dashboards are slightly modified versions of the dashboard provided by the nmon2influxdb tool) - Configure Grafana to communicate with your InfluxDB and import dashboards from *doc/* into Grafana (The dashboards are slightly modified versions of the dashboard provided by the nmon2influxdb tool)
- Run the *bin/hmci* program in a shell, as a @reboot cron task or setup a proper service :) - Run the *bin/hmci* program in a shell, as a @reboot cron task or setup a proper service :)
@ -41,4 +41,4 @@ Start the Grafana container, linking it to the InfluxDB container
docker run --name grafana --link influxdb:influxdb --rm -d -p 3000:3000 grafana/grafana:7.1.3 docker run --name grafana --link influxdb:influxdb --rm -d -p 3000:3000 grafana/grafana:7.1.3
Configure a new InfluxDB datasource on *http://influxdb:8086* named *hmci* to connect to the InfluxDB container. The database must be created beforehand, this can be done by running the hmci tool first. Grafana dashboards can be imported from the **doc/** folder. Configure a new InfluxDB datasource on *http://influxdb:8086* named *hmci* to connect to the InfluxDB container. The database must be created beforehand, this can be done by running the hmci tool first. Grafana dashboards can be imported from the *doc/* folder.

View file

@ -43,7 +43,7 @@
} }
] ]
}, },
"description": "HMC metrics imported from nmon2influxdb", "description": "HMC Logical Partitions",
"editable": true, "editable": true,
"gnetId": 1510, "gnetId": 1510,
"graphTooltip": 0, "graphTooltip": 0,

View file

@ -43,7 +43,7 @@
} }
] ]
}, },
"description": "nmon2influxdb HMC system view", "description": "HMC Managed Systems",
"editable": true, "editable": true,
"gnetId": 1465, "gnetId": 1465,
"graphTooltip": 0, "graphTooltip": 0,

View file

@ -2,7 +2,7 @@
Copy this file to /etc/hmci.groovy and change it to suit your environment. Copy this file to /etc/hmci.groovy and change it to suit your environment.
*/ */
// Query HMC's for data - in seconds // How often to query HMC's for data - in seconds
hmci.refresh = 30 hmci.refresh = 30
// Rescan HMC's for new systems and partitions - every x refresh // Rescan HMC's for new systems and partitions - every x refresh
@ -16,7 +16,7 @@ influx {
database = "hmci" database = "hmci"
} }
// One or more HMC to query for data and metrics // One or more HMC's to query for data and metrics
hmc { hmc {
// HMC on our primary site // HMC on our primary site

View file

@ -1,2 +1,2 @@
group = biz.nellemann.hmci group = biz.nellemann.hmci
version = 1.0.4 version = 1.0.5

View file

@ -75,10 +75,9 @@ class App implements Runnable {
hmcClients.each { hmcId, hmcClient -> hmcClients.each { hmcId, hmcClient ->
hmcClient.logoff()
hmcClient.login()
try { try {
hmcClient.login()
hmcClient.getManagedSystems().each { systemId, system -> hmcClient.getManagedSystems().each { systemId, system ->
// Add to list of known systems // Add to list of known systems
@ -93,7 +92,7 @@ class App implements Runnable {
} }
} catch(Exception e) { } catch(Exception e) {
log.error("discover() - " + hmcId + " error: " + e.message) log.error("discover() - " + hmcId + " error: " + e.message)
hmcClients.remove(hmcId) //hmcClients.remove(hmcId)
} }
} }

View file

@ -68,7 +68,11 @@ class HmcClient {
* *
* @throws IOException * @throws IOException
*/ */
void login() throws IOException { void login(Boolean force = false) throws IOException {
if(authToken && !force) {
return
}
String payload = """\ String payload = """\
<?xml version="1.0" encoding="UTF-8" standalone="yes"?> <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
@ -92,6 +96,7 @@ class HmcClient {
// Get response body and parse // Get response body and parse
String responseBody = response.body.string() String responseBody = response.body.string()
response.body().close()
def xml = new XmlSlurper().parseText(responseBody) def xml = new XmlSlurper().parseText(responseBody)
authToken = xml.toString() authToken = xml.toString()
@ -220,7 +225,7 @@ class HmcClient {
feed?.entry?.each { entry -> feed?.entry?.each { entry ->
String link = entry.link["@href"] String link = entry.link["@href"]
if(entry.category["@term"] == "ManagedSystem") { if(entry.category["@term"] == "ManagedSystem") {
jsonBody = getReponseBody(new URL(link)) jsonBody = getResponseBody(new URL(link))
} }
} }
@ -249,7 +254,7 @@ class HmcClient {
feed?.entry?.each { entry -> feed?.entry?.each { entry ->
String link = entry.link["@href"] String link = entry.link["@href"]
if(entry.category["@term"] == "LogicalPartition") { if(entry.category["@term"] == "LogicalPartition") {
jsonBody = getReponseBody(new URL(link)) jsonBody = getResponseBody(new URL(link))
} }
} }
@ -263,10 +268,11 @@ class HmcClient {
* @param url * @param url
* @return * @return
*/ */
protected String getReponseBody(URL url) { protected String getResponseBody(URL url) {
//log.debug("getBody() - " + url.toString())
Response response = getResponse(url) Response response = getResponse(url)
return response.body.string() String body = response.body().string()
response.body().close()
return body
} }
@ -277,7 +283,7 @@ class HmcClient {
* @param url * @param url
* @return * @return
*/ */
private Response getResponse(URL url) { private Response getResponse(URL url, Integer retry = 0) {
Request request = new Request.Builder() Request request = new Request.Builder()
.url(url) .url(url)
@ -288,11 +294,20 @@ class HmcClient {
Response response = client.newCall(request).execute(); Response response = client.newCall(request).execute();
if (!response.isSuccessful()) { if (!response.isSuccessful()) {
response.body().close()
if(response.code == 401) { if(response.code == 401) {
login() login(true)
} else { return getResponse(url, retry++)
throw new IOException("Unexpected code " + response)
} }
if(retry < 2) {
log.warn("getResponse() - Retrying due to unexpected response: " + response.code)
return getResponse(url, retry++)
}
log.error("getResponse() - Unexpected response: " + response.code)
throw new IOException("getResponse() - Unexpected response: " + response.code)
}; };
return response return response

View file

@ -50,8 +50,8 @@ class LogicalPartition extends MetaSystem {
log.debug("getMemoryMetrics() - tags: " + tagsMap.toString()) log.debug("getMemoryMetrics() - tags: " + tagsMap.toString())
HashMap<String, BigDecimal> fieldsMap = [ HashMap<String, BigDecimal> fieldsMap = [
logicalMem: metrics.systemUtil.utilSamples.first().lparsUtil.first().memory.logicalMem.first(), logicalMem: metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.memory?.logicalMem?.first(),
backedPhysicalMem: metrics.systemUtil.utilSamples.first().lparsUtil.first().memory.backedPhysicalMem.first(), backedPhysicalMem: metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.memory?.backedPhysicalMem?.first(),
] ]
map.put("fields", fieldsMap) map.put("fields", fieldsMap)
log.debug("getMemoryMetrics() - fields: " + fieldsMap.toString()) log.debug("getMemoryMetrics() - fields: " + fieldsMap.toString())
@ -74,17 +74,17 @@ class LogicalPartition extends MetaSystem {
log.debug("getProcessorMetrics() - tags: " + tagsMap.toString()) log.debug("getProcessorMetrics() - tags: " + tagsMap.toString())
HashMap<String, BigDecimal> fieldsMap = [ HashMap<String, BigDecimal> fieldsMap = [
utilizedProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.utilizedProcUnits.first(), utilizedProcUnits: metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.processor?.utilizedProcUnits?.first(),
//maxVirtualProcessors: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.maxVirtualProcessors.first(), //maxVirtualProcessors: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.maxVirtualProcessors.first(),
//currentVirtualProcessors: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.currentVirtualProcessors.first(), //currentVirtualProcessors: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.currentVirtualProcessors.first(),
//donatedProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.donatedProcUnits.first(), //donatedProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.donatedProcUnits.first(),
//entitledProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.entitledProcUnits.first(), //entitledProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.entitledProcUnits.first(),
//idleProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.idleProcUnits.first(), //idleProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.idleProcUnits.first(),
//maxProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.maxProcUnits.first(), //maxProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.maxProcUnits.first(),
utilizedCappedProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.utilizedCappedProcUnits.first(), utilizedCappedProcUnits: metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.processor?.utilizedCappedProcUnits?.first(),
utilizedUncappedProcUnits: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.utilizedUncappedProcUnits.first(), utilizedUncappedProcUnits: metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.processor?.utilizedUncappedProcUnits?.first(),
timePerInstructionExecution: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.timeSpentWaitingForDispatch.first(), timePerInstructionExecution: metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.processor?.timeSpentWaitingForDispatch?.first(),
timeSpentWaitingForDispatch: metrics.systemUtil.utilSamples.first().lparsUtil.first().processor.timePerInstructionExecution.first(), timeSpentWaitingForDispatch: metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.processor?.timePerInstructionExecution?.first(),
] ]
map.put("fields", fieldsMap) map.put("fields", fieldsMap)
log.debug("getProcessorMetrics() - fields: " + fieldsMap.toString()) log.debug("getProcessorMetrics() - fields: " + fieldsMap.toString())
@ -98,7 +98,7 @@ class LogicalPartition extends MetaSystem {
List<Map> list = new ArrayList<>() List<Map> list = new ArrayList<>()
Map<String, Map> map = new HashMap<String, Map>() Map<String, Map> map = new HashMap<String, Map>()
metrics.systemUtil.utilSamples.first().lparsUtil.first().network?.virtualEthernetAdapters?.each { metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.network?.virtualEthernetAdapters?.each {
HashMap<String, String> tagsMap = [ HashMap<String, String> tagsMap = [
system: system.name, system: system.name,
@ -132,7 +132,7 @@ class LogicalPartition extends MetaSystem {
List<Map> list = new ArrayList<>() List<Map> list = new ArrayList<>()
Map<String, Map> map = new HashMap<String, Map>() Map<String, Map> map = new HashMap<String, Map>()
metrics.systemUtil.utilSamples.first().lparsUtil.first().storage?.virtualFiberChannelAdapters?.each { metrics.systemUtil?.utilSamples?.first()?.lparsUtil?.first()?.storage?.virtualFiberChannelAdapters?.each {
HashMap<String, String> tagsMap = [ HashMap<String, String> tagsMap = [
system: system.name, system: system.name,

View file

@ -55,10 +55,10 @@ class ManagedSystem extends MetaSystem {
log.debug("getMemoryMetrics() - tags: " + tagsMap.toString()) log.debug("getMemoryMetrics() - tags: " + tagsMap.toString())
HashMap<String, BigDecimal> fieldsMap = [ HashMap<String, BigDecimal> fieldsMap = [
totalMem: metrics.systemUtil.utilSamples.first().serverUtil.memory.totalMem.first(), totalMem: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.memory?.totalMem?.first(),
availableMem: metrics.systemUtil.utilSamples.first().serverUtil.memory.availableMem.first(), availableMem: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.memory?.availableMem?.first(),
configurableMem: metrics.systemUtil.utilSamples.first().serverUtil.memory.configurableMem.first(), configurableMem: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.memory?.configurableMem?.first(),
assignedMemToLpars: metrics.systemUtil.utilSamples.first().serverUtil.memory.assignedMemToLpars.first(), assignedMemToLpars: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.memory?.assignedMemToLpars?.first(),
] ]
map.put("fields", fieldsMap) map.put("fields", fieldsMap)
log.debug("getMemoryMetrics() - fields: " + fieldsMap.toString()) log.debug("getMemoryMetrics() - fields: " + fieldsMap.toString())
@ -80,10 +80,10 @@ class ManagedSystem extends MetaSystem {
log.debug("getProcessorMetrics() - tags: " + tagsMap.toString()) log.debug("getProcessorMetrics() - tags: " + tagsMap.toString())
HashMap<String, BigDecimal> fieldsMap = [ HashMap<String, BigDecimal> fieldsMap = [
availableProcUnits: metrics.systemUtil.utilSamples.first().serverUtil.processor.totalProcUnits.first(), availableProcUnits: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.processor?.totalProcUnits?.first(),
utilizedProcUnits: metrics.systemUtil.utilSamples.first().serverUtil.processor.utilizedProcUnits.first(), utilizedProcUnits: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.processor?.utilizedProcUnits?.first(),
availableProcUnits: metrics.systemUtil.utilSamples.first().serverUtil.processor.availableProcUnits.first(), availableProcUnits: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.processor?.availableProcUnits?.first(),
configurableProcUnits: metrics.systemUtil.utilSamples.first().serverUtil.processor.configurableProcUnits.first(), configurableProcUnits: metrics.systemUtil?.utilSamples?.first()?.serverUtil?.processor?.configurableProcUnits?.first(),
] ]
map.put("fields", fieldsMap) map.put("fields", fieldsMap)
log.debug("getProcessorMetrics() - fields: " + fieldsMap.toString()) log.debug("getProcessorMetrics() - fields: " + fieldsMap.toString())
@ -97,7 +97,7 @@ class ManagedSystem extends MetaSystem {
List<Map> list = new ArrayList<>() List<Map> list = new ArrayList<>()
Map<String, Map> map = new HashMap<String, Map>() Map<String, Map> map = new HashMap<String, Map>()
metrics.systemUtil.utilSamples.first().serverUtil.sharedProcessorPool.each { metrics.systemUtil?.utilSamples?.first()?.serverUtil?.sharedProcessorPool?.each {
HashMap<String, String> tagsMap = [ HashMap<String, String> tagsMap = [
system: name, system: name,
@ -125,7 +125,7 @@ class ManagedSystem extends MetaSystem {
List<Map> list = new ArrayList<>() List<Map> list = new ArrayList<>()
Map<String, Map> map = new HashMap<String, Map>() Map<String, Map> map = new HashMap<String, Map>()
metrics.systemUtil.utilSamples.first().viosUtil.each {vios -> metrics.systemUtil?.utilSamples?.first()?.viosUtil?.each {vios ->
vios.network.sharedAdapters.each { vios.network.sharedAdapters.each {
@ -158,8 +158,8 @@ class ManagedSystem extends MetaSystem {
List<Map> list = new ArrayList<>() List<Map> list = new ArrayList<>()
Map<String, Map> map = new HashMap<String, Map>() Map<String, Map> map = new HashMap<String, Map>()
metrics.systemUtil.utilSamples.first().viosUtil.each { vios -> metrics.systemUtil?.utilSamples?.first()?.viosUtil?.each { vios ->
vios.storage.fiberChannelAdapters.each { vios.storage?.fiberChannelAdapters?.each {
HashMap<String, String> tagsMap = [ HashMap<String, String> tagsMap = [
system: name, system: name,

View file

@ -61,7 +61,7 @@ class HmcClientTest extends Specification {
mockServer.enqueue(new MockResponse().setBody(testJson)); mockServer.enqueue(new MockResponse().setBody(testJson));
when: when:
String jsonString = hmc.getReponseBody(new URL(mockServer.url("/rest/api/pcm/ProcessedMetrics/ManagedSystem_e09834d1-c930-3883-bdad-405d8e26e166_20200807T122600+0200_20200807T122600+0200_30.json") as String)) String jsonString = hmc.getResponseBody(new URL(mockServer.url("/rest/api/pcm/ProcessedMetrics/ManagedSystem_e09834d1-c930-3883-bdad-405d8e26e166_20200807T122600+0200_20200807T122600+0200_30.json") as String))
then: then:
jsonString.contains('"uuid": "e09834d1-c930-3883-bdad-405d8e26e166"') jsonString.contains('"uuid": "e09834d1-c930-3883-bdad-405d8e26e166"')
@ -75,7 +75,7 @@ class HmcClientTest extends Specification {
mockServer.enqueue(new MockResponse().setBody(testJson)); mockServer.enqueue(new MockResponse().setBody(testJson));
when: when:
String jsonString = hmc.getReponseBody(new URL(mockServer.url("/rest/api/pcm/ProcessedMetrics/LogicalPartition_2DE05DB6-8AD5-448F-8327-0F488D287E82_20200807T123730+0200_20200807T123730+0200_30.json") as String)) String jsonString = hmc.getResponseBody(new URL(mockServer.url("/rest/api/pcm/ProcessedMetrics/LogicalPartition_2DE05DB6-8AD5-448F-8327-0F488D287E82_20200807T123730+0200_20200807T123730+0200_30.json") as String))
then: then:
jsonString.contains('"uuid": "b597e4da-2aab-3f52-8616-341d62153559"') jsonString.contains('"uuid": "b597e4da-2aab-3f52-8616-341d62153559"')