先明确我们要监控的一些指标, 以下指标都基于 2.8.5

NameNode

DataNode

修改 /etc/telegraf/telegraf.conf 文件, 增加如下内容

################
# NAMENODE     #
################
[[inputs.jolokia2_agent]]
urls = ["<http://txz-data0:7777/jolokia>"]  # 访问namenode 绑定的 jolokia
name_prefix = "hadoop.hdfs.namenode." # 写入 influxdb 的 name 前缀

[[inputs.jolokia2_agent.metric]]
name  = "FSNamesystem" # 写入 influxdb 的 name, 如果有前缀则会拼接上
mbean = "Hadoop:name=FSNamesystem,service=NameNode" # 访问的 mbean 列
paths = ["CapacityTotal","CapacityRemaining","CapacityUsedNonDFS","NumLiveDataNodes","NumDeadDataNodes",
"BlockCapacity", "CorruptBlocks", "MissingBlocks", "CapacityUsed","UnderReplicatedBlocks",
"BlocksTotal","VolumeFailuresTotal","FilesTotal","NumDecommissioningDataNodes"] # 具体读取的 mbean 属性值

[[inputs.jolokia2_agent.metric]]
name  = "jvm_runtime"
mbean = "java.lang:type=Runtime"
paths = ["Uptime"]

[[inputs.jolokia2_agent.metric]]
name  = "jvm_memory"
mbean = "java.lang:type=Memory"
paths = ["HeapMemoryUsage", "NonHeapMemoryUsage", "ObjectPendingFinalizationCount"]

[[inputs.jolokia2_agent.metric]]
name  = "RPCContext"
mbean = "Hadoop:service=NameNode,name=RpcActivityForPort9820"
paths = ["RpcProcessingTimeAvgTime", "RpcQueueTimeAvgTime", "ReceivedBytes", "SentBytes"]

# [[inputs.jolokia2_agent.metric]]
# name  = "OperatingSystem"
# mbean = "java.lang:type=OperatingSystem"
# paths = ["ProcessCpuLoad","SystemLoadAverage","SystemCpuLoad"]

# [[inputs.jolokia2_agent.metric]]
# name     = "jvm_garbage_collector"
# mbean    = "java.lang:name=*,type=GarbageCollector"
# paths    = ["CollectionTime", "CollectionCount"]
# tag_keys = ["name"]

# [[inputs.jolokia2_agent.metric]]
# name       = "jvm_memory_pool"
# mbean      = "java.lang:name=*,type=MemoryPool"
# paths      = ["Usage", "PeakUsage", "CollectionUsage"]
# tag_keys   = ["name"]
# tag_prefix = "pool_"

################
# DATANODE     #
################
[[inputs.jolokia2_agent]]
urls = ["<http://txz-data0:7778/jolokia>"]
name_prefix = "hadoop.hdfs.datanode."

[[inputs.jolokia2_agent.metric]]
name  = "FSDatasetState"
mbean = "Hadoop:name=FSDatasetState,service=DataNode"
paths = ["Capacity","DfsUsed","Remaining"]

[[inputs.jolokia2_agent.metric]]
name  = "jvm_memory"
mbean = "java.lang:type=Memory"
paths = ["HeapMemoryUsage", "NonHeapMemoryUsage", "ObjectPendingFinalizationCount"]

[[inputs.jolokia2_agent.metric]]
name  = "RPCContext"
mbean = "Hadoop:service=DataNode,name=RpcActivityForPort50020"
paths = ["RpcProcessingTimeAvgTime", "RpcQueueTimeAvgTime"]

[[inputs.jolokia2_agent.metric]]
name  = "Activity"
mbean = "Hadoop:service=DataNode,name=DataNodeActivity-*-50010"
paths = ["ReadBlockOpAvgTime", "WriteBlockOpAvgTime"]

# [[inputs.jolokia2_agent.metric]]
# name  = "OperatingSystem"
# mbean = "java.lang:type=OperatingSystem"
# paths = ["ProcessCpuLoad","SystemLoadAverage","SystemCpuLoad"]

# [[inputs.jolokia2_agent.metric]]
# name  = "jvm_runtime"
# mbean = "java.lang:type=Runtime"
# paths = ["Uptime"]

# [[inputs.jolokia2_agent.metric]]
# name     = "jvm_garbage_collector"
# mbean    = "java.lang:name=*,type=GarbageCollector"
# paths    = ["CollectionTime", "CollectionCount"]
# tag_keys = ["name"]

# [[inputs.jolokia2_agent.metric]]
# name       = "jvm_memory_pool"
# mbean      = "java.lang:name=*,type=MemoryPool"
# paths      = ["Usage", "PeakUsage", "CollectionUsage"]
# tag_keys   = ["name"]
# tag_prefix = "pool_"