hadoop@datanode1 job]$ vim flume-telnet-logger.conf# Name the components on this agenta1.sources = r1 #r1:表示a1的输入源a1.sinks = k1 #k1表示a1的输出目的地 a1.channels = c1 #C1表示a1的缓冲区# Describe/configure the sourcea1.sources.r1.type = netcat #表示a1的输入源类型为netcat类型a1.sources.r1.bind = localhost #标识a1的监听的主机a1.sources.r1.port = 44444 #标识a1监听的端口号# Describe the sinka1.sinks.k1.type = logger #标识a1的输出目的地是logger类型# Use a channel which buffers events in memorya1.channels.c1.type = memory #表示a1的channel类型是memory内存型a1.channels.c1.capacity = 1000 #表示a1的channel总容量1000a1.channels.c1.transactionCapacity = 100 #表示a1的channel传输总容量100# Bind the source and sink to the channela1.sources.r1.channels = c1 #表示将r1和c1连接起来a1.sinks.k1.channel = c1 #表示将k1和c1连接起来
[hadoop@datanode1 group1]$ vim flume-file-flume.conf# Name the components on this agenta1.sources = r1a1.sinks = k1 k2a1.channels = c1 c2# 将数据流复制给多个channela1.sources.r1.selector.type = replicating# Describe/configure the sourcea1.sources.r1.type = execa1.sources.r1.command = tail -F /opt/module/datas/logs.loga1.sources.r1.shell = /bin/bash -c# Describe the sinka1.sinks.k1.type = avroa1.sinks.k1.hostname = datanode2a1.sinks.k1.port = 4141a1.sinks.k2.type = avroa1.sinks.k2.hostname = datanode3a1.sinks.k2.port = 4142# Describe the channela1.channels.c1.type = memorya1.channels.c1.capacity = 1000a1.channels.c1.transactionCapacity = 100a1.channels.c2.type = memorya1.channels.c2.capacity = 1000a1.channels.c2.transactionCapacity = 100# Bind the source and sink to the channela1.sources.r1.channels = c1 c2a1.sinks.k1.channel = c1a1.sinks.k2.channel = c2
datanode2配置文件
[hadoop@datanode2 group1]$ vim flume-flume-hdfs.conf# Name the components on this agenta2.sources = r1a2.sinks = k1a2.channels = c1# Describe/configure the sourcea2.sources.r1.type = avroa2.sources.r1.bind = datanode2a2.sources.r1.port = 4141# Describe the sinka2.sinks.k1.type = hdfsa2.sinks.k1.hdfs.path = hdfs://datanode1:9000/flume2/%Y%m%d/%H#上传文件的前缀a2.sinks.k1.hdfs.filePrefix = flume2-#是否按照时间滚动文件夹a2.sinks.k1.hdfs.round = true#多少时间单位创建一个新的文件夹a2.sinks.k1.hdfs.roundValue = 1#重新定义时间单位a2.sinks.k1.hdfs.roundUnit = hour#是否使用本地时间戳a2.sinks.k1.hdfs.useLocalTimeStamp = true#积攒多少个Event才flush到HDFS一次a2.sinks.k1.hdfs.batchSize = 100#设置文件类型,可支持压缩a2.sinks.k1.hdfs.fileType = DataStream#多久生成一个新的文件a2.sinks.k1.hdfs.rollInterval = 600#设置每个文件的滚动大小大概是128Ma2.sinks.k1.hdfs.rollSize = 134217700#文件的滚动与Event数量无关a2.sinks.k1.hdfs.rollCount = 0#最小冗余数a2.sinks.k1.hdfs.minBlockReplicas = 1# Describe the channela2.channels.c1.type = memorya2.channels.c1.capacity = 1000a2.channels.c1.transactionCapacity = 100
datanode3配置文件
[hadoop@datanode3 group1]$ vim flume-flume-dir.confme the components on this agenta3.sources = r1a3.sinks = k1a3.channels = c2# Describe/configure the sourcea3.sources.r1.type = avroa3.sources.r1.bind = datanode3a3.sources.r1.port = 4142# Describe the sinka3.sinks.k1.type = file_rolla3.sinks.k1.sink.directory = /opt/module/datas/flume3# Describe the channela3.channels.c2.type = memorya3.channels.c2.capacity = 1000a3.channels.c2.transactionCapacity = 100# Bind the source and sink to the channela3.sources.r1.channels = c2a3.sinks.k1.channel = c2启动
[hadoop@datanode1 flume]$ vim job/group1/flume-netcat-flume.conf# Name the components on this agenta1.sources = r1a1.channels = c1a1.sinkgroups = g1a1.sinks = k1 k2# Describe/configure the sourcea1.sources.r1.type = netcata1.sources.r1.bind = datanode1a1.sources.r1.port = 44444a1.sinkgroups.g1.processor.type = load_balancea1.sinkgroups.g1.processor.backoff = truea1.sinkgroups.g1.processor.selector = round_robina1.sinkgroups.g1.processor.selector.maxTimeOut=10000# Describe the sinka1.sinks.k1.type = avroa1.sinks.k1.hostname = datanode2a1.sinks.k1.port = 4141a1.sinks.k2.type = avroa1.sinks.k2.hostname = datanode3a1.sinks.k2.port = 4142# Describe the channela1.channels.c1.type = memorya1.channels.c1.capacity = 1000a1.channels.c1.transactionCapacity = 100# Bind the source and sink to the channela1.sources.r1.channels = c1a1.sinkgroups.g1.sinks = k1 k2a1.sinks.k1.channel = c1a1.sinks.k2.channel = c1
datanode2
# Name the components on this agenta2.sources = r1a2.sinks = k1a2.channels = c1# Describe/configure the sourcea2.sources.r1.type = avroa2.sources.r1.bind = datanode2a2.sources.r1.port = 4141# Describe the sinka2.sinks.k1.type = logger# Describe the channela2.channels.c1.type = memorya2.channels.c1.capacity = 1000a2.channels.c1.transactionCapacity = 100# Bind the source and sink to the channela2.sources.r1.channels = c1a2.sinks.k1.channel = c1
datanode3
[hadoop@datanode3 flume]$ vim job/group1/flume-flume2.conf# Name the components on this agenta3.sources = r1a3.sinks = k1a3.channels = c2# Describe/configure the sourcea3.sources.r1.type = avroa3.sources.r1.bind = datanode3a3.sources.r1.port = 4142# Describe the sinka3.sinks.k1.type = logger# Describe the channela3.channels.c2.type = memorya3.channels.c2.capacity = 1000a3.channels.c2.transactionCapacity = 100# Bind the source and sink to the channela3.sources.r1.channels = c2a3.sinks.k1.channel = c2
# Name the components on this agenta1.sources = r1a1.sinks = k1a1.channels = c1# Describe/configure the sourcea1.sources.r1.type = execa1.sources.r1.command = tail -F /opt/module/datas/logs.loga1.sources.r1.shell = /bin/bash -c# Describe the sinka1.sinks.k1.type = avroa1.sinks.k1.hostname = datanode1a1.sinks.k1.port = 4141# Describe the channela1.channels.c1.type = memorya1.channels.c1.capacity = 1000a1.channels.c1.transactionCapacity = 100# Bind the source and sink to the channela1.sources.r1.channels = c1a1.sinks.k1.channel = c1
[hadoop@datanode1 flume]$ sudo vim /etc/httpd/conf.d/ganglia.conf# Ganglia monitoring system php web frontendAlias /ganglia /usr/share/ganglia Order deny,allow Deny from all Allow from all # Allow from 127.0.0.1 # Allow from ::1 # Allow from .example.com
修改配置文件gmetad.conf
data_source "datanode1" 192.168.1.101
修改配置文件gmond.conf
cluster { name = "datanode1" #自己的主机名 owner = "unspecified" latlong = "unspecified" url = "unspecified"}udp_send_channel { #bind_hostname = yes # Highly recommended, soon to be default. # This option tells gmond to use a source address # that resolves to the machine's hostname. Without # this, the metrics may appear to come from any # interface and the DNS names associated with # those IPs will be used to create the RRDs. #mcast_join = 239.2.11.71 #注释掉 host=192.168.1.101 #自己的主机IP port = 8649 #端口号 ttl = 1}
修改配置文件config
[hadoop@datanode1 flume]$ sudo vim /etc/selinux/config# This file controls the state of SELinux on the system.# SELINUX= can take one of these three values:# enforcing - SELinux security policy is enforced.# permissive - SELinux prints warnings instead of enforcing.# disabled - No SELinux policy is loaded.SELINUX=disabled# SELINUXTYPE= can take one of these two values:# targeted - Targeted processes are protected,# mls - Multi Level Security protection.SELINUXTYPE=targeted
注意selinux本次生效关闭必须重启,如果此时不想重启,可以临时生效之:
[hadoop@datanode1 flume]$ sudo setenforce 0
启动ganglia
[hadoop@datanode1 flume]$ sudo service httpd startStarting httpd:[hadoop@datanode1 flume]$ sudo service gmetad start[hadoop@datanode1 flume]$ sudo service gmond start[hadoop@datanode1 flume]$