openresty+lua+kafka 日志采集

背景

大流量服务收集用户行为日志

需求:

  • 根据业务把不同的日志发送到不同主题中
  • 数据实时性
  • 高性能保证

方案一 

 自己写个接收程序转发到kafka

方案二

采用nginx+lua 方案

采用方案二,原因是nginx 高性能,编写代码少,维护少,满足业务场景

实施步骤

  1. 线上请求到nginx后,使用lua 组装json 格式日志,异步发送到kafka集群
  2. 根据不同业务日志,划分不同的topic
  3. 推荐业务实施实时消费

安装步骤

  • 安装 openresty

  准备openresty依赖:

apt-get install libreadline-dev libncurses5-dev libpcre3-dev libssl-dev perl make build-essential  
# 或者  
yum install readline-devel pcre-devel openssl-devel gcc  
  • 编译openresty
#1:下载解压openresty:  
cd /opt/ns/software/ 
wget https://openresty.org/download/openresty-1.19.3.2.tar.gz  
tar -xzf openresty-1.19.3.2.tar.gz -C /opt/ns/servers
   
#2:配置安装:  
# 指定目录为/opt/ns/openresty,默认在/usr/local。  
./configure --prefix=/opt/ns/openresty  --with-luajit  --without-http_redis2_module   
--with-http_iconv_module  
make&&make install  
  • 安装lua-resty-kafka
#下载lua-resty-kafka:  
wget https://github.com/doujiang24/lua-resty-kafka/archive/master.zip  
unzip master.zip -d /opt/ns/software  
    
#拷贝lua-resty-kafka到openresty  

 mkdir /opt/ns/openresty/lualib/kafka  
 
cp -rf /opt/ns/software/lua-resty-kafka-master/lib/resty /opt/ns/openresty/lualib/kafka/ 
  • 安装 kafka 略
  • 配置nginx 

   vi /opt/openresty/nginx/conf/nginx.conf 

http { 
   #开启共享字典,设置内存大小为10M,供每个nginx的线程消费
    lua_shared_dict shared_data 10m;

    # 配置lua依赖库地址  
    lua_package_path "/opt/openresty/lualib/kafka/?.lua;;";  
  server {  
      listen 80;
      server_name kafka.test.com;
      if ($request_method ~* GET) {
            return 403;
        }


     location / {  
         root /var/local/www/wwwroot/kafka/;
         index index.html;

        #解决跨域问题
         add_header 'Access-Control-Allow-Origin' $http_origin;
               add_header 'Access-Control-Allow-Credentials' 'true';
               add_header 'Access-Control-Allow-Methods' 'GET, POST, OPTIONS';
               add_header 'Access-Control-Allow-Headers' 'DNT,web-token,app-token,Authorization,Accept,Origin,Keep-Alive,User-Agent,X-Mx-ReqToken,X-Data-Type,X-Auth-Token,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range';
               add_header 'Access-Control-Expose-Headers' 'Content-Length,Content-Range';
                if ($request_method = 'OPTIONS') {
                   add_header 'Access-Control-Max-Age' 1728000;
                   add_header 'Content-Type' 'application/json; charset=utf-8';
                   add_header 'Content-Length' 0;
                   return 204;
               }
      
         rewrite_by_lua '
                -- 引入lua所有api
                local cjson = require "cjson"
                local client = require "resty.kafka.client"
                local producer = require "resty.kafka.producer"

                -- 定义kafka broker地址
                local broker_list = {
                    { host = "127.0.0.1", port = 9092 }
                }
                local function partitioner(key, num, correlation_id)
                      return tonumber(key)
                end

                local CONNECT_PARAMS = { producer_type = "async", socket_timeout = 30000, flush_time = 10000, request_timeout = 20000, batch_num = 10000,partitioner = partitioner }
                
                -- 定义数据变量
                local message={};
                local topic="topic_test_log"
                local request_method = ngx.var.request_method
                local args =ngx.req.get_uri_args()
                local body = nil

                -- 轮询器共享变量KEY值
                local publicKey= "PUBLIC_KEY"

                -- kafka分区数
                local PARTITION_NUM = 5

                 -- 共享内存计数器,用于kafka轮询使用
                local shared_data = ngx.shared.shared_data
                local publicVal = shared_data:get(publickey)
                if not publicVal then
                       publicVal = 1
                      shared_data:set(publickey, publicVal)
                end
                --获取每一条消息的计数器,对PARTITION_NUM取余数,均衡分区
                local partitions = "" .. (tonumber(publicVal) % PARTITION_NUM)
                -- 计数器自增
                shared_data:incr(publickey, 1)
              
        
                   ngx.req.read_body()
                
                   body= ngx.req.get_body_data() 
                   local obj=cjson.decode(body)
             
                   obj.ip=ngx.var.http_x_forwarded_for 
               
                   topic=obj.type
                   message = cjson.encode(obj);  
               

                 -- 定义kafka异步生产者  
                local bp = producer:new(broker_list, CONNECT_PARAMS)  
             
                local ok, err = bp:send(topic, partitions , message)
   
                if not ok then  
                    ngx.log(ngx.ERR, "kafka send err:", err)  
                    return  
                end
             
                ngx.say("time : ", ngx.var.time_local)
                ngx.say("send success, ok:", ok)
            ';




  }



}
  • 运行

  启动nginx   /opt/ns/openresty/nginx/sbin/nginx 

  开启kafka 消费者

  kafka-console-consumer.sh --bootstrap-server 192.168.0.128:9092 --topic topic_test_log

  准备测试html




    
    
    kafkaTest
    








你可能感兴趣的:(大数据,lua,kafka,nginx)