awk按某列key相同合并多行到一行,并对一对多情况进行统计

示例输入文件:

[x@x test]$ cat test.txt
a1|1
a1|2
a1|3
b1|1
b1|4
b2|4
b3|5

执行:

./test.sh test.txt

输出文件:

[x@x test]$ cat test.txt_output
a1|1,2,3
b1|1,4
b2|4
b3|5

脚本:

[x@x test]$ cat ./test.sh
#!/usr/bin/env bash
sort -t"|" -k1,1 $1 -o $1
cat $1 | awk -F"|" 'BEGIN{
       pre_key=""
}{
       key=$1
       if(NR==1){
               pre_key=$1
               field2=$2
       }else if(key!="" && key==pre_key){
               pre_key=$1
               field2=field2","$2                           #后面的分隔符

       }else if (key!="" && key!=pre_key){
               print pre_key"|"field2
               pre_key=$1
               field2=$2
       }

}END{
               print pre_key"|"field2
       }' > ${1}_output

统计一对多的情况:

[x@x test]$ cat test.txt_output |awk -F ',' '{print NF}' |sort|uniq -c
      2 1
      1 2
      1 3

调下输出格式:

[x@x test]$ cat test.txt_output |awk -F ',' '{print NF}' |sort|uniq -c |awk '{print "1 key has "$2" values : " $1}'
1 key has 1 values : 2
1 key has 2 values : 1
1 key has 3 values : 1

你可能感兴趣的:(awk)