统计gitlab中每人的代码提交量

直接上代码

  1. gitlabUtil.py
# coding:utf-8
import requests
import os
import json
private_token = '1KpjiCRyt2ywJq3tdfdfjD'
private_host = 'http://10.200.3.3'
def get100Projects(page):
    r = requests.get(private_host + '/api/v3/projects?private_token=' + private_token + '&per_page=100'+'&page='+str(page))
    data = r.json()
    projects = []
    for i in data:
        project = {}
        project['id'] = i['id']
        project['name']=i['name']
        project['url']=i['http_url_to_repo']
        project['group']=str(i['path_with_namespace']).split('/')[0]
        project['activitytime']=str(i['last_activity_at']).split('T')[0]
        projects.append(project)
    return projects
def getAllProjects():
    page = 1
    projects = []
    donext = True
    while donext:
        pros = get100Projects(page)
        for pro in pros:
            projects.append(pro)
        page=page+1
        if len(pros) < 100:
            donext = False
    return projects
def coutProject(project):
    commands = 'git log --format=\'%cN\' | sort -u | while read name; do echo -en \"$name*\"; git log --author=\"$name\" --pretty=tformat: --numstat | awk \'{ add += $1; subs += $2; loc += $1 - $2 } END { printf \"%s*%s*%s@\", add, subs, loc }\' -; done'
    out = os.popen('git clone '+project.get('url')+'&& cd '+project.get('name')
                   +'&&'+commands+'&& cd .. && rm -rf '+project.get('name'))
    return out.read()
def coutProjectBydate(project,date):
    commands = 'git log --format=\'%cN\' | sort -u | while read name; do echo -en \"$name*\"; git log --author=\"$name\" --since='+date+'-01 --until='+date+'-31 --pretty=tformat: --numstat | awk \'{ add += $1; subs += $2; loc += $1 - $2 } END { printf \"%s*%s*%s@\", add, subs, loc }\' -; done'
    out = os.popen('git clone ' + project.get('url') + '&& cd ' + project.get('name')
                   + '&&' + commands + '&& cd .. && rm -rf ' + project.get('name'))
    return out.read()
def converStr(out):
    out = out.replace('\n', '')
    l = out.split('@')
    l = [item for item in filter(lambda x: x != '', l)]
    rl = []
    for s in l:
        try:
            ul=s.split('*')
            rs={}
            rs['name'] = ul[0]
            v1=ul[1] if ul[1] != '' else 0
            v2=ul[2] if ul[2] != '' else 0
            v3=ul[3] if ul[3] != '' else 0
            rs['added'] = int(v1)
            rs['removed'] = int(v2)
            rs['total'] = int(v3)
            rl.append(rs)
        except Exception,e:
            print('convertStr err:::'+str(e))
    return rl
def mcount(total,rl):
    for per in rl:
        if total.get(per.get('name')):
            p = total.get(per.get('name'))
            p['added'] += int(per.get('added'))
            p['removed'] += int(per.get('removed'))
            p['total'] += int(per.get('total'))
        else:
            total[per.get('name')]=per
def report(total):
    for name in total:
        #if int(total.get(name).get('added'))==0 and int(total.get(name).get('removed'))==0 and int(total.get(name).get('total'))==0:
        #    continue
        if int(name[1].get('added'))==0 and int(name[1].get('removed'))==0 and int(name[1].get('total'))==0:
            continue
        print(json.dumps(name[0],encoding='utf-8',ensure_ascii=False).replace('-en ','').replace('\"','')
              +'\t'+json.dumps(name[1],encoding='utf-8',ensure_ascii=False).replace('-en ','').replace(' ','').replace('{','').replace('}','').replace(',','\t').replace('\'','').replace('\"',''))
  1. main.py
# coding:utf-8
import requests
import json
import time
import gitlabUtil as git
private_token = '1KpjiCRyt2ywJq3tdfdfjD'
private_host = 'http://10.200.3.3'
private_date = '2019-01'

def skipProject(pro):
    projects=['app1','app2','app3','app4','app5']
    if pro.get('name') in projects:
        return False
    else:
        return True
def skipXYProject(pro):
    if str(pro.get('name')).lower().startswith('xy'):
        return True
if __name__ == '__main__':
    sT = time.time()
    projects = git.getAllProjects()
    i = 1
    total = {}
    for pro in projects:
        if pro.get('group') != 'java':
            continue
        if skipProject(pro): #指定工程,注释掉这两行将统计gitlab的全部工程,耗时较久
            continue
        if skipXYProject(pro): #排除部分xy开头的工程
            continue
        print('start project:::' + pro.get('name')+',index:::'+str(i))
        if private_date != '': #指定统计月份
            activitytime = pro.get('activitytime')
            if activitytime < str(private_date+'-01'): # 不活跃的不统计
                print('skip unactiviry project:::' + pro.get('name'))
                continue
            r = git.coutProjectBydate(pro,private_date)
        else:
            r = git.coutProject(pro)
        rl = git.converStr(r)  #处理结果字符串转义
        print('finish project:::' + json.dumps(rl,encoding='utf-8',ensure_ascii=False))
        git.mcount(total,rl)  #处理结果汇总
        #print('now total is:::' + str(total))
        i += 1
    print('-------total------\n')
    dict = sorted(total.items(), key=lambda d: d[1], reverse=True)  #排序
    git.report(dict) #打印
    eT = time.time()
    exT = eT-sT
    print('Time used:::'+str(int(exT))+'S')

不知道github的API有没有,有的话能不能爬取一些有意思的东西呢

你可能感兴趣的:(Python)