Python读取某个目录下的zip压缩包解压开后计算每个小文件的md5值,并将压缩包名字、里面小文件名字、以及对应的md5值写入csv文件

#coding=utf-8
import csv
import hashlib
import os,sys
import zipfile
reload(sys)
sys.setdefaultencoding('UTF-8')

def CalcMD5(filepath):      #通过文件的路径来得到文件的md5值,传的参数是路径
    with open(filepath,'rb') as f:
        md5obj = hashlib.md5()
        md5obj.update(f.read())
        hash = md5obj.hexdigest()
        return hash

def get_md5_value(src):           #计算字符串的md5值,传的参数是string
    myMd5 = hashlib.md5()
    myMd5.update(src)
    myMd5_Digest = myMd5.hexdigest()
    return myMd5_Digest

def writetocsv(datalist):
    with open(sys.argv[1]+'_zhilianzipinfo.csv', 'wb') as wp:
        writer = csv.writer(wp)
        for info in datalist:
             writer.writerow(info)

if __name__=='__main__':
    folder=sys.argv[1]
    #dir = '/data/backup/'+folder
    dir = "C:\\Users\\Administrator\\Desktop\\data\\backup\\2016-08-01"
    print dir
    filelist = os.listdir(dir)
    list=[]
    i=0
    j=0
    #value1=get_md5_value('helloworld.')
    #print 'value1:',value1
    #value2=get_md5_value('hello world.')
    #print 'value2:',value2
    #if value1==value2:
    #   print 'equal'
    #else:
    #   print 'unequal'
    for line in filelist:
        if '.zip' in line and 'from_1' in line:
            z=zipfile.ZipFile(os.path.join(dir,line),'r')
            for filename in z.namelist():
                linetrans=line.decode('utf-8').encode('gbk')
                content=z.read(filename)
                valuemd5=get_md5_value(content)
                if filename is 'index.htm' or filename is 'source.html':
                   continue
                list.append([linetrans,filename,valuemd5])
        print i
        i+=1
    writetocsv(list)
    print 'end.'

你可能感兴趣的:(Python)