最近在尝试实现CommonLisp,想起CL的常用操作符car, cdr, defparameter之类,如果把它们改短些,输入时不就省事么?不过动手前先找准目标比较妥当,于是写了个Python脚本,统计ClozureCL、CLISP中出现的所有标识符,挑出排名靠前的看看。
# 统计CommonLisp实现中,最常用的标识符及其出现的频率 import string, os, sys, re; def stat_file(file_path, dic): max_id_len = 0; fp = open(file_path); try: for line in fp.readlines(): ids = re.findall("\w+[a-zA-Z0-9-_]*", line); for id in ids: l = len(id) if l > max_id_len: max_id_len = l dic[id] = dic.get(id,0) + 1; except: None return max_id_len base_dirs = ["C:\wz\greenware\ccl-1.7" , "C:\wz\greenware\clisp-2.49"] dic_id_cnt = {} max_id_len = 0 for base_dir in base_dirs: for root, dirs, files in os.walk(base_dir): for file_name in files: if re.match(".*\.lisp$", file_name) != None and re.search("\.svn", file_name) == None: file_path = os.path.join(root, file_name) print(file_path); tm = stat_file(file_path, dic_id_cnt); if tm > max_id_len: max_id_len = tm ml = max_id_len + 1 tuples = sorted(dic_id_cnt.items(), key=lambda d:(99999999 - d[1])); n = 0 for e in tuples: print(e[0].ljust(20) + str(e[1]).rjust(8)); # 虽计算了最长标识,但前100位都较短 n += 1 if n > 100: break; #exit();
the 25622 if 17399 let 15485 nil 14565 and 12769 0 12449 defun 11855 a 11716 t 10312 is 9790 1 9649 of 9405 to 9294 when 8918 or 7997 setf 7791 setq 7688 name 7195 s 6686 in 6640 i 6408 declare 5964 eq 5714 not 5586 x 5573 type 5558 fixnum 5395 form 5379 seg 4996 stream 4905 list 4836 for 4668 string 4334 unless 4329 dest 4043 car 4009 value 3975 imm0 3949 n 3657 2 3598 p 3594 l 3582 target 3580 buffer 3478 u 3421 be 3409 that 3403 cdr 3395 class 3301 start 3288 as 3249 arg_z 3245 S 3155 end 3130 ppc 3093 args 2991 values 2970 key 2948 it 2947 length 2783 return 2777 with 2753 src 2745 vreg 2736 logior 2722 null 2701 function 2678 error 2650 index 2639 The 2603 object 2578 val 2566 file 2522 code 2507 do 2500 arm 2475 4 2468 this 2407 eql 2401 rest 2397 ash 2394 optional 2366 progn 2360 format 2343 y 2342 x8664 2339 b 2310 cond 2277 8 2274 body 2243 apply 2235 line 2221 vector 2218 result 2209 lambda 2199 are 2198 3 2197 idx 2188 lisp 2161 defconstant 2128 x8632 2121