第二部分:进阶章 Python别名对象引用: >>> a=[1,'qwe','hadoop'] >>> a [1, 'qwe', 'hadoop'] >>> b=a >>> b [1, 'qwe', 'hadoop'] >>> a is b True >>> _str_类的特殊方法: #coding="utf-8" from sqlite3 import Time def _str_(self): return "%.2d:%d:%d" % (self.hour,self.minute,self.secord) time=Time(10,42) print time 无限递归: >>> def test(): test() >>> test() Traceback (most recent call last): File "<pyshell#30>", line 1, in <module> test() File "<pyshell#29>", line 2, in test test() File "<pyshell#29>", line 2, in test test() 。。。。。 File "<pyshell#29>", line 2, in test test() RuntimeError: maximum recursion depth exceeded 列表count()函数元素个数统计: >>> a=[1,1,'hadoop',[2,3],[2,3],'spark','spark'] >>> a.count(1) 2 >>> a.count([]2,3) SyntaxError: invalid syntax >>> a.count([2,3]) 2 >>> a.count('hadoop') 1 >>> 布尔值类型: >>> 5==5 True >>> 5==6 False >>> type(true) Traceback (most recent call last): File "<pyshell#78>", line 1, in <module> type(true) NameError: name 'true' is not defined >>> type(True) <type 'bool'> >>> type <type 'type'> >>> type(False) <type 'bool'> >>> Python语言异常错误类型: AttributeError:属性错误,特性引用和赋值失败时会引发属性错误 NameError:试图访问的变量名不存在 SyntaxError:语法错误,代码形式错误 Exception:所有异常的基类,因为所有python异常类都是基类Exception的其中一员,异常都是从基类Exception继承的,并且都在exceptions模块中定义。 IOError:一般常见于打开不存在文件时会引发IOError错误,也可以解理为输出输入错误 KeyError:使用了映射中不存在的关键字(键)时引发的关键字错误 IndexError:索引错误,使用的索引不存在,常索引超出序列范围,什么是索引 TypeError:类型错误,内建操作或是函数应于在了错误类型的对象时会引发类型错误 ZeroDivisonError:除数为0,在用除法操作时,第二个参数为0时引发了该错误 ValueError:值错误,传给对象的参数类型不正确,像是给int()函数传入了字符串数据类型的参数。 列表的sort添加key和reverse参数: >>> x=['hadoop','spark','storm','hive','hbase'] >>> x ['hadoop', 'spark', 'storm', 'hive', 'hbase'] >>> x.sort(key=len) >>> x ['hive', 'spark', 'storm', 'hbase', 'hadoop'] >>> a=['1','4','45','2','34'] >>> a.sort(key=len) >>> a ['1', '4', '2', '45', '34'] >>> a.sort(key=int) >>> a ['1', '2', '4', '34', '45'] >>> a.sort(key=int,reverse=True) >>> a ['45', '34', '4', '2', '1'] >>> 并行遍历zip函数使用方法: >>> a=[3,4,5] >>> b=[7,8,9] >>> zip(a,b) [(3, 7), (4, 8), (5, 9)] >>> c=(1,2,3) >>> d=(4,5,6) >>> e=(7,8,9) >>> zip(c,d,e) [(1, 4, 7), (2, 5, 8), (3, 6, 9)] >>> f='asd' >>> g='qwertyy' >>> zip(f,g) [('a', 'q'), ('s', 'w'), ('d', 'e')] >>> for (x,y) in zip(a,b): print x,y,'-->',x*y 3 7 --> 21 4 8 --> 32 5 9 --> 45 if和else语句: >>> if x>y: print 'sdf' elif x<y: print 'asf' else: print 'sdfg' asf >>> python列表的cmp函数: >>> cmp(23,56) -1 >>> cmp(67,23) 1 >>> cmp(33,33) 0 >>> cmp('asd','a') 1 >>> cmp('asd','bs') -1 >>> num=[345,56,7,32] >>> num.sort(cmp) >>> num [7, 32, 56, 345] >>> 字典的get()方法: >>> info={'name':'baozi'} >>> info {'name': 'baozi'} >>> print info['age'] Traceback (most recent call last): File "<pyshell#210>", line 1, in <module> print info['age'] KeyError: 'age' >>> print info.get('age') None >>> print info.get('age','不存在') 不存在 >>> 字典的items()和iteritems()方法: >>> x={'hadoop':'hdfe','spark':'rdd'} >>> x {'spark': 'rdd', 'hadoop': 'hdfe'} >>> x.items() [('spark', 'rdd'), ('hadoop', 'hdfe')] >>> x {'spark': 'rdd', 'hadoop': 'hdfe'} >>> a=x.items() >>> a [('spark', 'rdd'), ('hadoop', 'hdfe')] >>> type(a) <type 'list'> >>> >>> q=x.iteritems() >>> q <dictionary-itemiterator object at 0x02E3DED0> >>> type(q) <type 'dictionary-itemiterator'> >>> list(q) [('spark', 'rdd'), ('hadoop', 'hdfe')] >>> Python的range()方法: >>> range(5) [0, 1, 2, 3, 4] >>> range(1,5) [1, 2, 3, 4] >>> range(1,10,2) [1, 3, 5, 7, 9] >>> range(4,-4) [] >>> range(4,-4,-1) [4, 3, 2, 1, 0, -1, -2, -3] >>> for i in range(5): print (i,'---->spark') (0, '---->spark') (1, '---->spark') (2, '---->spark') (3, '---->spark') (4, '---->spark') >>> str='hadoop-spark' >>> for i in str: print i, h a d o o p - s p a r k >>> for i in range(len(str)): print str[i], h a d o o p - s p a r k >>> 字典的clear、pop、popitem方法: >>> p={} >>> p['name']='blb' >>> p['age']='age' >>> p {'age': 'age', 'name': 'blb'} >>> l=p >>> l {'age': 'age', 'name': 'blb'} >>> p.clear() >>> p {} >>> l {} >>> >>> x {'age': 23, 'spark': 'rdd', 'hadoop': 'hdfe', 'name': 'blb'} >>> y=x >>> y {'age': 23, 'spark': 'rdd', 'hadoop': 'hdfe', 'name': 'blb'} >>> x.pop('age') 23 >>> x {'spark': 'rdd', 'hadoop': 'hdfe', 'name': 'blb'} >>> y {'spark': 'rdd', 'hadoop': 'hdfe', 'name': 'blb'} >>> x.popitem() ('spark', 'rdd') >>> y {'hadoop': 'hdfe', 'name': 'blb'} >>> Python列表推导式: >>> a=[1,2,3,4,5,6,7,8,9,10,11,12] >>> [3*f for f in a ] [3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36] >>> [3*f for f in range(12)] [0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33] >>> [f for f in a if f % 2 ==0] [2, 4, 6, 8, 10, 12] >>> [[f,g] for f in range(5) for g in range(6)] [[0, 0], [0, 1], [0, 2], [0, 3], [0, 4], [0, 5], [1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [1, 5], [2, 0], [2, 1], [2, 2], [2, 3], [2, 4], [2, 5], [3, 0], [3, 1], [3, 2], [3, 3], [3, 4], [3, 5], [4, 0], [4, 1], [4, 2], [4, 3], [4, 4], [4, 5]] >>> Python列表排序方法reverse、sort、sorted: >>> a=[1,4,7,34,87,3,16] >>> a.reverse() >>> a [16, 3, 87, 34, 7, 4, 1] >>> a.sort() >>> a [1, 3, 4, 7, 16, 34, 87] >>> b=a.sort() >>> a [1, 3, 4, 7, 16, 34, 87] >>> b >>> print b None >>> b=sorted(a) >>> a [1, 3, 4, 7, 16, 34, 87] >>> b [1, 3, 4, 7, 16, 34, 87] >>> >>> sorted('hadoop-spark.storm') ['-', '.', 'a', 'a', 'd', 'h', 'k', 'm', 'o', 'o', 'o', 'p', 'p', 'r', 'r', 's', 's', 't'] >>> 使用from import方法导入模块: >>> import math >>> print math <module 'math' (built-in)> >>> print math.pi 3.14159265359 >>> >>> print pi Traceback (most recent call last): File "<pyshell#336>", line 1, in <module> print pi NameError: name 'pi' is not defined >>> from math import pi >>> print pi 3.14159265359 >>> from math import * >>> pi 3.141592653589793 >>> cos(pi) -1.0 >>> Python序列: 列表、元组是最常用的,还有字符串、Unicode字符串、buffer对象、xrange对象。 序列类的一些通用的方法: 1、索引: >>> x='hadoop-spark' >>> x[0] 'h' >>> x[-1] 'k' >>> 2、切片: >>> x[4:-4] 'op-s' >>> x[:-1] 'hadoop-spar' >>> x[1:] 'adoop-spark' >>> 3、序列相加: >>> [1,2]+[2,3,4] [1, 2, 2, 3, 4] >>> 'hadoop'+'spark' 'hadoopspark' >>> (23,45)+(5,234) (23, 45, 5, 234) >>> 4、序列乘法: >>> a='hadoop' >>> a*4 'hadoophadoophadoophadoop' >>> s=[23,45] >>> s*4 [23, 45, 23, 45, 23, 45, 23, 45] >>> 5、成员资格: >>> x 'hadoop-spark' >>> 't' in x False >>> 's' in x True >>> 6、序列内建函数: >>> x 'hadoop-spark' >>> len(x) 12 >>> min(x) '-' >>> max(x) 's' >>> num=[23,4,76,8,9,54,76] >>> len(num) 7 >>> min(num) 4 >>> max(num) 76 >>> Python赋值语句序列解包: >>> a,b,c=1,2,3 >>> print a,b,c 1 2 3 >>> a,b=b,a >>> print a,b,c 2 1 3 >>> val=1,2,3 >>> val (1, 2, 3) >>> x,y,z=val >>> print x,y,z 1 2 3 >>> Python的setdefault方法: >>> a={} >>> a['name']='blb' >>> a {'name': 'blb'} >>> a.setdefault('name','hadoop') 'blb' >>> a {'name': 'blb'} >>> a.setdefault('name1','spark') 'spark' >>> a {'name': 'blb', 'name1': 'spark'} >>> b={} >>> b {} >>> b.setdefault('storm') >>> b {'storm': None} >>> Python is同一性运算符和==相等运算符区别: ython中对象包含的三个基本要素,分别是:id(身份标识)、type(数据类型)和value(值)。is和==都是对对象进行比较判断作用的,但对对象比较判断的内容并不相同。 ==比较操作符:用来比较两个对象是否相等,value做为判断因素; is同一性运算符:比较判断两个对象是否相同,id做为判断因素。 >>> a=b='hadoop-spark' >>> c='hadoop-spark' >>> a==b True >>> a==c True >>> a is b True >>> a is c False >>> print id(a) 48410160 >>> print id(b) 48410160 >>> print id(c) 48410120 >>> Python列表删除 pop() 方法返回指定对象: >>> a=[1,2,3,4,5,6,7,8,9] >>> print a.pop() 9 >>> a.pop() 8 >>> a [1, 2, 3, 4, 5, 6, 7] >>> a.pop(2) 3 >>> a [1, 2, 4, 5, 6, 7] >>> 列表insert插入方法: >>> a=[1,2,3,4,5,6,7,8] >>> a.insert(4,'hadoop') >>> a [1, 2, 3, 4, 'hadoop', 5, 6, 7, 8] >>> a.extend('spark') >>> a [1, 2, 3, 4, 'hadoop', 5, 6, 7, 8, 's', 'p', 'a', 'r', 'k'] >>> a.append('storm') >>> a [1, 2, 3, 4, 'hadoop', 5, 6, 7, 8, 's', 'p', 'a', 'r', 'k', 'storm'] >>> Python list列表index查找使用方法: >>> a=['hadoop','spark','storm','hive','hbase'] >>> a.index('storm') 2 >>> a.index('docker') Traceback (most recent call last): File "<pyshell#449>", line 1, in <module> a.index('docker') ValueError: 'docker' is not in list >>> Python list列表extend()和加号+连接操作符的区别: >>> list1=[1,2,3,4] >>> list2=[5,6,7] >>> list1.extend(list2) >>> list1 [1, 2, 3, 4, 5, 6, 7] >>> >>> l1=[1,2,3] >>> l2=[4,5,6] >>> l1+l2 [1, 2, 3, 4, 5, 6] >>> l1 [1, 2, 3] >>> while语句: >>> x=1 >>> while x<10: x+=1 print x, 2 3 4 5 6 7 8 9 10 >>> Python的for循环控制语句: >>> a='hadoop-spark' >>> for i in a: print i, h a d o o p - s p a r k >>> b=[1,2,3,4,5] >>> for i in b: print i, 1 2 3 4 5 >>> c=[('hadoop','hdfs'),('spark','rdd')] >>> for (i,j) in c: print(i,j), ('hadoop', 'hdfs') ('spark', 'rdd') >>> python的break语句: >>> x=1 >>> while True: x+=1 print x if(x>=5): break 2 3 4 5 >>> Python的continue语句: >>> x=10 >>> while x: x-=1 if x % 2!=0: continue print x 8 6 4 2 0 >>> Python的pass语句: >>> while True: pass KeyboardInterrupt >>> Python的return语句: >>> def fun1(x,y): return x+y >>> x=fun1(3,4) >>> x 7 >>> Python的global使用方法: >>> x=6 >>> def fun2(): x=1 >>> fun2() >>> x 6 >>> >>> >>> x=6 >>> def fun3(): global x x=1 >>> fun3() >>> x 1 >>> Python的assert语句: assert断言是声明其布尔值必须为真的判定,如果发生异常就说明表达示为假。可以理解assert断言语句为raise-if-not,用来测试表示式,其返回值为假,就会触发异常。 >>> assert 1==1 >>> assert 2+2==2*2 >>> assert 2+2==2*2+1 Traceback (most recent call last): File "<pyshell#570>", line 1, in <module> assert 2+2==2*2+1 AssertionError >>> assert len(['my age',12])<10 >>> assert len(['my age',12])>10 Traceback (most recent call last): File "<pyshell#572>", line 1, in <module> assert len(['my age',12])>10 AssertionError >>> assert range(4)==[1,2,3,4] Traceback (most recent call last): File "<pyshell#573>", line 1, in <module> assert range(4)==[1,2,3,4] AssertionError >>> assert range(4)==[0,1,2,3] >>>