Python 是一种高级、面向对象、通用的编程语言,由Guido van Rossum发明,于1991年首次发布。Python 的设计哲学强调代码的可读性和简洁性,同时也非常适合于大型项目的开发。Python 语言被广泛用于Web开发、科学计算、人工智能、自动化测试、游戏开发等各个领域,并且拥有丰富的第三方库和工具,使得Python成为广泛应用的语言之一。同时,由于其开放性和可移植性,Python在跨平台应用、开源软件开发和云计算等领域也被广泛使用。
9.1 系统操作模块 python中最基本的模块,OS/SYS模块提供了一种使用与操作系统相关的功能的便捷式途径,这里将简单演示针对目录文件的各种操作函数与操作技巧.
OS文件目录操作: OS模块提供了多数操作系统的功能接口函数编程时,经常和文件、目录打交道,所以开发中离不开该模块.
方法
说明
os.getcwd()
获取当前工作目录,即当前python脚本工作的目录路径
os.chdir(“dirname”)
改变当前脚本工作目录,相当于shell下cd
os.curdir
返回当前目录: (‘.’)
os.pardir
获取当前目录的父目录字符串名:(‘..’)
os.makedirs(‘dir1/dir2’)
生成多层递归目录,此处递归生成./dir1/dir2
os.removedirs(‘dirname’)
若目录为空,则删除,并递归到上一级目录,如若也为空,则删除,依此类推
os.mkdir(‘dirname’)
创建目录,创建一个新的目录
os.rmdir(‘dirname’)
删除空目录,若目录不为空则无法删除,报错
os.listdir(‘dirname’)
列出指定目录下的所有文件和子目录,包括隐藏文件,并以列表方式打印
os.walk(‘dirname’)
遍历所有目录,包括子目录
os.remove()
删除一个文件
os.rename(“oldname”,”new”)
重命名文件/目录
os.stat(‘path/filename’)
获取文件/目录信息
os.sep
查系统特定的路径分隔符,win下为”\“; Linux下为”/“
os.name
查看字符串指示当前使用平台.win->’nt’; Linux->’posix’
os.linesep
查看平台使用的行终止符,win下为”\t\n”; Linux下为”\n”
os.pathsep
查看当前,用于分割文件路径的字符串
os.system(“shell”)
运行shell命令,直接显示,不能保存执行结果
os.popen(“shell”).read()
运行shell命令,可以保存执行结果
os.environ
获取系统环境变量
OS文件与目录处理: 通过使用该模块我们可以将文件与目录进行切割拼接等.
os.path.abspath(path) os.path.split(path) os.path.dirname(path) os.path.basename(path) os.path.exists(path) os.path.isabs(path) os.path.isfile(path) os.path.isdir(path) os.path.join(path) os.path.getatime(path) os.path.getmtime(path)
SYS系统命令行模块: SYS模块提供访问解释器使用或维护的变量,和与解释器进行交互的函数.
import syssys.argv sys.exit(n) sys.version sys.path sys.modules.keys() sys.platform sys.stdin sys.stdout sys.stderror
判断文件目录权限: 查看文件或者目录是否有指定权限,有则返回True否则返回flase
>>> os.access("/etc/passwd" ,os.F_OK) True >>> os.access("/etc/passwd" ,os.R_OK) True >>> os.access("/etc/passwd" ,os.W_OK) True >>> os.access("/etc/passwd" ,os.X_OK) False
设置文件目录权限: 设置目录或文件的各种权限,注意修改权限会消除以前的权限,只保留修改的权限.
>>> import stat,os>>> os.chmod("/etc/passwd" ,stat.S_IXGRP) >>> os.chmod("/etc/passwd" ,stat.S_IXOTH) >>> os.chmod("/etc/passwd" ,stat.S_IWOTH) >>> os.chmod("/etc/passwd" ,stat.S_IROTH) >>> os.chmod("/etc/passwd" ,stat.S_IRWOT) >>> >>> os.chmod("/etc/passwd" ,stat.S_IWGRP) >>> os.chmod("/etc/passwd" ,stat.S_IRGRP) >>> os.chmod("/etc/passwd" ,stat.S_IRWXG) >>> os.chmod("/etc/passwd" ,stat.S_IXUSR) >>> os.chmod("/etc/passwd" ,stat.S_IWUSR) >>> os.chmod("/etc/passwd" ,stat.S_IRUSR) >>> os.chmod("/etc/passwd" ,stat.S_IRWXU) >>> os.chown("/etc/passwd" ,0 ,0 )
文件拷贝/删除/移动/归档: shutil模块对文件和文件集合提供了许多高级操作,该模块也是python中默认自带的标准库.
>>> import shutil>>> >>> shutil.chown("/etc/passwd" ,user="root" ,group="root" ) >>> shutil.copy("/etc/passwd" ,"/tmp/passwd" ) >>> shutil.copy2("/etc/passwd" ,"/tmp/passwd" ) >>> shutil.copyfile("/etc/shadow" ,"/tmp/shadow" ) >>> shutil.copyfileobj(open ("/etc/passwd" ,"r" ),open ("/tmp/passwd" ,"w" ))>>> shutil.move("/etc/passwd" ,"/tmp/" ) >>> shutil.rmtree("/tmp/" ) >>> >>> shutil.copytree("/etc" ,"/tmp" , ignore=shutil.ignore_patterns('*.conf' , 'tmp*' ))>>> shutil.make_archive("/etc/" ,"gztar" ,root_dir='/home/' )
ZIP文件压缩: 通过ZipFile模块,压缩指定目录下的指定文件,与解压缩操作.
import os,zipfiledef ordinary_all_file (rootdir ): _file = [] for root, dirs, files in os.walk(rootdir, topdown=False ): for name in files: _file.append(os.path.join(root, name)) for name in dirs: _file.append(os.path.join(root, name)) for item in range (0 ,len (_file)): _file[item] = _file[item].replace("\\" ,"/" ) return _file with zipfile.ZipFile("lyshark.zip" ,"w" ) as fp: dictionary = ordinary_all_file("d://python" ) for each in dictionary: fp.write(each) fp.close() with zipfile.ZipFile("lyshark.zip" ,"r" ) as fp: fp.extractall("c://" ) fp.close()
9.2 文本处理模块 在python中常见的文本处理方式是,通过内置的re模块提供对正则表达式的支持,正则表达式会被编译成一系列的字节码,然后由通过C编写的正则表达式引擎进行执行,该引擎自从python这门语言诞生以来,近20年时间未有发生过变化.
基本的通用匹配符: 基本的通用正则匹配符号,下面的通配符是最基础也是最常用的几种符号序列.
>>> re.search("hel.o" ,"hello lyshark,hello world" ).group()'hello' >>> re.findall("hel.o" ,"hello lyshark hello world" )['hello' , 'hello' ] >>> re.findall("hel.o" ,"hello lyshark hello world" ,flags=re.DOTALL)['hello' , 'hello' ] >>> re.findall("ab*" ,"abccba23acbcabb" )['ab' , 'a' , 'a' , 'abb' ] >>> re.findall("ab+" ,"abccba23acbcabb" )['ab' , 'abb' ] >>> re.findall("ab?" ,"ab,abc,abb,abcd,a,acd,abc" )['ab' , 'ab' , 'ab' , 'ab' , 'a' , 'a' , 'ab' ] >>> re.findall("ab?" ,"ab,a,abc,abcde" )['ab' , 'a' , 'ab' , 'ab' ] >>> re.search(r"^h" ,"hello world" ).group()'h' >>> re.search(r"world$" ,"hello\nworld" ).group()'world' >>> re.search(r"^a" ,"\nabc\ndef" ,flags=re.MULTILINE).group()'a' >>> re.search("foo$" ,"bfoo\nsdfsf" ,flags=re.MULTILINE).group()'foo'
脱意字符与选择性匹配: 脱意字符就是转意字符将原有的特殊含义过滤掉,选择匹配这是在给定列表中选择其中之一.
>>> re.search("..\\t" ,"hello\t lyshark\n" ).group()'lo\t' >>> re.search("\\t" ,"hello\t lyshark\n" ).group()'\t' >>> re.search("\t" ,"hello\t lyshark\n" ).group()'\t' >>> re.search(r"\\" ,"hello\\lyshark" ).group()'\\' >>> re.search("\s+" ,"ab\tc1\n3" ).group()'\t' >>> re.search("\s+" ,"ab c1\n3" ).group()' ' >>> re.search("abc|ABC" ,"ABCBabcCD" ).group()'ABC' >>> re.findall("abc|ABC" ,"ABCBabcCD" )['ABC' , 'abc' ]
字串的范围匹配与分组输出: 通过给定范围对文本进行正则匹配,并且还可以将匹配到的结果进行分组输出.
>>> re.search("hello{2}" ,"hello,helloo,hellooo,helloooo" ).group()'helloo' >>> re.search("hello{3}" ,"hello,helloo,hellooo,helloooo" ).group()'hellooo' >>> re.search("hello{1,2}" ,"hello,helloo,hellooo,helloooo" ).group()'hello' >>> re.findall("hello{1,2}" ,"hello,helloo,hellooo,helloooo" )['hello' , 'helloo' , 'helloo' , 'helloo' ] >>> re.search("[0-9]" ,"hello 1,2,3,4,5" ).group() '1' >>> re.search("[0-9]" ,"hello a12 b23 34a 45t" ).group()'1' >>> re.findall("[0-9]" ,"hello 1,2,3,4,5" )['1' , '2' , '3' , '4' , '5' ] >>> re.findall("[0-9]" ,"hello b23 34a 45t wan" )['2' , '3' , '3' , '4' , '4' , '5' ] >>> re.search("[^0-9]" ,"hello 1,2,3,4,5" ).group()'h' >>> re.search("[^0-9]*" ,"hello 1,2,3,4,5" ).group()'hello' >>> re.search(r"[aeiou]" ,"Hello LyShark" ).group()'e' >>> number = "371481199306143242" >>> re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})" ,number).groupdict(){'province' : '3714' , 'city' : '81' , 'birthday' : '1993' } >>> >>> re.search("(?P<name>[a-zA-Z]+)(?P<age>[0-9]+)" ,"lyshark22" ).groupdict(){'name' : 'lyshark' , 'age' : '22' }
regex.match: 从起始位置开始匹配,匹配成功返回一个对象,未匹配成功返回None.
match (pattern,string,flags=0 )>>> origin = "hello alex bcd abcd lge acd 19" >>> >>> ret = re.match ("h\w+" ,origin)>>> print (ret.group()) >>> print (ret.groups()) >>> print (ret.groupdict()) >>> ret = re.match ("h(\w+).*(?P<name>\d)$" ,origin)>>> print (r.group()) >>> print (r.groups()) >>> print (r.groupdict())
regex.search: 搜索整个字符串去匹配第一个符合条件的数据,未匹配成功返回None.
>>> origin = "hello alex bcd abcd lge acd 19" >>> re.search("^h\w+" ,origin).group()'hello' >>> re.search("a\w+" ,origin).group()'alex' >>> re.search("(?P<name>a\w+)" ,origin).groupdict(){'name' : 'alex' } >>> re.search("(?P<姓名>[a-zA-Z]+)(?P<年龄>[0-9]+)" ,"lyshark22" ).groupdict(){'姓名' : 'lyshark' , '年龄' : '22' }
regex.findall: 获取非重复的匹配列表,且每一个匹配均是字符串,空的匹配也会包含在结果中.
>>> origin = "hello alex bcd abcd lge acd 19" >>> re.findall("al\w+" ,origin)['alex' ] >>> re.findall("a\w+" ,origin)['alex' , 'abcd' , 'acd' ]
regex.sub: 先匹配查找结果,然后进行字串的替换,也就是替换匹配成功的指定位置字符串.
sub(pattern,repl,string,count=0 ,flags=0 ) >>> origin = "hello alex bcd abcd lge acd 19" >>> re.sub("a[a-z]+" ,"999999" ,origin,1 )'hello 999999 bcd abcd lge acd 19' >>> re.sub("a[a-z]+" ,"999999" ,origin,2 )'hello 999999 bcd 999999 lge acd 19' >>> origin = "hello alex bcd abcd lge acd 19 !@#" >>> re.sub('[!|@|#]' ,"" ,origin)'hello alex bcd abcd lge acd 19 '
regex.split: 字符串切割函数,用来实现对指定字符串的分割工作,根据正则匹配分割字符串.
split(pattern,string,maxsplit=0 ,flags=0 ) >>> origin = "hello alex bcd abcd lge acd 19" >>> re.split("alex" ,origin,1 )['hello ' , ' bcd abcd lge acd 19' ] >>> re.split("(alex)" ,origin,1 )['hello ' , 'alex' , ' bcd abcd lge acd 19' ]
regex.compile: 用于将字符串编译到类中,直接调用这个类进行过滤,用于多处调用场合.
>>> string = "Hello LyShark !" >>> >>> obj = re.compile (r"[A-Z][a-z]" )>>> obj.findall(string)['He' , 'Ly' , 'Sh' ] >>> string = "the number is 20.5 -> 30.6" >>> obj = re.compile (r''' ... \d+ # 整数部分... \.? # 小数点... \d* # 小数部分... ''' ,re.VERBOSE)>>> obj.findall(string)['20.5' , '30.6' ]
regex.other: 除了上面介绍的几种常用的匹配模式以外,正则模块还支持使用保留关键字匹配.
>>> re.match (r'.*' , 'abc\nedf' ).group()'abc' >>> re.match (r'.*' , 'abc\nedf' ,re.DOTALL).group()'abc\nedf' >>> re.findall(r'^abc' , 'abc\nedf' )['abc' ] >>> re.findall(r'^abc' , 'abc\nabc' ,re.MULTILINE)['abc' , 'abc' ] >>> re.findall(r'abc\d$' , 'abc1\nabc2' )['abc2' ] >>> re.findall(r'abc\d$' , 'abc1\nabc2' ,re.MULTILINE)['abc1' , 'abc2' ] >>> re.match (r'(Name)\s*:\s*(\w+)' ,'NAME : Joey' ,re.IGNORECASE).groups()('NAME' , 'Joey' )
(案例) 匹配IP地址与MAC地址: 这里提供了不同的匹配正则表达式,来实现对IPv4/IPv6以及对MAC地址的匹配公式.
>>> re.search("^(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}$" ,"192.168.1.1" )<re.Match object ; span=(0 , 11 ), match ='192.168.1.1' > >>> re.match (r"^\s*\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s*$" ,"192.168.1.100" )<re.Match object ; span=(0 , 13 ), match ='192.168.1.100' > >>> >>> string_ip = "is this 236.168.192.1 ip 12321" >>> result = re.findall(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b" , string_ip)>>> result['236.168.192.1' ] >>> >>> string=re.compile (r'((1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.){3}(1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)' )>>> print (string.search('245.255.256.25asdsa10.11.244.10' ).group())10.11 .244 .10 >>> string_IPv6="1050:0:0:0:5:600:300c:326b" >>> re.match (r"^(?:[A-F0-9]{1,4}:){7}[A-F0-9]{1,4}$" , string_IPv6, re.I)<re.Match object ; span=(0 , 26 ), match ='1050:0:0:0:5:600:300c:326b' > >>> >>> re.findall(r"(?<![:.\w])(?:[A-F0-9]{1,4}:){7}[A-F0-9]{1,4}(?![:.\w])" , string_IPv6, re.I)['1050:0:0:0:5:600:300c:326b' ] >>> re.match (r"^\s*([0-9a-fA-F]{2,2}:){5,5}[0-9a-fA-F]{2,2}\s*$" ,"AB:1F:44:5B:3B:4A" )<re.Match object ; span=(0 , 17 ), match ='AB:1F:44:5B:3B:4A' >
(案例) 匹配网址与端口: 正则匹配单纯的网址,或者是网址加端口,或者是IP加端口等特殊格式.
>>> re.search(r"^(http|https?:\/\/)([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$" ,"https://www.baidu.com" )<re.Match object ; span=(0 , 21 ), match ='https://www.baidu.com' > >>> re.findall(r"([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-4]\d{4}|65[0-4]\d{2}|655[0-2]\d|6553[0-5])" ,"hello 443" )['4' , '4' , '3' ] >>> re.search(r'^(http|https?:\/\/)([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?( :([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-4]\d{4}|65[0-4]\d{2}|655[0-2]\d|6553[0-5]))?$' ,"http://www.baidu.com:80" )<re.Match object ; span=(0 , 23 ), match ='http://www.baidu.com:80' > >>> re.search(r'^(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\. (\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])( :([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-4]\d{4}|65[0-4]\d{2}|655[0-2]\d|6553[0-5]))?$' ,"192.168.1.100:443" )<re.Match object ; span=(0 , 17 ), match ='192.168.1.100:443' >
(案例) 匹配时间与时间戳: 正则匹配各种时间格式,与时间戳等,基本上囊括了所有的匹配格式.
>>> re.search('[0-9]{10}\.[0-9]{6,7}' ,"1585553108.7385645" )<re.Match object ; span=(0 , 18 ), match ='1585553108.7385645' > >>> >>> re.search(r"(\d{4}-\d{1,2}-\d{1,2})" ,"2019-01-12" )<re.Match object ; span=(0 , 10 ), match ='2019-01-12' > >>> >>> re.findall(r"(\d{4}-\d{1,2}-\d{1,2})" ,"2019-01-12,2010-12-11" )['2019-01-12' , '2010-12-11' ] >>> >>> re.findall(r"\d{4}[-/]\d{2}[-/]\d{2}" ,"2019-01-12,2010/12/11" )['2019-01-12' , '2010/12/11' ] >>> >>> re.search(r"(\d{1,2}/(Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sep|Oct|Nov|Dec)/\d{4})" ,"2019-01-12,21/Nov/2019" ).group()'21/Nov/2019' >>> >>> re.findall(r"(\d{1,2}:\d{1,2})" ,"2010-12-11 12:11" )['12:11' ] >>> >>> re.findall(r"(\d{1,2}:\d{1,2}:\d{1,2})" ,"2010-12-11 12:11:22,09:25:30" )['12:11:22' , '09:25:30' ] >>> >>> re.search(r"(\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}:\d{1,2})" ,"2010-12-11 12:11" )<re.Match object ; span=(0 , 16 ), match ='2010-12-11 12:11' > >>> >>> re.findall(r"(\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}:\d{1,2})" ,"2010-12-11 12:11" )['2010-12-11 12:11' ]
(案例) 匹配邮箱/手机号/身份证: 正则匹配验证邮箱手机号身份证等常用居民证件等.
>>> re.search("^1[3|4|5|8]\d{9}$" ,"18264856987" )<re.Match object ; span=(0 , 11 ), match ='18264856987' > >>> re.search("[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+" ,"182648@qq.com" )<re.Match object ; span=(0 , 13 ), match ='182648@qq.com' > >>> re.search("[a-zA-Z0-9_-]+@[qq|163]+(\.[a-zA-Z0-9_-]+)+" ,"182648@qq.com" )<re.Match object ; span=(0 , 13 ), match ='182648@qq.com' > >>> re.findall(r'(^[1-8][0-7]{2}\d{3}([12]\d{3})(0[1-9]|1[012])(0[1-9]|[12]\d|3[01])\d{3}([0-9X])$)' ,"33070219630306041X" )[('33070219630306041X' , '1963' , '03' , '06' , 'X' )]
(案例) 匹配密码验证: 该匹配规则通常用于验证用户输入的账号密码是否符合规范.
>>> re.findall("[\u4e00-\u9fa5]" ,"你好" )['你' , '好' ] >>> re.findall("^[\u4e00-\u9fa5_a-zA-Z0-9]{4,10}$" ,"1233" )['1233' ] >>> re.findall(r"^[a-zA-Z][a-zA-Z0-9_]{4,15}$" ,"password" )['password' ] >>> re.findall(r"^[a-zA-Z]\w{5,17}$" ,"passw3" )['passw3' ] >>> re.findall("^(?!_)(?!.*?_$)[a-zA-Z0-9_\u4e00-\u9fa5]+$" ,"1233" )['1233' ]
(案例) 匹配字符串密码: 该匹配规则用于检测用户输入的账号密码是否存在特殊字符,且必须包括(大写,小写,数字)
三种组合.
>>> if re.match ("^(?:(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])).*$" ,"Admin123" ) == None :>>> print ("验证失败" )>>> else :>>> print ("验证通过" ) >>> if re.match ("^(?:(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])).{5,10}$" ,"Adm2" ) == None :>>> print ("验证失败" )>>> else :>>> print ("验证通过" )>>> if re.match ("^(?=.*[a-z][A-Z][0-9]).*$" ,"admin23" ) == None :>>> print ("验证失败" )>>> else :>>> print ("验证通过" )
9.3 加密解密模块 python里面的hashlib模块提供了很多加密的算法,该模块实现了许多不同安全散列和消息摘要算法的通用接口,包括FIPS安全散列算法SHA1,SHA224,SHA256,SHA384和SHA512以及RSA的MD5算法等现代算法.
MD5加密: MD5消息摘要算法,被广泛使用的密码散列函数,可产生出一个128位的散列值(hash value).
>>> import hashlib>>> >>> hash = hashlib.md5()>>> hash .update(bytes ("lyshark" , encoding="utf-8" ))>>> >>> print (hash .hexdigest())a68aecb8fba3b8c68284937395a7db6f >>> print (hash .digest())b'"\xa6\x8a\xec\xb8\xfb\xa3\xb8\xc6\x82\x84\x93s\x95\xa7\xdbo"'
SHA1加密: SHA安全哈希算法主要适用于数字签名DSA算法,SHA1会产生一个160位的消息摘要(已被淘汰).
>>> import hashlib>>> >>> hash = hashlib.sha1()>>> hash .update(bytes ("lyshark" , encoding="utf-8" ))>>> >>> print (hash .hexdigest())e2a52d00b620d46370b177dcb21777a46c1d4f13 >>> print (hash .digest_size)20
SHA256加密: SHA安全哈希算法主要适用于数字签名DSA算法,SHA256算法的哈希值大小为256位.
>>> import hashlib>>> >>> hash = hashlib.sha256()>>> hash .update(bytes ("lyshark" , encoding="utf-8" ))>>> print (hash .hexdigest())9850380d33d64c1bad671b12fe971eb07aad6ee7f1df98eb8338c749ef5e1bc3 >>> >>> print (hash .block_size)64
SHA384加密: SHA安全哈希算法主要适用于数字签名DSA算法,SHA256算法的哈希值大小为384位.
>>> import hashlib>>> >>> hash = hashlib.sha384()>>> hash .update(bytes ("lyshark" , encoding="utf-8" ))>>> print (hash .hexdigest())
SHA512加密: SHA安全哈希算法主要适用于数字签名DSA算法,SHA256算法的哈希值大小为512位.
>>> import hashlib>>> >>> hash = hashlib.sha512()>>> hash .update(bytes ("lyshark" , encoding="utf-8" ))>>> print (hash .hexdigest())
MD5加盐: 以上的几个加密算法通过撞库可被破解,所以有必要对加密算法中添加自定义KEY再来做双重加密.
>>> import hashlib>>> >>> hash = hashlib.md5(bytes ('898oaFs09f' ,encoding="utf-8" )) >>> print (hash .hexdigest())c7fd0ceb70e0fe300c554887e36f5270 >>> >>> hash .update(bytes ("lyshark" ,encoding="utf-8" ))>>> print (hash .hexdigest())3503908e79a5b8d74b6bc697634d01b9
PKCS加密: 该函数提供了基于PKCS5密码的密钥派生函数,它使用HMAC作为伪随机函数.
>>> import hashlib>>> dk = hashlib.pbkdf2_hmac('sha256' , b'password' , b'salt' , 100000 )>>> dk.hex ()'0394a2ede332c9a13eb82e9b24631604c31df978b4e2f0fbd2c549944f9d79a5'
blake2b加密: 针对64位平台进行了优化,可生成1到64字节之间任意大小的摘要.
>>> from hashlib import blake2b>>> >>> hash = blake2b(key=b"password" , digest_size=17 )>>> hash .update(b"lyshark" )>>> print (hash .hexdigest())662f3f4e2c21b1a04e3b18d521fed55f03
HASH摘要计算: 我们可以通过读取指定文件到内存,并通过Hash算法对其生成指定Hash摘要.
>>> import hashlib>>> >>> hash = hashlib.md5()>>> with open ("dump.json" ,"rb" ) as fp:... for item in fp:... hash .update(item)... >>> print (hash .hexdigest())ee68b99bf5c930090d13412f2d49f6ea
Base64编码: Base64是一种任意二进制到文本字符串的编码方法,常用于在URL、Cookie、网页中传输少量二进制数据.
>>> import base64>>> >>> base64.b64encode(b"hello \x00 lyshark" )b'aGVsbG8gACBseXNoYXJr' >>> base64.b64decode("aGVsbG8gACBseXNoYXJr" )b'hello \x00 lyshark' >>> >>> base64.urlsafe_b64encode(b"https://www.baidu.com" )b'aHR0cHM6Ly93d3cuYmFpZHUuY29t' >>> base64.urlsafe_b64decode("aHR0cHM6Ly93d3cuYmFpZHUuY29t" )b'https://www.baidu.com'
9.4 取随机数模块 Random模块实现了一个伪随机数生成器,可用来生成随机数以及完成与随机数相关的功能,对于整数,从范围中统一选择,对于序列,随机元素的统一选择,用于生成列表的随机排列的函数,以及用于随机抽样而无需替换的函数.
import randomrandom.shuffle() random.randint(1 ,20 ) random.uniform(10 ,20 ) random.randrange(1 ,10 ) random.choice() random.triangular(low, high, mode) random.gauss(mu, sigma) random.betavariate(alpha, beta) random.expovariate(lambd) random.gammavariate(alpha, beta) random.lognormvariate(mu, sigma) random.normalvariate(mu, sigma) random.vonmisesvariate(mu, kappa) random.paretovariate(alpha) random.weibullvariate(alpha, beta)
生成随机数: 通过使用random.randint()
函数,可以实现随机生成整数,配合chr还可以实现生成a-z等符号.
>>> import random>>> >>> random.randint(1 ,10 ) 6 >>> random.random() 0.4055420309111927 >>> >>> random.randrange(1 ,10 ,2 ) 3 >>> >>> random.uniform(1 ,10 ) 9.880034105803746 >>> round (random.uniform(100 ,600 ),2 ) 269.89 >>> >>> chr (random.randint(97 ,122 )) >>> chr (random.randint(65 ,90 ))
随机打乱列表数据: 通过使用random.shuffle()
函数,可以实现随机的打乱一个列表中的数据.
>>> import random>>> >>> lists = [1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 ]>>> print (lists)[1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 ] >>> >>> random.shuffle(lists)>>> print (lists)[4 , 7 , 1 , 8 , 3 , 9 , 5 , 6 , 2 ]
随机获取一个数据: 通过使用random.choice()
函数,该函数可实现从指定的序列中获取一个随机元素.
>>> import random>>> >>> lists=[1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 ]>>> string=["admin" ,"guest" ,"lyshark" ]>>> >>> random.choice(lists)2 >>> random.choice(string)'lyshark'
随机获取多个数据: 通过使用random.sample()
函数,可以实现从指定的序列中随机获取指定长度的片断并随机排列.
>>> import random>>> >>> lists=[1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 ]>>> random.sample(lists,3 )[2 , 6 , 9 ] >>> >>> string = "hello lyshark" >>> random.sample(string,4 )['s' , 'e' , 'k' , 'r' ]
随机生成验证码: 通过random()
函数,配合循环语句,和选择语句来实现随机生成验证码或密码.
import random,stringdef Generateverification (digit ): rand=[] for x in range (digit): y=random.randrange(0 ,5 ) if y == 2 or y == 4 : num=random.randrange(0 ,9 ) rand.append(str (num)) else : temp=random.randrange(65 ,91 ) c=chr (temp) rand.append(c) result = "" .join(rand) return result def getRandChar (count ): ref = [] sample = random.sample(string.ascii_letters + string.digits, 62 ) sample = sample + list ('!@#$%^&*()-+=.' ) for i in range (count): char = random.choice(sample) ref.append(char) return '' .join(ref) if __name__ == "__main__" : ret = Generateverification(5 ) print ("本次生成的随机验证码是: {}" .format (ret)) ret = getRandChar(15 ) print ("本次生成的随机密码是: {}" .format (ret))
9.5 日期时间模块 Time 模块是通过调用C标准库time.h实现的,尽管此模块始终可用,但并非所有平台上都提供所有功能,此模块中定义的大多数函数调用具有相同名称的平台C库函数,因为这些函数的语义因平台而异.
import timetime.sleep(4 ) time.clock() time.process_time() time.time() time.ctime() time.ctime(time.time()-86640 ) time.gmtime() time.gmtime(time.time()-86640 ) time.localtime(time.time()-86640 ) time.mktime(time.localtime()) time.strftime("%Y-%m-%d %H:%M:%S" ,time.gmtime()) time.strptime("2019-09-20" ,"%Y-%m-%d" )
DateTime 模块提供了处理日期和时间的类,其实现的重点是为输出格式化和操作提供高效的属性提取功能,该模块提供了以简单和复杂的方式操作日期和时间的类,虽然支持日期和时间算法,但实现的重点是用于输出格式化.
import datetimedatetime.date.today() datetime.datetime.now() datetime.datetime.now().timetuple() datetime.date.fromtimestamp(time.time()-864400 ) temp = datetime.datetime.now() temp.replace(2019 ,10 ,10 )
Calendar 是与日历相关的模块,这个模块让你可以输出像Unix cal那样的日历,它还提供了其它与日历相关的实用函数,默认情况下,这些日历把星期一当作一周的第一天,星期天为一周的最后一天.
import calendarcalen = calendar.calendar(2018 ) calen = calendar.month(2018 ,8 ) calen = calendar.isleap(2008 ) calen = calendar.leapdays(1997 ,2018 ) calen = calendar.monthrange(2018 ,8 ) calen = calendar.weekday(2018 ,11 ,22 ) calen = calendar.timegm((2018 ,8 ,27 ,11 ,35 ,0 ,0 ,0 ))
基本的时间戳互转: 将一个指定的时间格式转换为秒级时间戳和毫秒级时间戳.
>>> import time,datetime>>> >>> now = time.time()>>> print (now) 1575785965.2278268 >>> >>> print (int (now)) 1575785965 >>> >>> print (int (round (now * 1000 )))1575785965228 >>> >>> local_time = time.localtime() >>> print (local_time)time.struct_time(tm_year=2020 , tm_mon=4 , tm_mday=12 , tm_hour=10 , tm_min=5 , tm_sec=29 , tm_wday=6 , tm_yday=103 , tm_isdst=0 ) >>> >>> utc_time = time.gmtime() >>> print (utc_time)time.struct_time(tm_year=2020 , tm_mon=4 , tm_mday=12 , tm_hour=2 , tm_min=6 , tm_sec=31 , tm_wday=6 , tm_yday=103 , tm_isdst=0 )
时间戳与日期时间互转: 将时间日期转换为特定的时间戳,或者是将特定时间戳转换为日期格式.
>>> import time,datetime>>> >>> date = "2019-01-01 11:22:30" >>> times = int (time.mktime(time.strptime(date,"%Y-%m-%d %H:%M:%S" )))>>> print (times)1546312950 >>> >>> date = 1546312950 >>> times = time.strftime("%Y-%m-%d %H:%M:%S" ,time.localtime(date))>>> print (times)2019 -01-01 11 :22 :30 >>> >>> date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S" )>>> print (date)2019 -12 -08 14 :22 :50 >>> >>> timeStamp = int (time.time())>>> dateArray = datetime.datetime.fromtimestamp(timeStamp)>>> otherStyleTime = dateArray.strftime("%Y-%m-%d %H:%M:%S" )>>> otherStyleTime'2019-12-08 14:22:50'
时间格式与时间格式互转: 将一种特定的时间格式转换为另外一种时间格式.
>>> import time,datetime>>> >>> date = "12/13/2019 10:25" >>> date_temp = datetime.datetime.strptime(date,'%m/%d/%Y %H:%M' )>>> print (date_temp)12 /13 /2019 10 :25 :00 >>> >>> new_date = date_temp.strftime('%Y-%m-%d %H:%M:%S' )>>> print (new_date)2019 -12 -13 10 :25 :00 >>> >>> date = datetime.datetime.strptime("19/10/05 12:30" , "%y/%m/%d %H:%M" )>>> print (date)2019 -10 -05 12 :30 :00 >>> >>> date = datetime.datetime.now()>>> datetime.datetime.strftime(date,"%A %B %d,%Y" )'Monday March 30,2020'
时间格式的换算与加减: 利用datetime模块来完成不同时间单位间的换算,timedelta实例则可以完成时间间隔换算.
>>> import datetime>>> from datetime import timedelta>>> >>> date = datetime.datetime.now() + datetime.timedelta(days=10 ) >>> date = datetime.datetime.now() + datetime.timedelta(days=-10 ) >>> date = datetime.datetime.now() + datetime.timedelta(hours=-10 ) >>> date = datetime.datetime.now() + datetime.timedelta(seconds=120 ) >>> print ("日期: {} {} {}" .format (date.year,date.month,date.day))日期: 2020 3 30 >>> >>> date = datetime.datetime(2020 ,5 ,24 ,12 ,22 ) >>> date + timedelta(days=10 ) >>> >>> date_1 = datetime.datetime.now() + datetime.timedelta(days=10 )>>> date_2 = datetime.datetime.now() + datetime.timedelta(days=365 )>>> >>> date_xor = date_2 - date_1 >>> date_xordatetime.timedelta(days=355 , seconds=8 , microseconds=949992 )
字符串与时间戳格式互转: 除了上方的标准格式以外,在编程中还会遇到其他的特殊时间格式的互转.
>>> import time,datetime>>> >>> date = "17/Mar/2020 10:25" >>> item = time.mktime(time.strptime(date,"%d/%b/%Y %H:%M" ))>>> item1584411900.0 >>> time.strftime("%Y-%m-%d %H:%M" ,time.localtime(item))'2020-03-17 10:25' >>> date = "Mar 05,2020" >>> item = time.mktime(time.strptime(date,"%b %d,%Y" ))>>> item1583337600.0 >>> time.strftime("%Y-%m-%d" ,time.localtime(item))'2020-03-05' >>> date = "2020-03-11" >>> item = time.mktime(time.strptime(date,"%Y-%m-%d" ))>>> item1583856000.0 >>> time.strftime("%d/%b/%Y" ,time.localtime(item))'11/Mar/2020' >>> local_time = time.localtime(time.time())>>> time.strftime("%Y-%m-%d, %H:%M:%S, %W" ,local_time)'2020-04-12, 10:00:12, 14'
计算当月的日期范围: 通过编程实现计算出2020年2月这个时间范围内有多少天,并列出来.
from datetime import datetime,date,timedeltaimport calendardef get_month_range (start_date=None ): day = [] if start_date is None : start_date = date.today().replace(day=1 ) else : start_date = start_date.replace(day=1 ) _,days_in_month = calendar.monthrange(start_date.year,start_date.month) end_date = start_date + timedelta(days=days_in_month) a_day = timedelta(days=1 ) while start_date < end_date: day.append(start_date) start_date += a_day return day day = get_month_range(date(2020 ,2 ,12 )) for item in day: print (item)
9.6 持久存储模块 有时候我们需要对字符串,列表,字典等数据进行持久化存储,方便以后使用,而不是简单的放入内存中关机断电就丢失数据,python中提供了多种方式来实现数据的持久化存储,下面将逐个介绍.
JSON 是一种轻量级的数据交换格式,其简洁和清晰的层次结构使得JSON成为理想的数据交换语言,易于人阅读和编写,同时也易于机器解析,有效地提升网络传输效率,JSON实现了字符串和编程语言之间的数据共享与交互,通用各种编程语言中.
JSON字符串序列互转: 使用json.dumps
将基本数据类型转成字符串,使用json.loads
将字符串转化成基本数据类型.
>>> import json>>> >>> Mydict = {"admin" :"123456" ,"guest" :"guest" ,"lyshark" :"123321" }>>> type (Mydict)<class 'dict' > >>> >>> result = json.dumps(Mydict)>>> type (result) <class 'str' > >>> string = '{"admin": "123456", "guest": "guest", "lyshark": "123321"}' >>> >>> Mydict = json.loads(string)>>> type (Mydict) <class 'dict' > >>> string = '{"admin": "123456", "guest": "guest", "lyshark": "123321"}' >>> >>> Mydict = eval (string)>>> type (Mydict) <class 'dict' >
JSON 序列化/反序列化: 使用json.dump
可以将数据进行序列化存储,使用json.load
可以将数据读入变量中.
>>> import json>>> >>> MyList = [1 ,2 ,3 ,4 ,5 ,6 ,7 ]>>> >>> with open ("db.json" ,"w" ,encoding="utf-8" ) as fp:... json.dump(MyList,fp) >>> with open ("db.json" ,"r" ,encoding="utf-8" ) as fp:... json.load(fp) ... [1 , 2 , 3 , 4 , 5 , 6 , 7 ]
pickle 模块实现了python的所有数据序列和反序列化,与JSON不同的是pickle不是用于多种语言间的数据传输,它仅作为python对象的持久化或者python程序间进行互相传输对象的方法,因此它只支持python所有的数据类型.
Pickle序列化/反序列化: 使用pickle.dumps
将列表序列化为二进制字串,使用pickle.loads
反序列化为正常数据.
>>> import pickle>>> >>> data = [1 ,2 ,3 ,4 ,5 ]>>> >>> dest_str = pickle.dumps(data)>>> dest_strb'\x80\x04\x95\x0f\x00\x00\x00\x00\x00\x00\x00]\x94(K\x01K\x02K\x03K\x04K\x05e.' >>> >>> with open ("db.pickle" ,"wb" ) as fp:... data = {'k1' :'python' ,'k2' :'java' }... fp.write(pickle.dumps(data))... fp.close()... 42 >>> with open ("db.pickle" ,"rb" ) as fp:... data = pickle.loads(fp.read())... >>> data{'k1' : 'python' , 'k2' : 'java' }
shelve与pickle类似用来持久化数据的,不过shelve是以键值对的形式,将内存中的数据通过文件持久化,其支持任何pickle支持的所有python数据格式,在开启回写功能后,其灵活程度远远高于Pickle/JSON这两种类型,使用代码如下.
>>> import shelve>>> >>> sh = shelve.open ("shelve.db" ,writeback=True )>>> sh["user1" ] = { "username" :"admin" ,"passwd" :123123 }>>> sh["user2" ] = { "username" :"guest" ,"passwd" :123456 }>>> sh.close()>>> >>> sh = shelve.open ("shelve.db" ,writeback=True )>>> >>> sh["user1" ]{'username' : 'admin' , 'passwd' : 123123 } >>> sh["user1" ].get("passwd" )123123 >>> sh["user1" ]["passwd" ] = 888888 >>> sh["user1" ]{'username' : 'admin' , 'passwd' : 888888 }
9.7 INI解析模块 ConfigParser 模块用来读取配置文件,配置文件的格式跟windows下的ini配置文件相似,可以包含一个或多个节,每个节可以有多个参数(键=值),使用的配置文件的好处就是一些参数无需写死,可以使程序更灵活的配置一些参数.
为了方便演示以下的例子,请在python所在目录创建一个test.ini配置文件,写入以下内容.
[db] db_host = 127.0 .0.1 db_port = 69 db_user = rootdb_pass = 123123 host_port = 69 [concurrent] thread = 10 processor = 20
获取所有节点: 通过使用以下方式,我们可以获取到指定文件的所有主节点名称.
>>> import configparser>>> >>> config=configparser.ConfigParser()>>> config.read("test.ini" ,encoding="utf-8" )>>> >>> result=config.sections()>>> print (result)['db' , 'concurrent' ]
获取指定键值: 使用以下方式遍历,来获取指定节点(concurrent)
下的所有键值对.
>>> import configparser>>> >>> config=configparser.ConfigParser()>>> config.read("test.ini" ,encoding="utf-8" )>>> >>> result=config.items("concurrent" )>>> print (result)[('thread' , '10' ), ('processor' , '20' )]
获取指定键: 使用以下方式遍历,来获取指定节点(concurrent)
下的所有的键.
>>> import configparser>>> >>> config=configparser.ConfigParser()>>> config.read("test.ini" ,encoding="utf-8" )>>> >>> result=config.options("concurrent" )>>> print (result)['thread' , 'processor' ]
获取指定值: 使用以下方式遍历,来获取指定节点下指定键
的对应值.
>>> import configparser>>> >>> config=configparser.ConfigParser()>>> config.read("test.ini" ,encoding="utf-8" )>>> >>> result=config.get("concurrent" ,"thread" )>>> print (result)10
检查&添加&删除主节点: 检查、添加、删除指定的主节点数据.
>>> import configparser>>> >>> config=configparser.ConfigParser()>>> config.read("test.ini" ,encoding="utf-8" )>>> has_sec=config.has_section("db" )>>> print (has_sec)True >>> config.add_section("lyshark" )>>> config.write(open ("test.ini" ,"w" ))>>> config.remove_section("lyshark" )True >>> config.write(open ("test.ini" ,"w" ))
检查&添加&删除指定键值对: 检查、删除、设置指定组内的键值对.
>>> import configparser>>> >>> config=configparser.ConfigParser()>>> config.read("test.ini" ,encoding="utf-8" )>>> has_opt=config.has_option("db" ,"db_host" )>>> print (has_opt)True >>> config.set ("test.ini" ,"db_host" ,"8888888888" )>>> config.write(open ("test.ini" ,"w" ))>>> config.remove_option("db" ,"db_host" )True >>> config.write(open ("test.ini" ,"w" ))
9.8 XML处理模块 XML可扩展标记语言,其宗旨传输数据的实现不同语言或程序之间进行数据交换的协议,XML是目前数据交换的唯一公共语言,至今很多传统公司如金融行业的很多系统的接口还主要是XML作为数据通信接口.
为了方便演示后续内容,请自行在python当前目录下创建lyshark.xml以下XML文档.
<?xml version="1.0" encoding="UTF-8" ?> <data > <country name ="Liechtenstein" > <rank updated ="yes" > 2</rank > <year > 2019</year > <gdppc > 141100</gdppc > <neighbor direction ="E" name ="Austria" /> <neighbor direction ="W" name ="Switzerland" /> </country > <country name ="Singapore" > <rank updated ="yes" > 5</rank > <year > 2020</year > <gdppc > 59900</gdppc > <neighbor direction ="N" name ="Malaysia" /> </country > <country name ="Panama" > <rank updated ="yes" > 69</rank > <year > 2029</year > <gdppc > 13600</gdppc > <neighbor direction ="W" name ="Costa Rica" /> <neighbor direction ="E" name ="Colombia" /> </country > </data >
创建XML文档: 通过使用XML函数,创建一个XML文档,原生保存的XML时默认无缩进.
<root> <son name="1号儿子" > <grand name="1号孙子" ></grand> </son> <son name="2号儿子" > <grand name="2号孙子" ></grand> </son> </root> >>> import xml.etree.ElementTree as ET>>> >>> root=ET.Element("root" )>>> >>> son1=ET.Element("son" ,{"name" :"1号儿子" })>>> son2=ET.Element("son" ,{"name" :"2号儿子" })>>> >>> grand1=ET.Element("grand" ,{"name" :"1号孙子" })>>> grand2=ET.Element("grand" ,{"name" :"2号孙子" })>>> >>> son1.append(grand1)>>> son2.append(grand2)>>> >>> root.append(son1)>>> root.append(son2)>>> >>> tree=ET.ElementTree(root)>>> tree.write('lyshark.xml' ,encoding='utf-8' ,short_empty_elements=False )
打开XML文档: 通过使用xml.etree.ElementTree
,来实现打开要XML文件.
>>> import xml.etree.ElementTree as ET>>> >>> tree = ET.parse("lyshark.xml" )>>> root = tree.getroot()>>> print (root.tag)
遍历XML文档(单层): 通过使用循环的方式,来实现对XML文件子树的遍历.
>>> import xml.etree.ElementTree as ET>>> >>> tree=ET.parse("lyshark.xml" )>>> root=tree.getroot()>>> >>> for child in root:... print (child.tag,child.attrib)... country {'name' : 'Liechtenstein' } country {'name' : 'Singapore' } country {'name' : 'Panama' }
遍历XML文档(多层): 通过使用循环的方式遍历root
下面的目录,来实现对XML文件子树的子树进行遍历.
>>> import xml.etree.ElementTree as ET>>> >>> tree=ET.parse("lyshark.xml" )>>> root=tree.getroot()>>> >>> for x in root: ... print ("主目录: %s" %x.tag) ... for y in x: ... print (y.tag,y.attrib,y.text)... 主目录: country rank {'updated' : 'yes' } year {} gdppc {} neighbor {'direction' : 'E' , 'name' : 'Austria' } neighbor {'direction' : 'W' , 'name' : 'Switzerland' } 主目录: country rank {'updated' : 'yes' } year {} gdppc {} neighbor {'direction' : 'N' , 'name' : 'Malaysia' } 主目录: country rank {'updated' : 'yes' } year {} gdppc {} neighbor {'direction' : 'W' , 'name' : 'Costa Rica' } neighbor {'direction' : 'E' , 'name' : 'Colombia' }
遍历指定节点: 通过循环的方式,配合root.iter()
来实现只遍历XML文档中的year节点.
>>> import xml.etree.ElementTree as ET>>> >>> tree=ET.parse("lyshark.xml" )>>> root=tree.getroot()>>> >>> for node in root.iter ("year" ):... print (node.tag,node.text)... year 2019 year 2020 year 2029
修改XML字段: 通过遍历的方式,找到节点为year
的数据行,并将其内容自动加1
,并会写到XML文档.
>>> import xml.etree.ElementTree as ET>>> >>> tree=ET.parse("lyshark.xml" )>>> root=tree.getroot()>>> >>> for node in root.iter ("year" ): ... new_year=int (node.text) + 1 ... node.text=str (new_year) ... node.set ("updated" ,"yes" ) ... >>> tree.write("lyshark.xml" ) >>> del node.attrib["name" ]
删除XML字段: 通过遍历的方式,查找所有的country
节点,并判断如果内部rank>50
则删除这个country
节点.
>>> import xml.etree.ElementTree as ET>>> >>> tree=ET.parse("lyshark.xml" )>>> root=tree.getroot()>>> >>> for country in root.findall("country" ): ... rank=int (country.find("rank" ).text)... if rank > 50 : ... root.remove(country)... >>> tree.write("output.xml" ,encoding="utf-8" )
9.9 Ctypes混编模块 运用Ctypes库我们可以实现和任意语言进行连接,混合编程的本质是python调用C/C++编译的动态链接库,或反过来C/C++直接使用python中的模块,如下总结了python与C语言如何衔接。
调用标准输出: 调用标准动态库实现打印输出,默认情况下Windows系统会调用msvcrt.dll
而Linux系统则会调用libc.so.6
其中的cdll代表调用约定为cdecl
而windll则代表stdcall
约定.
import platformimport ctypesif __name__ == "__main__" : if platform.system() == 'Windows' : libc = ctypes.cdll.LoadLibrary("msvcrt.dll" ) libc = ctypes.cdll.msvcrt elif platform.system() == 'Linux' : libc = ctypes.cdll.LoadLibrary("libc.so.6" ) string = "hello lyshark \n" string = string.encode("utf-8" ) libc.printf(string)
如果需要调用WindowsAPI函数同样可以使用该方式实现,代码如下.
from ctypes import *if __name__ == "__main__" : user32 = windll.LoadLibrary("user32.dll" ) MessageBox = user32.MessageBoxA ref = MessageBox(0 , "hello lyshark" .encode("utf-8" ), "msgbox" .encode("utf-8" ), 0 ) print ("返回值: " , ref) user32 = windll.LoadLibrary("user32.dll" ) string = "hello lyshark \n" string = string.encode("utf-8" ) user32.MessageBoxA(0 , string, "ctypes" .encode("utf-8" ), 0 )
定义函数指针调用弹窗代码.
from ctypes import c_int, WINFUNCTYPE, windllfrom ctypes.wintypes import HWND, LPCSTR, UINT,LPCWSTRimport localedef Ascii (): preferred_encoding = locale.getpreferredencoding(False ) prototype = WINFUNCTYPE(c_int, HWND, LPCSTR, LPCSTR, UINT) paramflags = ((1 , "hwnd" , 0 ), (1 , "text" , "MsgBox" .encode(preferred_encoding)), (1 , "caption" , None ), (1 , "flags" , 0 )) MessageBox = prototype(("MessageBoxA" , windll.user32), paramflags) MessageBox() ref = MessageBox(text="hello lyshark" .encode(preferred_encoding)) print ("输出返回值: " ,ref) MessageBox(flags=2 , text="hello lyshark" .encode(preferred_encoding)) def Unicode (): prototype = WINFUNCTYPE(c_int, HWND, LPCWSTR, LPCWSTR, UINT) paramflags = ((1 , "hwnd" , 0 ), (1 , "text" , "MsgBox" ), (1 , "caption" , None ), (1 , "flags" , 0 )) MessageBox = prototype(("MessageBoxW" , windll.user32), paramflags) MessageBox() MessageBox(text="hello lyshark" ) MessageBox(flags=2 , text="hello lyshark" ) if __name__ == "__main__" : Ascii()
创建自定义数据类型: Ctypes 会自动去搜索自定义数据的_as_parameter
属性,并将其作为C函数的参数返回.
import ctypesclass MyType (object ): def __init__ (self,x,y ): number = x * y self._as_parameter_ = number if __name__ == "__main__" : libc = ctypes.cdll.LoadLibrary("msvcrt.dll" ) libc = ctypes.cdll.msvcrt ref = MyType(10 , 20 ) libc.printf("计算结果: %d \n" .encode("utf-8" ),ref)
定义结构体/联合体: 结构体需要继承Structure
类,默认情况下数据会放在_fields_
中.
from ctypes import *class MyStruct (Structure ): _fields_ = [ ("username" , c_char * 10 ), ("age" , c_int), ("sex" , c_long) ] class MyUnion (Union ): _fields_ = [ ("a_long" , c_long), ("a_int" , c_int), ("a_char" , c_char * 10 ) ] if __name__ == "__main__" : MyStruct.username = "lyshark" MyStruct.age = 24 MyStruct.sex = 1 print ("姓名: {} 年龄: {}" .format (MyStruct.username,MyStruct.age))
定义多层数组: ctypes提供了对数组的支持,且数组可以内外层嵌套使用.
from ctypes import *class PointEx (Structure ): _fields_ = [('x' , c_int), ('y' , c_int)] class MyStruct (Structure ): _fields_ = [('uuid' , c_int), ('pointex_array' , PointEx * 4 )] def MyArray (): IntArrayType = c_int * 10 Array = IntArrayType(1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ) for i in Array: print ("{} " .format (i),end="" ) print () def processArray (): ptr = MyStruct(1001 , ((1 , 1 ), (2 , 2 ), (3 , 3 ), (4 , 4 ))) for item in ptr.pointex_array: print ("(item.x, item.y) = (%d, %d)" %(item.x, item.y)) print () if __name__ == "__main__" : MyArray() processArray()
数组与指针也可以相互引用,代码如下
from ctypes import *if __name__ == "__main__" : i = c_int(100 ) print ("输出元素: " , i.value) ptr = pointer(i) ptr[0 ] = 200 print ("修改后元素:" , i.value) IntArrayType = c_int * 10 Array = IntArrayType(1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 ) ArrayPtr = pointer(Array) print (ArrayPtr) null_ptr = POINTER(c_int)() print ("状态:" , bool (null_ptr))
数组之间类型转换: 类型转换主要通过使用cast
实现转换,如下将整数类型转为c_byte
数组.
from ctypes import *class MyStruct (Structure ): _fields_ = [('count' , c_int), ('value' , POINTER(c_int))] if __name__ == "__main__" : ptr = MyStruct() ptr.count = 5 ptr.value = (c_int * 10 )(1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 ,0 ) for index in range (ptr.count): print ("old [%d] = %d " %(index, ptr.value[index]),end="" ) print () ptr.value = cast((c_byte * 10 )(), POINTER(c_int)) for index in range (ptr.count): print ("new[%d] = %d " %(index, ptr.value[index]),end="" ) print ()
使用回调函数: 通过使用CFUNCTYPE
可以定义并指定一个回调函数.
from ctypes import *def cmp_func (a, b ): if a[0 ] > b[0 ]: return 1 elif a[0 ] < b[0 ]: return -1 else : return 0 if __name__ == "__main__" : libc = cdll.LoadLibrary("msvcrt.dll" ) IntArray = c_int * 10 IntArrayPtr = IntArray(5 ,6 ,8 ,9 ,3 ,2 ,6 ,7 ,9 ,0 ) CmpFuncType = CFUNCTYPE(c_int, POINTER(c_int), POINTER(c_int)) cmpfunc = CmpFuncType(cmp_func) libc.qsort(IntArrayPtr, len (IntArrayPtr), sizeof(c_int), cmpfunc) for i in IntArrayPtr: print (i,end="" )
增加数组长度: 使用resize()
可以增加数组长度,但只能增加不能减小.
from ctypes import *if __name__ == "__main__" : IntArray = (c_int * 3 )(1 ,2 ,3 ) for index in IntArray: print (" {}" .format (index),end="" ) resize(IntArray,12 ) for index in IntArray: print (" {}" .format (index),end="" )
C混编(返回字符串): 首先我们使用C语言编写一个DLL文件,并导出GetPing
测试函数,Dll代码如下.
#include <iostream> #include <Windows.h> #include <string> extern "C" __declspec(dllexport) char * GetPing (char *Addr, int Port) { char * ref = "{'Address' : '192.168.1.1','Port': 22}" ; return ref; } BOOL APIENTRY DllMain (HANDLE hModule, DWORD dwReason, void * lpReserved) { return true ; }
接着使用python调用这个DLL中的导出函数,并传入参数.
from ctypes import *if __name__ == "__main__" : pdll = CDLL("./engine.dll" ) pdll.GetPing.argtypes = [c_char_p, c_int] arg1 = c_char_p(bytes ("127.0.0.1" , "utf-8" )) arg2 = c_int(3200 ) pdll.GetPing.restype = c_char_p ref = pdll.GetPing(arg1,arg2) print ("返回字典: " , ref)
C混编(传递数组): 我们使用C语言编写一个DLL文件,并导出一个一维数组,和一个二维数组,Dll代码如下.
#include <iostream> #include <Windows.h> extern "C" __declspec(dllexport) int get_array_elem (int Array[], int index) { return Array[index]; } extern "C" __declspec(dllexport) int get_array_2_elem (int Array[][11 ], int row, int col) { return Array[row][col]; } BOOL APIENTRY DllMain (HANDLE hModule, DWORD dwReason, void * lpReserved) { return true ; }
接着使用python调用一维数组get_array_elem
处理函数,并传入参数.
from ctypes import *if __name__ == "__main__" : pdll = CDLL("./engine.dll" ) IntArrayType = c_int * 10 intArray = IntArrayType(1 ,2 ,3 ,4 ,5 ,6 ,7 ,8 ,9 ,0 ) for idx in range (10 ): print ("[%d] => %d" %(idx, pdll.get_array_elem(intArray, idx)),end="" ) print ()
使用python调用二维数组get_array_2_elem
处理函数,并传入参数.
from ctypes import *if __name__ == "__main__" : pdll = CDLL("./engine.dll" ) IntArray3Col = c_int * 3 IntArray3Row3Col = IntArray3Col * 3 arr2d = IntArray3Row3Col(IntArray3Col(1 , 2 , 3 ), IntArray3Col(8 , 9 , 4 ), IntArray3Col(7 , 6 , 5 )) for r in range (3 ): for c in range (3 ): print (" %d " %pdll.get_array_2_elem(arr2d, r, c),end="" ) print ()
C混编(返回数组): 通过使用c_byte * x
声明数组空间,返回数组结果输出,先写DLL.
#include <iostream> #include <Windows.h> #include <string> extern "C" __declspec(dllexport) int GetArray (char * Data, int Number, char * OutData) { for (int i = 0 ; i < Number; ++i) { OutData[i] = Data[i] + 100 ; } return Number; } BOOL APIENTRY DllMain (HANDLE hModule, DWORD dwReason, void * lpReserved) { return true ; }
使用python调用GetArray
处理函数,并传入参数.
from ctypes import *if __name__ == "__main__" : pdll = CDLL("./engine.dll" ) callBuf = pdll.GetArray number = 10 numbytes = c_int(10 ) data_in = (c_byte * number)() for i in range (number): data_in[i] = i data_out = (c_byte * number)() ref = pdll.GetArray(data_in,numbytes,data_out) print ("返回值: " , ref) for i in data_out: print ("{} " .format (i),end="" )
C混编(传递结构): 我们继续增加功能,这次让python传入结构体,DLL收到后输出内容,先来写DLL.
#include <iostream> #include <Windows.h> typedef struct MyStruct { char uname[10 ]; int age; float score; }MyStruct; extern "C" __declspec(dllexport) char * get_struct (MyStruct* ptr) { printf ("[dll print] name: %s -> age: %d -> score: %f \n" , ptr->uname, ptr->age, ptr->score); return ptr->uname; } BOOL APIENTRY DllMain (HANDLE hModule, DWORD dwReason, void * lpReserved) { return true ; }
使用python调用get_struct
处理函数,并传入参数.
from ctypes import *class MyStruct (Structure ): _fields_ = [ ("uname" ,c_char * 10 ), ("age" ,c_int), ("score" ,c_float) ] if __name__ == "__main__" : pdll = CDLL("./engine.dll" ) ptr = MyStruct() ptr.uname = "lyshark" .encode("utf-8" ) ptr.age = 24 ptr.score = 98.4 get_struct_ptr = pdll.get_struct get_struct_ptr.restype = c_char_p ref = get_struct_ptr(byref(ptr)) print ("返回值: {}" .format (ref))
C混编(返回结构): 先定义DLL文件代码,编写一个get_struct
函数,用户获取返回值.
#include <iostream> #include <Windows.h> typedef struct MyStruct { char uname[10 ]; int age; }MyStruct,*MyStructPointer; extern "C" __declspec(dllexport) MyStruct* get_struct (char *uname,int age) { MyStructPointer ptr = (MyStructPointer)malloc (sizeof (MyStruct)); strcpy (ptr->uname, uname); ptr->age = age; return ptr; } BOOL APIENTRY DllMain (HANDLE hModule, DWORD dwReason, void * lpReserved) { return true ; }
python部分则定义MyStructPointer
结构指针,并获取返回值即可.
from ctypes import *class MyStructPointer (Structure ): _fields_ = [ ("uname" ,c_char * 10 ), ("age" ,c_int) ] if __name__ == "__main__" : pdll = CDLL("./engine.dll" ) pdll.get_struct.argtypes = [c_char_p,c_int] arg1 = c_char_p(bytes ("lyshark" , "utf-8" )) arg2 = c_int(24 ) pdll.get_struct.restype = POINTER(MyStructPointer) ref = pdll.get_struct(arg1,arg2) print ("返回姓名: {} -> 年龄: {}" .format (ref.contents.uname,ref.contents.age))
C混编(C中调用python): 让C语言调用python文件,并让python文件返回一个字符串结果,充分利用python三方库.
#include <iostream> #include <Windows.h> using namespace std ; std ::string GetValue (char *pyname, char *function, char *argv[]) { string command; command.append(pyname); command.append(" " ); command.append(function); command.append(" " ); command.append(argv[0 ]); FILE *fp; char buf[8196 ] = { 0 }; if ((fp = _popen(command.c_str(), "r" )) == NULL ) { exit (1 ); } while (fgets(buf, 255 , fp) != NULL ) { printf ("%s" , buf); } _pclose(fp); return buf; } int main (int argc, char * argv[]) { char *time[] = { "1024" }; GetValue("python pytest.py" , "get_value" , time); getchar(); return 0 ; }
python代码中我们直接判断传入参数,并根据参数的不同来执行不同的流程.
import sysif __name__ == "__main__" : if (sys.argv[1 ] == "get_value" ): time = sys.argv[2 ] print ("{} ok" .format (time))