9.0 Python 内置模块应用

发表于 2023-08-12 分类于《灰帽黑客：攻守道》

Python 是一种高级、面向对象、通用的编程语言，由Guido van Rossum发明，于1991年首次发布。Python 的设计哲学强调代码的可读性和简洁性，同时也非常适合于大型项目的开发。Python 语言被广泛用于Web开发、科学计算、人工智能、自动化测试、游戏开发等各个领域，并且拥有丰富的第三方库和工具，使得Python成为广泛应用的语言之一。同时，由于其开放性和可移植性，Python在跨平台应用、开源软件开发和云计算等领域也被广泛使用。

9.1 系统操作模块

python中最基本的模块,OS/SYS模块提供了一种使用与操作系统相关的功能的便捷式途径,这里将简单演示针对目录文件的各种操作函数与操作技巧.

OS文件目录操作: OS模块提供了多数操作系统的功能接口函数编程时,经常和文件、目录打交道,所以开发中离不开该模块.

方法	说明
os.getcwd()	获取当前工作目录,即当前python脚本工作的目录路径
os.chdir(“dirname”)	改变当前脚本工作目录,相当于shell下cd
os.curdir	返回当前目录: (‘.’)
os.pardir	获取当前目录的父目录字符串名：(‘..’)
os.makedirs(‘dir1/dir2’)	生成多层递归目录,此处递归生成./dir1/dir2
os.removedirs(‘dirname’)	若目录为空,则删除,并递归到上一级目录,如若也为空,则删除,依此类推
os.mkdir(‘dirname’)	创建目录,创建一个新的目录
os.rmdir(‘dirname’)	删除空目录,若目录不为空则无法删除,报错
os.listdir(‘dirname’)	列出指定目录下的所有文件和子目录,包括隐藏文件,并以列表方式打印
os.walk(‘dirname’)	遍历所有目录,包括子目录
os.remove()	删除一个文件
os.rename(“oldname”,”new”)	重命名文件/目录
os.stat(‘path/filename’)	获取文件/目录信息
os.sep	查系统特定的路径分隔符,win下为”\“; Linux下为”/“
os.name	查看字符串指示当前使用平台.win->’nt’; Linux->’posix’
os.linesep	查看平台使用的行终止符,win下为”\t\n”; Linux下为”\n”
os.pathsep	查看当前,用于分割文件路径的字符串
os.system(“shell”)	运行shell命令,直接显示,不能保存执行结果
os.popen(“shell”).read()	运行shell命令,可以保存执行结果
os.environ	获取系统环境变量

OS文件与目录处理: 通过使用该模块我们可以将文件与目录进行切割拼接等.

os.path.abspath(path)       #返回path规范化的绝对路径
os.path.split(path)         #将path分割成目录和文件名二元组返回
os.path.dirname(path)       #返回path的目录,其实就是os.path.split(path)的第一个元素
os.path.basename(path)      #返回path最后的文件名,如何path以／或\结尾,那么就会返回空值.
os.path.exists(path)        #如果path存在,返回True.如果path不存在,返回False
os.path.isabs(path)         #如果path是绝对路径,返回True
os.path.isfile(path)        #如果path是一个存在的文件,返回True,否则返回False
os.path.isdir(path)         #如果path是一个存在的目录,则返回True,否则返回False
os.path.join(path)          #将多个路径组合后返回,第一个绝对路径之前的参数将被忽略
os.path.getatime(path)      #返回path所指向的文件或者目录的最后存取时间
os.path.getmtime(path)      #返回path所指向的文件或者目录的最后修改时间

SYS系统命令行模块: SYS模块提供访问解释器使用或维护的变量,和与解释器进行交互的函数.

import sys

sys.argv              #命令行参数列表,第一个元素是程序本身路径
sys.exit(n)           #退出程序,正常退出时exit(0)
sys.version           #获取python解释程序的版本信息
sys.path              #返回模块的搜索路径,初始化时使用pythonPATH环境变量的值
sys.modules.keys()    #返回所有已经导入的模块列表
sys.platform          #返回操作系统平台名称
sys.stdin             #输入相关
sys.stdout            #输出相关
sys.stderror          #错误相关

判断文件目录权限: 查看文件或者目录是否有指定权限,有则返回True否则返回flase

>>> os.access("/etc/passwd",os.F_OK)  # 是否存在
True
>>> os.access("/etc/passwd",os.R_OK)  # 是否可读
True
>>> os.access("/etc/passwd",os.W_OK)  # 是否可写
True
>>> os.access("/etc/passwd",os.X_OK)  # 是否可执行
False

设置文件目录权限: 设置目录或文件的各种权限,注意修改权限会消除以前的权限,只保留修改的权限.

>>> import stat,os

>>> os.chmod("/etc/passwd",stat.S_IXGRP)  # 组用户有执行权限
>>> os.chmod("/etc/passwd",stat.S_IXOTH)  # 其他用户有可执行权限
>>> os.chmod("/etc/passwd",stat.S_IWOTH)  # 写权限
>>> os.chmod("/etc/passwd",stat.S_IROTH)  # 读权限
>>> os.chmod("/etc/passwd",stat.S_IRWOT)  # 全部权限
>>>
>>> os.chmod("/etc/passwd",stat.S_IWGRP)  # 组用户有写权限
>>> os.chmod("/etc/passwd",stat.S_IRGRP)  # 组用户有读权限
>>> os.chmod("/etc/passwd",stat.S_IRWXG)  # 组用户有所有权限
>>> os.chmod("/etc/passwd",stat.S_IXUSR)  # 拥有者有执行权限
>>> os.chmod("/etc/passwd",stat.S_IWUSR)  # 拥有者有写权限
>>> os.chmod("/etc/passwd",stat.S_IRUSR)  # 拥有者有读权限
>>> os.chmod("/etc/passwd",stat.S_IRWXU)  # 拥有者有所有权限
>>> os.chown("/etc/passwd",0,0)           # 设置文件的UID为0/GID为0

文件拷贝/删除/移动/归档: shutil模块对文件和文件集合提供了许多高级操作,该模块也是python中默认自带的标准库.

>>> import shutil
>>>
>>> shutil.chown("/etc/passwd",user="root",group="root")   # 改变文件的属主和属组
>>> shutil.copy("/etc/passwd","/tmp/passwd")               # 只拷贝文件
>>> shutil.copy2("/etc/passwd","/tmp/passwd")              # 拷贝文件并复制所有统计信息
>>> shutil.copyfile("/etc/shadow","/tmp/shadow")           # 如果是链接文件将复制新文件,不复制链接
>>> shutil.copyfileobj(open("/etc/passwd","r"),open("/tmp/passwd","w"))
>>> shutil.move("/etc/passwd","/tmp/")                     # 文件移动
>>> shutil.rmtree("/tmp/")                                 # 删除/tmp目录
>>>                                                        # 递归目录拷贝,忽略.conf/tmp文件
>>> shutil.copytree("/etc","/tmp", ignore=shutil.ignore_patterns('*.conf', 'tmp*'))
>>> shutil.make_archive("/etc/","gztar",root_dir='/home/') # 将/etc/下的文件打包放置/home/目录下

ZIP文件压缩: 通过ZipFile模块,压缩指定目录下的指定文件,与解压缩操作.

import os,zipfile

def ordinary_all_file(rootdir):
    _file = []
    for root, dirs, files in os.walk(rootdir, topdown=False):
        for name in files:
            _file.append(os.path.join(root, name))
        for name in dirs:
            _file.append(os.path.join(root, name))
            
        for item in range(0,len(_file)):
            _file[item] = _file[item].replace("\\","/")
    return _file

# 压缩指定的目录,并放入指定文件中
with zipfile.ZipFile("lyshark.zip","w") as fp:
    dictionary = ordinary_all_file("d://python")
    for each in dictionary:
        fp.write(each)
    fp.close()

# 解压缩指定文件到C盘
with zipfile.ZipFile("lyshark.zip","r") as fp:
    fp.extractall("c://")
    fp.close()

9.2 文本处理模块

在python中常见的文本处理方式是,通过内置的re模块提供对正则表达式的支持,正则表达式会被编译成一系列的字节码,然后由通过C编写的正则表达式引擎进行执行,该引擎自从python这门语言诞生以来,近20年时间未有发生过变化.

基本的通用匹配符: 基本的通用正则匹配符号,下面的通配符是最基础也是最常用的几种符号序列.

# 符号 =>. <= 匹配除换行符之外的任意一个字符,若flag=DOTALL则匹配包括换行在内的字符.
>>> re.search("hel.o","hello lyshark,hello world").group()
'hello'
>>> re.findall("hel.o","hello lyshark hello world")
['hello', 'hello']
>>> re.findall("hel.o","hello lyshark hello world",flags=re.DOTALL)
['hello', 'hello']

# 符号 => * <= 匹配前一个字符出现零0次或任意多次.
>>> re.findall("ab*","abccba23acbcabb")
['ab', 'a', 'a', 'abb']

# 符号 => + <= 匹配前一个字符出现1次或任意多次,至少出现一次.
>>> re.findall("ab+","abccba23acbcabb")
['ab', 'abb']

# 符号 => ? <= 匹配前一个字符出现过1次或0次,允许出现0次.
>>> re.findall("ab?","ab,abc,abb,abcd,a,acd,abc")
['ab', 'ab', 'ab', 'ab', 'a', 'a', 'ab']
>>> re.findall("ab?","ab,a,abc,abcde")
['ab', 'a', 'ab', 'ab']

# 符号 => ^$ <= 匹配开头与结尾,^匹配指定字符开头的数据,$匹配指定字符结尾的数据.
>>> re.search(r"^h","hello world").group()
'h'
>>> re.search(r"world$","hello\nworld").group()
'world'
>>> re.search(r"^a","\nabc\ndef",flags=re.MULTILINE).group()
'a'
>>> re.search("foo$","bfoo\nsdfsf",flags=re.MULTILINE).group()
'foo'

脱意字符与选择性匹配: 脱意字符就是转意字符将原有的特殊含义过滤掉,选择匹配这是在给定列表中选择其中之一.

# 符号 => \ <= 转义字符,通常情况下使后一个字符改变原来的意思.
>>> re.search("..\\t","hello\t lyshark\n").group()
'lo\t'
>>> re.search("\\t","hello\t lyshark\n").group()
'\t'
>>> re.search("\t","hello\t lyshark\n").group()
'\t'
>>> re.search(r"\\","hello\\lyshark").group()
'\\'

# 符号 => \s <= 匹配空白字符
>>> re.search("\s+","ab\tc1\n3").group()
'\t'
>>> re.search("\s+","ab c1\n3").group()
' '

# 符号 => | <= 匹配选择竖线左边,或者右边的任意一种情况.
>>> re.search("abc|ABC","ABCBabcCD").group()
'ABC'
>>> re.findall("abc|ABC","ABCBabcCD")
['ABC', 'abc']

字串的范围匹配与分组输出: 通过给定范围对文本进行正则匹配,并且还可以将匹配到的结果进行分组输出.

# 符号 => x{m} <= 匹配前一个字符X,出现过M次的行.
>>> re.search("hello{2}","hello,helloo,hellooo,helloooo").group()
'helloo'
>>> re.search("hello{3}","hello,helloo,hellooo,helloooo").group()
'hellooo'

# 符号 => x{n,m} <= 匹配前一个字符X,最少出现过N次,最多出现过M次的行.
>>> re.search("hello{1,2}","hello,helloo,hellooo,helloooo").group()
'hello'
>>> re.findall("hello{1,2}","hello,helloo,hellooo,helloooo")
['hello', 'helloo', 'helloo', 'helloo']

# 符号 => [..] <= 匹配查找指定的数据范围,通常使用[0-9] [a-z] [A-Z]这几个匹配格式.
>>> re.search("[0-9]","hello 1,2,3,4,5").group()   # 匹配第一次出现数字的行
'1'
>>> re.search("[0-9]","hello a12 b23 34a 45t").group()
'1'

# 匹配所有出现数字的行
>>> re.findall("[0-9]","hello 1,2,3,4,5")
['1', '2', '3', '4', '5']
>>> re.findall("[0-9]","hello  b23 34a 45t wan")
['2', '3', '3', '4', '4', '5']

# 匹配开头不是0-9的单个字符
>>> re.search("[^0-9]","hello 1,2,3,4,5").group()
'h'

# 匹配开头不是0-9的单行行
>>> re.search("[^0-9]*","hello 1,2,3,4,5").group()
'hello'
>>> re.search(r"[aeiou]","Hello LyShark").group()
'e'

# 符号 => (?P<name>...) <= 分组匹配:匹配并自动分组,其中?P<..>是固定写法,后面紧跟正则规则.
>>> number = "371481199306143242"
>>> re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})",number).groupdict()
{'province': '3714', 'city': '81', 'birthday': '1993'}
>>>
>>> re.search("(?P<name>[a-zA-Z]+)(?P<age>[0-9]+)","lyshark22").groupdict()
{'name': 'lyshark', 'age': '22'}

regex.match: 从起始位置开始匹配,匹配成功返回一个对象,未匹配成功返回None.

match(pattern,string,flags=0)
# pattern： 正则模型
# string ： 要匹配的字符串
# falgs  ： 匹配模式

#  未分组情况下.
>>> origin = "hello alex bcd abcd lge acd 19"
>>>
>>> ret = re.match("h\w+",origin)
>>> print(ret.group())                 #获取匹配到的所有结果
>>> print(ret.groups())                #获取模型中匹配到的分组结果
>>> print(ret.groupdict())             #获取模型中匹配到的分组结果

#  有分组情况下. 提取匹配成功的指定内容(先匹配成功全部正则,再匹配成功的局部内容提取出来)
>>> ret = re.match("h(\w+).*(?P<name>\d)$",origin)
>>> print(r.group())                   #获取匹配到的所有结果
>>> print(r.groups())                  #获取模型中匹配到的分组结果
>>> print(r.groupdict())               #获取模型中匹配到的分组中所有执行了key的组

regex.search: 搜索整个字符串去匹配第一个符合条件的数据,未匹配成功返回None.

>>> origin = "hello alex bcd abcd lge acd 19"

# 匹配开头是h的后面是任意字符的
>>> re.search("^h\w+",origin).group()
'hello'

# 匹配a开头后面是任意字符的
>>> re.search("a\w+",origin).group()
'alex'

# 分组匹配并过滤出alex
>>> re.search("(?P<name>a\w+)",origin).groupdict()
{'name': 'alex'}

# 匹配字符串,并分组打印出结果
>>> re.search("(?P<姓名>[a-zA-Z]+)(?P<年龄>[0-9]+)","lyshark22").groupdict()
{'姓名': 'lyshark', '年龄': '22'}

regex.findall: 获取非重复的匹配列表,且每一个匹配均是字符串,空的匹配也会包含在结果中.

>>> origin = "hello alex bcd abcd lge acd 19"

# 匹配到单个结果,则以单列表返回
>>> re.findall("al\w+",origin)
['alex']

# 匹配到多个结果,则以列表形式返回
>>> re.findall("a\w+",origin)
['alex', 'abcd', 'acd']

regex.sub: 先匹配查找结果,然后进行字串的替换,也就是替换匹配成功的指定位置字符串.

sub(pattern,repl,string,count=0,flags=0)
# pattern： 正则模型
# repl   ： 要替换的字符串或可执行对象
# string ： 要匹配的字符串
# count  ： 指定匹配个数
# flags  ： 匹配模式

>>> origin = "hello alex bcd abcd lge acd 19"

# 匹配以a开头则字串,并替换成9999,替换1次
>>> re.sub("a[a-z]+","999999",origin,1)
'hello 999999 bcd abcd lge acd 19'

# 匹配以a开头则字串,并替换成9999,替换2次
>>> re.sub("a[a-z]+","999999",origin,2)
'hello 999999 bcd 999999 lge acd 19'

>>> origin = "hello alex bcd abcd lge acd 19 !@#"
>>> re.sub('[!|@|#]',"",origin)
'hello alex bcd abcd lge acd 19 '

regex.split: 字符串切割函数,用来实现对指定字符串的分割工作,根据正则匹配分割字符串.

split(pattern,string,maxsplit=0,flags=0)
# pattern： 正则模型
# string ： 要匹配的字符串
# maxsplit：指定分割个数
# flags  ： 匹配模式

>>> origin = "hello alex bcd abcd lge acd 19"

# 无分组切割
>>> re.split("alex",origin,1)
['hello ', ' bcd abcd lge acd 19']

# 有分组,以alex最为分隔符,切割字符串
>>> re.split("(alex)",origin,1)
['hello ', 'alex', ' bcd abcd lge acd 19']

regex.compile: 用于将字符串编译到类中,直接调用这个类进行过滤,用于多处调用场合.

>>> string = "Hello LyShark !"
>>>
>>> obj = re.compile(r"[A-Z][a-z]")
>>> obj.findall(string)
['He', 'Ly', 'Sh']

# VERBOSE => 标识位允许在re.compile中添加注释
>>> string = "the number is 20.5 -> 30.6"
>>> obj = re.compile(r'''
...                     \d+   # 整数部分
...                     \.?   # 小数点
...                     \d*   # 小数部分
...                     ''',re.VERBOSE)
>>> obj.findall(string)
['20.5', '30.6']

regex.other: 除了上面介绍的几种常用的匹配模式以外,正则模块还支持使用保留关键字匹配.

# re.DOTALL => 匹配包括换行在内的字符串
>>> re.match(r'.*', 'abc\nedf').group()
'abc'
>>> re.match(r'.*', 'abc\nedf',re.DOTALL).group()
'abc\nedf'

# re.MULTILINE => 匹配全部结果集
>>> re.findall(r'^abc', 'abc\nedf')
['abc']
>>> re.findall(r'^abc', 'abc\nabc',re.MULTILINE)
['abc', 'abc']

# re.MULTILINE => 匹配全部结果集
>>> re.findall(r'abc\d$', 'abc1\nabc2')
['abc2']
>>> re.findall(r'abc\d$', 'abc1\nabc2',re.MULTILINE)
['abc1', 'abc2']

# re.IGNORECASE => 将匹配到的结果分组
>>> re.match(r'(Name)\s*:\s*(\w+)','NAME : Joey',re.IGNORECASE).groups()
('NAME', 'Joey')

(案例) 匹配IP地址与MAC地址: 这里提供了不同的匹配正则表达式,来实现对IPv4/IPv6以及对MAC地址的匹配公式.

# 匹配IP地址(严格匹配模式)
>>> re.search("^(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}$","192.168.1.1")
<re.Match object; span=(0, 11), match='192.168.1.1'>

# 匹配IP地址(松散匹配模式)
>>> re.match(r"^\s*\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s*$","192.168.1.100")
<re.Match object; span=(0, 13), match='192.168.1.100'>
>>>
>>> string_ip = "is this 236.168.192.1 ip 12321"
>>> result = re.findall(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", string_ip)
>>> result
['236.168.192.1']
>>>
>>> string=re.compile(r'((1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.){3}(1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)')
>>> print(string.search('245.255.256.25asdsa10.11.244.10').group())
10.11.244.10

# 匹配IPV6地址(大小写不敏感)
>>> string_IPv6="1050:0:0:0:5:600:300c:326b"
>>> re.match(r"^(?:[A-F0-9]{1,4}:){7}[A-F0-9]{1,4}$", string_IPv6, re.I)
<re.Match object; span=(0, 26), match='1050:0:0:0:5:600:300c:326b'>
>>>
>>> re.findall(r"(?<![:.\w])(?:[A-F0-9]{1,4}:){7}[A-F0-9]{1,4}(?![:.\w])", string_IPv6, re.I)
['1050:0:0:0:5:600:300c:326b']

# 匹配一个MAC地址
>>> re.match(r"^\s*([0-9a-fA-F]{2,2}:){5,5}[0-9a-fA-F]{2,2}\s*$","AB:1F:44:5B:3B:4A")
<re.Match object; span=(0, 17), match='AB:1F:44:5B:3B:4A'>

(案例) 匹配网址与端口: 正则匹配单纯的网址,或者是网址加端口,或者是IP加端口等特殊格式.

# 单纯只匹配网址
>>> re.search(r"^(http|https?:\/\/)([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$","https://www.baidu.com")
<re.Match object; span=(0, 21), match='https://www.baidu.com'>

# 单纯只匹配端口号
>>> re.findall(r"([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-4]\d{4}|65[0-4]\d{2}|655[0-2]\d|6553[0-5])","hello 443")
['4', '4', '3']

# 匹配网址加端口的组合
>>> re.search(r'^(http|https?:\/\/)([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?(
    :([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-4]\d{4}|65[0-4]\d{2}|655[0-2]\d|6553[0-5]))?$',"http://www.baidu.com:80")
<re.Match object; span=(0, 23), match='http://www.baidu.com:80'>

# 匹配IP地址加端口的组合
>>> re.search(r'^(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.
    (\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])(
    :([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-4]\d{4}|65[0-4]\d{2}|655[0-2]\d|6553[0-5]))?$',"192.168.1.100:443")
<re.Match object; span=(0, 17), match='192.168.1.100:443'>

(案例) 匹配时间与时间戳: 正则匹配各种时间格式,与时间戳等,基本上囊括了所有的匹配格式.

>>> re.search('[0-9]{10}\.[0-9]{6,7}',"1585553108.7385645")
<re.Match object; span=(0, 18), match='1585553108.7385645'>
>>>
>>> re.search(r"(\d{4}-\d{1,2}-\d{1,2})","2019-01-12")
<re.Match object; span=(0, 10), match='2019-01-12'>
>>>
>>> re.findall(r"(\d{4}-\d{1,2}-\d{1,2})","2019-01-12,2010-12-11")
['2019-01-12', '2010-12-11']
>>>
>>> re.findall(r"\d{4}[-/]\d{2}[-/]\d{2}","2019-01-12,2010/12/11")
['2019-01-12', '2010/12/11']
>>>
>>> re.search(r"(\d{1,2}/(Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sep|Oct|Nov|Dec)/\d{4})","2019-01-12,21/Nov/2019").group()
'21/Nov/2019'
>>>
>>> re.findall(r"(\d{1,2}:\d{1,2})","2010-12-11 12:11")
['12:11']
>>>
>>> re.findall(r"(\d{1,2}:\d{1,2}:\d{1,2})","2010-12-11 12:11:22,09:25:30")
['12:11:22', '09:25:30']
>>>
>>> re.search(r"(\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}:\d{1,2})","2010-12-11 12:11")
<re.Match object; span=(0, 16), match='2010-12-11 12:11'>
>>>
>>> re.findall(r"(\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}:\d{1,2})","2010-12-11 12:11")
['2010-12-11 12:11']

(案例) 匹配邮箱/手机号/身份证: 正则匹配验证邮箱手机号身份证等常用居民证件等.

# 匹配手机号
>>> re.search("^1[3|4|5|8]\d{9}$","18264856987")
<re.Match object; span=(0, 11), match='18264856987'>

# 匹配全部域的邮箱
>>> re.search("[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+","182648@qq.com")
<re.Match object; span=(0, 13), match='182648@qq.com'>

# 只匹配qq.163这两个域的邮箱
>>> re.search("[a-zA-Z0-9_-]+@[qq|163]+(\.[a-zA-Z0-9_-]+)+","182648@qq.com")
<re.Match object; span=(0, 13), match='182648@qq.com'>

# 匹配身份证号
>>> re.findall(r'(^[1-8][0-7]{2}\d{3}([12]\d{3})(0[1-9]|1[012])(0[1-9]|[12]\d|3[01])\d{3}([0-9X])$)',"33070219630306041X")
[('33070219630306041X', '1963', '03', '06', 'X')]

(案例) 匹配密码验证: 该匹配规则通常用于验证用户输入的账号密码是否符合规范.

# 匹配中文字符
>>> re.findall("[\u4e00-\u9fa5]","你好")
['你', '好']

# 单纯限制字符的输入长度
>>> re.findall("^[\u4e00-\u9fa5_a-zA-Z0-9]{4,10}$","1233")
['1233']

# 允许输入最小5-15个字符的密码,允许使用下划线.
>>> re.findall(r"^[a-zA-Z][a-zA-Z0-9_]{4,15}$","password")
['password']

# 以字母开头,长度在6~18之间,只能包含字母、数字和下划线
>>> re.findall(r"^[a-zA-Z]\w{5,17}$","passw3")
['passw3']

# 限制不能以下划线开头和结尾
>>> re.findall("^(?!_)(?!.*?_$)[a-zA-Z0-9_\u4e00-\u9fa5]+$","1233")
['1233']

(案例) 匹配字符串密码: 该匹配规则用于检测用户输入的账号密码是否存在特殊字符,且必须包括(大写,小写,数字)三种组合.

# 验证字符串序列（必须包含，字母，数字，大小写）
>>> if re.match("^(?:(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])).*$","Admin123") == None:
>>>     print("验证失败")
>>> else:
>>>     print("验证通过")
    
# 验证字符串序列 (必须包含，字母，数字，大小写 并且长度 最小5 最大10)
>>> if re.match("^(?:(?=.*[A-Z])(?=.*[a-z])(?=.*[0-9])).{5,10}$","Adm2") == None:
>>>     print("验证失败")
>>> else:
>>>     print("验证通过")

# 验证字符串序列 (必须包含，只允许，小写，大写，数组组合)
>>> if re.match("^(?=.*[a-z][A-Z][0-9]).*$","admin23") == None:
>>>     print("验证失败")
>>> else:
>>>     print("验证通过")

9.3 加密解密模块

python里面的hashlib模块提供了很多加密的算法,该模块实现了许多不同安全散列和消息摘要算法的通用接口,包括FIPS安全散列算法SHA1,SHA224,SHA256,SHA384和SHA512以及RSA的MD5算法等现代算法.

MD5加密: MD5消息摘要算法,被广泛使用的密码散列函数,可产生出一个128位的散列值(hash value).

>>> import hashlib
>>>
>>> hash = hashlib.md5()
>>> hash.update(bytes("lyshark", encoding="utf-8"))
>>>
>>> print(hash.hexdigest())
a68aecb8fba3b8c68284937395a7db6f
>>> print(hash.digest())
b'"\xa6\x8a\xec\xb8\xfb\xa3\xb8\xc6\x82\x84\x93s\x95\xa7\xdbo"'

SHA1加密: SHA安全哈希算法主要适用于数字签名DSA算法,SHA1会产生一个160位的消息摘要(已被淘汰).

>>> import hashlib
>>>
>>> hash = hashlib.sha1()
>>> hash.update(bytes("lyshark", encoding="utf-8"))
>>>
>>> print(hash.hexdigest())
e2a52d00b620d46370b177dcb21777a46c1d4f13
>>> print(hash.digest_size)
20

SHA256加密: SHA安全哈希算法主要适用于数字签名DSA算法,SHA256算法的哈希值大小为256位.

>>> import hashlib
>>> 
>>> hash = hashlib.sha256()
>>> hash.update(bytes("lyshark", encoding="utf-8"))
>>> print(hash.hexdigest())
9850380d33d64c1bad671b12fe971eb07aad6ee7f1df98eb8338c749ef5e1bc3
>>>
>>> print(hash.block_size)
64

SHA384加密: SHA安全哈希算法主要适用于数字签名DSA算法,SHA256算法的哈希值大小为384位.

>>> import hashlib
>>> 
>>> hash = hashlib.sha384()
>>> hash.update(bytes("lyshark", encoding="utf-8"))
>>> print(hash.hexdigest())

SHA512加密: SHA安全哈希算法主要适用于数字签名DSA算法,SHA256算法的哈希值大小为512位.

>>> import hashlib
>>> 
>>> hash = hashlib.sha512()
>>> hash.update(bytes("lyshark", encoding="utf-8"))
>>> print(hash.hexdigest())

MD5加盐: 以上的几个加密算法通过撞库可被破解,所以有必要对加密算法中添加自定义KEY再来做双重加密.

>>> import hashlib
>>> 
>>> hash = hashlib.md5(bytes('898oaFs09f',encoding="utf-8"))  # 加盐
>>> print(hash.hexdigest())
c7fd0ceb70e0fe300c554887e36f5270
>>> 
>>> hash.update(bytes("lyshark",encoding="utf-8"))
>>> print(hash.hexdigest())
3503908e79a5b8d74b6bc697634d01b9

PKCS加密: 该函数提供了基于PKCS5密码的密钥派生函数,它使用HMAC作为伪随机函数.

>>> import hashlib
>>> dk = hashlib.pbkdf2_hmac('sha256', b'password', b'salt', 100000)
>>> dk.hex()
'0394a2ede332c9a13eb82e9b24631604c31df978b4e2f0fbd2c549944f9d79a5'

blake2b加密: 针对64位平台进行了优化,可生成1到64字节之间任意大小的摘要.

>>> from hashlib import blake2b
>>>
>>> hash = blake2b(key=b"password", digest_size=17)
>>> hash.update(b"lyshark")
>>> print(hash.hexdigest())
662f3f4e2c21b1a04e3b18d521fed55f03

HASH摘要计算: 我们可以通过读取指定文件到内存,并通过Hash算法对其生成指定Hash摘要.

>>> import hashlib
>>>
>>> hash = hashlib.md5()
>>> with open("dump.json","rb") as fp:
...     for item in fp:
...             hash.update(item)
...
>>> print(hash.hexdigest())
ee68b99bf5c930090d13412f2d49f6ea

Base64编码: Base64是一种任意二进制到文本字符串的编码方法,常用于在URL、Cookie、网页中传输少量二进制数据.

>>> import base64
>>>
>>> base64.b64encode(b"hello \x00 lyshark")
b'aGVsbG8gACBseXNoYXJr'
>>> base64.b64decode("aGVsbG8gACBseXNoYXJr")
b'hello \x00 lyshark'
>>>
>>> base64.urlsafe_b64encode(b"https://www.baidu.com")
b'aHR0cHM6Ly93d3cuYmFpZHUuY29t'
>>> base64.urlsafe_b64decode("aHR0cHM6Ly93d3cuYmFpZHUuY29t")
b'https://www.baidu.com'

9.4 取随机数模块

Random模块实现了一个伪随机数生成器,可用来生成随机数以及完成与随机数相关的功能,对于整数,从范围中统一选择,对于序列,随机元素的统一选择,用于生成列表的随机排列的函数,以及用于随机抽样而无需替换的函数.

import random

random.shuffle()                           #随机打乱列表元素排列
random.randint(1,20)                       #生成1到20的整数包括20
random.uniform(10,20)                      #生成10到20之间的浮点数
random.randrange(1,10)                     #生成1到10的整数不包括10
random.choice()                            #从序列中随机选择数据

random.triangular(low, high, mode)         #三角分布的随机数 
random.gauss(mu, sigma)                    #高斯分布的随机数
random.betavariate(alpha, beta)            #beta β分布的随机数
random.expovariate(lambd)                  #指数分布的随机数
random.gammavariate(alpha, beta)           #伽马分布的随机数
random.lognormvariate(mu, sigma)           #对数正态分布的随机数
random.normalvariate(mu, sigma)            #正态分布的随机数
random.vonmisesvariate(mu, kappa)          #冯米塞斯分布的随机数
random.paretovariate(alpha)                #帕累托分布的随机数
random.weibullvariate(alpha, beta)         #韦伯分布的随机数

生成随机数: 通过使用random.randint()函数,可以实现随机生成整数,配合chr还可以实现生成a-z等符号.

>>> import random
>>>
>>> random.randint(1,10)             #获取1-10之间的随机数
6
>>> random.random()                  #随机生成一个大于0小于1的随机数
0.4055420309111927
>>>
>>> random.randrange(1,10,2)         #相当于从1,3,5,7,9中随机获取一个数
3
>>>
>>> random.uniform(1,10)             #生成一个指定范围内的随机浮点数
9.880034105803746
>>> round(random.uniform(100,600),2) #随机生成浮点数,并保留两位小数
269.89
>>>
>>> chr(random.randint(97,122))      #随机生成a-z
>>> chr(random.randint(65,90))       #随机生成A-Z

随机打乱列表数据: 通过使用random.shuffle()函数,可以实现随机的打乱一个列表中的数据.

>>> import random
>>>
>>> lists = [1,2,3,4,5,6,7,8,9]
>>> print(lists)
[1, 2, 3, 4, 5, 6, 7, 8, 9]
>>>
>>> random.shuffle(lists)
>>> print(lists)
[4, 7, 1, 8, 3, 9, 5, 6, 2]

随机获取一个数据: 通过使用random.choice()函数,该函数可实现从指定的序列中获取一个随机元素.

>>> import random
>>>
>>> lists=[1,2,3,4,5,6,7,8,9]
>>> string=["admin","guest","lyshark"]
>>>
>>> random.choice(lists)
2
>>> random.choice(string)
'lyshark'

随机获取多个数据: 通过使用random.sample()函数,可以实现从指定的序列中随机获取指定长度的片断并随机排列.

>>> import random
>>>
>>> lists=[1,2,3,4,5,6,7,8,9]
>>> random.sample(lists,3)
[2, 6, 9]
>>>
>>> string = "hello lyshark"
>>> random.sample(string,4)
['s', 'e', 'k', 'r']

随机生成验证码: 通过random()函数,配合循环语句,和选择语句来实现随机生成验证码或密码.

import random,string

# 生成随机验证码
def Generateverification(digit):
    rand=[]
    for x in range(digit):
            y=random.randrange(0,5)
            if y == 2 or y == 4:
                    num=random.randrange(0,9)
                    rand.append(str(num))
            else:
                    temp=random.randrange(65,91)
                    c=chr(temp)
                    rand.append(c)
    result = "".join(rand)
    return result

# 生成随机密码
def getRandChar(count):
    ref = []
    sample = random.sample(string.ascii_letters + string.digits, 62)
    sample = sample + list('!@#$%^&*()-+=.')
    for i in range(count):
        char = random.choice(sample)
        ref.append(char)
    return ''.join(ref)

if __name__ == "__main__":
    ret = Generateverification(5)
    print("本次生成的随机验证码是: {}".format(ret))

    ret = getRandChar(15)
    print("本次生成的随机密码是: {}".format(ret))

9.5 日期时间模块

Time 模块是通过调用C标准库time.h实现的,尽管此模块始终可用,但并非所有平台上都提供所有功能,此模块中定义的大多数函数调用具有相同名称的平台C库函数,因为这些函数的语义因平台而异.

import time

time.sleep(4)                                    #暂停程序执行4秒
time.clock()                                     #返回处理器时间
time.process_time()                              #返回处理器时间
time.time()                                      #返回当前系统时间戳
time.ctime()                                     #当前系统时间,输出字符串格式化
time.ctime(time.time()-86640)                    #将时间戳转为字符串格式
time.gmtime()                                    #获取结构化时间
time.gmtime(time.time()-86640)                   #将时间戳转换成结构化格式
time.localtime(time.time()-86640)                #将时间戳转换成结构格式,但返回本地时间
time.mktime(time.localtime())                    #与localtime()功能相反,将结构时间转换为时间戳
time.strftime("%Y-%m-%d %H:%M:%S",time.gmtime()) #将struct_time格式转成指定的字符串格式
time.strptime("2019-09-20","%Y-%m-%d")           #将字符串格式转换成struct_time格式

DateTime 模块提供了处理日期和时间的类,其实现的重点是为输出格式化和操作提供高效的属性提取功能,该模块提供了以简单和复杂的方式操作日期和时间的类,虽然支持日期和时间算法,但实现的重点是用于输出格式化.

import datetime

datetime.date.today()                             #格式化输出今天时间
datetime.datetime.now()                           #格式化输出当前的时间
datetime.datetime.now().timetuple()               #以struct_time格式输出当前时间
datetime.date.fromtimestamp(time.time()-864400)   #将时间戳转成日期格式
#-----------------------------------------------------------------------------------
temp = datetime.datetime.now()                    #输出当前时间,并赋值给变量
temp.replace(2019,10,10)                          #替换输出内容中的,年月日为2019-10-10

Calendar 是与日历相关的模块,这个模块让你可以输出像Unix cal那样的日历,它还提供了其它与日历相关的实用函数,默认情况下,这些日历把星期一当作一周的第一天,星期天为一周的最后一天.

import calendar

calen = calendar.calendar(2018)      #获取2018年的日历
calen = calendar.month(2018,8)       #指定获取2018的月份
calen = calendar.isleap(2008)        #检测该年份是平年还是闰年
calen = calendar.leapdays(1997,2018) #检测1997-2018年限内润年的数量
calen = calendar.monthrange(2018,8)  #获取指定月份的信息
calen = calendar.weekday(2018,11,22) #根据指定的年月日计算星期几
calen = calendar.timegm((2018,8,27,11,35,0,0,0)) #将时间元组转化为时间戳

基本的时间戳互转: 将一个指定的时间格式转换为秒级时间戳和毫秒级时间戳.

>>> import time,datetime
>>>
>>> now = time.time()
>>> print(now)                       # 原始的时间戳
1575785965.2278268
>>>
>>> print(int(now))                  # 将时间戳转为整数(秒级)
1575785965
>>>
>>> print(int(round(now * 1000)))
1575785965228                        # 转换为毫秒级时间戳
>>>
>>> local_time = time.localtime()    # 本地时间信息,返回结构
>>> print(local_time)
time.struct_time(tm_year=2020, tm_mon=4, tm_mday=12, tm_hour=10, tm_min=5, tm_sec=29, tm_wday=6, tm_yday=103, tm_isdst=0)
>>>
>>> utc_time = time.gmtime()         # struct_time类型的utc时间,协调世界时
>>> print(utc_time)
time.struct_time(tm_year=2020, tm_mon=4, tm_mday=12, tm_hour=2, tm_min=6, tm_sec=31, tm_wday=6, tm_yday=103, tm_isdst=0)

时间戳与日期时间互转: 将时间日期转换为特定的时间戳,或者是将特定时间戳转换为日期格式.

>>> import time,datetime
>>>
>>> date = "2019-01-01 11:22:30"
>>> times = int(time.mktime(time.strptime(date,"%Y-%m-%d %H:%M:%S")))
>>> print(times)
1546312950
>>>
>>> date = 1546312950
>>> times = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(date))
>>> print(times)
2019-01-01 11:22:30
>>>
>>> date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
>>> print(date)
2019-12-08 14:22:50
>>>
>>> timeStamp = int(time.time())
>>> dateArray = datetime.datetime.fromtimestamp(timeStamp)
>>> otherStyleTime = dateArray.strftime("%Y-%m-%d %H:%M:%S")
>>> otherStyleTime
'2019-12-08 14:22:50'

时间格式与时间格式互转: 将一种特定的时间格式转换为另外一种时间格式.

>>> import time,datetime
>>>
>>> date = "12/13/2019 10:25"
>>> date_temp = datetime.datetime.strptime(date,'%m/%d/%Y %H:%M')
>>> print(date_temp)
12/13/2019 10:25:00
>>>
>>> new_date = date_temp.strftime('%Y-%m-%d %H:%M:%S')
>>> print(new_date)
2019-12-13 10:25:00
>>>
>>> date = datetime.datetime.strptime("19/10/05 12:30", "%y/%m/%d %H:%M")
>>> print(date)
2019-10-05 12:30:00
>>>
>>> date = datetime.datetime.now()
>>> datetime.datetime.strftime(date,"%A %B %d,%Y")
'Monday March 30,2020'

时间格式的换算与加减: 利用datetime模块来完成不同时间单位间的换算,timedelta实例则可以完成时间间隔换算.

# 时间格式关键字: [ year(年),month(月),day(天),hour(时),minute(分),second(秒),microsecond(微秒) ]
>>> import datetime
>>> from datetime import timedelta
>>>
>>> date = datetime.datetime.now() + datetime.timedelta(days=10)          # 在当前基础上加10天
>>> date = datetime.datetime.now() + datetime.timedelta(days=-10)         # 在当前基础上减10天
>>> date = datetime.datetime.now() + datetime.timedelta(hours=-10)        # 在当前基础上减10小时
>>> date = datetime.datetime.now() + datetime.timedelta(seconds=120)      # 在当前基础上加120秒
>>> print("日期: {} {} {}".format(date.year,date.month,date.day))
日期: 2020 3 30
>>> 
>>> date = datetime.datetime(2020,5,24,12,22)         # 实例化时间日期,定义一个日期
>>> date + timedelta(days=10)                         # 在上面实例的基础上加10天
>>> 
>>> date_1 = datetime.datetime.now() + datetime.timedelta(days=10)
>>> date_2 = datetime.datetime.now() + datetime.timedelta(days=365)
>>>
>>> date_xor = date_2 - date_1                        # 计算两个时间之间的差值
>>> date_xor
datetime.timedelta(days=355, seconds=8, microseconds=949992)

字符串与时间戳格式互转: 除了上方的标准格式以外,在编程中还会遇到其他的特殊时间格式的互转.

>>> import time,datetime
>>>
>>> date = "17/Mar/2020 10:25"
>>> item = time.mktime(time.strptime(date,"%d/%b/%Y %H:%M"))
>>> item
1584411900.0
>>> time.strftime("%Y-%m-%d %H:%M",time.localtime(item))
'2020-03-17 10:25'

>>> date = "Mar 05,2020"
>>> item = time.mktime(time.strptime(date,"%b %d,%Y"))
>>> item
1583337600.0
>>> time.strftime("%Y-%m-%d",time.localtime(item))
'2020-03-05'

>>> date = "2020-03-11"
>>> item = time.mktime(time.strptime(date,"%Y-%m-%d"))
>>> item
1583856000.0
>>> time.strftime("%d/%b/%Y",time.localtime(item))
'11/Mar/2020'

>>> local_time = time.localtime(time.time())
>>> time.strftime("%Y-%m-%d, %H:%M:%S, %W",local_time)
'2020-04-12, 10:00:12, 14'

计算当月的日期范围: 通过编程实现计算出2020年2月这个时间范围内有多少天,并列出来.

from datetime import datetime,date,timedelta
import calendar

def get_month_range(start_date=None):
    day = []
    if start_date is None:
        start_date = date.today().replace(day=1)
    else:
        start_date = start_date.replace(day=1) #替换输入时间的日期为1得到开始时间
    # calendar.monthrange()函数返回当月的第一个工作日和当月的天数
    _,days_in_month = calendar.monthrange(start_date.year,start_date.month)
    end_date = start_date + timedelta(days=days_in_month) #起始时间加当月天数获得截至时间
    a_day = timedelta(days=1)  #定义一天时间对象
    while start_date < end_date:
        day.append(start_date)
        start_date += a_day
    return day

day = get_month_range(date(2020,2,12))
for item in day:
    print(item)

9.6 持久存储模块

有时候我们需要对字符串,列表,字典等数据进行持久化存储,方便以后使用,而不是简单的放入内存中关机断电就丢失数据,python中提供了多种方式来实现数据的持久化存储,下面将逐个介绍.

JSON 是一种轻量级的数据交换格式,其简洁和清晰的层次结构使得JSON成为理想的数据交换语言,易于人阅读和编写,同时也易于机器解析,有效地提升网络传输效率,JSON实现了字符串和编程语言之间的数据共享与交互,通用各种编程语言中.

JSON字符串序列互转: 使用json.dumps将基本数据类型转成字符串,使用json.loads将字符串转化成基本数据类型.

>>> import json
>>>
>>> Mydict = {"admin":"123456","guest":"guest","lyshark":"123321"}
>>> type(Mydict)
<class 'dict'>
>>>
>>> result = json.dumps(Mydict)
>>> type(result)   # 将序列转为字符串
<class 'str'>

>>> string = '{"admin": "123456", "guest": "guest", "lyshark": "123321"}'
>>>
>>> Mydict = json.loads(string)
>>> type(Mydict)  # 将字符串序列化为字典
<class 'dict'>

>>> string = '{"admin": "123456", "guest": "guest", "lyshark": "123321"}'
>>>
>>> Mydict = eval(string)
>>> type(Mydict)  # 同样使用eval也可以完成转换
<class 'dict'>

JSON 序列化/反序列化: 使用json.dump可以将数据进行序列化存储,使用json.load可以将数据读入变量中.

>>> import json
>>>
>>> MyList = [1,2,3,4,5,6,7]
>>>
>>> with open("db.json","w",encoding="utf-8") as fp:
...     json.dump(MyList,fp)  # 将列表序列化保存到文件

>>> with open("db.json","r",encoding="utf-8") as fp:
...     json.load(fp)         # 从文件中加载列表
...
[1, 2, 3, 4, 5, 6, 7]

pickle 模块实现了python的所有数据序列和反序列化,与JSON不同的是pickle不是用于多种语言间的数据传输,它仅作为python对象的持久化或者python程序间进行互相传输对象的方法,因此它只支持python所有的数据类型.

Pickle序列化/反序列化: 使用pickle.dumps将列表序列化为二进制字串,使用pickle.loads反序列化为正常数据.

>>> import pickle
>>>
>>> data = [1,2,3,4,5]
>>>
>>> dest_str = pickle.dumps(data)
>>> dest_str
b'\x80\x04\x95\x0f\x00\x00\x00\x00\x00\x00\x00]\x94(K\x01K\x02K\x03K\x04K\x05e.'
>>>
>>> with open("db.pickle","wb") as fp:
...     data = {'k1':'python','k2':'java'}
...     fp.write(pickle.dumps(data))
...     fp.close()
...
42
>>> with open("db.pickle","rb") as fp:
...     data = pickle.loads(fp.read())
...
>>> data
{'k1': 'python', 'k2': 'java'}

shelve与pickle类似用来持久化数据的,不过shelve是以键值对的形式,将内存中的数据通过文件持久化,其支持任何pickle支持的所有python数据格式,在开启回写功能后,其灵活程度远远高于Pickle/JSON这两种类型,使用代码如下.

>>> import shelve
>>>
>>> sh = shelve.open("shelve.db",writeback=True)
>>> sh["user1"] = { "username":"admin","passwd":123123 }
>>> sh["user2"] = { "username":"guest","passwd":123456 }
>>> sh.close()
>>>
>>> sh = shelve.open("shelve.db",writeback=True)
>>>
>>> sh["user1"]
{'username': 'admin', 'passwd': 123123}
>>> sh["user1"].get("passwd")
123123
>>> sh["user1"]["passwd"] = 888888
>>> sh["user1"]
{'username': 'admin', 'passwd': 888888}

9.7 INI解析模块

ConfigParser 模块用来读取配置文件,配置文件的格式跟windows下的ini配置文件相似,可以包含一个或多个节,每个节可以有多个参数(键=值),使用的配置文件的好处就是一些参数无需写死,可以使程序更灵活的配置一些参数.

为了方便演示以下的例子,请在python所在目录创建一个test.ini配置文件,写入以下内容.

[db]
db_host = 127.0.0.1
db_port = 69
db_user = root
db_pass = 123123
host_port = 69

[concurrent]
thread = 10
processor = 20

获取所有节点: 通过使用以下方式,我们可以获取到指定文件的所有主节点名称.

>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")
>>>
>>> result=config.sections()
>>> print(result)
['db', 'concurrent']

获取指定键值: 使用以下方式遍历,来获取指定节点(concurrent)下的所有键值对.

>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")
>>>
>>> result=config.items("concurrent")
>>> print(result)
[('thread', '10'), ('processor', '20')]

获取指定键: 使用以下方式遍历,来获取指定节点(concurrent)下的所有的键.

>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")
>>>
>>> result=config.options("concurrent")
>>> print(result)
['thread', 'processor']

获取指定值: 使用以下方式遍历,来获取指定节点下指定键的对应值.

>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")
>>>
>>> result=config.get("concurrent","thread")
# result = config.getint("concurrent","thread")
# result = config.getfloat("concurrent","thread")
# result = config.getboolean("concurrent","thread")
>>> print(result)
10

检查&添加&删除主节点: 检查、添加、删除指定的主节点数据.

>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")

#--检查主节点---------------------------------------------
>>> has_sec=config.has_section("db")
>>> print(has_sec)
True
#--添加主节点---------------------------------------------
>>> config.add_section("lyshark")
>>> config.write(open("test.ini","w"))
#--删除主节点---------------------------------------------
>>> config.remove_section("lyshark")
True
>>> config.write(open("test.ini","w"))

检查&添加&删除指定键值对: 检查、删除、设置指定组内的键值对.

>>> import configparser
>>> 
>>> config=configparser.ConfigParser()
>>> config.read("test.ini",encoding="utf-8")

#--检查节点中的键值对--------------------------------------
>>> has_opt=config.has_option("db","db_host")
>>> print(has_opt)
True
#--设置节点中的键值对--------------------------------------
>>> config.set("test.ini","db_host","8888888888")
>>> config.write(open("test.ini","w"))
#--删除节点中的键值对--------------------------------------
>>> config.remove_option("db","db_host")
True
>>> config.write(open("test.ini","w"))

9.8 XML处理模块

XML可扩展标记语言,其宗旨传输数据的实现不同语言或程序之间进行数据交换的协议,XML是目前数据交换的唯一公共语言,至今很多传统公司如金融行业的很多系统的接口还主要是XML作为数据通信接口.

为了方便演示后续内容,请自行在python当前目录下创建lyshark.xml以下XML文档.

<?xml version="1.0" encoding="UTF-8"?>
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2019</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2020</year>
        <gdppc>59900</gdppc>
        <neighbor direction="N" name="Malaysia" />
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2029</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>

创建XML文档: 通过使用XML函数,创建一个XML文档,原生保存的XML时默认无缩进.

<root>
    <son name="1号儿子">
        <grand name="1号孙子"></grand>
    </son>
    <son name="2号儿子">
        <grand name="2号孙子"></grand>
    </son>
</root>
#--以下代码则可创建如上格式-------------------------------------------------
>>> import xml.etree.ElementTree as ET
>>>
>>> root=ET.Element("root")
>>>
>>> son1=ET.Element("son",{"name":"1号儿子"})
>>> son2=ET.Element("son",{"name":"2号儿子"})
>>>
>>> grand1=ET.Element("grand",{"name":"1号孙子"})
>>> grand2=ET.Element("grand",{"name":"2号孙子"})
>>>
>>> son1.append(grand1)
>>> son2.append(grand2)
>>>
>>> root.append(son1)
>>> root.append(son2)
>>>
>>> tree=ET.ElementTree(root)
>>> tree.write('lyshark.xml',encoding='utf-8',short_empty_elements=False)

打开XML文档: 通过使用xml.etree.ElementTree,来实现打开要XML文件.

>>> import xml.etree.ElementTree as ET
>>> 
>>> tree = ET.parse("lyshark.xml")
>>> root = tree.getroot()
>>> print(root.tag)

遍历XML文档(单层): 通过使用循环的方式,来实现对XML文件子树的遍历.

>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>
>>> for child in root:
...     print(child.tag,child.attrib)
...
country {'name': 'Liechtenstein'}
country {'name': 'Singapore'}
country {'name': 'Panama'}

遍历XML文档(多层): 通过使用循环的方式遍历root下面的目录,来实现对XML文件子树的子树进行遍历.

>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>     # 遍历XML文档的第二层
>>> for x in root:
        # 第二层节点的标签名称和标签属性
...     print("主目录: %s"%x.tag)
        # 遍历XML文档的第三层
...     for y in x:
        # 第三层节点的标签名称和内容
...             print(y.tag,y.attrib,y.text)
...
主目录: country
rank {'updated': 'yes'}
year {}
gdppc {}
neighbor {'direction': 'E', 'name': 'Austria'}
neighbor {'direction': 'W', 'name': 'Switzerland'}
主目录: country
rank {'updated': 'yes'}
year {}
gdppc {}
neighbor {'direction': 'N', 'name': 'Malaysia'}
主目录: country
rank {'updated': 'yes'}
year {}
gdppc {}
neighbor {'direction': 'W', 'name': 'Costa Rica'}
neighbor {'direction': 'E', 'name': 'Colombia'}

遍历指定节点: 通过循环的方式,配合root.iter()来实现只遍历XML文档中的year节点.

>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>
>>> for node in root.iter("year"):
...     print(node.tag,node.text)
...
year 2019
year 2020
year 2029

修改XML字段: 通过遍历的方式,找到节点为year的数据行,并将其内容自动加1,并会写到XML文档.

>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>
>>> for node in root.iter("year"):     #遍历并修改每个字段内容
...     new_year=int(node.text) + 1    #先将node.text变成整数,实现加法
...     node.text=str(new_year)        #然后变成字符串,复制给内存中的text
...     node.set("updated","yes")      #在每个year字段上加上一段属性,updated=yes
...
>>> tree.write("lyshark.xml")          #回写到配置文件中,覆盖成最新的数据
>>> del node.attrib["name"]            #删除节点中的指定属性字段

删除XML字段: 通过遍历的方式,查找所有的country节点,并判断如果内部rank>50则删除这个country节点.

>>> import xml.etree.ElementTree as ET
>>> 
>>> tree=ET.parse("lyshark.xml")
>>> root=tree.getroot()
>>>     # 遍历data下的所有country节点
>>> for country in root.findall("country"):
        # 获取每一个country节点下rank节点的内容
...     rank=int(country.find("rank").text)
...     if rank > 50:
        # 删除指定country节点
...             root.remove(country)
...
>>> tree.write("output.xml",encoding="utf-8")

9.9 Ctypes混编模块

运用Ctypes库我们可以实现和任意语言进行连接，混合编程的本质是python调用C/C++编译的动态链接库，或反过来C/C++直接使用python中的模块，如下总结了python与C语言如何衔接。

调用标准输出: 调用标准动态库实现打印输出,默认情况下Windows系统会调用msvcrt.dll而Linux系统则会调用libc.so.6其中的cdll代表调用约定为cdecl而windll则代表stdcall约定.

import platform
import ctypes

if __name__ == "__main__":

    # 判断系统平台并加载不同的链接库
    if platform.system() == 'Windows':
        libc = ctypes.cdll.LoadLibrary("msvcrt.dll")
        libc = ctypes.cdll.msvcrt

    elif platform.system() == 'Linux':
        libc = ctypes.cdll.LoadLibrary("libc.so.6")

    string = "hello lyshark \n"
    string = string.encode("utf-8")
    libc.printf(string)

如果需要调用WindowsAPI函数同样可以使用该方式实现,代码如下.

from ctypes import *

if __name__ == "__main__":
    # 调用后获取返回值
    user32 = windll.LoadLibrary("user32.dll")
    MessageBox = user32.MessageBoxA
    ref = MessageBox(0, "hello lyshark".encode("utf-8"), "msgbox".encode("utf-8"), 0)
    print("返回值: ", ref)

    # 直接加载并调用
    user32 = windll.LoadLibrary("user32.dll")
    string = "hello lyshark \n"
    string = string.encode("utf-8")
    user32.MessageBoxA(0, string, "ctypes".encode("utf-8"), 0)

定义函数指针调用弹窗代码.

from ctypes import c_int, WINFUNCTYPE, windll
from ctypes.wintypes import HWND, LPCSTR, UINT,LPCWSTR
import locale

# 定义输出多字节编码
def Ascii():
    preferred_encoding = locale.getpreferredencoding(False)

    # 定义函数指针
    prototype = WINFUNCTYPE(c_int, HWND, LPCSTR, LPCSTR, UINT)

    paramflags = ((1, "hwnd", 0),
                  (1, "text", "MsgBox".encode(preferred_encoding)),
                  (1, "caption", None),
                  (1, "flags", 0))

    # 第一种调用方式为定义指针后调用
    MessageBox = prototype(("MessageBoxA", windll.user32), paramflags)
    MessageBox()

    # 以下方式为直接调用
    ref = MessageBox(text="hello lyshark".encode(preferred_encoding))
    print("输出返回值: ",ref)

    MessageBox(flags=2, text="hello lyshark".encode(preferred_encoding))

# 定义宽字节编码
def Unicode():
    prototype = WINFUNCTYPE(c_int, HWND, LPCWSTR, LPCWSTR, UINT)
    paramflags = ((1, "hwnd", 0),
                  (1, "text", "MsgBox"),
                  (1, "caption", None),
                  (1, "flags", 0))
    MessageBox = prototype(("MessageBoxW", windll.user32), paramflags)
    MessageBox()

    MessageBox(text="hello lyshark")
    MessageBox(flags=2, text="hello lyshark")

if __name__ == "__main__":
    Ascii()

创建自定义数据类型: Ctypes 会自动去搜索自定义数据的_as_parameter属性,并将其作为C函数的参数返回.

import ctypes

# 定义自定义类型,完成计算后输出
class MyType(object):
    def __init__(self,x,y):
        number = x * y
        self._as_parameter_ = number

if __name__ == "__main__":
    libc = ctypes.cdll.LoadLibrary("msvcrt.dll")
    libc = ctypes.cdll.msvcrt

    # 调用自定义类型
    ref = MyType(10, 20)
    libc.printf("计算结果: %d \n".encode("utf-8"),ref)

定义结构体/联合体: 结构体需要继承Structure类,默认情况下数据会放在_fields_中.

from ctypes import *

# 定义结构体
class MyStruct(Structure):
    _fields_ = [
        ("username", c_char * 10),
        ("age", c_int),
        ("sex", c_long)
    ]

# 定义联合体
class MyUnion(Union):
    _fields_ = [
        ("a_long", c_long),
        ("a_int", c_int),
        ("a_char", c_char * 10)
    ]

if __name__ == "__main__":
    MyStruct.username = "lyshark"
    MyStruct.age = 24
    MyStruct.sex = 1
    print("姓名: {} 年龄: {}".format(MyStruct.username,MyStruct.age))

定义多层数组: ctypes提供了对数组的支持,且数组可以内外层嵌套使用.

from ctypes import *

# 定义内层嵌套数组
class PointEx(Structure):
    _fields_ = [('x', c_int), ('y', c_int)]

# 定义外层结构
class MyStruct(Structure):
    _fields_ = [('uuid', c_int), ('pointex_array', PointEx * 4)]

# 定义并引用简单的数组
def MyArray():
    IntArrayType = c_int * 10
    Array = IntArrayType(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)

    for i in Array:
        print("{} ".format(i),end="")
    print()

# 定义并引用嵌套数组
def processArray():
    ptr = MyStruct(1001, ((1, 1), (2, 2), (3, 3), (4, 4)))

    for item in ptr.pointex_array:
        print("(item.x, item.y) = (%d, %d)" %(item.x, item.y))
    print()

if __name__ == "__main__":
    MyArray()
    processArray()

数组与指针也可以相互引用,代码如下

from ctypes import *

if __name__ == "__main__":
    i = c_int(100)
    print("输出元素: ", i.value)

    ptr = pointer(i)
    ptr[0] = 200
    print("修改后元素:", i.value)

    # 数组指针
    IntArrayType = c_int * 10
    Array = IntArrayType(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
    ArrayPtr = pointer(Array)
    print(ArrayPtr)

    # 空指针
    null_ptr = POINTER(c_int)()
    print("状态:" , bool(null_ptr))

数组之间类型转换: 类型转换主要通过使用cast实现转换,如下将整数类型转为c_byte数组.

from ctypes import *

class MyStruct(Structure):
    _fields_ = [('count', c_int), ('value', POINTER(c_int))]

if __name__ == "__main__":
    ptr = MyStruct()
    ptr.count = 5
    ptr.value = (c_int * 10)(1,2,3,4,5,6,7,8,9,0)

    # 输出数组元素
    for index in range(ptr.count):
        print("old [%d] = %d " %(index, ptr.value[index]),end="")
    print()

    # 类型转换后
    ptr.value = cast((c_byte * 10)(), POINTER(c_int))
    for index in range(ptr.count):
        print("new[%d] = %d " %(index, ptr.value[index]),end="")
    print()

使用回调函数: 通过使用CFUNCTYPE可以定义并指定一个回调函数.

from ctypes import *

# 实现对比函数
def cmp_func(a, b):
    if a[0] > b[0]:
        return 1
    elif a[0] < b[0]:
        return -1
    else:
        return 0

if __name__ == "__main__":
    libc = cdll.LoadLibrary("msvcrt.dll")

    # 定义数组
    IntArray = c_int * 10
    IntArrayPtr = IntArray(5,6,8,9,3,2,6,7,9,0)

    # 定义并指定回调函数
    CmpFuncType = CFUNCTYPE(c_int, POINTER(c_int), POINTER(c_int))
    cmpfunc = CmpFuncType(cmp_func)

    # 调用msvcrt标准库中的排序函数
    libc.qsort(IntArrayPtr, len(IntArrayPtr), sizeof(c_int), cmpfunc)

    for i in IntArrayPtr:
        print(i,end="")

增加数组长度: 使用resize()可以增加数组长度,但只能增加不能减小.

from ctypes import *

if __name__ == "__main__":
    # 定义数组
    IntArray = (c_int * 3)(1,2,3)
    
    # 输出元素
    for index in IntArray:
        print(" {}".format(index),end="")
    
    # 增加长度到12
    resize(IntArray,12)

    for index in IntArray:
        print(" {}".format(index),end="")

C混编(返回字符串): 首先我们使用C语言编写一个DLL文件,并导出GetPing测试函数,Dll代码如下.

#include <iostream>
#include <Windows.h>
#include <string>

extern "C"__declspec(dllexport) char * GetPing(char *Addr, int Port)
{
  char * ref = "{'Address' : '192.168.1.1','Port': 22}";
  return ref;
}

BOOL APIENTRY DllMain(HANDLE hModule, DWORD dwReason, void* lpReserved)
{
  return true;
}

接着使用python调用这个DLL中的导出函数,并传入参数.

from ctypes import *

if __name__ == "__main__":
    pdll = CDLL("./engine.dll")
    pdll.GetPing.argtypes = [c_char_p, c_int]

    arg1 = c_char_p(bytes("127.0.0.1", "utf-8"))
    arg2 = c_int(3200)

    pdll.GetPing.restype = c_char_p
    
    ref = pdll.GetPing(arg1,arg2)
    print("返回字典: ", ref)

C混编(传递数组): 我们使用C语言编写一个DLL文件,并导出一个一维数组,和一个二维数组,Dll代码如下.

#include <iostream>
#include <Windows.h>

extern "C"__declspec(dllexport) int get_array_elem(int Array[], int index) {
  return Array[index];
}

extern "C"__declspec(dllexport) int get_array_2_elem(int Array[][11], int row, int col) {
  return Array[row][col];
}

BOOL APIENTRY DllMain(HANDLE hModule, DWORD dwReason, void* lpReserved)
{
  return true;
}

接着使用python调用一维数组get_array_elem处理函数,并传入参数.

from ctypes import *

if __name__ == "__main__":
    pdll = CDLL("./engine.dll")

    IntArrayType = c_int * 10
    intArray = IntArrayType(1,2,3,4,5,6,7,8,9,0)

    for idx in range(10):
        print("[%d] => %d" %(idx, pdll.get_array_elem(intArray, idx)),end="")
    print()

使用python调用二维数组get_array_2_elem处理函数,并传入参数.

from ctypes import *

if __name__ == "__main__":
    pdll = CDLL("./engine.dll")

    IntArray3Col = c_int * 3
    IntArray3Row3Col = IntArray3Col * 3
    arr2d = IntArray3Row3Col(IntArray3Col(1, 2, 3), IntArray3Col(8, 9, 4), IntArray3Col(7, 6, 5))

    for r in range(3):
        for c in range(3):
            print(" %d " %pdll.get_array_2_elem(arr2d, r, c),end="")
        print()

C混编(返回数组): 通过使用c_byte * x声明数组空间,返回数组结果输出,先写DLL.

#include <iostream>
#include <Windows.h>
#include <string>

extern "C"__declspec(dllexport) int GetArray(char* Data, int Number, char* OutData)
{
  for (int i = 0; i < Number; ++i)
  {
    OutData[i] = Data[i] + 100;
  }
  return Number;
}

BOOL APIENTRY DllMain(HANDLE hModule, DWORD dwReason, void* lpReserved)
{
  return true;
}

使用python调用GetArray处理函数,并传入参数.

from ctypes import *

if __name__ == "__main__":
    pdll = CDLL("./engine.dll")
    callBuf = pdll.GetArray

    number = 10
    numbytes = c_int(10)

    # 声明数组并循环赋值
    data_in = (c_byte * number)()
    for i in range(number):
        data_in[i] = i

    # 用户保存输出结果的数组
    data_out = (c_byte * number)()

    # 调用DLL中的函数
    ref = pdll.GetArray(data_in,numbytes,data_out)
    print("返回值: ", ref)
    for i in data_out:
        print("{} ".format(i),end="")

C混编(传递结构): 我们继续增加功能,这次让python传入结构体,DLL收到后输出内容,先来写DLL.

#include <iostream>
#include <Windows.h>

typedef struct MyStruct
{
  char uname[10];
  int age;
  float score;
}MyStruct;

extern "C"__declspec(dllexport) char* get_struct(MyStruct* ptr)
{
  printf("[dll print] name: %s -> age: %d -> score: %f \n", ptr->uname, ptr->age, ptr->score);
  return ptr->uname;
}

BOOL APIENTRY DllMain(HANDLE hModule, DWORD dwReason, void* lpReserved)
{
  return true;
}

使用python调用get_struct处理函数,并传入参数.

from ctypes import *

class MyStruct(Structure):
    _fields_ = [
        ("uname",c_char * 10),
        ("age",c_int),
        ("score",c_float)
    ]

if __name__ == "__main__":
    pdll = CDLL("./engine.dll")
    ptr = MyStruct()

    # 设置参数
    ptr.uname = "lyshark".encode("utf-8")
    ptr.age = 24
    ptr.score = 98.4

    # 设置返回值与指针
    get_struct_ptr = pdll.get_struct
    get_struct_ptr.restype = c_char_p

    # 调用
    ref = get_struct_ptr(byref(ptr))
    print("返回值: {}".format(ref))

C混编(返回结构): 先定义DLL文件代码,编写一个get_struct函数,用户获取返回值.

#include <iostream>
#include <Windows.h>

typedef struct MyStruct
{
  char uname[10];
  int age;
}MyStruct,*MyStructPointer;

extern "C"__declspec(dllexport) MyStruct* get_struct(char *uname,int age)
{
  MyStructPointer ptr = (MyStructPointer)malloc(sizeof(MyStruct));
  strcpy(ptr->uname, uname);
  ptr->age = age;
  return ptr;
}

BOOL APIENTRY DllMain(HANDLE hModule, DWORD dwReason, void* lpReserved)
{
  return true;
}

python部分则定义MyStructPointer结构指针,并获取返回值即可.

from ctypes import *

class MyStructPointer(Structure):
    _fields_ = [
        ("uname",c_char * 10),
        ("age",c_int)
    ]

if __name__ == "__main__":
    pdll = CDLL("./engine.dll")

    # 定义参数
    pdll.get_struct.argtypes = [c_char_p,c_int]
    arg1 = c_char_p(bytes("lyshark", "utf-8"))
    arg2 = c_int(24)

    # 定义返回值类型
    pdll.get_struct.restype = POINTER(MyStructPointer)

    # 调用并获取返回值
    ref = pdll.get_struct(arg1,arg2)
    print("返回姓名: {} -> 年龄: {}".format(ref.contents.uname,ref.contents.age))

C混编(C中调用python): 让C语言调用python文件,并让python文件返回一个字符串结果,充分利用python三方库.

#include <iostream>
#include <Windows.h>

using namespace std;

std::string GetValue(char *pyname, char *function, char *argv[])
{
  string command;
  command.append(pyname);
  command.append(" ");
  command.append(function);
  command.append(" ");
  command.append(argv[0]);

  FILE *fp;
  char buf[8196] = { 0 };
  if ((fp = _popen(command.c_str(), "r")) == NULL)
  {
    exit(1);
  }
  while (fgets(buf, 255, fp) != NULL)
  {
    printf("%s", buf);
  }
  _pclose(fp);
  return buf;
}

int main(int argc, char * argv[])
{
  char *time[] = { "1024" };
  GetValue("python pytest.py", "get_value", time);
  getchar();
  return 0;
}

python代码中我们直接判断传入参数,并根据参数的不同来执行不同的流程.

import sys

if __name__ == "__main__":
    if(sys.argv[1] == "get_value"):
        time = sys.argv[2]
        print("{} ok".format(time))