有关知识是正则表达式(真香)
GetMiddleStr2来自我之前的这篇博客
import re
def GetMiddleStr2(content,startStr,endStr):
goalStr = str('')
for sStr in startStr:
for eStr in endStr:
patternStr = r'[\s\S]*%s(.+?)%s[\s\S]*'%(sStr,eStr)
middleStr= re.match(patternStr,content)
if middleStr:
if not goalStr:
goalStr = middleStr.group(1)
else:
goalStr = middleStr.group(1) if len(goalStr)>len(middleStr.group(1)) else goalStr
return goalStr
def date_extract_transform(para):
m = re.search("(\d{4}年\d{1,2}月\d{1,2}日)", para)
if m==None:
return '00000000'
strdate = m.group(1)
month = GetMiddleStr2(strdate, '年', '月')
if len(month)<2:
month = '0' + month
day = GetMiddleStr2(strdate, '月', '日')
if len(day)<2:
day = '0' + day
year = strdate[:4]
date = year + month + day
return date
data = '截至2022年3月19日XXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
print(date_extract_transform(data))
输出结果是:20220319