import re
if __name__ =="__main__":
f =open(r"C:\Users\HuJun\PycharmProjects\pythonProject\daily_tesy\data.txt","r", encoding="utf-8")
data = f.read()
reg ='http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'# reg = 'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+'
url = re.findall(reg, data)print(url)
import re
import urllib
if __name__ =="__main__":
url ="https://www.jb51.net"
res = urllib.urlopen(url).read()
data = res.replace(" ","")
urls = re.findall(r"<a.*?href=.*?<\/a>", data, re.I)for i in urls:print(1)else:print('this is over')