一、工具vs2015 +python3.5
import urllib.request import urllib.errorimport redef getcontent(url,page): head=("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko") opener=urllib.request.build_opener() opener.addheaders=[head] urllib.request.install_opener(opener) try: data=urllib.request.urlopen(url).read().decode("utf-8") contentpat='(.*?)' contentlist=re.compile(contentpat,re.S).findall(data) for cont in contentlist: print(cont) except urllib.error.URLError as e: print(e.reason)for i in range(1,29): url="https://www.qiushibaike.com/8hr/page/"+str(i) getcontent(url,i)
模仿浏览访问,正则表达式匹配内容,打印结果