# -*- coding:utf-8 -*-#python 2.7#XiaoDeng#http://tieba.baidu.com/p/2460150866#标签操作from bs4 import BeautifulSoupimport urllib.requestimport re#如果是网址,可以用这个办法来读取网页#html_doc = "http://tieba.baidu.com/p/2460150866"#req = urllib.request.Request(html_doc) #webpage = urllib.request.urlopen(req) #html = webpage.read()html="""The Dormouse's story The Dormouse's story
Once upon a time there were three little sisters; and their names were ,Lacie andTillie;Lacieand they lived at the bottom of a well.
...
"""soup = BeautifulSoup(html, 'html.parser') #文档对象#re.compile来匹配需要抓取的href地址for k in soup.find_all(href=re.compile("lacie")): print(k)for k in soup.find_all(string=re.compile("Lacie")): print(k)