from HTMLParser import HTMLParser
class MyParser(HTMLParser):
def handle_starttag(self, tag, attrs):
# code to process start tag
def handle_data(self, data):
# code to process data between start and end tag
def handle_endtag(self, tag):
# code to process end tag
fd = open(sys.argv[1])
mp = MyParser()
mp.feed(fd.read())
mp.feed會依據處理到的資料呼叫對應的handle function,如此就可以處理HTML文件了。