--- feedparser.py~ 2004-09-16 18:43:10.000000000 +0200 +++ feedparser.py 2004-09-17 18:36:21.000000000 +0200 @@ -1246,6 +1246,7 @@ def reset(self): self.pieces = [] + self.tags = [] sgmllib.SGMLParser.reset(self) def feed(self, data): @@ -1276,12 +1277,15 @@ self.pieces.append("<%(tag)s%(strattrs)s />" % locals()) else: self.pieces.append("<%(tag)s%(strattrs)s>" % locals()) + self.tags.append(tag) def unknown_endtag(self, tag): # called for each end tag, e.g. for , tag will be "pre" # Reconstruct the original end tag. if tag not in self.elements_no_end_tag: - self.pieces.append("" % locals()) + if len(self.tags)>0 and self.tags[len(self.tags)-1] == tag: + self.tags.pop() + self.pieces.append("" % locals()) def handle_charref(self, ref): # called for each character reference, e.g. for " ", ref will be "160" @@ -1337,7 +1341,8 @@ def output(self): """Return processed HTML as a single string""" - return "".join([str(p) for p in self.pieces]) + #print "tags: " + "".join([("" % p) for p in self.tags]) #str(self.tags) + return "".join([str(p) for p in self.pieces]) + "".join([("" % p) for p in self.tags]) class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor): def __init__(self, baseuri, baselang, encoding):