HTML-ListScraper
view release on metacpan or search on metacpan
testdata/reddit.html view on Meta::CPAN
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<meta http-equiv='Content-Type' content='text/html; charset=UTF-8' />
<title>reddit.com: what's new online</title>
<script src="/static/psrs.js" language="javascript" type="text/javascript"></script>
<script src="/static/reddit.js" language="javascript" type='text/javascript'></script>
<script language='javascript'>var logged = false </script>
<script language='javascript'> window.onload = init </script>
<link rel='stylesheet' href='/static/styles.css' type='text/css' />
<link rel='shortcut icon' href='/favicon.ico' type="image/x-icon" />
<script language='javascript'>
var a = new Image();
a.src ="http://static.reddit.com/aupmod.png";
var b = new Image();
b.src = "http://static.reddit.com/adownmod.png";
</script>
<link rel="alternate" type="application/rss+xml" title="RSS" href="/.rss" />
</head>
<body>
<table id="topbar">
<tr>
<td rowspan='2'>
<img id="header" src="http://static.reddit.com/reddit.com.header.png" alt="reddit" usemap="#header" />
<map name="header">
<area shape="rect" coords="0,0,33,40" href="/"/>
<area shape="rect" coords="34,0,120,40" href="http://reddit.com"/>
</map>
</td>
<td colspan='2' width='100%' class='topmenu menu' nowrap="nowrap">
want to join? <a href="/login">register</a> in seconds |<a href='/submit'>submit</a>|<a href="/help">help</a>|<a href="http://blog.reddit.com">blog</a>
</td>
</tr>
<tr>
<td valign="top" width='100%'>
<div id="topstrip">
<a class='sel-menu-item' href='/hot'>hot</a><a class='menu-item' href='/new'>new</a><a class='menu-item' href='/browse'>browse</a><a class='menu-item' href='/stats'>stats</a>
</div>
</td>
<td valign="top" nowrap='nowrap'>
<form id='searchform' action='/search' method="get">
<input class='txt' style="vertical-align: bottom" type="text" name="q" />
<button class='btn' type='submit'>search</button>
</form>
</td>
</tr>
</table>
<div id='right'>
<div class="pbox">
<form onsubmit="return login(this)" method="post" action="/login">
<input type="hidden" name="url" value="/" />
<input type="hidden" name="op" value="login" />
<div class="loginform">
<p>username:</p>
<p><input class="logtxt" name="user" type="text" maxlength="20" /> </p>
<p>password:</p>
<p><input class="logtxt" name="passwd" type="password" maxlength="20" /></p>
<p class="error">
( run in 0.590 second using v1.01-cache-2.11-cpan-411bb0df24b )