HTML-ListScraper

 view release on metacpan or  search on metacpan

testdata/reddit.html  view on Meta::CPAN

    </td>
    <td colspan="3" id="titlerow2605462">
        <a id="title2605462" class="title" href="http://www.cnn.com/2007/US/04/21/blueangels.crash/index.html"                                   >          Blue Angels Crash</a>
        <span class="little"> (cnn.com)</span> 
    </td>
</tr>
<tr class="evenRow">
    <td valign="top" class="wide little" colspan="3">

<span id="score2605462">96 points</span>
posted 18 hours ago by <a href="/user/newton_dave">newton_dave </a>
    <a href="/info/1judy/comments" class="bylink"          >         36 comments     </a>
</td>
</tr>
<tr ><td colspan="3" class="evenRow spacing"></td></tr>

            
<tr id="site2605490">
    <td colspan="1" class="oddRow spacing top"></td>
    <td colspan="4" class="oddRow spacing top"></td>
    </tr>
<tr class="oddRow">
    <td valign="top" class="numbercol" rowspan="3">
            26.
    </td>
    <td valign="top" rowspan="3">
        <div id="up2605490" class="arrow up" onclick="javascript:mod(2605490, 1, '')"> </div>
        <div id="down2605490" class="arrow down" onclick="javascript:mod(2605490, 0, '')"/>
    </td>
    <td colspan="3" id="titlerow2605490">
        <a id="title2605490" class="title" href="http://www.vanityfair.com/politics/features/2007/05/texaco200705"                                   >          Decades of oil drilling in the Ecuadorean Amazon has turned hundreds of square miles of ra...
        <span class="little"> (vanityfair.com)</span> 
    </td>
</tr>
<tr class="oddRow">
    <td valign="top" class="wide little" colspan="3">

<span id="score2605490">92 points</span>
posted 18 hours ago by <a href="/user/keen75">keen75 </a>
    <a href="/info/1jueq/comments" class="bylink"          >         10 comments     </a>
</td>
</tr>
<tr ><td colspan="3" class="oddRow spacing"></td></tr>

    </table>
        <p class="menu"> view more:
        
        
         <a href="/?offset=26">next &raquo;</a>
        </p>

</div>
<div id='footer'>
<p class="menu">
    <a href="http://sub.reddit.com">
    subreddits</a>|<a href="/feedback">feedback</a>|<a href="/bookmarklets">bookmarklets</a>|<a href="/.rss">rss</a>|<a href="http://reddit.com/buttons">buttons</a>|<a href="http://reddit.com/widget">widget</a>|<a href="http://store.reddit.com/index....
</p>
<p class="bottommenu">Use of this site constitutes acceptance of our <a href="http://reddit.com/help/useragreement">User Agreement</a> and <a href="http://reddit.com/help/privacypolicy">Privacy Policy</a>. (c) 2007 CondeNet, Inc. All rights reserved....
</div>

<div id="cover" style="display:none" onclick="hide('cover', 'loginpopup')">
</div>

<div id="loginpopup" style="display: none">
<table>
<tr><td colspan="2" style="text-align: center"><h2 class="error">
please log in or register in to vote for the links you like or dislike. this will affect their ranking <i>and</i> help customize reddit for you.
</h2></td></tr>
<tr>
<td style="border-right: 2px solid #D3D3D3; vertical-align: top; padding-right:10px">
    <h1>create a new account</h1>
    <p>registration only requires a username and a password.</p>
    
    <form onsubmit="return login(this)" method="post" action="/login">
    <input type="hidden" name="url" value="/" />
    <input type="hidden" name="op" value="reg" />
    <div class="loginform">
    <p>username:</p>
    <p><input class="logtxt" name="user" type="text" maxlength="20" /> </p>
    <p>password:</p>
    <p><input class="logtxt" name="passwd" type="password" maxlength="20" /></p>
    <p>verify password:</p>
    <p><input class="logtxt" name="passwd2" type="password" maxlength="20" /></p>
    <p class="error">
    </p>
    <p><input type="checkbox" id="remTruereg" name="rem" /><label for="remTruereg">remember me</label></p>
    <p><button class="btn" type="submit">create account</button></p>
    </div>
</form>

    
    <p><span style="color:orangered">is it really that easy? only one way to find out...</span></p>
</td>
<td style="padding-left: 10px; vertical-align: top">
    <h1>login</h1>
    <p>already have an account and just want to login?</p>

    <form onsubmit="return login(this)" method="post" action="/login">
    <input type="hidden" name="url" value="/" />
    <input type="hidden" name="op" value="login" />
    <div class="loginform">
    <p>username:</p>
    <p><input class="logtxt" name="user" type="text" maxlength="20" /> </p>
    <p>password:</p>
    <p><input class="logtxt" name="passwd" type="password" maxlength="20" /></p>
    <p class="error">
    </p>
    <p><input type="checkbox" id="remTruelogin" name="rem" /><label for="remTruelogin">remember me</label></p>
    <p><button class="btn" type="submit">login</button></p>
    </div>
</form>


    <p style="color:orangered">forgot your password? click <a href="/password">here</a></p>
</td>
</tr>
</table>

<div class="center">
<a href="javascript:hide('cover','loginpopup')">close this window</a>
</div>
</div>

</body></html>



( run in 1.248 second using v1.01-cache-2.11-cpan-d7f47b0818f )