HTML-ExtractMain

 view release on metacpan or  search on metacpan

t/test_case_data/google_short_blog.html  view on Meta::CPAN

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns='http://www.w3.org/1999/xhtml' xmlns:b='http://www.google.com/2005/gml/b' xmlns:data='http://www.google.com/2005/gml/data' xmlns:expr='http://www.google.com/2005/gml/expr'>
<head>
<meta content='iitJxuWLjtoK2cUdZtHd8yn6yWLcf5HRPezdIAwXW50=' name='verify-v1'/>
<meta content='text/html; charset=UTF-8' http-equiv='Content-Type'/>
<script type="text/javascript">(function() { var a=window;function c(b){this.t={};this.tick=function(d,i,e){e=e?e:(new Date).getTime();this.t[d]=[e,i]};this.tick("start",null,b)}var f=new c;a.jstiming={Timer:c,load:f};try{var g=null;if(a.chrome&&a.ch...
<meta content='true' name='MSSmartTagsPreventParsing'/>
<meta content='blogger' name='generator'/>
<link href='http://www.blogger.com/favicon.ico' rel='icon' type='image/vnd.microsoft.icon'/>
<link href='http://googleblog.blogspot.com/2010/03/introducing-google-ad-innovations.html' rel='canonical'/>
<link rel="alternate" type="application/atom+xml" title="Official Google Blog - Atom" href="http://googleblog.blogspot.com/feeds/posts/default" />
<link rel="alternate" type="application/rss+xml" title="Official Google Blog - RSS" href="http://googleblog.blogspot.com/feeds/posts/default?alt=rss" />
<link rel="service.post" type="application/atom+xml" title="Official Google Blog - Atom" href="http://www.blogger.com/feeds/10861780/posts/default" />
<link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://www.blogger.com/rsd.g?blogID=10861780" />
<link rel="alternate" type="application/atom+xml" title="Official Google Blog - Atom" href="http://googleblog.blogspot.com/feeds/4607702883382327739/comments/default" />
<!--[if IE]> <script> (function() { var html5 = ("abbr,article,aside,audio,canvas,datalist,details," + "figure,footer,header,hgroup,mark,menu,meter,nav,output," + "progress,section,time,video").split(','); for (var i = 0; i < html5.length; i++) { doc...
<title>Official Google Blog: Introducing Google Ad Innovations</title>
<link href='http://www2.blogger.com/widgets/3319451950-blogarchive.css' rel='stylesheet' type='text/css'/><link href='http://www2.blogger.com/widgets/2791266615-blog.css' rel='stylesheet' type='text/css'/><!-- --><link type='text/css' rel='stylesheet...
 <link rel="stylesheet" type="text/css" href="http://www.blogger.com/dyn-css/authorization.css?targetBlogID=10861780&zx=8676e8ae-f25e-41c2-97d9-8e9fc7fa3458"/>
<style type="text/css">#navbar-iframe { display:block }
</style>

<style type='text/css'>@import url('http://www2.blogger.com/css/blog_controls.css');
@import url('http://www2.blogger.com/dyn-css/authorization.css?blogID=8975829421217267474');
</style>
<link href='http://www.google.com/uds/css/gsearch.css' rel='stylesheet' type='text/css'/>
<script charset='utf-8' src='http://bit.ly/javascript-api.js?version=latest&login=tweettrackjs&apiKey=R_7e9987b2fd13d7e4e881f9cbb168f523' type='text/javascript'></script>
<script charset='utf-8' src='http://s.bit.ly/TweetAndTrack.js?v=1.01' type='text/javascript'></script>
<style id='page-skin-1' type='text/css'><!--
/*
/* Variable definitions
====================
<Variable name="bgcolor" description="Page Background Color"
type="color" default="#fff">
<Variable name="textcolor" description="Text Color"
type="color" default="#333">
<Variable name="linkcolor" description="Link Color"
type="color" default="#58a">
<Variable name="pagetitlecolor" description="Blog Title Color"
type="color" default="#929292">
<Variable name="pagesubtitlecolor" description="Blog Sub Title Color"
type="color" default="#929292">
<Variable name="descriptioncolor" description="Blog Description Color"
type="color" default="#666666">
<Variable name="titlecolor" description="Post Title Color"
type="color" default="#c60">
<Variable name="bordercolor" description="Border Color"
type="color" default="#ccc">
<Variable name="sidebarcolor" description="Sidebar Title Color"
type="color" default="#000000">
<Variable name="sidebartextcolor" description="Sidebar Text Color"
type="color" default="#666">
<Variable name="visitedlinkcolor" description="Visited Link Color"
type="color" default="#999">
<Variable name="bodyfont" description="Text Font"
type="font" default="normal normal 100% Arial, sans-serif">
<Variable name="headerfont" description="Sidebar Title Font"
type="font"
default="normal normal 95% Arial, sans-serif">
<Variable name="pagetitlefont" description="Blog Title Font"
type="font"



( run in 0.314 second using v1.01-cache-2.11-cpan-b50b6a40fd4 )