Last active
October 13, 2017 09:27
-
-
Save Terryhung/fc7e0d606e227d975106e23718c9fde2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//JAVA | |
public static void AddDocHead(Elements doc) throws Exception { | |
Element head = doc.first(); | |
head.prepend("<style>.article-header{content: '';background: linear-gradient(to top, #000 1rem,#fff 40rem,#fff 100%);height: 35rem;display: block;}.main-image{width: 100%;max-height: 700px;opacty: 0.9}.article-content{background-color: #fff;position: absolute;width: 90%;z-index: 1;margin-left: 4%; margin-right: 5%;margin-top: -2rem;font-size: 25px; padding-left: 5px; padding-right: 5px}.article-header:after {content: '';background: #000;background: -moz-linear-gradient(top, #000 28%, #fff 58%, #fff 100%);background: -webkit-linear-gradient(top, #000 22rem,#fff 40rem,#fff 100%);background: linear-gradient(to bottom, #000 22rem,#fff 40rem,#fff 100%);height: 40rem;display: block;} .article-content img{width: 100%}</style>"); | |
} | |
public static void Parsing() throws Exception { | |
String url = "http://www.cna.com.tw/news/aloc/201710030325-1.aspx"; | |
Document doc = Jsoup.connect(url) | |
.header("User-Agent", "Mozilla/5.0 (Linux; Android 7.0; SAMSUNG SM-G950U Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/5.2 Chrome/51.0.2704.106 Mobile Safari/537.36") | |
.get(); | |
String target = "div.news_article"; | |
Elements divs = doc.select(target); | |
String remove = "script, button"; | |
Elements removed = doc.select(remove); | |
String image_url = "http://img.appledaily.com.tw/images/twapple/640pix/20170925/BN02/BN02_005.jpg"; | |
String news_title = "苗栗縣鼓勵青年創業苗栗縣鼓勵青年創業苗栗縣鼓勵青年創業"; | |
removed.remove(); | |
/* Add Title to first position*/ | |
divs.first().prependElement("h2").text(news_title); | |
/*Wrap New Tag*/ | |
String child_html = divs.html(); | |
divs.html(String.format("<div class='article-content'>%s<div>", child_html)); | |
System.out.println(divs); | |
divs.prepend(String.format("<div class='article-header'><img src='%s' class='main-image'></div></div>", image_url)); | |
Whitelist wl = Whitelist.relaxed(); | |
wl.addTags("div", "span", "p", "br", "article", "section", "style"); | |
wl.addAttributes("div", "class"); | |
wl.addAttributes("img", "class"); | |
AddDocHead(divs); | |
String mProcessedHtml = Jsoup.clean(divs.outerHtml(),wl); | |
CreateHTML(Arrays.asList(mProcessedHtml)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment