第一句子网 - 唯美句子、句子迷、好句子大全
第一句子网 > xpath java html_Java根据XPath提取HTML

xpath java html_Java根据XPath提取HTML

时间:2021-01-04 00:34:23

相关推荐

xpath java html_Java根据XPath提取HTML

有这样一段HTML:

希望通过这个XPath提取出Hello:

//div//td[contains(@id, 'foo')]/text()

先导入maven依赖:

net.sourceforge.htmlcleaner

htmlcleaner

2.21

main函数:

package com.my.demo;

import javax.xml.xpath.XPath;

import javax.xml.xpath.XPathConstants;

import javax.xml.xpath.XPathFactory;

import org.htmlcleaner.CleanerProperties;

import org.htmlcleaner.DomSerializer;

import org.htmlcleaner.HtmlCleaner;

import org.htmlcleaner.TagNode;

import org.w3c.dom.Document;

public class HtmlXpathJava {

public static void main(String[] args) {

String sampleHtml = "

String sampleXpath = "//div//td[contains(@id, 'foo')]/text()";

System.out.println(getValueByXpath(sampleXpath, sampleHtml));

}

/**

* Extract value by xPath from HTML.

*/

private static String getValueByXpath(String xPath, String html) {

TagNode tagNode = new HtmlCleaner().clean(html);

String value = null;

try {

Document doc = new DomSerializer(new CleanerProperties()).createDOM(tagNode);

XPath xpath = XPathFactory.newInstance().newXPath();

value = (String) xpath.evaluate(xPath, doc, XPathConstants.STRING);

} catch (Exception e) {

System.out.println("Extract value error. " + e.getMessage());

e.printStackTrace();

}

return value;

}

}

输出:

Hello

参考:

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。