htmlcleaner-xpath解析工具
<dependency> <groupId>net.sourceforge.htmlcleaner</groupId> <artifactId>htmlcleaner</artifactId> <version>2.24</version></dependency>
example
private static HtmlCleaner htmlCleaner = new HtmlCleaner();/** * 把html转化为待处理对象 * * @param html html文本 * @return */public static TagNode toTagNode(String html) { TagNode tn = htmlCleaner.clean(html); return tn;}public static String getFirstTextByXPath(TagNode tn, String xpath) { Optional<Object[]> objects1 = getByXPath(tn, xpath); if (objects1.isPresent() && objects1.get().length > 0) { Object val = objects1.get()[0]; return StringUtils.trimToEmpty(null == val ? "" : val.toString()); } return StrPool.EMPTY;}public static Optional<Object[]> getByXPath(TagNode tn, String xpath) { try { return Optional.ofNullable(tn.evaluateXPath(xpath)); } catch (XPatherException e) { } return Optional.empty();}