Xml parsing in java using Apache

Project structure
























Download the project from here or here


SubstringBetweenTest.java


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
package getSubstringFromString;

import java.io.File;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;

//How to get a substring between tags in a String?

public class SubstringBetweenTest1 {

 public static void main(String[] args) throws Exception {

  File file = new File("D:/Workspace_Juno/XmlParsing_Apache/WebContent/FileToParse/test.html");
  String testHtml = FileUtils.readFileToString(file); // from commons io

  String title = StringUtils.substringBetween(testHtml, "<title>", "</title>");
  System.out.println("title:" + title);

  String[] tds = StringUtils.substringsBetween(testHtml, "<td>", "</td>");
  for (String td : tds) {
   System.out.println("td value:" + td);
  }
  System.out.println(" \n tds[1] value:" + tds[1] + "\n");

  // passing values of tds variable to a method. Here tds is an array
  display(tds);

 }

 public static void display(String tds[]) {
  for (String td : tds) {
   System.out.println("td value in method::" + td);
  }
 }
}




test.html (File to parse)


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
<title>My Title</title>
</head>
<body>
 <table>
  <tr>
   <td>One</td>
   <td>Two</td>
  </tr>
  <tr>
   <td>Three</td>
   <td>Four</td>
  </tr>
 </table>
</body>
</html>


Output
title:My Title
td value:One
td value:Two
td value:Three
td value:Four

 tds[1] value:Two

td value in method::One
td value in method::Two
td value in method::Three
td value in method::Four

No comments:

Post a Comment