1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  package com.quiotix.html.parser;
17  
18  import java.util.ArrayList;
19  import java.util.Iterator;
20  import java.util.List;
21  
22  
23  
24  
25  
26  
27  
28  
29  
30  
31  
32  
33  
34  public class HtmlDocument implements Visitable {
35      ElementSequence elements;
36  
37      
38      public HtmlDocument(ElementSequence s) {
39          elements = s;
40      }
41  
42      public void accept(HtmlVisitor v) {
43          v.visit(this);
44      }
45  
46      private static String dequote(String s) {
47          if (s == null)
48              return "";
49          if ((s.startsWith("\"") && s.endsWith("\"")) || 
50              (s.startsWith("'") && s.endsWith("'")))
51              return s.substring(1, s.length()-1);
52          else
53              return s;
54      }
55  
56      
57      
58      
59      
60      
61  
62      
63      
64  
65      
66  
67  
68      public static abstract class HtmlElement implements Visitable, Sized {
69          public abstract void accept(HtmlVisitor v);
70      }
71  
72      
73  
74  
75      public static class Tag extends HtmlElement {
76          
77          public String tagName;
78          
79          public AttributeList attributeList;
80  
81          
82  
83  
84  
85          public boolean emptyTag = false;
86  
87          
88          public Tag(String t, AttributeList a) {
89              tagName = t;
90              attributeList = a;
91          }
92  
93          
94          public void setEmpty(boolean b) {
95              emptyTag = b;
96          }
97  
98          public void accept(HtmlVisitor v) {
99              v.visit(this);
100         }
101 
102         
103         public boolean hasAttribute(String name) {
104             return attributeList.contains(name);
105         }
106 
107         
108 
109 
110 
111         public boolean hasAttributeValue(String name) {
112             return attributeList.hasValue(name);
113         }
114 
115         
116 
117 
118         public String getAttributeValue(String name) {
119             return attributeList.getValue(name);
120         }
121 
122         public int getLength() {
123             int length = 0;
124             for (Iterator iterator = attributeList.attributes.iterator(); iterator.hasNext();) {
125                 Attribute attribute = (Attribute) iterator.next();
126                 length += 1 + (attribute.getLength());
127             }
128             return length + tagName.length() + 2 + (emptyTag ? 1 : 0);
129         }
130 
131         public String toString() {
132             StringBuffer s = new StringBuffer();
133             s.append("<");
134             s.append(tagName);
135             for (Iterator iterator = attributeList.attributes.iterator(); iterator.hasNext();) {
136                 Attribute attribute = (Attribute) iterator.next();
137                 s.append(" ");
138                 s.append(attribute.toString());
139             }
140             if (emptyTag) s.append("/");
141             s.append(">");
142             return s.toString();
143         }
144     }
145 
146     
147 
148 
149     public static class EndTag extends HtmlElement {
150 
151         
152         public String tagName;
153 
154         
155         public EndTag(String t) {
156             tagName = t;
157         }
158 
159         public void accept(HtmlVisitor v) {
160             v.visit(this);
161         }
162 
163         public int getLength() {
164             return 3 + tagName.length();
165         }
166 
167         public String toString() {
168             return "</" + tagName + ">";
169         }
170     }
171 
172     
173 
174 
175 
176     public static class TagBlock extends HtmlElement {
177         
178         public Tag startTag;
179         
180         public EndTag endTag;
181         
182         public ElementSequence body;
183 
184         
185         public TagBlock(String name, AttributeList aList, ElementSequence b) {
186             startTag = new Tag(name, aList);
187             endTag = new EndTag(name);
188             body = b;
189         }
190 
191         public void accept(HtmlVisitor v) {
192             v.visit(this);
193         }
194         
195         public int getLength() { 
196             int bodyLength = 0;
197             for (Iterator iterator = body.iterator(); iterator.hasNext();) {
198                 HtmlDocument.HtmlElement htmlElement = (HtmlDocument.HtmlElement) iterator.next();
199                 bodyLength += htmlElement.getLength();    
200             }
201             return startTag.getLength() + bodyLength + endTag.getLength();
202         }
203         
204         public String toString() {
205           StringBuffer sb = new StringBuffer();
206           sb.append(startTag.toString());
207           for (Iterator iterator = body.iterator(); iterator.hasNext();) {
208             HtmlDocument.HtmlElement htmlElement = (HtmlDocument.HtmlElement) iterator.next();
209             sb.append(htmlElement.toString());
210           }
211           sb.append(endTag.toString());
212           return sb.toString();
213         }
214         
215         
216 
217 
218         public String text() {
219           StringBuffer sb = new StringBuffer();
220           for (Iterator iterator = body.iterator(); iterator.hasNext();) {
221             HtmlDocument.HtmlElement htmlElement = (HtmlDocument.HtmlElement) iterator.next();
222             if (htmlElement instanceof Text) {
223               sb.append(htmlElement.toString());
224             } else if(htmlElement instanceof TagBlock)
225               sb.append(((TagBlock)htmlElement).text());
226           }
227           return sb.toString();
228         }
229     }
230 
231     
232 
233 
234     public static class Comment extends HtmlElement {
235         
236 
237 
238         public String comment;
239 
240         
241         public Comment(String c) {
242             comment = c;
243         }
244 
245         public void accept(HtmlVisitor v) {
246             v.visit(this);
247         }
248 
249         public int getLength() {
250             return 3 + comment.length();
251         }
252 
253         public String toString() {
254             return "<!" + comment + ">";
255         }
256     }
257 
258     
259 
260 
261     public static class Text extends HtmlElement {
262         
263         public String text;
264 
265         
266         public Text(String t) {
267             text = t;
268         }
269 
270         public void accept(HtmlVisitor v) {
271             v.visit(this);
272         }
273 
274         public int getLength() {
275             return text.length();
276         }
277 
278         public String toString() {
279             return text;
280         }
281     }
282 
283     
284 
285 
286     public static class Newline extends HtmlElement {
287         
288         public static final String NL = System.getProperty("line.separator");
289 
290         public void accept(HtmlVisitor v) {
291             v.visit(this);
292         }
293 
294         public int getLength() {
295             return NL.length();
296         }
297 
298         public String toString() {
299             return NL;
300         }
301     }
302 
303     
304 
305 
306     public static class ElementSequence {
307         private List elements;
308 
309         
310         public ElementSequence(int n) {
311             elements = new ArrayList(n);
312         }
313 
314         
315         public ElementSequence() {
316             elements = new ArrayList();
317         }
318 
319         
320         public void addElement(HtmlElement o) {
321             elements.add(o);
322         }
323 
324         
325 
326 
327         public int size() {
328             return elements.size();
329         }
330 
331         
332 
333 
334         public Iterator iterator() {
335             return elements.iterator();
336         }
337 
338         
339 
340 
341 
342 
343         public void setElements(List collection) {
344             elements.clear();
345             elements.addAll(collection);
346         }
347     }
348 
349     
350 
351 
352 
353 
354 
355 
356 
357 
358     public static class Annotation extends HtmlElement {
359         String type, text;
360 
361         
362         public Annotation(String type, String text) {
363             this.type = type;
364             this.text = text;
365         }
366 
367         public void accept(HtmlVisitor v) {
368             v.visit(this);
369         }
370 
371         public int getLength() {
372             return 14 + type.length() + text.length();
373         }
374 
375         public String toString() {
376             return "<!--NOTE(" + type + ") " + text + "-->";
377         }
378     }
379 
380     
381 
382 
383     public static class Attribute implements Sized {
384         
385         public String name;
386         
387         public String value;
388         
389         public boolean hasValue;
390 
391         
392         public Attribute(String n) {
393             name = n;
394             hasValue = false;
395         }
396 
397         
398         public Attribute(String n, String v) {
399             name = n;
400             if (v != null) {
401                 value = v;
402                 hasValue = true;
403             }
404         }
405 
406         
407 
408 
409 
410 
411 
412         public int getLength() {
413             return (hasValue ? name.length() + 1 + value.length() : name.length());
414         }
415 
416         public String toString() {
417             return (hasValue ? name + "=" + value : name);
418         }
419         
420         
421 
422 
423         public String getValue() { 
424             return dequote(value);
425         }
426         
427         
428 
429 
430         public void setValue(String v) {
431             value = v;
432             if (v == null)  
433                 hasValue = false;
434             else 
435                 hasValue = true;
436         }
437     }
438 
439     
440 
441 
442     public static class AttributeList {
443         
444         public List attributes = new ArrayList();
445 
446         
447         public void addAttribute(Attribute a) {
448             attributes.add(a);
449         }
450 
451         
452         public boolean contains(String name) {
453             for (Iterator iterator = attributes.iterator(); iterator.hasNext();) {
454                 Attribute attribute = (Attribute) iterator.next();
455                 if (attribute.name.equalsIgnoreCase(name))
456                     return true;
457             }
458             return false;
459         }
460 
461         
462 
463 
464 
465         public boolean hasValue(String name) {
466             for (Iterator iterator = attributes.iterator(); iterator.hasNext();) {
467                 Attribute attribute = (Attribute) iterator.next();
468                 if (attribute.name.equalsIgnoreCase(name) && attribute.hasValue)
469                     return true;
470             }
471             return false;
472         }
473 
474         
475 
476 
477 
478         public String getValue(String name) {
479             for (Iterator iterator = attributes.iterator(); iterator.hasNext();) {
480                 Attribute attribute = (Attribute) iterator.next();
481                 if (attribute.name.equalsIgnoreCase(name) && attribute.hasValue)
482                     return dequote(attribute.value);
483             }
484             return null;
485         }
486     }
487 }
488 
489 
490