Find HTML <!– comment tags.

@JunkDrawStuffFeb 02.2010

I want to find any <!– –> comment tags in a page and then read the contents. I would also like to distinguish between those in the body and those elsewhere in the HTML document.

What do I use to get an array of these elements?

to post a comment

JavaScript

10 Comments(s) _↴

@justinbarneskinFeb 03.2010 — #You can paste the source code into a textarea and then read it as a string.

Split the string at "<!" to make an Array and then use shift() to remove the 0 index, it won't be a comment.

Next, loop through the array do define each member as substring(0, indexOf('-->'))

Now you can use an alert or innerHTML to view the comments using join('n') or with innerHTML join('<br>')

@toicontienFeb 03.2010 — #Loop through all the elements in the document:

[CODE]var els = document.getElementsByTagName( "*" );
 
 for ( var i = 0, length = els.length; i < length; i++ ) {
 [B]if ( els[i].nodeName === "#comment" ) {[/B]
 // it's a comment node
 }
 }[/CODE]

@rpg2009Feb 03.2010 — #or if (node.nodeType === 8)

Unfortunately it ignores script comments.

RPG

@rpg2009Feb 03.2010 — #Needs work, but a start. Unfortunately not fixed for IE.

[CODE]<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <!-- First comment in the head-->
 <!-- This comment is in the head and takes up 
 a couple of lines -->
 <title>Get Comments</title>
 <script type="text/javascript">
 /* A javascript comment */
 // A second javascript comment.
 function init(){
 var comments = [];
 var regCom1 = //{2}[^n]+|/*[sS]+*/n/g; 
 
  var walkDom = function (node){
   node = node.firstChild;
   while (node){
   walkDom(node);
   isComment(node); 
   node = node.nextSibling;
   }
  }
 // And a third javascript comment. 
 function isComment(node){
 if (node.nodeType === 8){
 comments[comments.length] = node.parentNode.nodeName+' - '+node.data.replace(/[nr]/g,"");
 } else if (node.data && node.parentNode.nodeName === "SCRIPT") {
 var temp = node.data.match(regCom1);
 for (var i = 0, len = temp.length; i < len; i++){
 temp[i] = temp[i].replace(/^/{2}s+|^/*s+|(*/[nr])/g,"")
 comments[comments.length] = node.parentNode.nodeName+' - '+temp[i];
 }
 }
 return;
 }
 
 walkDom(document);
 var output = "";
 for (var i = 0, len = comments.length; i < len; i+=1){
 output+=comments[i]+'n';
 }
 alert (output);
 }
 window.onload = init;
 
 </script>
 </head>
 <body>
 <!-- First comment in the body -->
 <div>
 <!-- 
 This comment 
 is in a DIV element
 and takes up 
 a few lines 
 -->
 </div>
 </body>
 </html>[/CODE]

Using PHP formatting seems to screw with your regex's

RPG

@rnd_meFeb 03.2010 — #I want to find any  comment tags in a page and then read the contents. I would also like to distinguish between those in the body and those elsewhere in the HTML document.

What do I use to get an array of these elements?[/QUOTE]

harvesting comments is why i created the core of this function, which spun off into a generic attribute selector:

[CODE]
 //public domain
 function getNodes(prop, val, meth, nd, useSelf ){
 var r=[], any= getNodes[val]===true;
 nd=nd||document.documentElement;
 if(nd.constructor===Array){nd={childNodes:nd};}
 for(var cn=nd.childNodes, i=0, mx=cn.length;i<mx;i++){
 var it=cn[i];
 if(it.childNodes.length && !useSelf){r=r.concat(getNodes(prop,val,meth,it,useSelf ));}
 if( any ? it[prop] : (it[prop]!==undefined && (meth ? ""[meth] && 
 String(it[prop])[meth](val) : it[prop]==val))){
 r[r.length]=it; 
 }
 }//nxt
 
 return r;
 };getNodes[null]=true; getNodes[undefined]=true;
 
 
 
 getNodes("nodeType", 8) //all
 
 getNodes("nodeType", 8, null, document.body) //body only
 
 [/CODE]

more uses can be found on this post describing my node getter function.

@JunkDrawStuffauthorFeb 03.2010 — #Wow, thanks. So many ways.

I am looking to do this in a simple fairly painless and simple way, so The tags I am looking for will be in the document.body

Can I adapt the var els = document.getElementsByTagName( "*" ); option to be var els = document.body.getElementsByTagName( "*" ); and get a list of the elements in the body and then use a simple loop and if statement to build a list of them.

Say do something like this.

[CODE]var els = document.body.getElementsByTagName( "*" );
 for ( var i = 0, length = els.length; i < length; i++ ) {
 if ( els[i].nodeName === "#comment" || els[i].nodeType === 8) {
 val =  els[i].nodeValue; // get the string
 // do something
 }
 }
 [/CODE]

To get the contents of the tag if anything is present.

@rnd_me, your script doe what exactly? Return the contents of the elements or a list of the nodes that I can harvest information from? Looks complicated.

@rnd_meFeb 03.2010 — #
@rnd_me, your script doe what exactly? Return the contents of the elements or a list of the nodes that I can harvest information from? Looks complicated.[/QUOTE]

the script itself is complicated, but using it shouldn't be.

[CODE]getNodes("nodeType", 8, null, document.body)[/CODE]
returns an array of all the comment nodes in body.

I believe that's exactly what you asked for...

if you want the text contents, the .data property of each element in the array contains the string version of the text inside each comment.

@rpg2009Feb 04.2010 — #For what it's worth my contribution. A bit of challenge and maybe a little OTT?

Could still do with a couple of tweaks for IE, and maybe the addition of a comments removal method.

[CODE]<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 <title>getComments</title>
 <!-- ******** Get Comments Tool Utility ********* -->
 <style type="text/css">
 /* Basic style sheet comments here */
 body {background:grey}
 </style>
 <script type="text/javascript">
 /*------- In theory this script------------
 ------ should build a comments array -------
 --------- out of the pages comments --------*/
 alert
 var comms = (function(){
 // Define variables
 // and regular expressions
 var match, i;
 var stripChars = /t/g;
 // this one took bloody ages, and still needs work
 var regScript = //{2}s?([sS]+?)(?=n)|/*s?([sS]+?)(?=*/)/g;
 var regType = /SCRIPT|STYLE/ 
 var comments = {};
 // a few private methods
 function _isComment(node){
 if (node.nodeType === 8){
 if (!comments[node.parentNode.nodeName]) { comments[node.parentNode.nodeName] = [];}
 comments[node.parentNode.nodeName].push(node.data.replace(stripChars,""));
 } else if (regType.test(node.nodeName)) { _scriptComs(node); }
 return;
 }
 
  function _scriptComs(node){
   if (!comments[node.nodeName]) { comments[node.nodeName] = [];}
   while ((match = regScript.exec(node.innerHTML)) != null){
   i = (match[1])? 1 : 2;
   comments[node.nodeName].push(match[i].replace(stripChars," "));
   }
  }
 
  return { // public methods here
   get : function(node){
   node = node.firstChild;
   while (node){
   this.get(node); _isComment(node); node = node.nextSibling;
   }
   return this;
   },
   output : function(){
   var output = "";
   for (props in comments){
   output+=props+"n";
   var i = comments[props].length, x = 0;
   while (i--){ output+=comments[props][x++]+"n"; }
   }
   return output;
   } 
  };
 })();
 window.onload = function(){alert(comms.get(document).output())}; // can change to document.body
 </script>
 </head>
 <body>
 <!--Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus a auctor sem. Quisque lorem urna, tempus sed iaculis vel, sodales a quam. In tincidunt enim est. Donec et ante sem, non mattis arcu. Proin luctus aliquet magna commodo aliquet. Integer ac velit tortor, in viverra erat. Praesent sem nisi, vulputate eu imperdiet vel, tempor sed lorem. Pellentesque eget libero in odio congue bibendum. Fusce tempor tellus at nunc tincidunt at consectetur sapien malesuada.-->
 </body>
 </html>[/CODE]

Output from above script just for testing

[CODE]HEAD
 ******** Get Comments Tool Utility ********* 
 STYLE
 Basic style sheet comments here 
 SCRIPT
 ------- In theory this script------------
 ------ should build a comments array -------
 --------- out of the pages comments --------
 Define variables
 and regular expressions
 this one took bloody ages, and still needs work
 a few private methods
 public methods here
 can change to document.body
 BODY
 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus a auctor sem. Quisque lorem urna, tempus sed iaculis vel, sodales a quam. In tincidunt enim est. Donec et ante sem, non mattis arcu. Proin luctus aliquet magna commodo aliquet. Integer ac velit tortor, in viverra erat. Praesent sem nisi, vulputate eu imperdiet vel, tempor sed lorem. Pellentesque eget libero in odio congue bibendum. Fusce tempor tellus at nunc tincidunt at consectetur sapien malesuada.[/CODE]

Note you can just change comms.get(document).output() to comms.get(document.body).output()

Edit: just script

[CODE]var comms = (function(){
 var match, i;
 var stripChars = /t/g;
 var regScript = //{2}s?([sS]+?)(?=n)|/*s?([sS]+?)(?=*/)/g;
 var regType = /SCRIPT|STYLE/ 
 var comments = {};
 function _isComment(node){
 if (node.nodeType === 8){
 if (!comments[node.parentNode.nodeName]) { comments[node.parentNode.nodeName] = [];}
 comments[node.parentNode.nodeName].push(node.data.replace(stripChars,""));
 } else if (regType.test(node.nodeName)) { _scriptComs(node); }
 return;
 } 
 function _scriptComs(node){
 if (!comments[node.nodeName]) { comments[node.nodeName] = [];}
 while ((match = regScript.exec(node.innerHTML)) != null){
 i = (match[1])? 1 : 2;
 comments[node.nodeName].push(match[i].replace(stripChars," "));
 }
 } 
 return {
 get : function(node){
 node = node.firstChild;
 while (node){
 this.get(node); _isComment(node); node = node.nextSibling;
 }
 return this;
 },
 output : function(){
 var output = "";
 for (props in comments){
 output+=props+"n";
 var i = comments[props].length, x = 0;
 while (i--){ output+=comments[props][x++]+"n"; }
 }
 return output;
 } 
 };
 })();
 window.onload = function(){alert(comms.get(document).output())};[/CODE]

RPG

@JunkDrawStuffauthorFeb 04.2010 — #the script itself is complicated, but using it shouldn't be.

[CODE]getNodes("nodeType", 8, null, document.body)[/CODE]
returns an array of all the comment nodes in body.

I believe that's exactly what you asked for...

if you want the text contents, the .data property of each element in the array contains the string version of the text inside each comment.[/QUOTE]
Yes thanks, I am just confused by it, I like to understand how it operates to appreciate what it does and it makes me wonder why functions do not already exist for this purpose...

SO... Is that <!-- HTML comments or does that include // --- and /* --- */ comment blocks as well? Not that I want to find those comment blocks. but I am not picky. Still have to try your code, hope this weekend to get a demo page up, see what happens.

@rnd_meFeb 05.2010 — #Yes thanks, I am just confused by it, I like to understand how it operates to appreciate what it does and it makes me wonder why functions do not already exist for this purpose...

SO... Is that <!-- HTML comments or does that include // --- and /* --- */ comment blocks as well? [/QUOTE]

It gathers HTML comments only.

perhaps a rewrite will prove itself more readable:

[CODE]function getNodes(prop, needle, blnMatch, node){
 var results=[], any=(needle==null); 
 node=node||document.documentElement;
 if(node.splice){ node={childNodes:node}; }
 for(var it, i=0, kids=node.childNodes;it=kids[i];i++){
 if(it.childNodes.length){
 results=results.concat(getNodes(prop, needle, blnMatch, it));
 }
 switch(true){
 case 	any && it[prop]:
 case 	it[prop]===needle: 

 case blnMatch && !!String(it[prop]).match(needle):
 results[results.length]=it; 
 }
 }
 return results;
 }//end getNodes()
 
 function getNodes(prop, needle, blnMatch, node){
 var results=[], any=(needle==null); 
 node=node||document.documentElement;
 if(node.splice){ node={childNodes:node}; }
 for(var it, i=0, kids=node.childNodes;it=kids[i];i++){
 if(it.childNodes.length){
 results=results.concat(getNodes(prop, needle, blnMatch, it));
 }
 switch(true){
 case 	any && it[prop]:
 case 	it[prop]===needle: 

 case blnMatch && !!String(it[prop]).match(needle):
 results[results.length]=it; 
 }
 }
 return results;
 }//end getNodes()
 
 //usage example: get all comments from the body
 getNodes("nodeType", 8, false, document.body) //body only[/CODE]

prop: a property of each node to examine

needle: a value to compare the node's property to.

blnMatch: if true, uses a string-match instead of an equivalency compare

node: the root node containing all the nodes to be examined

all arguments except prop are optional.

if you don't pass a val, anything that has the property set to something besides 0, false or null will hit.

Also in #JavaScript _↴

Call function from another function Validating form please help... annoying loop!

Success!

Help @JunkDrawStuff spread the word by sharing this article on Twitter...

Tweet This

Find HTML <!– comment tags.

10 Comments(s) _↴

Also in #JavaScript _↴

Success!

Social

Version

Find HTML <!– comment tags.

10 Comments(s) ↴

Also in #JavaScript ↴

Success!

The web is an endless sea of information. Don't miss the boat... Subscribe!

Social

Version

10 Comments(s) _↴

Also in #JavaScript _↴