Validate XML syntax

Ask questions about how to create a script or swap scripts with other users.
Post Reply
User avatar
Rickard Johansson
Site Admin
Posts: 6577
Joined: 19 Jul 2006 14:29

Validate XML syntax

Post by Rickard Johansson »

Here is a Pascal script to validate the syntax in a XML or (x)Html file. It only works in v6.10 beta 1, or later.

Code: Select all

// Validate XML syntax
// Written by Rickard Johansson

// Global variables
var
   w,s: WideString;
   i,n,nc: Integer;
   len: Integer;
   bComment: Boolean;

function GetTagName(var index: Integer; const ws: WideString): WideString;
var
   i: Integer;
begin
   i := index;
   while (index <= Length(ws)) and not ScriptUtils.WPosAt(' ',ws,index) and not ScriptUtils.WPosAt('>',ws,index) and not ScriptUtils.WPosAt('/',ws,index) do Inc(index);
   Result := Copy(ws,i,index-i);
end;

function IsTagEnded(var index: Integer; const ws: WideString): Boolean;
begin
   Result := False;
   while (index <= Length(ws)) and not ScriptUtils.WPosAt('>',ws,index) do Inc(index);
   if (index > 1) and ScriptUtils.WPosAt('/',ws,index-1) then
      Result := True;
end;

procedure GetPosXY(const nPos: Integer; var x,y: Integer; const ws: WideString);
var
   n,nl: Integer;
begin
   x := 1;
   y := 0;
   n := 1;
   nl := 0;
   while (n <= Length(ws)) and (n < nPos) do
   begin
      if (n < Length(ws)) and ScriptUtils.WPosAt(#13#10,ws,n) then
      begin
         Inc(n,2);
         Inc(y);
         nl := n;
         continue;
      end;
      Inc(n);
    end;
    x := nPos - nl;
end;

procedure ShowError(const nType: Integer; const wTag, ws: WideString; const nPos: Integer);
var
   x,y: Integer;
   s: WideString;
begin
   GetPosXY(nPos,x,y,ws);
   case nType of
      0: ShowMessage('Stop tag missing' + ':  ' + wTag);
      1: ShowMessage('Could not find the stop tag' + ':  ' + wTag);
      2: ShowMessage('Start or stop tag is misspelled' + ':  ' + wTag);
   end;
   Document.CursorY := y;
   Document.CursorX := x;
end;

begin
   WStrings.Clear;
   bComment := False;
   i := 1;
   n := 1;
   w := Document.Text;
   len := Length(w);
   while (i > 0) and (i < len) do
   begin
      if bComment then
         i := ScriptUtils.WPosFrom('-->',w,n)
      else
         i := ScriptUtils.WPosFrom('<',w,n);
      
      if bComment and (i > 0) then
      begin
         bComment := False;
         Inc(i,2);
      end
      else if i > 0 then
      begin
         if ScriptUtils.WPosAt('<?',w,i) or ScriptUtils.WPosAt('<!DOCTYPE',w,i) then
         begin
            // Ignore <?xml ... ?> and <!DOCTYPE > tags
            n := i+1;
            i := ScriptUtils.WPosFrom('>',w,n)
         end
         else if ScriptUtils.WPosAt('<!--',w,i) then
         begin
            // Comment was found
            bComment := True;
            Inc(i,3);
         end
         else if (i+1 <= len) and not ScriptUtils.WPosAt('/',w,i+1) then
         begin
            // Get start tag name
            Inc(i);
            s := GetTagName(i,w);

            // Check if the tag is ended with />
            if not IsTagEnded(i,w) then
            begin
               // Store the tag name
               WStrings.Add(s);
            end;
         end
         else if i <= len then
         begin
            // Get stop tag name
            n := i;
            Inc(i,2);
            s := GetTagName(i,w);

            // Check against the last stored start tag
            nc := WStrings.GetCount;
            if (nc > 0) and (WStrings.GetString(nc-1) = s) then
            begin
               // All is ok. Remove the stored tag
               WStrings.Delete(nc-1);
            end
            else if (nc > 0) and (Lowercase(WStrings.GetString(nc-1)) = LowerCase(s)) then
            begin
               // ERROR: Stop tag is misspelled
               nc := WStrings.GetCount;
               s := '<' + WStrings.GetString(nc-1) + '></' + s + '>';
               ShowError(2,s,w,n);
               exit;
            end
            else
            begin
               // ERROR: Could not find stop tag
               nc := WStrings.GetCount;
               s := '</' + WStrings.GetString(nc-1) + '>';
               ShowError(1,s,w,n);
               exit;
            end;
         end;
      end
      else if WStrings.GetCount > 0 then
      begin
         // ERROR: Stop tag(s) missing
         nc := WStrings.GetCount;
         s := '</' + WStrings.GetString(nc-1) + '>';
         ShowError(0,s,w,-1);
         exit;
      end;
      n := i+1;
   end;
end.
Post Reply