I had a number of musings and thoughts that I had written down in various places over the past 6 months, and I wanted to collect them and organize them into some sort of blog form.

So which blog engine should I use? After looking around for a while, I decided to build my own. I know that will surprise and dismay a number of people (including myself) – but hear me out. The reason why I am doing this is because I am *not* a web developer. Wait a minute – why would this make me *more* likely to develop a web application? Because I need to hone my skills of course. Part of being a good developer is working on lots of different types of applications, and frankly it has been quite a while since I have played over in the web world. I could use a standard blog engine, and invent some other sort of project for myself, but why not kill two birds with one stone?

I chose an older more lengthy one as the first entry to port. Which as it turns out may have been a mistake. For this particular entry, I had a large amount of code that I needed to annotate in addition to colorize. I wanted to make it easy to post the entry and have it look like it does inside Visual Studio automatically. After looking around a bit I found a place that had a code control, but not one that enabled you to annotate the code (e.g. highlight certain sections, or cross out certain lines that weren’t needed anymore). I didn’t spend a ton of time looking, because as I mentioned one of the goals was to see what was involved in writing a bigger web application.

The ultimate goal was to paste in a block of code like this:

<code>
class Program
{
	static void Main(string[] args)
	{
		Console.WriteLine("Hello world");
	}
}
</code>

and it would look great when rendered.

However if I wanted to I could talk about how the args parameter wasn’t needed I could indicate this by surrounding the args with a span containing some CSS class, like this:

<code>
class Program
{
	static void Main(<span style="text-decoration: line-through;">string[] args</span>)
	{
		Console.WriteLine("Hello world");
	}
}
</code>

and it would render in the correct coloring, but with a strikeout of the args like this:

class Program
{
	static void Main(string[] args)
	{
		Console.WriteLine("Hello world");
	}
}

In order to support this I had to come up with an easy way to parse/recognize code. I didn’t need a professional grade parser, I just wanted a simple coloring mechanism. Here I decided to use some simple regular expressions to do the trick. These regular expressions are based on a set of keywords read from a config file like so:

	  <add key="C#Keywords" value="#region.*n,#endregion.*n,abstract,event,new,struct,
explicit,null,switch,base,extern,object,this,bool,false,operator,
throw,break,finally,out,true,byte,fixed,override,try,case,float,
params,typeof,catch,private,uint,char,foreach,protected,ulong,
checked,goto,public,unchecked,class,readonly,unsafe,const,
implicit,ushort,continue,return,using,decimal,sbyte,virtual,
default,interface,sealed,volatile,delegate,internal,short,void,
sizeof,while,double,lock,stackalloc,else,long,static,enum,
namespace,string,ref,int,for,if,else if,do,is,in,as"/>

The other requirement that I had (because I chose this initial entry to port) was the need to do the same set of annotations for xml files. This just meant supporting two different languages.

As with most things I was able to get 80% of the functionality in 20% of the time, but the last 20% of the functionality took a while, but here is the final result. It is weird to think about but it is actually colorizing itself :)
[Update: when I switched to using WordPress I also changed any code that did not have bolding or line-through to use SyntaxHighlighter]

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Configuration;
using System.Text;
using System.Text.RegularExpressions;
using System.Web.UI;
using BrainHz.Blog;

[ParseChildren(true)]
public partial class CodeControl : UserControl
{
	private Regex regex;
	private string[] groupNames;

	string language;
	public string Language
	{
		get { return language; }
		set { language = value; }
	}

	string textContent;
	[PersistenceMode(PersistenceMode.InnerDefaultProperty)]
	[DesignerSerializationVisibility(DesignerSerializationVisibility.Content)]
	public string Content
	{
		get { return textContent; }
		set { textContent = value; }
	}

	private List<KnownType> knownTypes;
	public List<KnownType> KnownTypes
	{
		get { return knownTypes; }
		set { knownTypes = value; }
	}

	string spanString;

	protected void Page_Load(object sender, EventArgs e)
	{
		spanString = string.Format("(?<{0}></*{0}[^>]*>)|", "span");
		if (language == "csharp")
		{
			regex = CreateCodeRegex();
			groupNames = new string[] {"comment", "quotated", "keyword", "knownType"};
		}

		if (language == "xml")
		{
			regex = CreateXmlRegex();
			groupNames = new string[] { "elementName", "attributeName", "attributeValue" };
		}
	}

	private Regex CreateXmlRegex()
	{
		StringBuilder exp = new StringBuilder(spanString +
			"&lt;/?(?<elementName>[\S]+)|" +
			"(?<attribute>(?<attributeName>\w+)=(&quot;|")(?<attributeValue>[^&"]*)(&quot;|"))"); //+ "|" +
			//"(?<elementName>[\S]+)&gt;");
		return new Regex(exp.ToString());
	}

	/// <summary>
	/// This method creates the regular expression which will be used to
	/// identify special words.
	/// The keywords are read from the application configuration file.
	/// The knownTypes configured per control use
	/// </summary>
	/// <returns>Regex object</returns>
	private Regex CreateCodeRegex()
	{
		StringBuilder expression = new StringBuilder(spanString + "(?<quotated>(\".*\"))|(?<comment>(//.*))");
		string keywords = ConfigurationManager.AppSettings["C#Keywords"];

		string[] splitKeywords = keywords.Split(',');
		string keywordExpression = GetRegexForSpecificWords("keyword", splitKeywords);
		expression.Append(keywordExpression);

		if (knownTypes != null && knownTypes.Count > 0)
		{
			List<string> types = new List<string>();
			foreach (KnownType type in knownTypes)
				types.Add(type.Name);
			string knownTypeExpression = GetRegexForSpecificWords("knownType", types);
			expression.Append(knownTypeExpression);
		}

		return new Regex(expression.ToString());
	}

	private static string GetRegexForSpecificWords(string collectionName, ICollection<string> words)
	{
		if (words == null) return string.Empty;
		if (words.Count == 0) return string.Empty;

		StringBuilder exp = new StringBuilder();
		exp.AppendFormat("|(?<{0}>\b(", collectionName);

		bool needsPipe = false;
		foreach (string s in words)
		{
			if (needsPipe)
				exp.Append("|");
			exp.Append(s);
			needsPipe = true;
		}
		exp.Append("\b))");
		return exp.ToString();
	}

	class CaptureInfo
	{
		public string GroupName;
		public Capture Capture;
		public CaptureInfo(string groupName, Capture capture)
		{
			GroupName = groupName;
			Capture = capture;
		}
	}

	/// <summary>
	/// This method takes an input string from a source file and
	/// outputs the string with the spans and classes.
	/// </summary>
	/// <param name="writer">place to write to</param>
	/// <param name="line">single line of source code</param>
	private void Colorize(HtmlTextWriter writer, string line)
	{
		int idx = 0;
		Match m = regex.Match(line);

		while (m != null && m.Success)
		{
			writer.Write(line.Substring(idx, m.Index - idx));
			idx = m.Index;

			// create a sorted list of captured info
			SortedDictionary<int, CaptureInfo> captures = new SortedDictionary<int, CaptureInfo>();
			foreach (string groupName in groupNames)
			{
				Group group = m.Groups[groupName];
				if (!group.Success)
					continue;
				foreach (Capture cap in group.Captures)
					captures[cap.Index] = new CaptureInfo(groupName, cap);
			}

			foreach (KeyValuePair<int, CaptureInfo> kv in captures)
			{
				string groupName = kv.Value.GroupName;
				Capture cap = kv.Value.Capture;

				if (idx != cap.Index)
				{
					// write any non-formatted stuff
					writer.Write(line.Substring(idx, cap.Index - idx));
					idx = cap.Index;
				}

				writer.AddAttribute(HtmlTextWriterAttribute.Class, groupName);
				writer.RenderBeginTag(HtmlTextWriterTag.Span);
				writer.Write(cap.Value);
				idx += cap.Length;
				writer.RenderEndTag();
			}

			// write out remaining
			writer.Write(line.Substring(idx, m.Index + m.Length - idx));
			idx = m.Index + m.Length;

			m = m.NextMatch();
		}

		writer.Write(line.Substring(idx));
	}

	protected override void Render(HtmlTextWriter writer)
	{
		writer.AddAttribute(HtmlTextWriterAttribute.Class, "code");
		writer.RenderBeginTag(HtmlTextWriterTag.Pre);

		string[] lines = textContent.Split(new string[] {"rn"}, StringSplitOptions.None);
		foreach (string line in lines)
		{
			Colorize(writer, line);
			writer.WriteLine();
		}
		writer.RenderEndTag();
	}
}