4 files changed, 496 insertions, 13 deletions
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 0fe705a..185411f 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -1,28 +1,30 @@
 lib_LTLIBRARIES = libopkele.la
 
+DEFAULT_INCLUDES = -I${top_builddir}
 INCLUDES = \
 	   -I${top_srcdir}/include/ \
 	   ${KONFORKA_CFLAGS} \
 	   ${OPENSSL_CFLAGS} \
 	   ${LIBCURL_CPPFLAGS} \
-	   ${PCRE_CFLAGS}
+	   ${PCRE_CFLAGS} ${EXPAT_CFLAGS}
 libopkele_la_LIBADD = \
 	${LIBCURL} \
-	${PCRE_LIBS} \
+	${PCRE_LIBS} ${EXPAT_LIBS} \
 	${OPENSSL_LIBS} \
 	${KONFORKA_LIBS}
 
 libopkele_la_SOURCES = \
 		       params.cc \
 		       util.cc \
 		       server.cc \
 		       secret.cc \
 		       data.cc \
 		       consumer.cc \
 		       exception.cc \
 		       extension.cc \
 		       sreg.cc \
 		       extension_chain.cc \
-		       curl.cc
+		       curl.cc expat.cc \
+		       discovery.cc
 libopkele_la_LDFLAGS = \
 	-version-info 2:0:0
diff --git a/lib/discovery.cc b/lib/discovery.cc
new file mode 100644
index 0000000..a35ce32
--- a/dev/null
+++ b/lib/discovery.cc
@@ -0,0 +1,375 @@
+#include <iostream>
+using namespace std;
+#include <list>
+#include <opkele/curl.h>
+#include <opkele/expat.h>
+#include <opkele/uris.h>
+#include <opkele/discovery.h>
+#include <opkele/exception.h>
+#include <opkele/util.h>
+
+#include "config.h"
+
+#define XRDS_HEADER "X-XRDS-Location"
+#define CT_HEADER "Content-Type"
+
+namespace opkele {
+    using std::list;
+    using xrd::XRD_t;
+    using xrd::service_t;
+
+    static const char *whitespace = " \t\r\n";
+    static const char *i_leaders = "=@+$!(";
+
+    static inline bool is_qelement(const XML_Char *n,const char *qen) {
+	return !strcasecmp(n,qen);
+    }
+    static inline bool is_element(const XML_Char *n,const char *en) {
+	if(!strcasecmp(n,en)) return true;
+	int nl = strlen(n), enl = strlen(en);
+	if( (nl>=(enl+1)) && n[nl-enl-1]=='\t'
+		&& !strcasecmp(&n[nl-enl],en) )
+	    return true;
+	return false;
+    }
+
+    static long element_priority(const XML_Char **a) {
+	for(;*a;++a)
+	    if(!strcasecmp(*(a++),"priority")) {
+		long rv;
+		return (sscanf(*a,"%ld",&rv)==1)?rv:-1;
+	    }
+	return -1;
+    }
+
+    class idigger_t : public util::curl_t, public util::expat_t {
+	public:
+	    string xri_proxy;
+
+	    enum {
+		xmode_html = 1, xmode_xrd = 2
+	    };
+	    int xmode;
+
+	    string xrds_location;
+	    string http_content_type;
+	    service_t html_openid1;
+	    service_t html_openid2;
+	    string cdata_buf;
+	    long status_code;
+	    string status_string;
+
+	    typedef list<string> pt_stack_t;
+	    pt_stack_t pt_stack;
+	    int skipping;
+
+	    XRD_t *xrd;
+	    service_t *xrd_service;
+	    string* cdata;
+
+	    idigger_t()
+		: util::curl_t(easy_init()),
+		util::expat_t(0),
+		xri_proxy(XRI_PROXY_URL) {
+		    CURLcode r;
+		    (r=misc_sets())
+			|| (r=set_write())
+			|| (r=set_header())
+			;
+		    if(r)
+			throw exception_curl(OPKELE_CP_ "failed to set curly options",r);
+		}
+	    ~idigger_t() throw() { }
+
+	    void discover(idiscovery_t& result,const string& identity) {
+		result.clear();
+		string::size_type fsc = identity.find_first_not_of(whitespace);
+		if(fsc==string::npos)
+		    throw bad_input(OPKELE_CP_ "whtiespace-only identity");
+		string::size_type lsc = identity.find_last_not_of(whitespace);
+		assert(lsc!=string::npos);
+		if(!strncasecmp(identity.c_str()+fsc,"xri://",sizeof("xri://")-1))
+		    fsc += sizeof("xri://")-1;
+		if((fsc+1)>=lsc)
+		    throw bad_input(OPKELE_CP_ "not a character of importance in identity");
+		string id(identity,fsc,lsc-fsc+1);
+		if(strchr(i_leaders,id[0])) {
+		    result.normalized_id = id;
+		    /* TODO: further canonicalize xri identity? Like folding case  or whatever... */
+		    discover_at(
+			    result,
+			    xri_proxy + util::url_encode(id)+
+			    "?_xrd_r=application/xrd+xml;sep=false", xmode_xrd);
+		    if(status_code!=100)
+			throw failed_xri_resolution(OPKELE_CP_
+				"XRI resolution failed with '"+status_string+"' message",status_code);
+		    if(result.xrd.canonical_ids.empty())
+			throw opkele::failed_discovery(OPKELE_CP_ "No CanonicalID for XRI identity found");
+		}else{
+		    if(id.find("://")==string::npos)
+			id.insert(0,"http://");
+		    string::size_type fp = id.find('#');
+		    if(fp!=string::npos) {
+			string::size_type qp = id.find('?');
+			if(qp==string::npos || qp<fp)
+			    id.erase(fp);
+			else if(qp>fp)
+			    id.erase(fp,qp-fp);
+		    }
+		    result.normalized_id = util::rfc_3986_normalize_uri(id);
+		    discover_at(result,id,xmode_html|xmode_xrd);
+		    const char * eu = 0;
+		    CURLcode r = easy_getinfo(CURLINFO_EFFECTIVE_URL,&eu);
+		    if(r)
+			throw exception_curl(OPKELE_CP_ "failed to get CURLINFO_EFFECTIVE_URL",r);
+		    result.canonicalized_id = util::rfc_3986_normalize_uri(eu); /* XXX: strip fragment part? */
+		    if(xrds_location.empty()) {
+			html2xrd(result.xrd);
+		    }else{
+			discover_at(result,xrds_location,xmode_xrd);
+			if(result.xrd.empty())
+			    html2xrd(result.xrd);
+		    }
+		}
+	    }
+
+	    void discover_at(idiscovery_t& result,const string& url,int xm) {
+		CURLcode r = easy_setopt(CURLOPT_URL,url.c_str());
+		if(r)
+		    throw exception_curl(OPKELE_CP_ "failed to set culry urlie",r);
+
+		(*(expat_t*)this) = parser_create_ns();
+		set_user_data(); set_element_handler();
+		set_character_data_handler();
+
+		xrds_location.clear(); http_content_type.clear();
+		xmode = xm;
+		if(xmode&xmode_html) {
+		    xrds_location.clear();
+		    html_openid1.clear(); html_openid2.clear();
+		}
+		xrd = &result.xrd;
+		cdata = 0; xrd_service = 0; skipping = 0;
+		status_code = 100; status_string.clear();
+
+		r = easy_perform();
+		if(r && r!=CURLE_WRITE_ERROR)
+		    throw exception_curl(OPKELE_CP_ "failed to perform curly request",r);
+
+		parse(0,0,true);
+	    }
+
+	    void html2xrd(XRD_t& x) {
+		if(!html_openid1.uris.empty()) {
+		    html_openid1.types.insert(STURI_OPENID11);
+		    x.services.add(-1,html_openid1);
+		}
+		if(!html_openid2.uris.empty()) {
+		    html_openid2.types.insert(STURI_OPENID20);
+		    x.services.add(-1,html_openid2);
+		}
+	    }
+
+	    size_t write(void *p,size_t s,size_t nm) {
+		if(skipping<0) return 0;
+		/* TODO: limit total size */
+		size_t bytes = s*nm;
+		parse((const char *)p,bytes,false);
+		return bytes;
+	    }
+	    size_t header(void *p,size_t s,size_t nm) {
+		size_t bytes = s*nm;
+		const char *h = (const char*)p;
+		const char *colon = (const char*)memchr(p,':',bytes);
+		const char *space = (const char*)memchr(p,' ',bytes);
+		if(space && ( (!colon) || space<colon ) ) {
+		    xrds_location.clear(); http_content_type.clear();
+		}else if(colon) {
+		    const char *hv = ++colon;
+		    int hnl = colon-h;
+		    int rb;
+		    for(rb = bytes-hnl-1;rb>0 && isspace(*hv);++hv,--rb);
+		    while(rb>0 && isspace(hv[rb-1])) --rb;
+		    if(rb) {
+			if( (hnl>=sizeof(XRDS_HEADER))
+				&& !strncasecmp(h,XRDS_HEADER":",
+				    sizeof(XRDS_HEADER)) ) {
+			    xrds_location.assign(hv,rb);
+			}else if( (hnl>=sizeof(CT_HEADER))
+				&& !strncasecmp(h,CT_HEADER":",
+				    sizeof(CT_HEADER)) ) {
+			    const char *sc = (const char*)memchr(
+				    hv,';',rb);
+			    http_content_type.assign(hv,sc?(sc-hv):rb);
+			}
+		    }
+		}
+		return curl_t::header(p,s,nm);
+	    }
+
+	    void start_element(const XML_Char *n,const XML_Char **a) {
+		if(skipping<0) return;
+		if(skipping) {
+		    if(xmode&xmode_html)
+			html_start_element(n,a);
+		    ++skipping; return;
+		}
+		if(pt_stack.empty()) {
+		    if(is_qelement(n,NSURI_XRDS "\tXRDS"))
+			return;
+		    if(is_qelement(n,NSURI_XRD "\tXRD")) {
+			assert(xrd);
+			xrd->clear();
+			pt_stack.push_back(n);
+		    }else if(xmode&xmode_html) {
+			html_start_element(n,a);
+		    }else{
+			skipping = -1;
+		    }
+		}else{
+		    int pt_s = pt_stack.size();
+		    if(pt_s==1) {
+			/* TODO: xrd:XRD/xrd:Expires */
+			if(is_qelement(n,NSURI_XRD "\tCanonicalID")) {
+			    assert(xrd);
+			    cdata = &(xrd->canonical_ids.add(element_priority(a),string()));
+			}else if(is_qelement(n,NSURI_XRD "\tLocalID")) {
+			    assert(xrd);
+			    cdata = &(xrd->local_ids.add(element_priority(a),string()));
+			}else if(is_qelement(n,NSURI_XRD "\tService")) {
+			    assert(xrd);
+			    xrd_service = &(xrd->services.add(element_priority(a),
+					service_t()));
+			    pt_stack.push_back(n);
+			}else if(is_qelement(n,NSURI_XRD "\tStatus")) {
+			    for(;*a;) {
+				if(!strcasecmp(*(a++),"code")) {
+				    if(sscanf(*(a++),"%ld",&status_code)==1 && status_code!=100) {
+					cdata = &status_string;
+					pt_stack.push_back(n);
+					break;
+				    }
+				}
+			    }
+			}else if(xmode&xmode_html) {
+			    html_start_element(n,a);
+			}else{
+			    skipping = 1;
+			}
+		    }else if(pt_s==2) {
+			if(is_qelement(pt_stack.back().c_str(), NSURI_XRD "\tService")) {
+			    if(is_qelement(n,NSURI_XRD "\tType")) {
+				assert(xrd); assert(xrd_service);
+				cdata_buf.clear();
+				cdata = &cdata_buf;
+			    }else if(is_qelement(n,NSURI_XRD "\tURI")) {
+				assert(xrd); assert(xrd_service);
+				cdata = &(xrd_service->uris.add(element_priority(a),string()));
+			    }else if(is_qelement(n,NSURI_XRD "\tLocalID")
+				    || is_qelement(n,NSURI_OPENID10 "\tDelegate") ) {
+				assert(xrd); assert(xrd_service);
+				cdata = &(xrd_service->uris.add(element_priority(a),string()));
+			    }else{
+				skipping = 1;
+			    }
+			}else
+			    skipping = 1;
+		    }else if(xmode&xmode_html) {
+			html_start_element(n,a);
+		    }else{
+			skipping = 1;
+		    }
+		}
+	    }
+	    void end_element(const XML_Char *n) {
+		if(skipping<0) return;
+		if(skipping) {
+		    --skipping; return;
+		}
+		if(is_qelement(n,NSURI_XRD "\tType")) {
+		    assert(xrd); assert(xrd_service); assert(cdata==&cdata_buf);
+		    xrd_service->types.insert(cdata_buf);
+		}else if(is_qelement(n,NSURI_XRD "\tService")) {
+		    assert(xrd); assert(xrd_service);
+		    assert(!pt_stack.empty());
+		    assert(pt_stack.back()==(NSURI_XRD "\tService"));
+		    pt_stack.pop_back();
+		    xrd_service = 0;
+		}else if(is_qelement(n,NSURI_XRD "\tStatus")) {
+		    assert(xrd);
+		    if(is_qelement(pt_stack.back().c_str(),n)) {
+			assert(cdata==&status_string);
+			pt_stack.pop_back();
+			if(status_code!=100)
+			    skipping = -1;
+		    }
+		}else if((xmode&xmode_html) && is_element(n,"head")) {
+		    skipping = -1;
+		}
+		cdata = 0;
+	    }
+	    void character_data(const XML_Char *s,int l) {
+		if(skipping) return;
+		if(cdata) cdata->append(s,l);
+	    }
+
+	    void html_start_element(const XML_Char *n,const XML_Char **a) {
+		if(is_element(n,"meta")) {
+		    bool heq = false;
+		    string l;
+		    for(;*a;a+=2) {
+			if(!( strcasecmp(a[0],"http-equiv")
+				|| strcasecmp(a[1],XRDS_HEADER) ))
+			    heq = true;
+			else if(!strcasecmp(a[0],"content"))
+			    l.assign(a[1]);
+		    }
+		    if(heq)
+			xrds_location = l;
+		}else if(is_element(n,"link")) {
+		    string rels;
+		    string href;
+		    for(;*a;a+=2) {
+			if( !strcasecmp(a[0],"rel") ) {
+			    rels.assign(a[1]);
+			}else if( !strcasecmp(a[0],"href") ) {
+			    const char *ns = a[1];
+			    for(;*ns && isspace(*ns);++ns);
+			    href.assign(ns);
+			    string::size_type lns=href.find_last_not_of(whitespace);
+			    href.erase(lns+1);
+			}
+		    }
+		    for(string::size_type ns=rels.find_first_not_of(whitespace);
+			    ns!=string::npos; ns=rels.find_first_not_of(whitespace,ns)) {
+			string::size_type s = rels.find_first_of(whitespace,ns);
+			string rel;
+			if(s==string::npos) {
+			    rel.assign(rels,ns,string::npos);
+			    ns = string::npos;
+			}else{
+			    rel.assign(rels,ns,s-ns);
+			    ns = s;
+			}
+			if(rel=="openid.server")
+			    html_openid1.uris.add(-1,href);
+			else if(rel=="openid.delegate")
+			    html_openid1.local_ids.add(-1,href);
+			else if(rel=="openid2.provider")
+			    html_openid2.uris.add(-1,href);
+			else if(rel=="openid2.local_id")
+			    html_openid2.local_ids.add(-1,href);
+		    }
+		}else if(is_element(n,"body")) {
+		    skipping = -1;
+		}
+	    }
+
+    };
+
+    void idiscover(idiscovery_t& result,const string& identity) {
+	idigger_t idigger;
+	idigger.discover(result,identity);
+    }
+
+}
diff --git a/lib/expat.cc b/lib/expat.cc
new file mode 100644
index 0000000..fa6fdde
--- a/dev/null
+++ b/lib/expat.cc
@@ -0,0 +1,96 @@
+#include <opkele/expat.h>
+
+namespace opkele {
+
+    namespace util {
+
+	expat_t::~expat_t() throw() {
+	    if(_x)
+		XML_ParserFree(_x);
+	}
+
+	expat_t& expat_t::operator=(XML_Parser x) {
+	    if(_x)
+		XML_ParserFree(_x);
+	    _x = x;
+	}
+
+	static void _start_element(void* ud,const XML_Char *n,const XML_Char **a) {
+	    ((expat_t*)ud)->start_element(n,a);
+	}
+	static void _end_element(void *ud,const XML_Char *n) {
+	    ((expat_t*)ud)->end_element(n);
+	}
+
+	void expat_t::set_element_handler() {
+	    assert(_x);
+	    XML_SetElementHandler(_x,_start_element,_end_element);
+	}
+
+	static void _character_data(void *ud,const XML_Char *s,int l) {
+	    ((expat_t*)ud)->character_data(s,l);
+	}
+
+	void expat_t::set_character_data_handler() {
+	    assert(_x);
+	    XML_SetCharacterDataHandler(_x,_character_data);
+	}
+
+	static void _processing_instruction(void *ud,const XML_Char *t,const XML_Char *d) {
+	    ((expat_t*)ud)->processing_instruction(t,d);
+	}
+
+	void expat_t::set_processing_instruction_handler() {
+	    assert(_x);
+	    XML_SetProcessingInstructionHandler(_x,_processing_instruction);
+	}
+
+	static void _comment(void *ud,const XML_Char *d) {
+	    ((expat_t*)ud)->comment(d);
+	}
+
+	void expat_t::set_comment_handler() {
+	    assert(_x);
+	    XML_SetCommentHandler(_x,_comment);
+	}
+
+	static void _start_cdata_section(void *ud) {
+	    ((expat_t*)ud)->start_cdata_section();
+	}
+	static void _end_cdata_section(void *ud) {
+	    ((expat_t*)ud)->end_cdata_section();
+	}
+
+	void expat_t::set_cdata_section_handler() {
+	    assert(_x);
+	    XML_SetCdataSectionHandler(_x,_start_cdata_section,_end_cdata_section);
+	}
+
+	static void _default_handler(void *ud,const XML_Char *s,int l) {
+	    ((expat_t*)ud)->default_handler(s,l);
+	}
+
+	void expat_t::set_default_handler() {
+	    assert(_x);
+	    XML_SetDefaultHandler(_x,_default_handler);
+	}
+	void expat_t::set_default_handler_expand() {
+	    assert(_x);
+	    XML_SetDefaultHandlerExpand(_x,_default_handler);
+	}
+
+	static void _start_namespace_decl(void *ud,const XML_Char *p,const XML_Char *u) {
+	    ((expat_t*)ud)->start_namespace_decl(p,u);
+	}
+	static void _end_namespace_decl(void *ud,const XML_Char *p) {
+	    ((expat_t*)ud)->end_namespace_decl(p);
+	}
+
+	void expat_t::set_namespace_decl_handler() {
+	    assert(_x);
+	    XML_SetNamespaceDeclHandler(_x,_start_namespace_decl,_end_namespace_decl);
+	}
+
+    }
+
+}
diff --git a/lib/util.cc b/lib/util.cc
index 416e2cc..4600576 100644
--- a/lib/util.cc
+++ b/lib/util.cc
@@ -1,295 +1,305 @@
 #include <errno.h>
 #include <cassert>
 #include <cctype>
 #include <cstring>
 #include <vector>
 #include <string>
 #include <stack>
 #include <openssl/bio.h>
 #include <openssl/evp.h>
 #include <curl/curl.h>
 #include "opkele/util.h"
 #include "opkele/exception.h"
 
 namespace opkele {
     using namespace std;
 
     namespace util {
 
 	/*
 	 * base64
 	 */
 	string encode_base64(const void *data,size_t length) {
 	    BIO *b64 = 0, *bmem = 0;
 	    try {
 		b64 = BIO_new(BIO_f_base64());
 		if(!b64)
 		    throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 encoder");
 		BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL);
 		bmem = BIO_new(BIO_s_mem());
 		BIO_set_flags(b64,BIO_CLOSE);
 		if(!bmem)
 		    throw exception_openssl(OPKELE_CP_ "failed to BIO_new() memory buffer");
 		BIO_push(b64,bmem);
 		if(((size_t)BIO_write(b64,data,length))!=length)
 		    throw exception_openssl(OPKELE_CP_ "failed to BIO_write()");
 		if(BIO_flush(b64)!=1)
 		    throw exception_openssl(OPKELE_CP_ "failed to BIO_flush()");
 		char *rvd;
 		long rvl = BIO_get_mem_data(bmem,&rvd);
 		string rv(rvd,rvl);
 		BIO_free_all(b64);
 		return rv;
 	    }catch(...) {
 		if(b64) BIO_free_all(b64);
 		throw;
 	    }
 	}
 
 	void decode_base64(const string& data,vector<unsigned char>& rv) {
 	    BIO *b64 = 0, *bmem = 0;
 	    rv.clear();
 	    try {
 		bmem = BIO_new_mem_buf((void*)data.data(),data.size());
 		if(!bmem)
 		    throw exception_openssl(OPKELE_CP_ "failed to BIO_new_mem_buf()");
 		b64 = BIO_new(BIO_f_base64());
 		if(!b64)
 		    throw exception_openssl(OPKELE_CP_ "failed to BIO_new() base64 decoder");
 		BIO_set_flags(b64,BIO_FLAGS_BASE64_NO_NL);
 		BIO_push(b64,bmem);
 		unsigned char tmp[512];
 		size_t rb = 0;
 		while((rb=BIO_read(b64,tmp,sizeof(tmp)))>0)
 		    rv.insert(rv.end(),tmp,&tmp[rb]);
 		BIO_free_all(b64);
 	    }catch(...) {
 		if(b64) BIO_free_all(b64);
 		throw;
 	    }
 	}
 
 	/*
 	 * big numerics
 	 */
 
 	BIGNUM *base64_to_bignum(const string& b64) {
 	    vector<unsigned char> bin;
 	    decode_base64(b64,bin);
 	    BIGNUM *rv = BN_bin2bn(&(bin.front()),bin.size(),0);
 	    if(!rv)
 		throw failed_conversion(OPKELE_CP_ "failed to BN_bin2bn()");
 	    return rv;
 	}
 
 	BIGNUM *dec_to_bignum(const string& dec) {
 	    BIGNUM *rv = 0;
 	    if(!BN_dec2bn(&rv,dec.c_str()))
 		throw failed_conversion(OPKELE_CP_ "failed to BN_dec2bn()");
 	    return rv;
 	}
 
 	string bignum_to_base64(const BIGNUM *bn) {
 	    vector<unsigned char> bin(BN_num_bytes(bn)+1);
 	    unsigned char *binptr = &(bin.front())+1;
 	    int l = BN_bn2bin(bn,binptr);
 	    if(l && (*binptr)&0x80){
 		(*(--binptr)) = 0; ++l;
 	    }
 	    return encode_base64(binptr,l);
 	}
 
 	/*
 	 * w3c times
 	 */
 
 	string time_to_w3c(time_t t) {
 	    struct tm tm_t;
 	    if(!gmtime_r(&t,&tm_t))
 		throw failed_conversion(OPKELE_CP_ "failed to BN_dec2bn()");
 	    char rv[25];
 	    if(!strftime(rv,sizeof(rv)-1,"%Y-%m-%dT%H:%M:%SZ",&tm_t))
 		throw failed_conversion(OPKELE_CP_ "failed to strftime()");
 	    return rv;
 	}
 
 	time_t w3c_to_time(const string& w) {
 	    struct tm tm_t;
 	    memset(&tm_t,0,sizeof(tm_t));
 	    if(
 		    sscanf(
 			w.c_str(),
 			"%04d-%02d-%02dT%02d:%02d:%02dZ",
 			&tm_t.tm_year,&tm_t.tm_mon,&tm_t.tm_mday,
 			&tm_t.tm_hour,&tm_t.tm_min,&tm_t.tm_sec
 		    ) != 6 )
 		throw failed_conversion(OPKELE_CP_ "failed to sscanf()");
 	    tm_t.tm_mon--;
 	    tm_t.tm_year-=1900;
 	    time_t rv = mktime(&tm_t);
 	    if(rv==(time_t)-1)
 		throw failed_conversion(OPKELE_CP_ "failed to mktime()");
 	    return rv;
 	}
 
 	/*
 	 *
 	 */
 
 	string url_encode(const string& str) {
 	    char * t = curl_escape(str.c_str(),str.length());
 	    if(!t)
 		throw failed_conversion(OPKELE_CP_ "failed to curl_escape()");
 	    string rv(t);
 	    curl_free(t);
 	    return rv;
 	}
 
 	string long_to_string(long l) {
 	    char rv[32];
 	    int r=snprintf(rv,sizeof(rv),"%ld",l);
 	    if(r<0 || r>=(int)sizeof(rv))
 		throw failed_conversion(OPKELE_CP_ "failed to snprintf()");
 	    return rv;
 	}
 
 	long string_to_long(const string& s) {
 	    char *endptr = 0;
 	    long rv = strtol(s.c_str(),&endptr,10);
 	    if((!endptr) || endptr==s.c_str())
 		throw failed_conversion(OPKELE_CP_ "failed to strtol()");
 	    return rv;
 	}
 
 	/*
 	 * Normalize URL according to the rules, described in rfc 3986, section 6
 	 *
 	 * - uppercase hext triplets (e.g. %ab -> %AB)
 	 * - lowercase scheme and host
 	 * - decode %-encoded characters, specified as unreserved in rfc 3986, section 2.3,
 	 *   that is - [:alpha:][:digit:]._~-
 	 * - remove dot segments
 	 * - remove empty and default ports
 	 * - if there's no path component, add '/'
 	 */
 	 string rfc_3986_normalize_uri(const string& uri) {
+	     static const char *whitespace = " \t\r\n";
 	     string rv;
-	     string::size_type colon = uri.find(':');
+	     string::size_type ns = uri.find_first_not_of(whitespace);
+	     if(ns==string::npos)
+		 throw bad_input(OPKELE_CP_ "Can't normalize empty URI");
+	     string::size_type colon = uri.find(':',ns);
 	     if(colon==string::npos)
 		 throw bad_input(OPKELE_CP_ "No scheme specified in URI");
 	     transform(
-		     uri.begin(), uri.begin()+colon+1,
+		     uri.begin()+ns, uri.begin()+colon+1,
 		     back_inserter(rv), ::tolower );
 	     bool s;
-	     if(rv=="http:")
-		 s = false;
-	     else if(rv=="https:")
-		 s = true;
-	     else
-		 throw not_implemented(OPKELE_CP_ "Only http(s) URIs can be normalized here");
-	     string::size_type ul = uri.length();
+	     string::size_type ul = uri.find_last_not_of(whitespace)+1;
 	     if(ul <= (colon+3))
 		 throw bad_input(OPKELE_CP_ "Unexpected end of URI being normalized encountered");
 	     if(uri[colon+1]!='/' || uri[colon+2]!='/')
 		 throw bad_input(OPKELE_CP_ "Unexpected input in URI being normalized after scheme component");
+	     if(rv=="http:")
+		 s = false;
+	     else if(rv=="https:")
+		 s = true;
+	     else{
+		 /* TODO: support more schemes.
+		  * e.g. xri. How do we normalize
+		  * xri?
+		  */
+		 rv.append(uri,colon+1,ul-colon-1);
+		 return rv;
+	     }
 	     rv += "//";
 	     string::size_type interesting = uri.find_first_of(":/#?",colon+3);
 	     if(interesting==string::npos) {
 		 transform(
-			 uri.begin()+colon+3,uri.end(),
+			 uri.begin()+colon+3,uri.begin()+ul,
 			 back_inserter(rv), ::tolower );
 		 rv += '/'; return rv;
 	     }
 	     transform(
 		     uri.begin()+colon+3,uri.begin()+interesting,
 		     back_inserter(rv), ::tolower );
 	     bool qf = false;
 	     char ic = uri[interesting];
 	     if(ic==':') {
 		 string::size_type ni = uri.find_first_of("/#?%",interesting+1);
 		 const char *nptr = uri.data()+interesting+1;
 		 char *eptr = 0;
 		 long port = strtol(nptr,&eptr,10);
 		 if( (port>0) && (port<65535) && port!=(s?443:80) ) {
 		     char tmp[8];
 		     snprintf(tmp,sizeof(tmp),":%ld",port);
 		     rv += tmp;
 		 }
 		 if(ni==string::npos) {
 		     rv += '/'; return rv;
 		 }
 		 interesting = ni;
 	     }else if(ic!='/') {
 		 rv += '/'; rv += ic;
 		 qf = true;
 		 ++interesting;
 	     }
 	     string::size_type n = interesting;
 	     char tmp[3] = { 0,0,0 };
 	     stack<string::size_type> psegs; psegs.push(rv.length());
 	     string pseg;
 	     for(;n<ul;) {
 		 string::size_type unsafe = uri.find_first_of(qf?"%":"%/?#",n);
 		 if(unsafe==string::npos) {
 		     pseg.append(uri,n,ul-n-1); n = ul-1;
 		 }else{
 		     pseg.append(uri,n,unsafe-n);
 		     n = unsafe;
 		 }
 		 char c = uri[n++];
 		 if(c=='%') {
 		     if((n+1)>=ul)
 			 throw bad_input(OPKELE_CP_ "Unexpected end of URI encountered while parsing percent-encoded character");
 		     tmp[0] = uri[n++];
 		     tmp[1] = uri[n++];
 		     if(!( isxdigit(tmp[0]) && isxdigit(tmp[1]) ))
 			 throw bad_input(OPKELE_CP_ "Invalid percent-encoded character in URI being normalized");
 		     int cc = strtol(tmp,0,16);
 		     if( isalpha(cc) || isdigit(cc) || strchr("._~-",cc) )
 			 pseg += cc;
 		     else{
 			 pseg += '%';
 			 pseg += toupper(tmp[0]); pseg += toupper(tmp[1]);
 		     }
 		 }else if(qf) {
 		     rv += pseg; rv += c;
 		     pseg.clear();
 		 }else if(n>=ul || strchr("?/#",c)) {
 		     if(pseg.empty() || pseg==".") {
 		     }else if(pseg=="..") {
 			 if(psegs.size()>1) {
 			     rv.resize(psegs.top()); psegs.pop();
 			 }
 		     }else{
 			 psegs.push(rv.length());
 			 if(c!='/') {
 			     pseg += c;
 			     qf = true;
 			 }
 			 rv += '/'; rv += pseg;
 		     }
 		     if(c=='/' && (n>=ul || strchr("?#",uri[n])) ) {
 			 rv += '/';
 			 if(n<ul)
 			     qf = true;
 		     }else if(strchr("?#",c)) {
 			 if(psegs.size()==1 && psegs.top()==rv.length())
 			     rv += '/';
 			 if(pseg.empty())
 			     rv += c;
 			 qf = true;
 		     }
 		     pseg.clear();
 		 }else{
 		     pseg += c;
 		 }
 	     }
 	     if(!pseg.empty()) {
 		 rv += '/'; rv += pseg;
 	     }
 	     return rv;
 	 }
 
     }
 
 }