<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>AK Tech Blog</title>
	<atom:link href="http://blog.aggregateknowledge.com/feed/" rel="self" type="application/rss+xml" />
	<link>http://blog.aggregateknowledge.com</link>
	<description>Making Media Accountable</description>
	<lastBuildDate>Thu, 17 May 2012 01:56:07 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='blog.aggregateknowledge.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://1.gravatar.com/blavatar/f7c9c0215768938a158a2d13f6937ade?s=96&#038;d=http%3A%2F%2Fs2.wp.com%2Fi%2Fbuttonw-com.png</url>
		<title>AK Tech Blog</title>
		<link>http://blog.aggregateknowledge.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://blog.aggregateknowledge.com/osd.xml" title="AK Tech Blog" />
	<atom:link rel='hub' href='http://blog.aggregateknowledge.com/?pushpress=hub'/>
		<item>
		<title>Sketching the last year</title>
		<link>http://blog.aggregateknowledge.com/2012/05/13/sketching-the-last-year/</link>
		<comments>http://blog.aggregateknowledge.com/2012/05/13/sketching-the-last-year/#comments</comments>
		<pubDate>Sun, 13 May 2012 13:07:31 +0000</pubDate>
		<dc:creator>mattcurcio</dc:creator>
				<category><![CDATA[Data Science]]></category>
		<category><![CDATA[General]]></category>
		<category><![CDATA[Big Data]]></category>
		<category><![CDATA[Distinct Value Counting]]></category>
		<category><![CDATA[Sketching]]></category>

		<guid isPermaLink="false">http://blog.aggregateknowledge.com/?p=1850</guid>
		<description><![CDATA[Sketching is an area of big-data science that has been getting a lot of attention lately. I personally am very excited about this.  Sketching analytics has been a primary focus of our platform and one of my personal interests for quite a while now. Sketching as an area of big-data science has been slow to [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.aggregateknowledge.com&#038;blog=22289708&#038;post=1850&#038;subd=agkn&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
		<wfw:commentRss>http://blog.aggregateknowledge.com/2012/05/13/sketching-the-last-year/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/f386aae94d0e1706ada7b8ea7176ef7b?s=96&#38;d=http%3A%2F%2F1.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">wwkae</media:title>
		</media:content>
	</item>
		<item>
		<title>Netty&#8217;s CodecEmbedder</title>
		<link>http://blog.aggregateknowledge.com/2012/05/01/nettys-codecembedder/</link>
		<comments>http://blog.aggregateknowledge.com/2012/05/01/nettys-codecembedder/#comments</comments>
		<pubDate>Tue, 01 May 2012 16:48:18 +0000</pubDate>
		<dc:creator>rgrzywinski</dc:creator>
				<category><![CDATA[Programming]]></category>
		<category><![CDATA[ChannelBuffer]]></category>
		<category><![CDATA[CodecEmbedder]]></category>
		<category><![CDATA[DecoderEmbedder]]></category>
		<category><![CDATA[EncoderEmbedder]]></category>
		<category><![CDATA[Netty]]></category>

		<guid isPermaLink="false">http://blog.aggregateknowledge.com/?p=1811</guid>
		<description><![CDATA[We love Netty. It&#8217;s a great full-featured network framework for Java. One of the features that rounds out the framework is the CodecEmbedder. It allows you to test your encoders and decoders without any fuss using a offer-poll paradigm. For example, to test our Rsyslog decoder, we simply: One gotcha to watch out for (which [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.aggregateknowledge.com&#038;blog=22289708&#038;post=1811&#038;subd=agkn&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
		<wfw:commentRss>http://blog.aggregateknowledge.com/2012/05/01/nettys-codecembedder/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/88926c0300b20080425391bbfbf02483?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">rgrzywinski</media:title>
		</media:content>
	</item>
		<item>
		<title>Statistical Toolbox: The Kolmogorov-Smirnov Test</title>
		<link>http://blog.aggregateknowledge.com/2012/04/18/statistical-toolbox-the-kolmogorov-smirnov-test/</link>
		<comments>http://blog.aggregateknowledge.com/2012/04/18/statistical-toolbox-the-kolmogorov-smirnov-test/#comments</comments>
		<pubDate>Wed, 18 Apr 2012 16:28:26 +0000</pubDate>
		<dc:creator>cpesyna</dc:creator>
				<category><![CDATA[Data Science]]></category>
		<category><![CDATA[General]]></category>
		<category><![CDATA[Kolmogorov-Smirnov]]></category>
		<category><![CDATA[KS test]]></category>
		<category><![CDATA[Statistics]]></category>
		<category><![CDATA[tools]]></category>

		<guid isPermaLink="false">http://blog.aggregateknowledge.com/?p=1720</guid>
		<description><![CDATA[Author&#8217;s Note: The Kolmogorov-Smirnov test is a handy tool that is conceptually clean, and can be useful in a variety of data analysis situations. I&#8217;ll introduce it in the context of a problem that I came across, and give a feel for what it does, and how it might be useful. A Question and A [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.aggregateknowledge.com&#038;blog=22289708&#038;post=1720&#038;subd=agkn&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
		<wfw:commentRss>http://blog.aggregateknowledge.com/2012/04/18/statistical-toolbox-the-kolmogorov-smirnov-test/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/c888f0952ef0d6609643e4ceaabecb70?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">cpesyna</media:title>
		</media:content>

		<media:content url="http://agkn.files.wordpress.com/2012/04/concept_map.png" medium="image">
			<media:title type="html">Conceptual map</media:title>
		</media:content>

		<media:content url="http://agkn.files.wordpress.com/2012/04/ks_demo1.png" medium="image">
			<media:title type="html">KS Schematic</media:title>
		</media:content>
	</item>
		<item>
		<title>No BS Data Salon #2</title>
		<link>http://blog.aggregateknowledge.com/2012/03/12/no-bs-data-salon-2/</link>
		<comments>http://blog.aggregateknowledge.com/2012/03/12/no-bs-data-salon-2/#comments</comments>
		<pubDate>Mon, 12 Mar 2012 20:14:24 +0000</pubDate>
		<dc:creator>timonk</dc:creator>
				<category><![CDATA[General]]></category>
		<category><![CDATA[analytics]]></category>
		<category><![CDATA[Big Data Conference]]></category>
		<category><![CDATA[machine learning]]></category>
		<category><![CDATA[MetaMarkets]]></category>
		<category><![CDATA[No BS Data Salon]]></category>

		<guid isPermaLink="false">http://blog.aggregateknowledge.com/?p=1701</guid>
		<description><![CDATA[On Saturday, our illustrious Chief Scientist Matt Curcio sat on the Frameworks, Tools, and Techniques for Scaling up Machine Learning panel at the second No BS Data Salon hosted by MetaMarkets. The discussion ranged from scaling the human aspect of ML and analytics to brass tacks about the difficulties of actually performing ML on web scale data [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.aggregateknowledge.com&#038;blog=22289708&#038;post=1701&#038;subd=agkn&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
		<wfw:commentRss>http://blog.aggregateknowledge.com/2012/03/12/no-bs-data-salon-2/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/c65848d7edcf64562c02c90946bf489c?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">timonk</media:title>
		</media:content>

		<media:content url="http://agkn.files.wordpress.com/2012/03/crowd1.jpg?w=300" medium="image">
			<media:title type="html">crowd1</media:title>
		</media:content>

		<media:content url="http://agkn.files.wordpress.com/2012/03/crowd2.jpg?w=300" medium="image">
			<media:title type="html">crowd2</media:title>
		</media:content>

		<media:content url="http://agkn.files.wordpress.com/2012/03/crowd3.jpg?w=300" medium="image">
			<media:title type="html">crowd3</media:title>
		</media:content>
	</item>
		<item>
		<title>Choosing a Good Hash Function, Part 3</title>
		<link>http://blog.aggregateknowledge.com/2012/02/02/choosing-a-good-hash-function-part-3/</link>
		<comments>http://blog.aggregateknowledge.com/2012/02/02/choosing-a-good-hash-function-part-3/#comments</comments>
		<pubDate>Thu, 02 Feb 2012 23:29:19 +0000</pubDate>
		<dc:creator>cpesyna</dc:creator>
				<category><![CDATA[Data Science]]></category>
		<category><![CDATA[General]]></category>
		<category><![CDATA[Avalanche]]></category>
		<category><![CDATA[bigmemory]]></category>
		<category><![CDATA[chi-squared]]></category>
		<category><![CDATA[City Hash]]></category>
		<category><![CDATA[collision]]></category>
		<category><![CDATA[hash function]]></category>
		<category><![CDATA[hashing]]></category>
		<category><![CDATA[hashtable]]></category>
		<category><![CDATA[Jenkins Hash]]></category>
		<category><![CDATA[Murmur Hash]]></category>
		<category><![CDATA[Spooky Hash]]></category>

		<guid isPermaLink="false">http://blog.aggregateknowledge.com/?p=1516</guid>
		<description><![CDATA[Author&#8217;s note: Part three of a series studying hash functions. My last post identified a few candidate algorithms that are subjected to further scrutiny here today. The Story So Far The simplest attribute on which one could imagine differentiating candidate hash functions is the number of collision produced when hashing a fixed pool of keys. [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.aggregateknowledge.com&#038;blog=22289708&#038;post=1516&#038;subd=agkn&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
		<wfw:commentRss>http://blog.aggregateknowledge.com/2012/02/02/choosing-a-good-hash-function-part-3/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/c888f0952ef0d6609643e4ceaabecb70?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">cpesyna</media:title>
		</media:content>

		<media:content url="http://agkn.files.wordpress.com/2012/01/vm_plot.png" medium="image">
			<media:title type="html">vm_plot</media:title>
		</media:content>

		<media:content url="http://agkn.files.wordpress.com/2012/01/aval_diagram.png" medium="image">
			<media:title type="html">Avalanche Diagram</media:title>
		</media:content>
	</item>
		<item>
		<title>Big Data Ain&#8217;t Fat Data: A Case Study</title>
		<link>http://blog.aggregateknowledge.com/2012/01/28/big-data-aint-fat-data-a-case-study/</link>
		<comments>http://blog.aggregateknowledge.com/2012/01/28/big-data-aint-fat-data-a-case-study/#comments</comments>
		<pubDate>Sat, 28 Jan 2012 23:51:00 +0000</pubDate>
		<dc:creator>blinsay</dc:creator>
				<category><![CDATA[Data Science]]></category>
		<category><![CDATA[General]]></category>
		<category><![CDATA[Programming]]></category>
		<category><![CDATA[bloom filter]]></category>
		<category><![CDATA[Streaming Algorithms]]></category>

		<guid isPermaLink="false">http://blog.aggregateknowledge.com/?p=1503</guid>
		<description><![CDATA[We&#8217;ve always had a hunch that our users stick to the same geographic region. Sure, there&#8217;s the occasional jet-setter that takes their laptop from New York to Los Angeles (or like Rob, goes Chicago to San Francisco) on a daily or weekly basis, but they&#8217;re the exception and not the rule. Knowing how true this [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.aggregateknowledge.com&#038;blog=22289708&#038;post=1503&#038;subd=agkn&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
		<wfw:commentRss>http://blog.aggregateknowledge.com/2012/01/28/big-data-aint-fat-data-a-case-study/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/9b29f100095e5e013806f5099120daa7?s=96&#38;d=http%3A%2F%2F1.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">blinsay</media:title>
		</media:content>
	</item>
		<item>
		<title>Cookies</title>
		<link>http://blog.aggregateknowledge.com/2012/01/18/cookies/</link>
		<comments>http://blog.aggregateknowledge.com/2012/01/18/cookies/#comments</comments>
		<pubDate>Wed, 18 Jan 2012 18:16:53 +0000</pubDate>
		<dc:creator>mattcurcio</dc:creator>
				<category><![CDATA[General]]></category>
		<category><![CDATA[Big Data]]></category>
		<category><![CDATA[Cookies]]></category>

		<guid isPermaLink="false">http://blog.aggregateknowledge.com/?p=1461</guid>
		<description><![CDATA[At Aggregate Knowledge we are constantly concerned about our data space. And since our most basic data key is cookies (cookie ids) we are very interested in how they behave. To that end we have done a ton of research into what the cookie space looks like in the advertising world and the web in [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.aggregateknowledge.com&#038;blog=22289708&#038;post=1461&#038;subd=agkn&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
		<wfw:commentRss>http://blog.aggregateknowledge.com/2012/01/18/cookies/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="http://1.gravatar.com/avatar/f386aae94d0e1706ada7b8ea7176ef7b?s=96&#38;d=http%3A%2F%2F1.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">wwkae</media:title>
		</media:content>

		<media:content url="http://agkn.files.wordpress.com/2012/01/cookie_theory1.jpg" medium="image">
			<media:title type="html">cookie_theory</media:title>
		</media:content>

		<media:content url="http://agkn.files.wordpress.com/2012/01/cookie_theory_ak1.jpg" medium="image">
			<media:title type="html">cookie_theory_ak1</media:title>
		</media:content>

		<media:content url="http://agkn.files.wordpress.com/2012/01/cookie_theory_ak1_zoom_out.jpg" medium="image">
			<media:title type="html">cookie_theory_ak1_zoom_out</media:title>
		</media:content>
	</item>
		<item>
		<title>No BS Data Salon</title>
		<link>http://blog.aggregateknowledge.com/2012/01/07/no-bs-data-salon/</link>
		<comments>http://blog.aggregateknowledge.com/2012/01/07/no-bs-data-salon/#comments</comments>
		<pubDate>Sat, 07 Jan 2012 22:32:22 +0000</pubDate>
		<dc:creator>rgrzywinski</dc:creator>
				<category><![CDATA[General]]></category>
		<category><![CDATA[Big Data Conference]]></category>
		<category><![CDATA[Data Visualization]]></category>
		<category><![CDATA[MetaMarkets]]></category>
		<category><![CDATA[No BS Data Salon]]></category>

		<guid isPermaLink="false">http://blog.aggregateknowledge.com/?p=1494</guid>
		<description><![CDATA[After being quite disenchanted with the state of the Big Data conferences, I thought that I would reach out to some folks that do work similar to ours and plan a mini conference of our own. The first guy that I reached out to was Mike Driscoll, the CTO of MetaMarkets. I had hit the [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.aggregateknowledge.com&#038;blog=22289708&#038;post=1494&#038;subd=agkn&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
		<wfw:commentRss>http://blog.aggregateknowledge.com/2012/01/07/no-bs-data-salon/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/88926c0300b20080425391bbfbf02483?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">rgrzywinski</media:title>
		</media:content>

		<media:content url="http://agkn.files.wordpress.com/2012/01/20120107_dataviz.jpg?w=300" medium="image">
			<media:title type="html">NoBS Data Salon -- Visualization</media:title>
		</media:content>
	</item>
		<item>
		<title>Choosing a Good Hash Function, Part 2</title>
		<link>http://blog.aggregateknowledge.com/2011/12/29/choosing-a-good-hash-function-part-2/</link>
		<comments>http://blog.aggregateknowledge.com/2011/12/29/choosing-a-good-hash-function-part-2/#comments</comments>
		<pubDate>Thu, 29 Dec 2011 18:16:56 +0000</pubDate>
		<dc:creator>cpesyna</dc:creator>
				<category><![CDATA[Data Science]]></category>
		<category><![CDATA[General]]></category>
		<category><![CDATA[bigmemory]]></category>
		<category><![CDATA[collision]]></category>
		<category><![CDATA[hash function]]></category>
		<category><![CDATA[hashing]]></category>
		<category><![CDATA[hashtable]]></category>

		<guid isPermaLink="false">http://blog.aggregateknowledge.com/?p=1407</guid>
		<description><![CDATA[Author&#8217;s note: Part two of a series in which I investigate the performance of a menagerie of hash functions on our data. In today&#8217;s episode the analysis begins in earnest with an investigation of collision rates. Hash function designers have many tools at their disposal, but at their heart, most algorithms follow the same pattern: [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.aggregateknowledge.com&#038;blog=22289708&#038;post=1407&#038;subd=agkn&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
		<wfw:commentRss>http://blog.aggregateknowledge.com/2011/12/29/choosing-a-good-hash-function-part-2/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/c888f0952ef0d6609643e4ceaabecb70?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">cpesyna</media:title>
		</media:content>

		<media:content url="http://agkn.files.wordpress.com/2011/12/final_bar.png" medium="image">
			<media:title type="html">Fraction of keys hashed without collision</media:title>
		</media:content>
	</item>
		<item>
		<title>Big Memory, Part 4</title>
		<link>http://blog.aggregateknowledge.com/2011/12/12/big-memory-part-4/</link>
		<comments>http://blog.aggregateknowledge.com/2011/12/12/big-memory-part-4/#comments</comments>
		<pubDate>Mon, 12 Dec 2011 22:33:46 +0000</pubDate>
		<dc:creator>timonk</dc:creator>
				<category><![CDATA[General]]></category>
		<category><![CDATA[bigmemory]]></category>
		<category><![CDATA[chaining]]></category>
		<category><![CDATA[GC]]></category>
		<category><![CDATA[hashtable]]></category>
		<category><![CDATA[open addressing]]></category>

		<guid isPermaLink="false">http://blog.aggregateknowledge.com/?p=1253</guid>
		<description><![CDATA[Author’s Note: This is part 4 of a series of posts about my adventures in building a “large”, in-memory hash table. This post is a summary of some pure Java hash table libraries. Background In my last post, I discussed the results of rerunning Nick Welch&#8217;s benchmark of C/C++ hash tables. However, since we use the JVM in production, those results [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=blog.aggregateknowledge.com&#038;blog=22289708&#038;post=1253&#038;subd=agkn&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
		<wfw:commentRss>http://blog.aggregateknowledge.com/2011/12/12/big-memory-part-4/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/c65848d7edcf64562c02c90946bf489c?s=96&#38;d=http%3A%2F%2F0.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&#38;r=G" medium="image">
			<media:title type="html">timonk</media:title>
		</media:content>

		<media:content url="http://agkn.files.wordpress.com/2011/12/comparison_small1.png" medium="image">
			<media:title type="html">Java Hash Table Throughput Comparison (small)</media:title>
		</media:content>
	</item>
	</channel>
</rss>
