<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Natural Language Processing &#187; Tokenization</title>
	<atom:link href="http://language.worldofcomputing.net/tag/tokenization/feed" rel="self" type="application/rss+xml" />
	<link>http://language.worldofcomputing.net</link>
	<description>Articles on Natural Language Processing</description>
	<lastBuildDate>Sun, 27 Nov 2011 04:29:40 +0000</lastBuildDate>
	<generator>http://wordpress.org/?v=2.8.6</generator>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
			<item>
		<title>Tokenization: Overview</title>
		<link>http://language.worldofcomputing.net/tokenization/tokenization-overview.html</link>
		<comments>http://language.worldofcomputing.net/tokenization/tokenization-overview.html#comments</comments>
		<pubDate>Fri, 27 Nov 2009 10:47:26 +0000</pubDate>
		<dc:creator>Robin</dc:creator>
				<category><![CDATA[Tokenization]]></category>

		<guid isPermaLink="false">http://language.worldofcomputing.net/?p=29</guid>
		<description><![CDATA[This article presents an overview of Tokenization and the challenges associated with it.
What is Tokenization?
Tokenization is the process of breaking up the given text into units called tokens. The tokens may be words or number or punctuation mark.  Tokenization does this task by locating word boundaries. Ending point of a word and beginning of [...]]]></description>
		<wfw:commentRss>http://language.worldofcomputing.net/tokenization/tokenization-overview.html/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
	</channel>
</rss>

