{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# LDA\n", "[Tutorial Here](http://dataskunkworks.com/2018/06/06/extracting-topics-from-11000-newsgroups-posts-with-python-gensim-and-lda/)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['rec.autos' 'comp.sys.mac.hardware' 'comp.graphics' 'sci.space'\n", " 'talk.politics.guns' 'sci.med' 'comp.sys.ibm.pc.hardware'\n", " 'comp.os.ms-windows.misc' 'rec.motorcycles' 'talk.religion.misc'\n", " 'misc.forsale' 'alt.atheism' 'sci.electronics' 'comp.windows.x'\n", " 'rec.sport.hockey' 'rec.sport.baseball' 'soc.religion.christian'\n", " 'talk.politics.mideast' 'talk.politics.misc' 'sci.crypt']\n", "['rec.autos' 'comp.sys.mac.hardware' 'comp.graphics' 'sci.space'\n", " 'talk.politics.guns' 'sci.med' 'comp.sys.ibm.pc.hardware'\n", " 'comp.os.ms-windows.misc' 'rec.motorcycles' 'talk.religion.misc'\n", " 'misc.forsale' 'alt.atheism' 'sci.electronics' 'comp.windows.x'\n", " 'rec.sport.hockey' 'rec.sport.baseball' 'soc.religion.christian'\n", " 'talk.politics.mideast' 'talk.politics.misc' 'sci.crypt']\n" ] }, { "data": { "text/html": [ "
\n", " | content | \n", "target | \n", "target_names | \n", "
---|---|---|---|
0 | \n", "From: lerxst@wam.umd.edu (where's my thing)\\nS... | \n", "7 | \n", "rec.autos | \n", "
1 | \n", "From: guykuo@carson.u.washington.edu (Guy Kuo)... | \n", "4 | \n", "comp.sys.mac.hardware | \n", "
2 | \n", "From: twillis@ec.ecn.purdue.edu (Thomas E Will... | \n", "4 | \n", "comp.sys.mac.hardware | \n", "
3 | \n", "From: jgreen@amber (Joe Green)\\nSubject: Re: W... | \n", "1 | \n", "comp.graphics | \n", "
4 | \n", "From: jcm@head-cfa.harvard.edu (Jonathan McDow... | \n", "14 | \n", "sci.space | \n", "