-------------------> Session (re)starting: 03-Mar-2008 09:45:02 lsi %------------------------------------------------------------- % dictionary.m sets up the array "dict" of words that form the % dictionary for the problem. %------------------------------------------------------------- dictionary; dict = strvcat(... 'accent',... 'access',... 'accurate',... 'active',... 'addr',... 'address',... 'addressed',... 'adjust',... 'affect',... 'all',... 'allowed',... 'allows',... 'alpha',... 'also',... 'always',... 'any',... 'append',... 'arbitrary',... 'arch',... 'architecture',... 'archive',... 'archives',... 'argument',... 'arguments',... 'argv',... 'arm',... 'arnold',... 'array',... 'ascii',... 'at',... 'attributes',... 'available',... 'awk',... 'ax',... 'backslash',... 'backup',... 'backups',... 'barnes',... 'basename',... 'bash',... 'becomes',... 'been',... 'before',... 'begins',... 'being',... 'better',... 'binary',... 'bindings',... 'bits',... 'blank',... 'blanks',... 'block',... 'blocks',... 'boot',... 'branch',... 'broadcast',... 'bsd',... 'buffer',... 'bufsize',... 'bugs',... 'bugzilla',... 'busy',... 'bytes',... 'bz',... 'calculate',... 'callout',... 'carriage',... 'case',... 'cat',... 'causes',... 'cbs',... 'change',... 'changed',... 'changes',... 'char',... 'character',... 'characters',... 'check',... 'chgrp',... 'chmod',... 'chown',... 'class',... 'classes',... 'clear',... 'cmd',... 'code',... 'com',... 'comm',... 'commandname',... 'commands',... 'compatible',... 'compose',... 'compress',... 'compressed',... 'compressing',... 'concatenate',... 'connections',... 'contents',... 'context',... 'continuous',... 'control',... 'convert',... 'copy',... 'copying',... 'copyright',... 'coreutils',... 'count',... 'cp',... 'cpio',... 'cpu',... 'crc',... 'create',... 'cs',... 'ctime',... 'current',... 'cutable',... 'data',... 'database',... 'date',... 'day',... 'dd',... 'deadline',... 'decompressing',... 'default',... 'defaults',... 'defined',... 'defines',... 'definitions',... 'defkeymap',... 'delay',... 'density',... 'dereference',... 'dest',... 'dev',... 'device',... 'devices',... 'df',... 'dir',... 'direc',... 'directories',... 'directory',... 'disk',... 'display',... 'displayed',... 'djb',... 'dmesg',... 'dns',... 'dnsdomainname',... 'doexec',... 'domainname',... 'donald',... 'dot',... 'down',... 'drive',... 'driver',... 'drivers',... 'drives',... 'dumpkeys',... 'during',... 'each',... 'ebcdic',... 'echo',... 'echoctl',... 'echoe',... 'ed',... 'effective',... 'either',... 'embedded',... 'empty',... 'enable',... 'end',... 'entered',... 'entries',... 'entry',... 'environment',... 'equivalent',... 'erase',... 'erik',... 'escape',... 'etc',... 'even',... 'every',... 'ewt',... 'ex',... 'example',... 'examples',... 'except',... 'exclude',... 'exe',... 'exec',... 'executable',... 'execute',... 'executed',... 'exist',... 'existing',... 'expr',... 'expressions',... 'ext',... 'extend',... 'extract',... 'extracting',... 'false',... 'family',... 'field',... 'fields',... 'file',... 'filename',... 'filenames',... 'files',... 'filesystem',... 'filesystems',... 'first',... 'flag',... 'folder',... 'follow',... 'following',... 'follows',... 'foo',... 'force',... 'format',... 'forward',... 'fqdn',... 'free',... 'fs',... 'fstab',... 'ftp',... 'full',... 'fully',... 'functions',... 'gawk',... 'general',... 'getopt',... 'getting',... 'gid',... 'give',... 'given',... 'gnu',... 'go',... 'gpg',... 'granted',... 'grep',... 'group',... 'groups',... 'gui',... 'gunzip',... 'gz',... 'gzip',... 'hard',... 'hat',... 'header',... 'help',... 'hh',... 'history',... 'host',... 'hostname',... 'hosts',... 'hour',... 'http',... 'ibs',... 'icmp',... 'icrnl',... 'id',... 'identical',... 'igawk',... 'ignore',... 'inc',... 'include',... 'indicating',... 'info',... 'initializations',... 'input',... 'inserted',... 'install',... 'installed',... 'instead',... 'interactive',... 'interface',... 'interval',... 'invoke',... 'ip',... 'ipcalc',... 'ipx',... 'irq',... 'iso',... 'istrip',... 'ixany',... 'jim',... 'job',... 'july',... 'just',... 'kb',... 'kernel',... 'key',... 'keyboard',... 'keymap',... 'keymaps',... 'keyword',... 'kill',... 'kth',... 'label',... 'language',... 'lar',... 'larger',... 'last',... 'later',... 'lc',... 'leading',... 'letters',... 'level',... 'lib',... 'line',... 'lines',... 'link',... 'links',... 'linux',... 'list',... 'listening',... 'listing',... 'ln',... 'load',... 'loadkeys',... 'local',... 'locale',... 'log',... 'long',... 'loop',... 'losetup',... 'lower',... 'lowest',... 'ls',... 'machine',... 'machines',... 'mackenzie',... 'macro',... 'made',... 'mail',... 'maintained',... 'make',... 'mandatory',... 'manual',... 'many',... 'map',... 'mask',... 'match',... 'matched',... 'matches',... 'matching',... 'mbox',... 'md',... 'members',... 'memory',... 'merchantability',... 'merge',... 'message',... 'messages',... 'method',... 'meyering',... 'microsecond',... 'microseconds',... 'mines',... 'minutes',... 'mips',... 'mkdir',... 'mknod',... 'mktemp',... 'mm',... 'mode',... 'modified',... 'monday',... 'month',... 'moolenaar',... 'more',... 'most',... 'mount',... 'mounted',... 'move',... 'mt',... 'mtab',... 'multi',... 'multiple',... 'multiport',... 'must',... 'mv',... 'myclass',... 'name',... 'names',... 'net',... 'netmask',... 'netstat',... 'network',... 'never',... 'new',... 'newer',... 'newline',... 'next',... 'nfs',... 'nice',... 'nisdomainname',... 'nnn',... 'node',... 'nodename',... 'nonblank',... 'noscripts',... 'note',... 'nothing',... 'nov',... 'nuls',... 'num',... 'number',... 'numbered',... 'numeric',... 'numerical',... 'obs',... 'octal',... 'old',... 'omitted',... 'open',... 'operate',... 'operating',... 'operations',... 'opost',... 'optional',... 'options',... 'opts',... 'order',... 'ordering',... 'org',... 'original',... 'originally',... 'other',... 'otherwise',... 'out',... 'output',... 'overwrite',... 'own',... 'owner',... 'ownership',... 'owns',... 'pack',... 'package',... 'packages',... 'packet',... 'packets',... 'pad',... 'parameter',... 'parameters',... 'parenb',... 'parents',... 'parity',... 'part',... 'particu',... 'partitions',... 'pass',... 'passed',... 'passwd',... 'path',... 'pattern',... 'paul',... 'people',... 'permissions',... 'personality',... 'pid',... 'pierce',... 'ping',... 'platform',... 'please',... 'point',... 'port',... 'portable',... 'ports',... 'pos',... 'positional',... 'positioned',... 'posix',... 'ppc',... 'prefix',... 'preserve',... 'previous',... 'print',... 'printed',... 'prints',... 'priority',... 'privileged',... 'probably',... 'proc',... 'process',... 'processed',... 'processes',... 'program',... 'programmer',... 'programming',... 'programs',... 'prompt',... 'properly',... 'protocol',... 'provided',... 'ps',... 'published',... 'purpose',... 'pwd',... 'qualified',... 'query',... 'quotes',... 'range',... 'rather',... 'raw',... 'rc',... 're',... 'read',... 'readable',... 'readline',... 'real',... 'received',... 'record',... 'records',... 'recursively',... 'red',... 'redhat',... 'refer',... 'reference',... 'regexp',... 'regular',... 'release',... 'remote',... 'remove',... 'removed',... 'rename',... 'repackage',... 'replaced',... 'replacement',... 'reply',... 'report',... 'reporting',... 'request',... 'require',... 'requires',... 'reset',... 'restrictions',... 'result',... 'return',... 'returned',... 'returns',... 'reverse',... 'rewind',... 'rfc',... 'rfile',... 'rh',... 'rhs',... 'ring',... 'rm',... 'rmdir',... 'robbins',... 'robert',... 'root',... 'round',... 'route',... 'routing',... 'rpm',... 'rpmb',... 'rpmrc',... 'run',... 'runs',... 'runuser',... 'rwxrwxrwx',... 'sages',... 'same',... 'sat',... 'sbufsize',... 'scheduling',... 'school',... 'screen',... 'script',... 'scriptlet',... 'scroll',... 'scsi',... 'search',... 'second',... 'seconds',... 'security',... 'sed',... 'select',... 'selects',... 'selinux',... 'send',... 'sent',... 'sep',... 'separated',... 'sequences',... 'serial',... 'server',... 'set',... 'setserial',... 'settings',... 'shell',... 'shells',... 'short',... 'should',... 'show',... 'shown',... 'shred',... 'signal',... 'signaled',... 'silent',... 'similar',... 'simple',... 'simply',... 'since',... 'single',... 'site',... 'size',... 'sized',... 'skeeve',... 'skip',... 'slashes',... 'sleep',... 'sleeps',... 'smith',... 'so',... 'sock',... 'socket',... 'sockets',... 'sole',... 'solely',... 'some',... 'sort',... 'sorted',... 'sorting',... 'source',... 'space',... 'spaces',... 'sparc',... 'sparse',... 'spd',... 'speci',... 'special',... 'specified',... 'specifies',... 'specify',... 'specifying',... 'squeeze',... 'src',... 'ss',... 'stack',... 'stamp',... 'standard',... 'start',... 'started',... 'starts',... 'state',... 'statistics',... 'status',... 'sticky',... 'still',... 'stop',... 'stream',... 'string',... 'strings',... 'strip',... 'stty',... 'style',... 'su',... 'substitute',... 'success',... 'successfully',... 'such',... 'suffix',... 'sunday',... 'super',... 'supercede',... 'superuser',... 'supp',... 'supplemental',... 'supplied',... 'supports',... 'suppress',... 'svalente',... 'swap',... 'symbolic',... 'sync',... 'syntax',... 'syslog',... 'syslogd',... 'system',... 'systems',... 'tab',... 'table',... 'tables',... 'tabs',... 'tag',... 'taken',... 'takes',... 'tape',... 'tapes',... 'tar',... 'target',... 'tcp',... 'tcsh',... 'tell',... 'tem',... 'template',... 'temporary',... 'term',... 'terminal',... 'terminate',... 'test',... 'texinfo',... 'text',... 'their',... 'them',... 'theodore',... 'there',... 'these',... 'they',... 'things',... 'those',... 'through',... 'thus',... 'time',... 'timeout',... 'times',... 'timespec',... 'timestamp',... 'timestamps',... 'tmp',... 'tmpdir',... 'tmpfile',... 'too',... 'tools',... 'torbjorn',... 'tory',... 'touch',... 'trailing',... 'translate',... 'translators',... 'traverse',... 'trip',... 'true',... 'try',... 'ts',... 'ttl',... 'tty',... 'ttys',... 'two',... 'type',... 'types',... 'tytso',... 'uart',... 'udp',... 'ugoa',... 'uid',... 'umask',... 'umount',... 'uname',... 'unblock',... 'unbuffered',... 'unchanged',... 'uncompressed',... 'underlining',... 'unique',... 'universal',... 'university',... 'unix',... 'unknown',... 'unlink',... 'unmount',... 'unmounted',... 'unset',... 'unsuccessfully',... 'until',... 'unwritable',... 'update',... 'upper',... 'usage',... 'useful',... 'user',... 'users',... 'usertty',... 'using',... 'usleep',... 'usr',... 'usually',... 'utc',... 'utilities',... 'utility',... 'valente',... 'value',... 'values',... 'var',... 'variable',... 'variables',... 'various',... 've',... 'verbose',... 'verify',... 'vertical',... 'vet',... 'vfstype',... 'vi',... 'view',... 'vim',... 'vimrc',... 'virtual',... 'visual',... 'volume',... 'vt',... 'waiting',... 'want',... 'warranty',... 'way',... 'we',... 'week',... 'weekday',... 'well',... 'what',... 'when',... 'where',... 'whether',... 'which',... 'while',... 'who',... 'whoever',... 'whose',... 'width',... 'wildcards',... 'will',... 'window',... 'without',... 'word',... 'words',... 'working',... 'works',... 'write',... 'writes',... 'written',... 'xs',... 'xxxxxxxxxx',... 'year',... 'yes',... 'your',... 'yp',... 'ypdomainname',... 'yy',... 'zcat',... 'zero',... 'zip'); %-------------------------------------------------- % docs.m lists the files containing word counts and % words for each individual document. %-------------------------------------------------- docs; doclist = strvcat(... 'arch.def', ... 'ash.def', ... 'awk.def', ... 'basename.def', ... 'bash.def', ... 'bsh.def', ... 'cat.def', ... 'chgrp.def', ... 'chmod.def', ... 'chown.def', ... 'cp.def', ... 'cpio.def', ... 'csh.def', ... 'date.def', ... 'dd.def', ... 'df.def', ... 'dmesg.def', ... 'dnsdomainname.def', ... 'doexec.def', ... 'domainname.def', ... 'echo.def', ... 'ed.def', ... 'egrep.def', ... 'ex.def', ... 'false.def', ... 'fgrep.def', ... 'grep.def', ... 'gunzip.def', ... 'gzip.def', ... 'hostname.def', ... 'igawk.def', ... 'ipcalc.def', ... 'kill.def', ... 'ln.def', ... 'loadkeys.def', ... 'login.def', ... 'ls.def', ... 'mail.def', ... 'mkdir.def', ... 'mknod.def', ... 'mktemp.def', ... 'more.def', ... 'mount.def', ... 'mt.def', ... 'mv.def', ... 'netstat.def', ... 'nice.def', ... 'nisdomainname.def', ... 'ping.def', ... 'ps.def', ... 'pwd.def', ... 'red.def', ... 'rm.def', ... 'rmdir.def', ... 'rpm.def', ... 'rvi.def', ... 'rview.def', ... 'sed.def', ... 'setserial.def', ... 'sh.def', ... 'sleep.def', ... 'sort.def', ... 'stty.def', ... 'su.def', ... 'sync.def', ... 'tar.def', ... 'tcsh.def', ... 'touch.def', ... 'true.def', ... 'umount.def', ... 'uname.def', ... 'usleep.def', ... 'vi.def', ... 'view.def', ... 'vimtutor.def', ... 'ypdomainname.def', ... 'zcat.def'); nterms = length(dict); ndocs = length(doclist); %------------------------------- % The term-document matrix setup %------------------------------- A = zeros(nterms, ndocs); %---------------------------------------------------------- % Turning off echo, since it will print for every iteration % in the next loop. %---------------------------------------------------------- echo off; ??? Error using ==> feof Invalid file identifier. Use fopen to generate a valid file identifier. Error in ==> getdoc at 15 while feof(fid) == 0 Error in ==> lsi at 40 [counts, words] = getdoc(deblank(doclist(k, :))); exit -------------------> Session (re)starting: 03-Mar-2008 09:36:11 clc ls LSI create_dict create_docs dictionary docs info manpages setup_manpages exit -------------------> Session (re)starting: 03-Mar-2008 09:47:47 lsi %------------------------------------------------------------- % dictionary.m sets up the array "dict" of words that form the % dictionary for the problem. %------------------------------------------------------------- dictionary; dict = strvcat(... 'accent',... 'access',... 'accurate',... 'active',... 'addr',... 'address',... 'addressed',... 'adjust',... 'affect',... 'all',... 'allowed',... 'allows',... 'alpha',... 'also',... 'always',... 'any',... 'append',... 'arbitrary',... 'arch',... 'architecture',... 'archive',... 'archives',... 'argument',... 'arguments',... 'argv',... 'arm',... 'arnold',... 'array',... 'ascii',... 'at',... 'attributes',... 'available',... 'awk',... 'ax',... 'backslash',... 'backup',... 'backups',... 'barnes',... 'basename',... 'bash',... 'becomes',... 'been',... 'before',... 'begins',... 'being',... 'better',... 'binary',... 'bindings',... 'bits',... 'blank',... 'blanks',... 'block',... 'blocks',... 'boot',... 'branch',... 'broadcast',... 'bsd',... 'buffer',... 'bufsize',... 'bugs',... 'bugzilla',... 'busy',... 'bytes',... 'bz',... 'calculate',... 'callout',... 'carriage',... 'case',... 'cat',... 'causes',... 'cbs',... 'change',... 'changed',... 'changes',... 'char',... 'character',... 'characters',... 'check',... 'chgrp',... 'chmod',... 'chown',... 'class',... 'classes',... 'clear',... 'cmd',... 'code',... 'com',... 'comm',... 'commandname',... 'commands',... 'compatible',... 'compose',... 'compress',... 'compressed',... 'compressing',... 'concatenate',... 'connections',... 'contents',... 'context',... 'continuous',... 'control',... 'convert',... 'copy',... 'copying',... 'copyright',... 'coreutils',... 'count',... 'cp',... 'cpio',... 'cpu',... 'crc',... 'create',... 'cs',... 'ctime',... 'current',... 'cutable',... 'data',... 'database',... 'date',... 'day',... 'dd',... 'deadline',... 'decompressing',... 'default',... 'defaults',... 'defined',... 'defines',... 'definitions',... 'defkeymap',... 'delay',... 'density',... 'dereference',... 'dest',... 'dev',... 'device',... 'devices',... 'df',... 'dir',... 'direc',... 'directories',... 'directory',... 'disk',... 'display',... 'displayed',... 'djb',... 'dmesg',... 'dns',... 'dnsdomainname',... 'doexec',... 'domainname',... 'donald',... 'dot',... 'down',... 'drive',... 'driver',... 'drivers',... 'drives',... 'dumpkeys',... 'during',... 'each',... 'ebcdic',... 'echo',... 'echoctl',... 'echoe',... 'ed',... 'effective',... 'either',... 'embedded',... 'empty',... 'enable',... 'end',... 'entered',... 'entries',... 'entry',... 'environment',... 'equivalent',... 'erase',... 'erik',... 'escape',... 'etc',... 'even',... 'every',... 'ewt',... 'ex',... 'example',... 'examples',... 'except',... 'exclude',... 'exe',... 'exec',... 'executable',... 'execute',... 'executed',... 'exist',... 'existing',... 'expr',... 'expressions',... 'ext',... 'extend',... 'extract',... 'extracting',... 'false',... 'family',... 'field',... 'fields',... 'file',... 'filename',... 'filenames',... 'files',... 'filesystem',... 'filesystems',... 'first',... 'flag',... 'folder',... 'follow',... 'following',... 'follows',... 'foo',... 'force',... 'format',... 'forward',... 'fqdn',... 'free',... 'fs',... 'fstab',... 'ftp',... 'full',... 'fully',... 'functions',... 'gawk',... 'general',... 'getopt',... 'getting',... 'gid',... 'give',... 'given',... 'gnu',... 'go',... 'gpg',... 'granted',... 'grep',... 'group',... 'groups',... 'gui',... 'gunzip',... 'gz',... 'gzip',... 'hard',... 'hat',... 'header',... 'help',... 'hh',... 'history',... 'host',... 'hostname',... 'hosts',... 'hour',... 'http',... 'ibs',... 'icmp',... 'icrnl',... 'id',... 'identical',... 'igawk',... 'ignore',... 'inc',... 'include',... 'indicating',... 'info',... 'initializations',... 'input',... 'inserted',... 'install',... 'installed',... 'instead',... 'interactive',... 'interface',... 'interval',... 'invoke',... 'ip',... 'ipcalc',... 'ipx',... 'irq',... 'iso',... 'istrip',... 'ixany',... 'jim',... 'job',... 'july',... 'just',... 'kb',... 'kernel',... 'key',... 'keyboard',... 'keymap',... 'keymaps',... 'keyword',... 'kill',... 'kth',... 'label',... 'language',... 'lar',... 'larger',... 'last',... 'later',... 'lc',... 'leading',... 'letters',... 'level',... 'lib',... 'line',... 'lines',... 'link',... 'links',... 'linux',... 'list',... 'listening',... 'listing',... 'ln',... 'load',... 'loadkeys',... 'local',... 'locale',... 'log',... 'long',... 'loop',... 'losetup',... 'lower',... 'lowest',... 'ls',... 'machine',... 'machines',... 'mackenzie',... 'macro',... 'made',... 'mail',... 'maintained',... 'make',... 'mandatory',... 'manual',... 'many',... 'map',... 'mask',... 'match',... 'matched',... 'matches',... 'matching',... 'mbox',... 'md',... 'members',... 'memory',... 'merchantability',... 'merge',... 'message',... 'messages',... 'method',... 'meyering',... 'microsecond',... 'microseconds',... 'mines',... 'minutes',... 'mips',... 'mkdir',... 'mknod',... 'mktemp',... 'mm',... 'mode',... 'modified',... 'monday',... 'month',... 'moolenaar',... 'more',... 'most',... 'mount',... 'mounted',... 'move',... 'mt',... 'mtab',... 'multi',... 'multiple',... 'multiport',... 'must',... 'mv',... 'myclass',... 'name',... 'names',... 'net',... 'netmask',... 'netstat',... 'network',... 'never',... 'new',... 'newer',... 'newline',... 'next',... 'nfs',... 'nice',... 'nisdomainname',... 'nnn',... 'node',... 'nodename',... 'nonblank',... 'noscripts',... 'note',... 'nothing',... 'nov',... 'nuls',... 'num',... 'number',... 'numbered',... 'numeric',... 'numerical',... 'obs',... 'octal',... 'old',... 'omitted',... 'open',... 'operate',... 'operating',... 'operations',... 'opost',... 'optional',... 'options',... 'opts',... 'order',... 'ordering',... 'org',... 'original',... 'originally',... 'other',... 'otherwise',... 'out',... 'output',... 'overwrite',... 'own',... 'owner',... 'ownership',... 'owns',... 'pack',... 'package',... 'packages',... 'packet',... 'packets',... 'pad',... 'parameter',... 'parameters',... 'parenb',... 'parents',... 'parity',... 'part',... 'particu',... 'partitions',... 'pass',... 'passed',... 'passwd',... 'path',... 'pattern',... 'paul',... 'people',... 'permissions',... 'personality',... 'pid',... 'pierce',... 'ping',... 'platform',... 'please',... 'point',... 'port',... 'portable',... 'ports',... 'pos',... 'positional',... 'positioned',... 'posix',... 'ppc',... 'prefix',... 'preserve',... 'previous',... 'print',... 'printed',... 'prints',... 'priority',... 'privileged',... 'probably',... 'proc',... 'process',... 'processed',... 'processes',... 'program',... 'programmer',... 'programming',... 'programs',... 'prompt',... 'properly',... 'protocol',... 'provided',... 'ps',... 'published',... 'purpose',... 'pwd',... 'qualified',... 'query',... 'quotes',... 'range',... 'rather',... 'raw',... 'rc',... 're',... 'read',... 'readable',... 'readline',... 'real',... 'received',... 'record',... 'records',... 'recursively',... 'red',... 'redhat',... 'refer',... 'reference',... 'regexp',... 'regular',... 'release',... 'remote',... 'remove',... 'removed',... 'rename',... 'repackage',... 'replaced',... 'replacement',... 'reply',... 'report',... 'reporting',... 'request',... 'require',... 'requires',... 'reset',... 'restrictions',... 'result',... 'return',... 'returned',... 'returns',... 'reverse',... 'rewind',... 'rfc',... 'rfile',... 'rh',... 'rhs',... 'ring',... 'rm',... 'rmdir',... 'robbins',... 'robert',... 'root',... 'round',... 'route',... 'routing',... 'rpm',... 'rpmb',... 'rpmrc',... 'run',... 'runs',... 'runuser',... 'rwxrwxrwx',... 'sages',... 'same',... 'sat',... 'sbufsize',... 'scheduling',... 'school',... 'screen',... 'script',... 'scriptlet',... 'scroll',... 'scsi',... 'search',... 'second',... 'seconds',... 'security',... 'sed',... 'select',... 'selects',... 'selinux',... 'send',... 'sent',... 'sep',... 'separated',... 'sequences',... 'serial',... 'server',... 'set',... 'setserial',... 'settings',... 'shell',... 'shells',... 'short',... 'should',... 'show',... 'shown',... 'shred',... 'signal',... 'signaled',... 'silent',... 'similar',... 'simple',... 'simply',... 'since',... 'single',... 'site',... 'size',... 'sized',... 'skeeve',... 'skip',... 'slashes',... 'sleep',... 'sleeps',... 'smith',... 'so',... 'sock',... 'socket',... 'sockets',... 'sole',... 'solely',... 'some',... 'sort',... 'sorted',... 'sorting',... 'source',... 'space',... 'spaces',... 'sparc',... 'sparse',... 'spd',... 'speci',... 'special',... 'specified',... 'specifies',... 'specify',... 'specifying',... 'squeeze',... 'src',... 'ss',... 'stack',... 'stamp',... 'standard',... 'start',... 'started',... 'starts',... 'state',... 'statistics',... 'status',... 'sticky',... 'still',... 'stop',... 'stream',... 'string',... 'strings',... 'strip',... 'stty',... 'style',... 'su',... 'substitute',... 'success',... 'successfully',... 'such',... 'suffix',... 'sunday',... 'super',... 'supercede',... 'superuser',... 'supp',... 'supplemental',... 'supplied',... 'supports',... 'suppress',... 'svalente',... 'swap',... 'symbolic',... 'sync',... 'syntax',... 'syslog',... 'syslogd',... 'system',... 'systems',... 'tab',... 'table',... 'tables',... 'tabs',... 'tag',... 'taken',... 'takes',... 'tape',... 'tapes',... 'tar',... 'target',... 'tcp',... 'tcsh',... 'tell',... 'tem',... 'template',... 'temporary',... 'term',... 'terminal',... 'terminate',... 'test',... 'texinfo',... 'text',... 'their',... 'them',... 'theodore',... 'there',... 'these',... 'they',... 'things',... 'those',... 'through',... 'thus',... 'time',... 'timeout',... 'times',... 'timespec',... 'timestamp',... 'timestamps',... 'tmp',... 'tmpdir',... 'tmpfile',... 'too',... 'tools',... 'torbjorn',... 'tory',... 'touch',... 'trailing',... 'translate',... 'translators',... 'traverse',... 'trip',... 'true',... 'try',... 'ts',... 'ttl',... 'tty',... 'ttys',... 'two',... 'type',... 'types',... 'tytso',... 'uart',... 'udp',... 'ugoa',... 'uid',... 'umask',... 'umount',... 'uname',... 'unblock',... 'unbuffered',... 'unchanged',... 'uncompressed',... 'underlining',... 'unique',... 'universal',... 'university',... 'unix',... 'unknown',... 'unlink',... 'unmount',... 'unmounted',... 'unset',... 'unsuccessfully',... 'until',... 'unwritable',... 'update',... 'upper',... 'usage',... 'useful',... 'user',... 'users',... 'usertty',... 'using',... 'usleep',... 'usr',... 'usually',... 'utc',... 'utilities',... 'utility',... 'valente',... 'value',... 'values',... 'var',... 'variable',... 'variables',... 'various',... 've',... 'verbose',... 'verify',... 'vertical',... 'vet',... 'vfstype',... 'vi',... 'view',... 'vim',... 'vimrc',... 'virtual',... 'visual',... 'volume',... 'vt',... 'waiting',... 'want',... 'warranty',... 'way',... 'we',... 'week',... 'weekday',... 'well',... 'what',... 'when',... 'where',... 'whether',... 'which',... 'while',... 'who',... 'whoever',... 'whose',... 'width',... 'wildcards',... 'will',... 'window',... 'without',... 'word',... 'words',... 'working',... 'works',... 'write',... 'writes',... 'written',... 'xs',... 'xxxxxxxxxx',... 'year',... 'yes',... 'your',... 'yp',... 'ypdomainname',... 'yy',... 'zcat',... 'zero',... 'zip'); %-------------------------------------------------- % docs.m lists the files containing word counts and % words for each individual document. %-------------------------------------------------- docs; doclist = strvcat(... 'arch.def', ... 'ash.def', ... 'awk.def', ... 'basename.def', ... 'bash.def', ... 'bsh.def', ... 'cat.def', ... 'chgrp.def', ... 'chmod.def', ... 'chown.def', ... 'cp.def', ... 'cpio.def', ... 'csh.def', ... 'date.def', ... 'dd.def', ... 'df.def', ... 'dmesg.def', ... 'dnsdomainname.def', ... 'doexec.def', ... 'domainname.def', ... 'echo.def', ... 'ed.def', ... 'egrep.def', ... 'ex.def', ... 'false.def', ... 'fgrep.def', ... 'grep.def', ... 'gunzip.def', ... 'gzip.def', ... 'hostname.def', ... 'igawk.def', ... 'ipcalc.def', ... 'kill.def', ... 'ln.def', ... 'loadkeys.def', ... 'login.def', ... 'ls.def', ... 'mail.def', ... 'mkdir.def', ... 'mknod.def', ... 'mktemp.def', ... 'more.def', ... 'mount.def', ... 'mt.def', ... 'mv.def', ... 'netstat.def', ... 'nice.def', ... 'nisdomainname.def', ... 'ping.def', ... 'ps.def', ... 'pwd.def', ... 'red.def', ... 'rm.def', ... 'rmdir.def', ... 'rpm.def', ... 'rvi.def', ... 'rview.def', ... 'sed.def', ... 'setserial.def', ... 'sh.def', ... 'sleep.def', ... 'sort.def', ... 'stty.def', ... 'su.def', ... 'sync.def', ... 'tar.def', ... 'tcsh.def', ... 'touch.def', ... 'true.def', ... 'umount.def', ... 'uname.def', ... 'usleep.def', ... 'vi.def', ... 'view.def', ... 'vimtutor.def', ... 'ypdomainname.def', ... 'zcat.def'); nterms = length(dict); ndocs = length(doclist); %------------------------------- % The term-document matrix setup %------------------------------- A = zeros(nterms, ndocs); %---------------------------------------------------------- % Turning off echo, since it will print for every iteration % in the next loop. %---------------------------------------------------------- echo off; fid = -1 ??? Error using ==> feof Invalid file identifier. Use fopen to generate a valid file identifier. Error in ==> getdoc at 15 while feof(fid) == 0 Error in ==> lsi at 40 [counts, words] = getdoc(deblank(doclist(k, :))); help fopen FOPEN Open file. FID = FOPEN(FILENAME) opens the file FILENAME for read access. FILENAME is a string containing the name of the file to be opened. (On PC systems, FOPEN opens files for binary read access.) FILENAME can be a MATLABPATH relative partial pathname. If the file is not found in the current working directory, FOPEN searches for it on the MATLAB search path. On UNIX systems, FILENAME may also start with a "~/" or a "~username/", which FOPEN expands to the current user's home directory or the specified user's home directory, respectively. FID is a scalar MATLAB integer valued double, called a file identifier. You use FID as the first argument to other file input/output routines, such as FREAD and FCLOSE. If FOPEN cannot open the file, it returns -1. FID = FOPEN(FILENAME,PERMISSION) opens the file FILENAME in the mode specified by PERMISSION. PERMISSION can be: 'r' read 'w' write (create if necessary) 'a' append (create if necessary) 'r+' read and write (do not create) 'w+' truncate or create for read and write 'a+' read and append (create if necessary) 'W' write without automatic flushing 'A' append without automatic flushing FILENAME can be a MATLABPATH relative partial pathname only if the file is opened for reading. You can open files in binary mode (the default) or in text mode. In binary mode, no characters get singled out for special treatment. In text mode on the PC, the carriage return character preceding a newline character is deleted on input and added before the newline character on output. To open a file in text mode, append 't' to the permission string, for example 'rt' and 'w+t'. (On Unix, text and binary mode are the same, so this has no effect. On PC systems this is critical.) If the file is opened in update mode ('+'), you must use an FSEEK or FREWIND between an input command like FREAD, FSCANF, FGETS, or FGETL and an output command like FWRITE or FPRINTF. You must also use an FSEEK or FREWIND between an output command and an input command. Two file identifiers are automatically available and need not be opened. They are FID=1 (standard output) and FID=2 (standard error). [FID, MESSAGE] = FOPEN(FILENAME,...) returns a system dependent error message if the open is not successful. [FID, MESSAGE] = FOPEN(FILENAME,PERMISSION,MACHINEFORMAT) opens the specified file with the specified PERMISSION and treats data read using FREAD or data written using FWRITE as having a format given by MACHINEFORMAT. MACHINEFORMAT is one of the following strings: Full Precision Support: 'native' or 'n' - local machine format - the default 'ieee-le' or 'l' - IEEE floating point with little-endian byte ordering 'ieee-be' or 'b' - IEEE floating point with big-endian byte ordering 'vaxd' or 'd' - VAX D floating point and VAX ordering 'vaxg' or 'g' - VAX G floating point and VAX ordering 'ieee-le.l64' or 'a' - IEEE floating point with little-endian byte ordering and 64 bit long data type 'ieee-be.l64' or 's' - IEEE floating point with big-endian byte ordering and 64 bit long data type. Limited Precision Support: (double or equivalent) 'cray' or 'c' - Cray floating point with big-endian byte ordering [FID, MESSAGE] = FOPEN(FILENAME,PERMISSION,MACHINEFORMAT,ENCODING) opens the specified file using the specified PERMISSION and MACHINEFORMAT. ENCODING is a string that specifies the character encoding scheme associated with the file. It must be the empty string ('') or a name or alias for an encoding scheme. Some examples are 'UTF-8', 'latin1', 'US-ASCII', and 'Shift_JIS'. For common names and aliases, see the Web site http://www.iana.org/assignments/character-sets. If ENCODING is unspecified or is the empty string (''), MATLAB's default encoding scheme is used. [FILENAME,PERMISSION,MACHINEFORMAT,ENCODING] = FOPEN(FID) returns the filename, permission, machine format, and character encoding values used by MATLAB when it opened the file associated with identifier FID. MATLAB does not determine these output values by reading information from the opened file. For any of these parameters that were not specified when the file was opened, MATLAB returns its default value. The ENCODING string is a standard character encoding scheme name that may not be the same as the ENCODING argument used in the call to FOPEN that opened the file. An invalid FID returns empty strings for all output arguments. FIDS = FOPEN('all') returns a row vector containing the file identifiers for all the files currently opened by the user (but not 1 or 2). The 'W' and 'A' permissions do not automatically perform a flush of the current output buffer after output operations. See also FCLOSE, FERROR, FGETL, FGETS, FPRINTF, FREAD, FSCANF, FSEEK, FTELL, FWRITE. Overloaded functions or methods (ones with the same name in other directories) help serial/fopen.m help icinterface/fopen.m more on help fopen FOPEN Open file. FID = FOPEN(FILENAME) opens the file FILENAME for read access. FILENAME is a string containing the name of the file to be opened. (On PC systems, FOPEN opens files for binary read access.) FILENAME can be a MATLABPATH relative partial pathname. If the file is not found in the current working directory, FOPEN searches for it on the MATLAB search path. On UNIX systems, FILENAME may also start with a "~/" or a "~username/", which FOPEN expands to the current user's home directory or the specified user's home directory, respectively. FID is a scalar MATLAB integer valued double, called a file identifier. You use FID as the first argument to other file input/output routines, such as FREAD and FCLOSE. If FOPEN cannot open the file, it returns -1. FID = FOPEN(FILENAME,PERMISSION) opens the file FILENAME in the mode specified by PERMISSION. PERMISSION can be: 'r' read 'w' write (create if necessary) 'a' append (create if necessary) 'r+' read and write (do not create) 'w+' truncate or create for read and write 'a+' read and append (create if necessary) 'W' write without automatic flushing 'A' append without automatic flushing FILENAME can be a MATLABPATH relative partial pathname only if the file is opened for reading. You can open files in binary mode (the default) or in text mode. In binary mode, no characters get singled out for special treatment. In text mode on the PC, the carriage return character preceding a newline character is deleted on input and added before the newline character on output. To open a file in text mode, append 't' to the permission string, for example 'rt' and 'w+t'. (On Unix, text and binary mode are the same, so this has no effect. On PC systems this is critical.) If the file is opened in update mode ('+'), you must use an FSEEK or --more-- ccc lsi %------------------------------------------------------------- % dictionary.m sets up the array "dict" of words that form the % dictionary for the problem. %------------------------------------------------------------- dictionary; dict = strvcat(... 'accent',... 'access',... 'accurate',... 'active',... 'addr',... 'address',... 'addressed',... 'adjust',... 'affect',... 'all',... 'allowed',... 'allows',... 'alpha',... 'also',... 'always',... 'any',... 'append',... 'arbitrary',... 'arch',... 'architecture',... 'archive',... 'archives',... 'argument',... 'arguments',... 'argv',... 'arm',... 'arnold',... 'array',... 'ascii',... 'at',... 'attributes',... 'available',... 'awk',... 'ax',... --more-- 'backslash',... 'backup',... 'backups',... 'barnes',... 'basename',... 'bash',... 'becomes',... 'been',... 'before',... 'begins',... 'being',... 'better',... 'binary',... 'bindings',... 'bits',... 'blank',... 'blanks',... 'block',... 'blocks',... 'boot',... 'branch',... 'broadcast',... 'bsd',... 'buffer',... 'bufsize',... 'bugs',... 'bugzilla',... 'busy',... 'bytes',... 'bz',... 'calculate',... 'callout',... 'carriage',... 'case',... 'cat',... 'causes',... 'cbs',... 'change',... 'changed',... 'changes',... 'char',... 'character',... --more-- 'characters',... 'check',... 'chgrp',... 'chmod',... 'chown',... 'class',... 'classes',... 'clear',... 'cmd',... 'code',... 'com',... 'comm',... 'commandname',... 'commands',... 'compatible',... 'compose',... 'compress',... 'compressed',... 'compressing',... 'concatenate',... 'connections',... 'contents',... 'context',... 'continuous',... 'control',... 'convert',... 'copy',... 'copying',... 'copyright',... 'coreutils',... 'count',... 'cp',... 'cpio',... 'cpu',... 'crc',... 'create',... 'cs',... 'ctime',... 'current',... 'cutable',... 'data',... 'database',... --more-- 'date',... 'day',... 'dd',... 'deadline',... 'decompressing',... 'default',... 'defaults',... 'defined',... 'defines',... 'definitions',... 'defkeymap',... 'delay',... 'density',... 'dereference',... 'dest',... 'dev',... 'device',... 'devices',... 'df',... 'dir',... 'direc',... 'directories',... 'directory',... 'disk',... 'display',... 'displayed',... 'djb',... 'dmesg',... 'dns',... 'dnsdomainname',... 'doexec',... 'domainname',... 'donald',... 'dot',... 'down',... 'drive',... 'driver',... 'drivers',... 'drives',... 'dumpkeys',... 'during',... 'each',... --more-- 'ebcdic',... 'echo',... 'echoctl',... 'echoe',... 'ed',... 'effective',... 'either',... 'embedded',... 'empty',... 'enable',... 'end',... 'entered',... 'entries',... 'entry',... 'environment',... 'equivalent',... 'erase',... 'erik',... 'escape',... 'etc',... 'even',... 'every',... 'ewt',... 'ex',... 'example',... 'examples',... 'except',... 'exclude',... 'exe',... 'exec',... 'executable',... 'execute',... 'executed',... 'exist',... 'existing',... 'expr',... 'expressions',... 'ext',... 'extend',... 'extract',... 'extracting',... 'false',... --more-- 'family',... 'field',... 'fields',... 'file',... 'filename',... 'filenames',... 'files',... 'filesystem',... 'filesystems',... 'first',... 'flag',... 'folder',... 'follow',... 'following',... 'follows',... 'foo',... 'force',... 'format',... 'forward',... 'fqdn',... 'free',... 'fs',... 'fstab',... 'ftp',... 'full',... 'fully',... 'functions',... 'gawk',... 'general',... 'getopt',... 'getting',... 'gid',... 'give',... 'given',... 'gnu',... 'go',... 'gpg',... 'granted',... 'grep',... 'group',... 'groups',... 'gui',... --more-- 'gunzip',... 'gz',... 'gzip',... 'hard',... 'hat',... 'header',... 'help',... 'hh',... 'history',... 'host',... 'hostname',... 'hosts',... 'hour',... 'http',... 'ibs',... 'icmp',... 'icrnl',... 'id',... 'identical',... 'igawk',... 'ignore',... 'inc',... 'include',... 'indicating',... 'info',... 'initializations',... 'input',... 'inserted',... 'install',... 'installed',... 'instead',... 'interactive',... 'interface',... 'interval',... 'invoke',... 'ip',... 'ipcalc',... 'ipx',... 'irq',... 'iso',... 'istrip',... 'ixany',... --more-- 'jim',... 'job',... 'july',... 'just',... 'kb',... 'kernel',... 'key',... 'keyboard',... 'keymap',... 'keymaps',... 'keyword',... 'kill',... 'kth',... 'label',... 'language',... 'lar',... 'larger',... 'last',... 'later',... 'lc',... 'leading',... 'letters',... 'level',... 'lib',... 'line',... 'lines',... 'link',... 'links',... 'linux',... 'list',... 'listening',... 'listing',... 'ln',... 'load',... 'loadkeys',... 'local',... 'locale',... 'log',... 'long',... 'loop',... 'losetup',... 'lower',... --more-- 'lowest',... 'ls',... 'machine',... 'machines',... 'mackenzie',... 'macro',... 'made',... 'mail',... 'maintained',... 'make',... 'mandatory',... 'manual',... 'many',... 'map',... 'mask',... 'match',... 'matched',... 'matches',... 'matching',... 'mbox',... 'md',... 'members',... 'memory',... 'merchantability',... 'merge',... 'message',... 'messages',... 'method',... 'meyering',... 'microsecond',... 'microseconds',... 'mines',... 'minutes',... 'mips',... 'mkdir',... 'mknod',... 'mktemp',... 'mm',... 'mode',... 'modified',... 'monday',... 'month',... --more-- 'moolenaar',... 'more',... 'most',... 'mount',... 'mounted',... 'move',... 'mt',... 'mtab',... 'multi',... 'multiple',... 'multiport',... 'must',... 'mv',... 'myclass',... 'name',... 'names',... 'net',... 'netmask',... 'netstat',... 'network',... 'never',... 'new',... 'newer',... 'newline',... 'next',... 'nfs',... 'nice',... 'nisdomainname',... 'nnn',... 'node',... 'nodename',... 'nonblank',... 'noscripts',... 'note',... 'nothing',... 'nov',... 'nuls',... 'num',... 'number',... 'numbered',... 'numeric',... 'numerical',... --more-- 'obs',... 'octal',... 'old',... 'omitted',... 'open',... 'operate',... 'operating',... 'operations',... 'opost',... 'optional',... 'options',... 'opts',... 'order',... 'ordering',... 'org',... 'original',... 'originally',... 'other',... 'otherwise',... 'out',... 'output',... 'overwrite',... 'own',... 'owner',... 'ownership',... 'owns',... 'pack',... 'package',... 'packages',... 'packet',... 'packets',... 'pad',... 'parameter',... 'parameters',... 'parenb',... 'parents',... 'parity',... 'part',... 'particu',... 'partitions',... 'pass',... 'passed',... --more-- 'passwd',... 'path',... 'pattern',... 'paul',... 'people',... 'permissions',... 'personality',... 'pid',... 'pierce',... 'ping',... 'platform',... 'please',... 'point',... 'port',... 'portable',... 'ports',... 'pos',... 'positional',... 'positioned',... 'posix',... 'ppc',... 'prefix',... 'preserve',... 'previous',... 'print',... 'printed',... 'prints',... 'priority',... 'privileged',... 'probably',... 'proc',... 'process',... 'processed',... 'processes',... 'program',... 'programmer',... 'programming',... 'programs',... 'prompt',... 'properly',... 'protocol',... 'provided',... --more-- 'ps',... 'published',... 'purpose',... 'pwd',... 'qualified',... 'query',... 'quotes',... 'range',... 'rather',... 'raw',... 'rc',... 're',... 'read',... 'readable',... 'readline',... 'real',... 'received',... 'record',... 'records',... 'recursively',... 'red',... 'redhat',... 'refer',... 'reference',... 'regexp',... 'regular',... 'release',... 'remote',... 'remove',... 'removed',... 'rename',... 'repackage',... 'replaced',... 'replacement',... 'reply',... 'report',... 'reporting',... 'request',... 'require',... 'requires',... 'reset',... 'restrictions',... --more-- 'result',... 'return',... 'returned',... 'returns',... 'reverse',... 'rewind',... 'rfc',... 'rfile',... 'rh',... 'rhs',... 'ring',... 'rm',... 'rmdir',... 'robbins',... 'robert',... 'root',... 'round',... 'route',... 'routing',... 'rpm',... 'rpmb',... 'rpmrc',... 'run',... 'runs',... 'runuser',... 'rwxrwxrwx',... 'sages',... 'same',... 'sat',... 'sbufsize',... 'scheduling',... 'school',... 'screen',... 'script',... 'scriptlet',... 'scroll',... 'scsi',... 'search',... 'second',... 'seconds',... 'security',... 'sed',... --more-- 'select',... 'selects',... 'selinux',... 'send',... 'sent',... 'sep',... 'separated',... 'sequences',... 'serial',... 'server',... 'set',... 'setserial',... 'settings',... 'shell',... 'shells',... 'short',... 'should',... 'show',... 'shown',... 'shred',... 'signal',... 'signaled',... 'silent',... 'similar',... 'simple',... 'simply',... 'since',... 'single',... 'site',... 'size',... 'sized',... 'skeeve',... 'skip',... 'slashes',... 'sleep',... 'sleeps',... 'smith',... 'so',... 'sock',... 'socket',... 'sockets',... 'sole',... --more-- 'solely',... 'some',... 'sort',... 'sorted',... 'sorting',... 'source',... 'space',... 'spaces',... 'sparc',... 'sparse',... 'spd',... 'speci',... 'special',... 'specified',... 'specifies',... 'specify',... 'specifying',... 'squeeze',... 'src',... 'ss',... 'stack',... 'stamp',... 'standard',... 'start',... 'started',... 'starts',... 'state',... 'statistics',... 'status',... 'sticky',... 'still',... 'stop',... 'stream',... 'string',... 'strings',... 'strip',... 'stty',... 'style',... 'su',... 'substitute',... 'success',... 'successfully',... --more-- 'such',... 'suffix',... 'sunday',... 'super',... 'supercede',... 'superuser',... 'supp',... 'supplemental',... 'supplied',... 'supports',... 'suppress',... 'svalente',... 'swap',... 'symbolic',... 'sync',... 'syntax',... 'syslog',... 'syslogd',... 'system',... 'systems',... 'tab',... 'table',... 'tables',... 'tabs',... 'tag',... 'taken',... 'takes',... 'tape',... 'tapes',... 'tar',... 'target',... 'tcp',... 'tcsh',... 'tell',... 'tem',... 'template',... 'temporary',... 'term',... 'terminal',... 'terminate',... 'test',... 'texinfo',... --more-- 'text',... 'their',... 'them',... 'theodore',... 'there',... 'these',... 'they',... 'things',... 'those',... 'through',... 'thus',... 'time',... 'timeout',... 'times',... 'timespec',... 'timestamp',... 'timestamps',... 'tmp',... 'tmpdir',... 'tmpfile',... 'too',... 'tools',... 'torbjorn',... 'tory',... 'touch',... 'trailing',... 'translate',... 'translators',... 'traverse',... 'trip',... 'true',... 'try',... 'ts',... 'ttl',... 'tty',... 'ttys',... 'two',... 'type',... 'types',... 'tytso',... 'uart',... 'udp',... --more-- 'ugoa',... 'uid',... 'umask',... 'umount',... 'uname',... 'unblock',... 'unbuffered',... 'unchanged',... 'uncompressed',... 'underlining',... 'unique',... 'universal',... 'university',... 'unix',... 'unknown',... 'unlink',... 'unmount',... 'unmounted',... 'unset',... 'unsuccessfully',... 'until',... 'unwritable',... 'update',... 'upper',... 'usage',... 'useful',... 'user',... 'users',... 'usertty',... 'using',... 'usleep',... 'usr',... 'usually',... 'utc',... 'utilities',... 'utility',... 'valente',... 'value',... 'values',... 'var',... 'variable',... 'variables',... --more-- 'various',... 've',... 'verbose',... 'verify',... 'vertical',... 'vet',... 'vfstype',... 'vi',... 'view',... 'vim',... 'vimrc',... 'virtual',... 'visual',... 'volume',... 'vt',... 'waiting',... 'want',... 'warranty',... 'way',... 'we',... 'week',... 'weekday',... 'well',... 'what',... 'when',... 'where',... 'whether',... 'which',... 'while',... 'who',... 'whoever',... 'whose',... 'width',... 'wildcards',... 'will',... 'window',... 'without',... 'word',... 'words',... 'working',... 'works',... 'write',... --more-- 'writes',... 'written',... 'xs',... 'xxxxxxxxxx',... 'year',... 'yes',... 'your',... 'yp',... 'ypdomainname',... 'yy',... 'zcat',... 'zero',... 'zip'); %-------------------------------------------------- % docs.m lists the files containing word counts and % words for each individual document. %-------------------------------------------------- docs; doclist = strvcat(... '../docs/arch.def',... '../docs/ash.def',... '../docs/awk.def',... '../docs/basename.def',... '../docs/bash.def',... '../docs/bsh.def',... '../docs/cat.def',... '../docs/chgrp.def',... '../docs/chmod.def',... '../docs/chown.def',... '../docs/cp.def',... '../docs/cpio.def',... '../docs/csh.def',... '../docs/date.def',... '../docs/dd.def',... '../docs/df.def',... '../docs/dmesg.def',... '../docs/dnsdomainname.def',... '../docs/doexec.def',... '../docs/domainname.def',... '../docs/echo.def',... --more-- '../docs/ed.def',... '../docs/egrep.def',... '../docs/ex.def',... '../docs/false.def',... '../docs/fgrep.def',... '../docs/grep.def',... '../docs/gunzip.def',... '../docs/gzip.def',... '../docs/hostname.def',... '../docs/igawk.def',... '../docs/ipcalc.def',... '../docs/kill.def',... '../docs/ln.def',... '../docs/loadkeys.def',... '../docs/login.def',... '../docs/ls.def',... '../docs/mail.def',... '../docs/mkdir.def',... '../docs/mknod.def',... '../docs/mktemp.def',... '../docs/more.def',... '../docs/mount.def',... '../docs/mt.def',... '../docs/mv.def',... '../docs/netstat.def',... '../docs/nice.def',... '../docs/nisdomainname.def',... '../docs/ping.def',... '../docs/ps.def',... '../docs/pwd.def',... '../docs/red.def',... '../docs/rm.def',... '../docs/rmdir.def',... '../docs/rpm.def',... '../docs/rvi.def',... '../docs/rview.def',... '../docs/sed.def',... '../docs/setserial.def',... '../docs/sh.def',... '../docs/sleep.def',... '../docs/sort.def',... '../docs/stty.def',... --more-- '../docs/su.def',... '../docs/sync.def',... '../docs/tar.def',... '../docs/tcsh.def',... '../docs/touch.def',... '../docs/true.def',... '../docs/umount.def',... '../docs/uname.def',... '../docs/usleep.def',... '../docs/vi.def',... '../docs/view.def',... '../docs/vimtutor.def',... '../docs/ypdomainname.def',... '../docs/zcat.def'); nterms = length(dict); ndocs = length(doclist); %------------------------------- % The term-document matrix setup %------------------------------- A = zeros(nterms, ndocs); %---------------------------------------------------------- % Turning off echo, since it will print for every iteration % in the next loop. %---------------------------------------------------------- echo off; fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 --more-- fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 --more-- fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 --more-- fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 --more-- fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 %--------------------------------------- % What does the zero do in the SVD call? % Find out! %--------------------------------------- [U, D, V] = svd(A, 0); %-------------------------------------- % Create a query for list + all + files %-------------------------------------- qterms = strvcat('list', 'files', 'all'); disp('Hit return to see results of query') Hit return to see results of query pause; I = findrows(dict, qterms); q = zeros(nterms, 1); q(I) = 1; q = q/norm(q); cosines = A'*q; plot(cosines, '+') title('Cosines for query vector, by document') xlabel('Document Number') ylabel('cosine') number_cosines = length(cosines); [a, b] = max(cosines) a = 0.22491 b = 37 disp(['Best match for list, files, all: ', doclist(b, :)]) Best match for list, files, all: ../docs/ls.def %------------------------------------- % How to find the next largest cosine? %------------------------------------- echo off; ccc ccc lll %------------------------------------------------------------- % dictionary.m sets up the array "dict" of words that form the % dictionary for the problem. %------------------------------------------------------------- dictionary; dict = strvcat(... 'accent',... 'access',... 'accurate',... 'active',... 'addr',... 'address',... 'addressed',... 'adjust',... 'affect',... 'all',... 'allowed',... 'allows',... 'alpha',... 'also',... 'always',... 'any',... 'append',... 'arbitrary',... 'arch',... 'architecture',... 'archive',... 'archives',... 'argument',... 'arguments',... 'argv',... 'arm',... 'arnold',... 'array',... 'ascii',... 'at',... 'attributes',... 'available',... 'awk',... 'ax',... --more-- Error in ==> lll at 17 dictionary; ccc warning('off', 'all'); clear all; clear global; clear functions; clear java; clear classes; warning('on', 'all'); hold off; close all; clc; more off lll echo on; %------------------------------------------------------------- % dictionary.m sets up the array "dict" of words that form the % dictionary for the problem. %------------------------------------------------------------- dictionary; dict = strvcat(... 'accent',... 'access',... 'accurate',... 'active',... 'addr',... 'address',... 'addressed',... 'adjust',... 'affect',... 'all',... 'allowed',... 'allows',... 'alpha',... 'also',... 'always',... 'any',... 'append',... 'arbitrary',... 'arch',... 'architecture',... 'archive',... 'archives',... 'argument',... 'arguments',... 'argv',... 'arm',... 'arnold',... 'array',... 'ascii',... 'at',... 'attributes',... 'available',... 'awk',... 'ax',... 'backslash',... 'backup',... 'backups',... 'barnes',... 'basename',... 'bash',... 'becomes',... 'been',... 'before',... 'begins',... 'being',... 'better',... 'binary',... 'bindings',... 'bits',... 'blank',... 'blanks',... 'block',... 'blocks',... 'boot',... 'branch',... 'broadcast',... 'bsd',... 'buffer',... 'bufsize',... 'bugs',... 'bugzilla',... 'busy',... 'bytes',... 'bz',... 'calculate',... 'callout',... 'carriage',... 'case',... 'cat',... 'causes',... 'cbs',... 'change',... 'changed',... 'changes',... 'char',... 'character',... 'characters',... 'check',... 'chgrp',... 'chmod',... 'chown',... 'class',... 'classes',... 'clear',... 'cmd',... 'code',... 'com',... 'comm',... 'commandname',... 'commands',... 'compatible',... 'compose',... 'compress',... 'compressed',... 'compressing',... 'concatenate',... 'connections',... 'contents',... 'context',... 'continuous',... 'control',... 'convert',... 'copy',... 'copying',... 'copyright',... 'coreutils',... 'count',... 'cp',... 'cpio',... 'cpu',... 'crc',... 'create',... 'cs',... 'ctime',... 'current',... 'cutable',... 'data',... 'database',... 'date',... 'day',... 'dd',... 'deadline',... 'decompressing',... 'default',... 'defaults',... 'defined',... 'defines',... 'definitions',... 'defkeymap',... 'delay',... 'density',... 'dereference',... 'dest',... 'dev',... 'device',... 'devices',... 'df',... 'dir',... 'direc',... 'directories',... 'directory',... 'disk',... 'display',... 'displayed',... 'djb',... 'dmesg',... 'dns',... 'dnsdomainname',... 'doexec',... 'domainname',... 'donald',... 'dot',... 'down',... 'drive',... 'driver',... 'drivers',... 'drives',... 'dumpkeys',... 'during',... 'each',... 'ebcdic',... 'echo',... 'echoctl',... 'echoe',... 'ed',... 'effective',... 'either',... 'embedded',... 'empty',... 'enable',... 'end',... 'entered',... 'entries',... 'entry',... 'environment',... 'equivalent',... 'erase',... 'erik',... 'escape',... 'etc',... 'even',... 'every',... 'ewt',... 'ex',... 'example',... 'examples',... 'except',... 'exclude',... 'exe',... 'exec',... 'executable',... 'execute',... 'executed',... 'exist',... 'existing',... 'expr',... 'expressions',... 'ext',... 'extend',... 'extract',... 'extracting',... 'false',... 'family',... 'field',... 'fields',... 'file',... 'filename',... 'filenames',... 'files',... 'filesystem',... 'filesystems',... 'first',... 'flag',... 'folder',... 'follow',... 'following',... 'follows',... 'foo',... 'force',... 'format',... 'forward',... 'fqdn',... 'free',... 'fs',... 'fstab',... 'ftp',... 'full',... 'fully',... 'functions',... 'gawk',... 'general',... 'getopt',... 'getting',... 'gid',... 'give',... 'given',... 'gnu',... 'go',... 'gpg',... 'granted',... 'grep',... 'group',... 'groups',... 'gui',... 'gunzip',... 'gz',... 'gzip',... 'hard',... 'hat',... 'header',... 'help',... 'hh',... 'history',... 'host',... 'hostname',... 'hosts',... 'hour',... 'http',... 'ibs',... 'icmp',... 'icrnl',... 'id',... 'identical',... 'igawk',... 'ignore',... 'inc',... 'include',... 'indicating',... 'info',... 'initializations',... 'input',... 'inserted',... 'install',... 'installed',... 'instead',... 'interactive',... 'interface',... 'interval',... 'invoke',... 'ip',... 'ipcalc',... 'ipx',... 'irq',... 'iso',... 'istrip',... 'ixany',... 'jim',... 'job',... 'july',... 'just',... 'kb',... 'kernel',... 'key',... 'keyboard',... 'keymap',... 'keymaps',... 'keyword',... 'kill',... 'kth',... 'label',... 'language',... 'lar',... 'larger',... 'last',... 'later',... 'lc',... 'leading',... 'letters',... 'level',... 'lib',... 'line',... 'lines',... 'link',... 'links',... 'linux',... 'list',... 'listening',... 'listing',... 'ln',... 'load',... 'loadkeys',... 'local',... 'locale',... 'log',... 'long',... 'loop',... 'losetup',... 'lower',... 'lowest',... 'ls',... 'machine',... 'machines',... 'mackenzie',... 'macro',... 'made',... 'mail',... 'maintained',... 'make',... 'mandatory',... 'manual',... 'many',... 'map',... 'mask',... 'match',... 'matched',... 'matches',... 'matching',... 'mbox',... 'md',... 'members',... 'memory',... 'merchantability',... 'merge',... 'message',... 'messages',... 'method',... 'meyering',... 'microsecond',... 'microseconds',... 'mines',... 'minutes',... 'mips',... 'mkdir',... 'mknod',... 'mktemp',... 'mm',... 'mode',... 'modified',... 'monday',... 'month',... 'moolenaar',... 'more',... 'most',... 'mount',... 'mounted',... 'move',... 'mt',... 'mtab',... 'multi',... 'multiple',... 'multiport',... 'must',... 'mv',... 'myclass',... 'name',... 'names',... 'net',... 'netmask',... 'netstat',... 'network',... 'never',... 'new',... 'newer',... 'newline',... 'next',... 'nfs',... 'nice',... 'nisdomainname',... 'nnn',... 'node',... 'nodename',... 'nonblank',... 'noscripts',... 'note',... 'nothing',... 'nov',... 'nuls',... 'num',... 'number',... 'numbered',... 'numeric',... 'numerical',... 'obs',... 'octal',... 'old',... 'omitted',... 'open',... 'operate',... 'operating',... 'operations',... 'opost',... 'optional',... 'options',... 'opts',... 'order',... 'ordering',... 'org',... 'original',... 'originally',... 'other',... 'otherwise',... 'out',... 'output',... 'overwrite',... 'own',... 'owner',... 'ownership',... 'owns',... 'pack',... 'package',... 'packages',... 'packet',... 'packets',... 'pad',... 'parameter',... 'parameters',... 'parenb',... 'parents',... 'parity',... 'part',... 'particu',... 'partitions',... 'pass',... 'passed',... 'passwd',... 'path',... 'pattern',... 'paul',... 'people',... 'permissions',... 'personality',... 'pid',... 'pierce',... 'ping',... 'platform',... 'please',... 'point',... 'port',... 'portable',... 'ports',... 'pos',... 'positional',... 'positioned',... 'posix',... 'ppc',... 'prefix',... 'preserve',... 'previous',... 'print',... 'printed',... 'prints',... 'priority',... 'privileged',... 'probably',... 'proc',... 'process',... 'processed',... 'processes',... 'program',... 'programmer',... 'programming',... 'programs',... 'prompt',... 'properly',... 'protocol',... 'provided',... 'ps',... 'published',... 'purpose',... 'pwd',... 'qualified',... 'query',... 'quotes',... 'range',... 'rather',... 'raw',... 'rc',... 're',... 'read',... 'readable',... 'readline',... 'real',... 'received',... 'record',... 'records',... 'recursively',... 'red',... 'redhat',... 'refer',... 'reference',... 'regexp',... 'regular',... 'release',... 'remote',... 'remove',... 'removed',... 'rename',... 'repackage',... 'replaced',... 'replacement',... 'reply',... 'report',... 'reporting',... 'request',... 'require',... 'requires',... 'reset',... 'restrictions',... 'result',... 'return',... 'returned',... 'returns',... 'reverse',... 'rewind',... 'rfc',... 'rfile',... 'rh',... 'rhs',... 'ring',... 'rm',... 'rmdir',... 'robbins',... 'robert',... 'root',... 'round',... 'route',... 'routing',... 'rpm',... 'rpmb',... 'rpmrc',... 'run',... 'runs',... 'runuser',... 'rwxrwxrwx',... 'sages',... 'same',... 'sat',... 'sbufsize',... 'scheduling',... 'school',... 'screen',... 'script',... 'scriptlet',... 'scroll',... 'scsi',... 'search',... 'second',... 'seconds',... 'security',... 'sed',... 'select',... 'selects',... 'selinux',... 'send',... 'sent',... 'sep',... 'separated',... 'sequences',... 'serial',... 'server',... 'set',... 'setserial',... 'settings',... 'shell',... 'shells',... 'short',... 'should',... 'show',... 'shown',... 'shred',... 'signal',... 'signaled',... 'silent',... 'similar',... 'simple',... 'simply',... 'since',... 'single',... 'site',... 'size',... 'sized',... 'skeeve',... 'skip',... 'slashes',... 'sleep',... 'sleeps',... 'smith',... 'so',... 'sock',... 'socket',... 'sockets',... 'sole',... 'solely',... 'some',... 'sort',... 'sorted',... 'sorting',... 'source',... 'space',... 'spaces',... 'sparc',... 'sparse',... 'spd',... 'speci',... 'special',... 'specified',... 'specifies',... 'specify',... 'specifying',... 'squeeze',... 'src',... 'ss',... 'stack',... 'stamp',... 'standard',... 'start',... 'started',... 'starts',... 'state',... 'statistics',... 'status',... 'sticky',... 'still',... 'stop',... 'stream',... 'string',... 'strings',... 'strip',... 'stty',... 'style',... 'su',... 'substitute',... 'success',... 'successfully',... 'such',... 'suffix',... 'sunday',... 'super',... 'supercede',... 'superuser',... 'supp',... 'supplemental',... 'supplied',... 'supports',... 'suppress',... 'svalente',... 'swap',... 'symbolic',... 'sync',... 'syntax',... 'syslog',... 'syslogd',... 'system',... 'systems',... 'tab',... 'table',... 'tables',... 'tabs',... 'tag',... 'taken',... 'takes',... 'tape',... 'tapes',... 'tar',... 'target',... 'tcp',... 'tcsh',... 'tell',... 'tem',... 'template',... 'temporary',... 'term',... 'terminal',... 'terminate',... 'test',... 'texinfo',... 'text',... 'their',... 'them',... 'theodore',... 'there',... 'these',... 'they',... 'things',... 'those',... 'through',... 'thus',... 'time',... 'timeout',... 'times',... 'timespec',... 'timestamp',... 'timestamps',... 'tmp',... 'tmpdir',... 'tmpfile',... 'too',... 'tools',... 'torbjorn',... 'tory',... 'touch',... 'trailing',... 'translate',... 'translators',... 'traverse',... 'trip',... 'true',... 'try',... 'ts',... 'ttl',... 'tty',... 'ttys',... 'two',... 'type',... 'types',... 'tytso',... 'uart',... 'udp',... 'ugoa',... 'uid',... 'umask',... 'umount',... 'uname',... 'unblock',... 'unbuffered',... 'unchanged',... 'uncompressed',... 'underlining',... 'unique',... 'universal',... 'university',... 'unix',... 'unknown',... 'unlink',... 'unmount',... 'unmounted',... 'unset',... 'unsuccessfully',... 'until',... 'unwritable',... 'update',... 'upper',... 'usage',... 'useful',... 'user',... 'users',... 'usertty',... 'using',... 'usleep',... 'usr',... 'usually',... 'utc',... 'utilities',... 'utility',... 'valente',... 'value',... 'values',... 'var',... 'variable',... 'variables',... 'various',... 've',... 'verbose',... 'verify',... 'vertical',... 'vet',... 'vfstype',... 'vi',... 'view',... 'vim',... 'vimrc',... 'virtual',... 'visual',... 'volume',... 'vt',... 'waiting',... 'want',... 'warranty',... 'way',... 'we',... 'week',... 'weekday',... 'well',... 'what',... 'when',... 'where',... 'whether',... 'which',... 'while',... 'who',... 'whoever',... 'whose',... 'width',... 'wildcards',... 'will',... 'window',... 'without',... 'word',... 'words',... 'working',... 'works',... 'write',... 'writes',... 'written',... 'xs',... 'xxxxxxxxxx',... 'year',... 'yes',... 'your',... 'yp',... 'ypdomainname',... 'yy',... 'zcat',... 'zero',... 'zip'); %-------------------------------------------------- % docs.m lists the files containing word counts and % words for each individual document. %-------------------------------------------------- docs; doclist = strvcat(... '../docs/arch.def',... '../docs/ash.def',... '../docs/awk.def',... '../docs/basename.def',... '../docs/bash.def',... '../docs/bsh.def',... '../docs/cat.def',... '../docs/chgrp.def',... '../docs/chmod.def',... '../docs/chown.def',... '../docs/cp.def',... '../docs/cpio.def',... '../docs/csh.def',... '../docs/date.def',... '../docs/dd.def',... '../docs/df.def',... '../docs/dmesg.def',... '../docs/dnsdomainname.def',... '../docs/doexec.def',... '../docs/domainname.def',... '../docs/echo.def',... '../docs/ed.def',... '../docs/egrep.def',... '../docs/ex.def',... '../docs/false.def',... '../docs/fgrep.def',... '../docs/grep.def',... '../docs/gunzip.def',... '../docs/gzip.def',... '../docs/hostname.def',... '../docs/igawk.def',... '../docs/ipcalc.def',... '../docs/kill.def',... '../docs/ln.def',... '../docs/loadkeys.def',... '../docs/login.def',... '../docs/ls.def',... '../docs/mail.def',... '../docs/mkdir.def',... '../docs/mknod.def',... '../docs/mktemp.def',... '../docs/more.def',... '../docs/mount.def',... '../docs/mt.def',... '../docs/mv.def',... '../docs/netstat.def',... '../docs/nice.def',... '../docs/nisdomainname.def',... '../docs/ping.def',... '../docs/ps.def',... '../docs/pwd.def',... '../docs/red.def',... '../docs/rm.def',... '../docs/rmdir.def',... '../docs/rpm.def',... '../docs/rvi.def',... '../docs/rview.def',... '../docs/sed.def',... '../docs/setserial.def',... '../docs/sh.def',... '../docs/sleep.def',... '../docs/sort.def',... '../docs/stty.def',... '../docs/su.def',... '../docs/sync.def',... '../docs/tar.def',... '../docs/tcsh.def',... '../docs/touch.def',... '../docs/true.def',... '../docs/umount.def',... '../docs/uname.def',... '../docs/usleep.def',... '../docs/vi.def',... '../docs/view.def',... '../docs/vimtutor.def',... '../docs/ypdomainname.def',... '../docs/zcat.def'); nterms = length(dict); ndocs = length(doclist); %------------------------------- % The term-document matrix setup %------------------------------- A = zeros(nterms, ndocs); %---------------------------------------------------------- % Turning off echo, since it will print for every iteration % in the next loop. %---------------------------------------------------------- echo off; fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 %--------------------------------------- % What does the zero do in the SVD call? % Find out! %--------------------------------------- [U, D, V] = svd(A, 0); %------------------------------------------- % Try also using a rank-p approximation to A %------------------------------------------- p = 2; Ap = U(:,1:p)*D(1:p,1:p)*[V(:,1:p)]' ; %-------------------------------------- % Create a query for list + all + files %-------------------------------------- qterms = strvcat('list', 'files', 'all'); disp('Hit return to see results of query') Hit return to see results of query pause; I = findrows(dict, qterms); q = zeros(nterms, 1); q(I) = 1; q = q/norm(q); cosines = A'*q; plot(cosines, '+') title('Cosines for query vector, by document') xlabel('Document Number') ylabel('cosine') number_cosines = length(cosines); [a, b] = max(cosines) a = 0.22491 b = 37 disp(['Best match for list, files, all: ', doclist(b, :)]) Best match for list, files, all: ../docs/ls.def cosinesp = Ap'*q; plot(cosinesp, '+') title('Cosines for query vector using rank-p estimate, by document') xlabel('Document Number') ylabel('cosine') number_cosines = length(cosinesp); [a, b] = max(cosinesp) a = 0.054555 b = 60 disp(['Best match using rank-p approximate for list, files, all: ', doclist(b, :)]) Best match using rank-p approximate for list, files, all: ../docs/sh.def %------------------------------------- % How to find the next largest cosine? % Need to extract second largest value % from the vector of cosines. How? %------------------------------------- echo off; edit lll.m ccc lll %------------------------------------------------------------- % dictionary.m sets up the array "dict" of words that form the % dictionary for the problem. %------------------------------------------------------------- dictionary; dict = strvcat(... 'accent',... 'access',... 'accurate',... 'active',... 'addr',... 'address',... 'addressed',... 'adjust',... 'affect',... 'all',... 'allowed',... 'allows',... 'alpha',... 'also',... 'always',... 'any',... 'append',... 'arbitrary',... 'arch',... 'architecture',... 'archive',... 'archives',... 'argument',... 'arguments',... 'argv',... 'arm',... 'arnold',... 'array',... 'ascii',... 'at',... 'attributes',... 'available',... 'awk',... 'ax',... 'backslash',... 'backup',... 'backups',... 'barnes',... 'basename',... 'bash',... 'becomes',... 'been',... 'before',... 'begins',... 'being',... 'better',... 'binary',... 'bindings',... 'bits',... 'blank',... 'blanks',... 'block',... 'blocks',... 'boot',... 'branch',... 'broadcast',... 'bsd',... 'buffer',... 'bufsize',... 'bugs',... 'bugzilla',... 'busy',... 'bytes',... 'bz',... 'calculate',... 'callout',... 'carriage',... 'case',... 'cat',... 'causes',... 'cbs',... 'change',... 'changed',... 'changes',... 'char',... 'character',... 'characters',... 'check',... 'chgrp',... 'chmod',... 'chown',... 'class',... 'classes',... 'clear',... 'cmd',... 'code',... 'com',... 'comm',... 'commandname',... 'commands',... 'compatible',... 'compose',... 'compress',... 'compressed',... 'compressing',... 'concatenate',... 'connections',... 'contents',... 'context',... 'continuous',... 'control',... 'convert',... 'copy',... 'copying',... 'copyright',... 'coreutils',... 'count',... 'cp',... 'cpio',... 'cpu',... 'crc',... 'create',... 'cs',... 'ctime',... 'current',... 'cutable',... 'data',... 'database',... 'date',... 'day',... 'dd',... 'deadline',... 'decompressing',... 'default',... 'defaults',... 'defined',... 'defines',... 'definitions',... 'defkeymap',... 'delay',... 'density',... 'dereference',... 'dest',... 'dev',... 'device',... 'devices',... 'df',... 'dir',... 'direc',... 'directories',... 'directory',... 'disk',... 'display',... 'displayed',... 'djb',... 'dmesg',... 'dns',... 'dnsdomainname',... 'doexec',... 'domainname',... 'donald',... 'dot',... 'down',... 'drive',... 'driver',... 'drivers',... 'drives',... 'dumpkeys',... 'during',... 'each',... 'ebcdic',... 'echo',... 'echoctl',... 'echoe',... 'ed',... 'effective',... 'either',... 'embedded',... 'empty',... 'enable',... 'end',... 'entered',... 'entries',... 'entry',... 'environment',... 'equivalent',... 'erase',... 'erik',... 'escape',... 'etc',... 'even',... 'every',... 'ewt',... 'ex',... 'example',... 'examples',... 'except',... 'exclude',... 'exe',... 'exec',... 'executable',... 'execute',... 'executed',... 'exist',... 'existing',... 'expr',... 'expressions',... 'ext',... 'extend',... 'extract',... 'extracting',... 'false',... 'family',... 'field',... 'fields',... 'file',... 'filename',... 'filenames',... 'files',... 'filesystem',... 'filesystems',... 'first',... 'flag',... 'folder',... 'follow',... 'following',... 'follows',... 'foo',... 'force',... 'format',... 'forward',... 'fqdn',... 'free',... 'fs',... 'fstab',... 'ftp',... 'full',... 'fully',... 'functions',... 'gawk',... 'general',... 'getopt',... 'getting',... 'gid',... 'give',... 'given',... 'gnu',... 'go',... 'gpg',... 'granted',... 'grep',... 'group',... 'groups',... 'gui',... 'gunzip',... 'gz',... 'gzip',... 'hard',... 'hat',... 'header',... 'help',... 'hh',... 'history',... 'host',... 'hostname',... 'hosts',... 'hour',... 'http',... 'ibs',... 'icmp',... 'icrnl',... 'id',... 'identical',... 'igawk',... 'ignore',... 'inc',... 'include',... 'indicating',... 'info',... 'initializations',... 'input',... 'inserted',... 'install',... 'installed',... 'instead',... 'interactive',... 'interface',... 'interval',... 'invoke',... 'ip',... 'ipcalc',... 'ipx',... 'irq',... 'iso',... 'istrip',... 'ixany',... 'jim',... 'job',... 'july',... 'just',... 'kb',... 'kernel',... 'key',... 'keyboard',... 'keymap',... 'keymaps',... 'keyword',... 'kill',... 'kth',... 'label',... 'language',... 'lar',... 'larger',... 'last',... 'later',... 'lc',... 'leading',... 'letters',... 'level',... 'lib',... 'line',... 'lines',... 'link',... 'links',... 'linux',... 'list',... 'listening',... 'listing',... 'ln',... 'load',... 'loadkeys',... 'local',... 'locale',... 'log',... 'long',... 'loop',... 'losetup',... 'lower',... 'lowest',... 'ls',... 'machine',... 'machines',... 'mackenzie',... 'macro',... 'made',... 'mail',... 'maintained',... 'make',... 'mandatory',... 'manual',... 'many',... 'map',... 'mask',... 'match',... 'matched',... 'matches',... 'matching',... 'mbox',... 'md',... 'members',... 'memory',... 'merchantability',... 'merge',... 'message',... 'messages',... 'method',... 'meyering',... 'microsecond',... 'microseconds',... 'mines',... 'minutes',... 'mips',... 'mkdir',... 'mknod',... 'mktemp',... 'mm',... 'mode',... 'modified',... 'monday',... 'month',... 'moolenaar',... 'more',... 'most',... 'mount',... 'mounted',... 'move',... 'mt',... 'mtab',... 'multi',... 'multiple',... 'multiport',... 'must',... 'mv',... 'myclass',... 'name',... 'names',... 'net',... 'netmask',... 'netstat',... 'network',... 'never',... 'new',... 'newer',... 'newline',... 'next',... 'nfs',... 'nice',... 'nisdomainname',... 'nnn',... 'node',... 'nodename',... 'nonblank',... 'noscripts',... 'note',... 'nothing',... 'nov',... 'nuls',... 'num',... 'number',... 'numbered',... 'numeric',... 'numerical',... 'obs',... 'octal',... 'old',... 'omitted',... 'open',... 'operate',... 'operating',... 'operations',... 'opost',... 'optional',... 'options',... 'opts',... 'order',... 'ordering',... 'org',... 'original',... 'originally',... 'other',... 'otherwise',... 'out',... 'output',... 'overwrite',... 'own',... 'owner',... 'ownership',... 'owns',... 'pack',... 'package',... 'packages',... 'packet',... 'packets',... 'pad',... 'parameter',... 'parameters',... 'parenb',... 'parents',... 'parity',... 'part',... 'particu',... 'partitions',... 'pass',... 'passed',... 'passwd',... 'path',... 'pattern',... 'paul',... 'people',... 'permissions',... 'personality',... 'pid',... 'pierce',... 'ping',... 'platform',... 'please',... 'point',... 'port',... 'portable',... 'ports',... 'pos',... 'positional',... 'positioned',... 'posix',... 'ppc',... 'prefix',... 'preserve',... 'previous',... 'print',... 'printed',... 'prints',... 'priority',... 'privileged',... 'probably',... 'proc',... 'process',... 'processed',... 'processes',... 'program',... 'programmer',... 'programming',... 'programs',... 'prompt',... 'properly',... 'protocol',... 'provided',... 'ps',... 'published',... 'purpose',... 'pwd',... 'qualified',... 'query',... 'quotes',... 'range',... 'rather',... 'raw',... 'rc',... 're',... 'read',... 'readable',... 'readline',... 'real',... 'received',... 'record',... 'records',... 'recursively',... 'red',... 'redhat',... 'refer',... 'reference',... 'regexp',... 'regular',... 'release',... 'remote',... 'remove',... 'removed',... 'rename',... 'repackage',... 'replaced',... 'replacement',... 'reply',... 'report',... 'reporting',... 'request',... 'require',... 'requires',... 'reset',... 'restrictions',... 'result',... 'return',... 'returned',... 'returns',... 'reverse',... 'rewind',... 'rfc',... 'rfile',... 'rh',... 'rhs',... 'ring',... 'rm',... 'rmdir',... 'robbins',... 'robert',... 'root',... 'round',... 'route',... 'routing',... 'rpm',... 'rpmb',... 'rpmrc',... 'run',... 'runs',... 'runuser',... 'rwxrwxrwx',... 'sages',... 'same',... 'sat',... 'sbufsize',... 'scheduling',... 'school',... 'screen',... 'script',... 'scriptlet',... 'scroll',... 'scsi',... 'search',... 'second',... 'seconds',... 'security',... 'sed',... 'select',... 'selects',... 'selinux',... 'send',... 'sent',... 'sep',... 'separated',... 'sequences',... 'serial',... 'server',... 'set',... 'setserial',... 'settings',... 'shell',... 'shells',... 'short',... 'should',... 'show',... 'shown',... 'shred',... 'signal',... 'signaled',... 'silent',... 'similar',... 'simple',... 'simply',... 'since',... 'single',... 'site',... 'size',... 'sized',... 'skeeve',... 'skip',... 'slashes',... 'sleep',... 'sleeps',... 'smith',... 'so',... 'sock',... 'socket',... 'sockets',... 'sole',... 'solely',... 'some',... 'sort',... 'sorted',... 'sorting',... 'source',... 'space',... 'spaces',... 'sparc',... 'sparse',... 'spd',... 'speci',... 'special',... 'specified',... 'specifies',... 'specify',... 'specifying',... 'squeeze',... 'src',... 'ss',... 'stack',... 'stamp',... 'standard',... 'start',... 'started',... 'starts',... 'state',... 'statistics',... 'status',... 'sticky',... 'still',... 'stop',... 'stream',... 'string',... 'strings',... 'strip',... 'stty',... 'style',... 'su',... 'substitute',... 'success',... 'successfully',... 'such',... 'suffix',... 'sunday',... 'super',... 'supercede',... 'superuser',... 'supp',... 'supplemental',... 'supplied',... 'supports',... 'suppress',... 'svalente',... 'swap',... 'symbolic',... 'sync',... 'syntax',... 'syslog',... 'syslogd',... 'system',... 'systems',... 'tab',... 'table',... 'tables',... 'tabs',... 'tag',... 'taken',... 'takes',... 'tape',... 'tapes',... 'tar',... 'target',... 'tcp',... 'tcsh',... 'tell',... 'tem',... 'template',... 'temporary',... 'term',... 'terminal',... 'terminate',... 'test',... 'texinfo',... 'text',... 'their',... 'them',... 'theodore',... 'there',... 'these',... 'they',... 'things',... 'those',... 'through',... 'thus',... 'time',... 'timeout',... 'times',... 'timespec',... 'timestamp',... 'timestamps',... 'tmp',... 'tmpdir',... 'tmpfile',... 'too',... 'tools',... 'torbjorn',... 'tory',... 'touch',... 'trailing',... 'translate',... 'translators',... 'traverse',... 'trip',... 'true',... 'try',... 'ts',... 'ttl',... 'tty',... 'ttys',... 'two',... 'type',... 'types',... 'tytso',... 'uart',... 'udp',... 'ugoa',... 'uid',... 'umask',... 'umount',... 'uname',... 'unblock',... 'unbuffered',... 'unchanged',... 'uncompressed',... 'underlining',... 'unique',... 'universal',... 'university',... 'unix',... 'unknown',... 'unlink',... 'unmount',... 'unmounted',... 'unset',... 'unsuccessfully',... 'until',... 'unwritable',... 'update',... 'upper',... 'usage',... 'useful',... 'user',... 'users',... 'usertty',... 'using',... 'usleep',... 'usr',... 'usually',... 'utc',... 'utilities',... 'utility',... 'valente',... 'value',... 'values',... 'var',... 'variable',... 'variables',... 'various',... 've',... 'verbose',... 'verify',... 'vertical',... 'vet',... 'vfstype',... 'vi',... 'view',... 'vim',... 'vimrc',... 'virtual',... 'visual',... 'volume',... 'vt',... 'waiting',... 'want',... 'warranty',... 'way',... 'we',... 'week',... 'weekday',... 'well',... 'what',... 'when',... 'where',... 'whether',... 'which',... 'while',... 'who',... 'whoever',... 'whose',... 'width',... 'wildcards',... 'will',... 'window',... 'without',... 'word',... 'words',... 'working',... 'works',... 'write',... 'writes',... 'written',... 'xs',... 'xxxxxxxxxx',... 'year',... 'yes',... 'your',... 'yp',... 'ypdomainname',... 'yy',... 'zcat',... 'zero',... 'zip'); %-------------------------------------------------- % docs.m lists the files containing word counts and % words for each individual document. %-------------------------------------------------- docs; doclist = strvcat(... '../docs/arch.def',... '../docs/ash.def',... '../docs/awk.def',... '../docs/basename.def',... '../docs/bash.def',... '../docs/bsh.def',... '../docs/cat.def',... '../docs/chgrp.def',... '../docs/chmod.def',... '../docs/chown.def',... '../docs/cp.def',... '../docs/cpio.def',... '../docs/csh.def',... '../docs/date.def',... '../docs/dd.def',... '../docs/df.def',... '../docs/dmesg.def',... '../docs/dnsdomainname.def',... '../docs/doexec.def',... '../docs/domainname.def',... '../docs/echo.def',... '../docs/ed.def',... '../docs/egrep.def',... '../docs/ex.def',... '../docs/false.def',... '../docs/fgrep.def',... '../docs/grep.def',... '../docs/gunzip.def',... '../docs/gzip.def',... '../docs/hostname.def',... '../docs/igawk.def',... '../docs/ipcalc.def',... '../docs/kill.def',... '../docs/ln.def',... '../docs/loadkeys.def',... '../docs/login.def',... '../docs/ls.def',... '../docs/mail.def',... '../docs/mkdir.def',... '../docs/mknod.def',... '../docs/mktemp.def',... '../docs/more.def',... '../docs/mount.def',... '../docs/mt.def',... '../docs/mv.def',... '../docs/netstat.def',... '../docs/nice.def',... '../docs/nisdomainname.def',... '../docs/ping.def',... '../docs/ps.def',... '../docs/pwd.def',... '../docs/red.def',... '../docs/rm.def',... '../docs/rmdir.def',... '../docs/rpm.def',... '../docs/rvi.def',... '../docs/rview.def',... '../docs/sed.def',... '../docs/setserial.def',... '../docs/sh.def',... '../docs/sleep.def',... '../docs/sort.def',... '../docs/stty.def',... '../docs/su.def',... '../docs/sync.def',... '../docs/tar.def',... '../docs/tcsh.def',... '../docs/touch.def',... '../docs/true.def',... '../docs/umount.def',... '../docs/uname.def',... '../docs/usleep.def',... '../docs/vi.def',... '../docs/view.def',... '../docs/vimtutor.def',... '../docs/ypdomainname.def',... '../docs/zcat.def'); nterms = length(dict); ndocs = length(doclist); %------------------------------- % The term-document matrix setup %------------------------------- A = zeros(nterms, ndocs); %---------------------------------------------------------- % Turning off echo, since it will print for every iteration % in the next loop. %---------------------------------------------------------- echo off; fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 %--------------------------------------- % What does the zero do in the SVD call? % Find out! %--------------------------------------- [U, D, V] = svd(A, 0); %------------------------------------------- % Try also using a rank-p approximation to A %------------------------------------------- p = 5; Ap = U(:,1:p)*D(1:p,1:p)*[V(:,1:p)]' ; %-------------------------------------- % Create a query for list + all + files %-------------------------------------- qterms = strvcat('list', 'files', 'all'); disp('Hit return to see results of query') Hit return to see results of query pause; I = findrows(dict, qterms); q = zeros(nterms, 1); q(I) = 1; q = q/norm(q); cosines = A'*q; plot(cosines, '+') title('Cosines for query vector, by document') xlabel('Document Number') ylabel('cosine') number_cosines = length(cosines); [a, b] = max(cosines) a = 0.22491 b = 37 disp(['Best match for list, files, all: ', doclist(b, :)]) Best match for list, files, all: ../docs/ls.def cosinesp = Ap'*q; plot(cosinesp, '+') title('Cosines for query vector using rank-p estimate, by document') xlabel('Document Number') ylabel('cosine') number_cosines = length(cosinesp); [a, b] = max(cosinesp) a = 0.10589 b = 60 disp(['Best match using rank-p approximate for list, files, all: ', doclist(b, :)]) Best match using rank-p approximate for list, files, all: ../docs/sh.def %------------------------------------- % How to find the next largest cosine? % Need to extract second largest value % from the vector of cosines. How? %------------------------------------- echo off; figure; plot(sort(cosinep), 'ro') ??? Undefined function or variable 'cosinep'. figure; plot(sort(cosinesp), 'ro') ccc exit -------------------> Session (re)starting: 03-Mar-2008 10:07:06 more on help sort SORT Sort in ascending or descending order. For vectors, SORT(X) sorts the elements of X in ascending order. For matrices, SORT(X) sorts each column of X in ascending order. For N-D arrays, SORT(X) sorts the along the first non-singleton dimension of X. When X is a cell array of strings, SORT(X) sorts the strings in ASCII dictionary order. Y = SORT(X,DIM,MODE) has two optional parameters. DIM selects a dimension along which to sort. MODE selects the direction of the sort 'ascend' results in ascending order 'descend' results in descending order The result is in Y which has the same shape and type as X. [Y,I] = SORT(X,DIM,MODE) also returns an index matrix I. If X is a vector, then Y = X(I). If X is an m-by-n matrix and DIM=1, then for j = 1:n, Y(:,j) = X(I(:,j),j); end When X is complex, the elements are sorted by ABS(X). Complex matches are further sorted by ANGLE(X). When more than one element has the same value, the order of the elements are preserved in the sorted result and the indexes of equal elements will be ascending in any index matrix. Example: If X = [3 7 5 0 4 2] then sort(X,1) is [0 4 2 and sort(X,2) is [3 5 7 3 7 5] 0 2 4]; See also ISSORTED, SORTROWS, MIN, MAX, MEAN, MEDIAN. Overloaded functions or methods (ones with the same name in other directories) help cell/sort.m help ordinal/sort.m exit -------------------> Session (re)starting: 03-Mar-2008 10:13:39 lsi %------------------------------------------------------------- % dictionary.m sets up the array "dict" of words that form the % dictionary for the problem. %------------------------------------------------------------- dictionary; dict = strvcat(... 'accent',... 'access',... 'accurate',... 'active',... 'addr',... 'address',... 'addressed',... 'adjust',... 'affect',... 'all',... 'allowed',... 'allows',... 'alpha',... 'also',... 'always',... 'any',... 'append',... 'arbitrary',... 'arch',... 'architecture',... 'archive',... 'archives',... 'argument',... 'arguments',... 'argv',... 'arm',... 'arnold',... 'array',... 'ascii',... 'at',... 'attributes',... 'available',... 'awk',... 'ax',... 'backslash',... 'backup',... 'backups',... 'barnes',... 'basename',... 'bash',... 'becomes',... 'been',... 'before',... 'begins',... 'being',... 'better',... 'binary',... 'bindings',... 'bits',... 'blank',... 'blanks',... 'block',... 'blocks',... 'boot',... 'branch',... 'broadcast',... 'bsd',... 'buffer',... 'bufsize',... 'bugs',... 'bugzilla',... 'busy',... 'bytes',... 'bz',... 'calculate',... 'callout',... 'carriage',... 'case',... 'cat',... 'causes',... 'cbs',... 'change',... 'changed',... 'changes',... 'char',... 'character',... 'characters',... 'check',... 'chgrp',... 'chmod',... 'chown',... 'class',... 'classes',... 'clear',... 'cmd',... 'code',... 'com',... 'comm',... 'commandname',... 'commands',... 'compatible',... 'compose',... 'compress',... 'compressed',... 'compressing',... 'concatenate',... 'connections',... 'contents',... 'context',... 'continuous',... 'control',... 'convert',... 'copy',... 'copying',... 'copyright',... 'coreutils',... 'count',... 'cp',... 'cpio',... 'cpu',... 'crc',... 'create',... 'cs',... 'ctime',... 'current',... 'cutable',... 'data',... 'database',... 'date',... 'day',... 'dd',... 'deadline',... 'decompressing',... 'default',... 'defaults',... 'defined',... 'defines',... 'definitions',... 'defkeymap',... 'delay',... 'density',... 'dereference',... 'dest',... 'dev',... 'device',... 'devices',... 'df',... 'dir',... 'direc',... 'directories',... 'directory',... 'disk',... 'display',... 'displayed',... 'djb',... 'dmesg',... 'dns',... 'dnsdomainname',... 'doexec',... 'domainname',... 'donald',... 'dot',... 'down',... 'drive',... 'driver',... 'drivers',... 'drives',... 'dumpkeys',... 'during',... 'each',... 'ebcdic',... 'echo',... 'echoctl',... 'echoe',... 'ed',... 'effective',... 'either',... 'embedded',... 'empty',... 'enable',... 'end',... 'entered',... 'entries',... 'entry',... 'environment',... 'equivalent',... 'erase',... 'erik',... 'escape',... 'etc',... 'even',... 'every',... 'ewt',... 'ex',... 'example',... 'examples',... 'except',... 'exclude',... 'exe',... 'exec',... 'executable',... 'execute',... 'executed',... 'exist',... 'existing',... 'expr',... 'expressions',... 'ext',... 'extend',... 'extract',... 'extracting',... 'false',... 'family',... 'field',... 'fields',... 'file',... 'filename',... 'filenames',... 'files',... 'filesystem',... 'filesystems',... 'first',... 'flag',... 'folder',... 'follow',... 'following',... 'follows',... 'foo',... 'force',... 'format',... 'forward',... 'fqdn',... 'free',... 'fs',... 'fstab',... 'ftp',... 'full',... 'fully',... 'functions',... 'gawk',... 'general',... 'getopt',... 'getting',... 'gid',... 'give',... 'given',... 'gnu',... 'go',... 'gpg',... 'granted',... 'grep',... 'group',... 'groups',... 'gui',... 'gunzip',... 'gz',... 'gzip',... 'hard',... 'hat',... 'header',... 'help',... 'hh',... 'history',... 'host',... 'hostname',... 'hosts',... 'hour',... 'http',... 'ibs',... 'icmp',... 'icrnl',... 'id',... 'identical',... 'igawk',... 'ignore',... 'inc',... 'include',... 'indicating',... 'info',... 'initializations',... 'input',... 'inserted',... 'install',... 'installed',... 'instead',... 'interactive',... 'interface',... 'interval',... 'invoke',... 'ip',... 'ipcalc',... 'ipx',... 'irq',... 'iso',... 'istrip',... 'ixany',... 'jim',... 'job',... 'july',... 'just',... 'kb',... 'kernel',... 'key',... 'keyboard',... 'keymap',... 'keymaps',... 'keyword',... 'kill',... 'kth',... 'label',... 'language',... 'lar',... 'larger',... 'last',... 'later',... 'lc',... 'leading',... 'letters',... 'level',... 'lib',... 'line',... 'lines',... 'link',... 'links',... 'linux',... 'list',... 'listening',... 'listing',... 'ln',... 'load',... 'loadkeys',... 'local',... 'locale',... 'log',... 'long',... 'loop',... 'losetup',... 'lower',... 'lowest',... 'ls',... 'machine',... 'machines',... 'mackenzie',... 'macro',... 'made',... 'mail',... 'maintained',... 'make',... 'mandatory',... 'manual',... 'many',... 'map',... 'mask',... 'match',... 'matched',... 'matches',... 'matching',... 'mbox',... 'md',... 'members',... 'memory',... 'merchantability',... 'merge',... 'message',... 'messages',... 'method',... 'meyering',... 'microsecond',... 'microseconds',... 'mines',... 'minutes',... 'mips',... 'mkdir',... 'mknod',... 'mktemp',... 'mm',... 'mode',... 'modified',... 'monday',... 'month',... 'moolenaar',... 'more',... 'most',... 'mount',... 'mounted',... 'move',... 'mt',... 'mtab',... 'multi',... 'multiple',... 'multiport',... 'must',... 'mv',... 'myclass',... 'name',... 'names',... 'net',... 'netmask',... 'netstat',... 'network',... 'never',... 'new',... 'newer',... 'newline',... 'next',... 'nfs',... 'nice',... 'nisdomainname',... 'nnn',... 'node',... 'nodename',... 'nonblank',... 'noscripts',... 'note',... 'nothing',... 'nov',... 'nuls',... 'num',... 'number',... 'numbered',... 'numeric',... 'numerical',... 'obs',... 'octal',... 'old',... 'omitted',... 'open',... 'operate',... 'operating',... 'operations',... 'opost',... 'optional',... 'options',... 'opts',... 'order',... 'ordering',... 'org',... 'original',... 'originally',... 'other',... 'otherwise',... 'out',... 'output',... 'overwrite',... 'own',... 'owner',... 'ownership',... 'owns',... 'pack',... 'package',... 'packages',... 'packet',... 'packets',... 'pad',... 'parameter',... 'parameters',... 'parenb',... 'parents',... 'parity',... 'part',... 'particu',... 'partitions',... 'pass',... 'passed',... 'passwd',... 'path',... 'pattern',... 'paul',... 'people',... 'permissions',... 'personality',... 'pid',... 'pierce',... 'ping',... 'platform',... 'please',... 'point',... 'port',... 'portable',... 'ports',... 'pos',... 'positional',... 'positioned',... 'posix',... 'ppc',... 'prefix',... 'preserve',... 'previous',... 'print',... 'printed',... 'prints',... 'priority',... 'privileged',... 'probably',... 'proc',... 'process',... 'processed',... 'processes',... 'program',... 'programmer',... 'programming',... 'programs',... 'prompt',... 'properly',... 'protocol',... 'provided',... 'ps',... 'published',... 'purpose',... 'pwd',... 'qualified',... 'query',... 'quotes',... 'range',... 'rather',... 'raw',... 'rc',... 're',... 'read',... 'readable',... 'readline',... 'real',... 'received',... 'record',... 'records',... 'recursively',... 'red',... 'redhat',... 'refer',... 'reference',... 'regexp',... 'regular',... 'release',... 'remote',... 'remove',... 'removed',... 'rename',... 'repackage',... 'replaced',... 'replacement',... 'reply',... 'report',... 'reporting',... 'request',... 'require',... 'requires',... 'reset',... 'restrictions',... 'result',... 'return',... 'returned',... 'returns',... 'reverse',... 'rewind',... 'rfc',... 'rfile',... 'rh',... 'rhs',... 'ring',... 'rm',... 'rmdir',... 'robbins',... 'robert',... 'root',... 'round',... 'route',... 'routing',... 'rpm',... 'rpmb',... 'rpmrc',... 'run',... 'runs',... 'runuser',... 'rwxrwxrwx',... 'sages',... 'same',... 'sat',... 'sbufsize',... 'scheduling',... 'school',... 'screen',... 'script',... 'scriptlet',... 'scroll',... 'scsi',... 'search',... 'second',... 'seconds',... 'security',... 'sed',... 'select',... 'selects',... 'selinux',... 'send',... 'sent',... 'sep',... 'separated',... 'sequences',... 'serial',... 'server',... 'set',... 'setserial',... 'settings',... 'shell',... 'shells',... 'short',... 'should',... 'show',... 'shown',... 'shred',... 'signal',... 'signaled',... 'silent',... 'similar',... 'simple',... 'simply',... 'since',... 'single',... 'site',... 'size',... 'sized',... 'skeeve',... 'skip',... 'slashes',... 'sleep',... 'sleeps',... 'smith',... 'so',... 'sock',... 'socket',... 'sockets',... 'sole',... 'solely',... 'some',... 'sort',... 'sorted',... 'sorting',... 'source',... 'space',... 'spaces',... 'sparc',... 'sparse',... 'spd',... 'speci',... 'special',... 'specified',... 'specifies',... 'specify',... 'specifying',... 'squeeze',... 'src',... 'ss',... 'stack',... 'stamp',... 'standard',... 'start',... 'started',... 'starts',... 'state',... 'statistics',... 'status',... 'sticky',... 'still',... 'stop',... 'stream',... 'string',... 'strings',... 'strip',... 'stty',... 'style',... 'su',... 'substitute',... 'success',... 'successfully',... 'such',... 'suffix',... 'sunday',... 'super',... 'supercede',... 'superuser',... 'supp',... 'supplemental',... 'supplied',... 'supports',... 'suppress',... 'svalente',... 'swap',... 'symbolic',... 'sync',... 'syntax',... 'syslog',... 'syslogd',... 'system',... 'systems',... 'tab',... 'table',... 'tables',... 'tabs',... 'tag',... 'taken',... 'takes',... 'tape',... 'tapes',... 'tar',... 'target',... 'tcp',... 'tcsh',... 'tell',... 'tem',... 'template',... 'temporary',... 'term',... 'terminal',... 'terminate',... 'test',... 'texinfo',... 'text',... 'their',... 'them',... 'theodore',... 'there',... 'these',... 'they',... 'things',... 'those',... 'through',... 'thus',... 'time',... 'timeout',... 'times',... 'timespec',... 'timestamp',... 'timestamps',... 'tmp',... 'tmpdir',... 'tmpfile',... 'too',... 'tools',... 'torbjorn',... 'tory',... 'touch',... 'trailing',... 'translate',... 'translators',... 'traverse',... 'trip',... 'true',... 'try',... 'ts',... 'ttl',... 'tty',... 'ttys',... 'two',... 'type',... 'types',... 'tytso',... 'uart',... 'udp',... 'ugoa',... 'uid',... 'umask',... 'umount',... 'uname',... 'unblock',... 'unbuffered',... 'unchanged',... 'uncompressed',... 'underlining',... 'unique',... 'universal',... 'university',... 'unix',... 'unknown',... 'unlink',... 'unmount',... 'unmounted',... 'unset',... 'unsuccessfully',... 'until',... 'unwritable',... 'update',... 'upper',... 'usage',... 'useful',... 'user',... 'users',... 'usertty',... 'using',... 'usleep',... 'usr',... 'usually',... 'utc',... 'utilities',... 'utility',... 'valente',... 'value',... 'values',... 'var',... 'variable',... 'variables',... 'various',... 've',... 'verbose',... 'verify',... 'vertical',... 'vet',... 'vfstype',... 'vi',... 'view',... 'vim',... 'vimrc',... 'virtual',... 'visual',... 'volume',... 'vt',... 'waiting',... 'want',... 'warranty',... 'way',... 'we',... 'week',... 'weekday',... 'well',... 'what',... 'when',... 'where',... 'whether',... 'which',... 'while',... 'who',... 'whoever',... 'whose',... 'width',... 'wildcards',... 'will',... 'window',... 'without',... 'word',... 'words',... 'working',... 'works',... 'write',... 'writes',... 'written',... 'xs',... 'xxxxxxxxxx',... 'year',... 'yes',... 'your',... 'yp',... 'ypdomainname',... 'yy',... 'zcat',... 'zero',... 'zip'); %-------------------------------------------------- % docs.m lists the files containing word counts and % words for each individual document. %-------------------------------------------------- docs; doclist = strvcat(... '../docs/arch.def',... '../docs/ash.def',... '../docs/awk.def',... '../docs/basename.def',... '../docs/bash.def',... '../docs/bsh.def',... '../docs/cat.def',... '../docs/chgrp.def',... '../docs/chmod.def',... '../docs/chown.def',... '../docs/cp.def',... '../docs/cpio.def',... '../docs/csh.def',... '../docs/date.def',... '../docs/dd.def',... '../docs/df.def',... '../docs/dmesg.def',... '../docs/dnsdomainname.def',... '../docs/doexec.def',... '../docs/domainname.def',... '../docs/echo.def',... '../docs/ed.def',... '../docs/egrep.def',... '../docs/ex.def',... '../docs/false.def',... '../docs/fgrep.def',... '../docs/grep.def',... '../docs/gunzip.def',... '../docs/gzip.def',... '../docs/hostname.def',... '../docs/igawk.def',... '../docs/ipcalc.def',... '../docs/kill.def',... '../docs/ln.def',... '../docs/loadkeys.def',... '../docs/login.def',... '../docs/ls.def',... '../docs/mail.def',... '../docs/mkdir.def',... '../docs/mknod.def',... '../docs/mktemp.def',... '../docs/more.def',... '../docs/mount.def',... '../docs/mt.def',... '../docs/mv.def',... '../docs/netstat.def',... '../docs/nice.def',... '../docs/nisdomainname.def',... '../docs/ping.def',... '../docs/ps.def',... '../docs/pwd.def',... '../docs/red.def',... '../docs/rm.def',... '../docs/rmdir.def',... '../docs/rpm.def',... '../docs/rvi.def',... '../docs/rview.def',... '../docs/sed.def',... '../docs/setserial.def',... '../docs/sh.def',... '../docs/sleep.def',... '../docs/sort.def',... '../docs/stty.def',... '../docs/su.def',... '../docs/sync.def',... '../docs/tar.def',... '../docs/tcsh.def',... '../docs/touch.def',... '../docs/true.def',... '../docs/umount.def',... '../docs/uname.def',... '../docs/usleep.def',... '../docs/vi.def',... '../docs/view.def',... '../docs/vimtutor.def',... '../docs/ypdomainname.def',... '../docs/zcat.def'); nterms = length(dict); ndocs = length(doclist); %------------------------------- % The term-document matrix setup %------------------------------- A = zeros(nterms, ndocs); %---------------------------------------------------------- % Turning off echo, since it will print for every iteration % in the next loop. %---------------------------------------------------------- echo off; fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 fid = 3 %--------------------------------------- % What does the zero do in the SVD call? % Find out! %--------------------------------------- [U, D, V] = svd(A, 0); %-------------------------------------- % Create a query for list + all + files %-------------------------------------- qterms = strvcat('list', 'files', 'all'); disp('Hit return to see results of query') Hit return to see results of query pause; I = findrows(dict, qterms); q = zeros(nterms, 1); q(I) = 1; q = q/norm(q); cosines = A'*q; plot(cosines, '+') title('Cosines for query vector, by document') xlabel('Document Number') ylabel('cosine') number_cosines = length(cosines); [a, b] = max(cosines) a = 0.22491 b = 37 disp(['Best match for list, files, all: ', doclist(b, :)]) Best match for list, files, all: ../docs/ls.def %------------------------------------- % How to find the next largest cosine? %------------------------------------- echo off; nterms nterms = 845 ndocs ndocs = 77 exit