Wordlists

Closed class word types

Various wordlists, mostly for subtypes of closed class words

corpkit.dictionaries.wordlists.wordlists = wordlists(pronouns=[u'all', u'another', u'any', u'anybody', u'anyone', u'anything', u'both', u'each', u'each', u'other', u'either', u'everybody', u'everyone', u'everything', u'few', u'he', u'her', u'hers', u'herself', u'him', u'himself', u'his', u'it', u'i', u'its', u'itself', u'many', u'me', u'mine', u'more', u'most', u'much', u'myself', u'neither', u'no', u'one', u'nobody', u'none', u'nothing', u'one', u'another', u'other', u'others', u'ours', u'ourselves', u'several', u'she', u'some', u'somebody', u'someone', u'something', u'that', u'their', u'theirs', u'them', u'there', u'themselves', u'these', u'they', u'this', u'those', u'us', u'we', u'what', u'whatever', u'which', u'whichever', u'who', u'whoever', u'whom', u'whomever', u'whose', u'you', u'your', u'yours', u'yourself', u'yourselves'], conjunctions=[u'though', u'although', u'even though', u'while', u'if', u'only if', u'unless', u'until', u'provided that', u'assuming that', u'even if', u'in case', u'lest', u'than', u'rather than', u'whether', u'as much as', u'whereas', u'after', u'as long as', u'as soon as', u'before', u'by the time', u'now that', u'once', u'since', u'till', u'until', u'when', u'whenever', u'while', u'because', u'since', u'so that', u'why', u'that', u'what', u'whatever', u'which', u'whichever', u'who', u'whoever', u'whom', u'whomever', u'whose', u'how', u'as though', u'as if', u'where', u'wherever', u'for', u'and', u'nor', u'but', u'or', u'yet', u'so', u'however'], articles=[u'a', u'an', u'the', u'teh'], determiners=[u'all', u'anotha', u'another', u'any', u'any-and-all', u'atta', u'both', u'certain', u'couple', u'dat', u'dem', u'dis', u'each', u'either', u'enough', u'enuf', u'enuff', u'every', u'few', u'fewer', u'fewest', u'her', u'hes', u'his', u'its', u'last', u'least', u'many', u'more', u'most', u'much', u'muchee', u'my', u'neither', u'nil', u'no', u'none', u'other', u'our', u'overmuch', u'owne', u'plenty', u'quodque', u'several', u'some', u'such', u'sufficient', u'that', u'their', u'them', u'these', u'they', u'thilk', u'thine', u'this', u'those', u'thy', u'umpteen', u'us', u'various', u'wat', u'we', u'what', u'whatever', u'which', u'whichever', u'yonder', u'you', u'your'], prepositions=[u'about', u'above', u'across', u'after', u'against', u'along', u'among', u'around', u'at', u'before', u'behind', u'below', u'beneath', u'beside', u'between', u'by', u'down', u'during', u'except', u'for', u'from', u'front', u'in', u'inside', u'instead', u'into', u'like', u'near', u'of', u'off', u'on', u'onto', u'out', u'outside', u'over', u'past', u'since', u'through', u'to', u'top', u'toward', u'under', u'underneath', u'until', u'up', u'upon', u'with', u'within', u'without'], connectors=[u'about', u'above', u'across', u'after', u'against', u'along', u'among', u'around', u'at', u'before', u'behind', u'below', u'beneath', u'beside', u'between', u'by', u'down', u'during', u'except', u'for', u'from', u'front', u'in', u'inside', u'instead', u'into', u'like', u'near', u'of', u'off', u'on', u'onto', u'out', u'outside', u'over', u'past', u'since', u'through', u'to', u'top', u'toward', u'under', u'underneath', u'until', u'up', u'upon', u'with', u'within', u'without'], modals=[u'would', u'will', u'can', u'could', u'may', u'should', u'might', u'must', u'ca', u"'ll", u"'d", u'wo', u'ought', u'need', u'shall', u'dare', u'shalt'], closedclass=[u"'d", u"'ll", u'a', u'about', u'above', u'across', u'after', u'against', u'all', u'along', u'although', u'among', u'an', u'and', u'anotha', u'another', u'any', u'any-and-all', u'anybody', u'anyone', u'anything', u'around', u'as if', u'as long as', u'as much as', u'as soon as', u'as though', u'assuming that', u'at', u'atta', u'because', u'before', u'behind', u'below', u'beneath', u'beside', u'between', u'both', u'but', u'by', u'by the time', u'ca', u'can', u'certain', u'could', u'couple', u'dare', u'dat', u'dem', u'dis', u'down', u'during', u'each', u'either', u'enough', u'enuf', u'enuff', u'even if', u'even though', u'every', u'everybody', u'everyone', u'everything', u'except', u'few', u'fewer', u'fewest', u'for', u'from', u'front', u'he', u'her', u'hers', u'herself', u'hes', u'him', u'himself', u'his', u'how', u'however', u'i', u'if', u'in', u'in case', u'inside', u'instead', u'into', u'it', u'its', u'itself', u'last', u'least', u'lest', u'like', u'many', u'may', u'me', u'might', u'mine', u'more', u'most', u'much', u'muchee', u'must', u'my', u'myself', u'near', u'need', u'neither', u'nil', u'no', u'nobody', u'none', u'nor', 'not', u'nothing', u'now that', u'of', u'off', u'on', u'once', u'one', u'only if', u'onto', u'or', u'other', u'others', u'ought', u'our', u'ours', u'ourselves', u'out', u'outside', u'over', u'overmuch', u'owne', u'past', u'plenty', u'provided that', u'quodque', u'rather than', u'several', u'shall', u'shalt', u'she', u'should', u'since', u'so', u'so that', u'some', u'somebody', u'someone', u'something', u'such', u'sufficient', u'teh', u'than', u'that', u'the', u'their', u'theirs', u'them', u'themselves', u'there', u'these', u'they', u'thilk', u'thine', u'this', u'those', u'though', u'through', u'thy', u'till', u'to', u'top', u'toward', u'umpteen', u'under', u'underneath', u'unless', u'until', u'up', u'upon', u'us', u'various', u'wat', u'we', u'what', u'whatever', u'when', u'whenever', u'where', u'whereas', u'wherever', u'whether', u'which', u'whichever', u'while', u'who', u'whoever', u'whom', u'whomever', u'whose', u'why', u'will', u'with', u'within', u'without', u'wo', u'would', u'yet', u'yonder', u'you', u'your', u'yours', u'yourself', u'yourselves'], stopwords=['yeah', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'a', 'able', 'about', 'above', 'abst', 'accordance', 'according', 'accordingly', 'across', 'act', 'actually', 'added', 'adj', 'adopted', 'affected', 'affecting', 'affects', 'after', 'afterwards', 'again', 'against', 'ah', 'all', 'almost', 'alone', 'along', 'already', 'also', 'although', 'always', 'am', 'among', 'amongst', 'an', 'and', 'announce', 'another', 'any', 'anybody', 'anyhow', 'anymore', 'anyone', 'anything', 'anyway', 'anyways', 'anywhere', 'apparently', 'approximately', 'are', 'aren', 'arent', 'arise', 'around', 'as', 'aside', 'ask', 'asking', 'at', 'auth', 'available', 'away', 'awfully', 'b', 'back', 'be', 'became', 'because', 'become', 'becomes', 'becoming', 'been', 'before', 'beforehand', 'begin', 'beginning', 'beginnings', 'begins', 'behind', 'being', 'believe', 'below', 'beside', 'besides', 'between', 'beyond', 'biol', 'both', 'brief', 'briefly', 'but', 'by', 'c', 'ca', 'came', 'can', 'cannot', 'cant', 'cause', 'causes', 'certain', 'certainly', 'co', 'com', 'come', 'comes', 'contain', 'containing', 'contains', 'could', 'couldnt', 'd', 'date', 'did', 'didnt', 'different', 'do', 'does', 'doesnt', 'doing', 'done', 'dont', 'down', 'downwards', 'due', 'during', 'e', 'each', 'ed', 'edu', 'effect', 'eg', 'eight', 'eighty', 'either', 'else', 'elsewhere', 'end', 'ending', 'enough', 'especially', 'et', 'et-al', 'etc', 'even', 'ever', 'every', 'everybody', 'everyone', 'everything', 'everywhere', 'ex', 'except', 'f', 'far', 'few', 'ff', 'fifth', 'first', 'five', 'fix', 'followed', 'following', 'follows', 'for', 'former', 'formerly', 'forth', 'found', 'four', 'from', 'further', 'furthermore', 'going', 'g', 'gave', 'get', 'gets', 'getting', 'give', 'given', 'gives', 'giving', 'go', 'goes', 'gone', 'got', 'gotten', 'h', 'had', 'happens', 'hardly', 'has', 'hasnt', 'have', 'havent', 'having', 'he', 'hed', 'hence', 'her', 'here', 'hereafter', 'hereby', 'herein', 'heres', 'hereupon', 'hers', 'herself', 'hes', 'hi', 'hid', 'him', 'himself', 'his', 'hither', 'home', 'how', 'howbeit', 'however', 'hundred', 'i', 'id', 'ie', 'if', 'ill', 'im', 'immediate', 'immediately', 'importance', 'important', 'in', 'inc', 'indeed', 'index', 'information', 'instead', 'into', 'invention', 'inward', 'is', 'isnt', 'it', 'itd', 'itll', 'its', 'itself', 'ive', 'j', 'just', 'k', 'keep', 'keeps', 'kept', 'keys', 'kg', 'km', 'know', 'known', 'knows', 'l', 'largely', 'last', 'lately', 'later', 'latter', 'latterly', 'least', 'less', 'lest', 'let', 'lets', 'like', 'liked', 'likely', 'line', 'little', 'll', 'look', 'looking', 'looks', 'ltd', 'm', 'made', 'mainly', 'make', 'makes', 'many', 'may', 'maybe', 'me', 'mean', 'means', 'meantime', 'meanwhile', 'merely', 'mg', 'might', 'million', 'miss', 'ml', 'more', 'moreover', 'most', 'mostly', 'mr', 'mrs', 'much', 'mug', 'must', 'my', 'myself', 'n', 'na', 'name', 'namely', 'nay', 'nd', 'near', 'nearly', 'necessarily', 'necessary', 'need', 'needs', 'neither', 'never', 'nevertheless', 'new', 'next', 'nine', 'ninety', 'no', 'nobody', 'non', 'none', 'nonetheless', 'noone', 'nor', 'normally', 'nos', 'not', 'noted', 'nothing', 'now', 'nowhere', 'o', 'obtain', 'obtained', 'obviously', 'of', 'off', 'often', 'oh', 'ok', 'okay', 'old', 'omitted', 'on', 'once', 'one', 'ones', 'only', 'onto', 'or', 'ord', 'other', 'others', 'otherwise', 'ought', 'our', 'ours', 'ourselves', 'out', 'outside', 'over', 'overall', 'owing', 'own', 'p', 'page', 'pages', 'part', 'particular', 'particularly', 'past', 'per', 'perhaps', 'placed', 'please', 'plus', 'poorly', 'possible', 'possibly', 'potentially', 'pp', 'predominantly', 'present', 'previously', 'primarily', 'probably', 'promptly', 'proud', 'provides', 'put', 'q', 'que', 'quickly', 'quite', 'qv', 'r', 'ran', 'rather', 'rd', 're', 'readily', 'really', 'recent', 'recently', 'ref', 'refs', 'regarding', 'regardless', 'regards', 'related', 'relatively', 'research', 'respectively', 'resulted', 'resulting', 'results', 'right', 'run', 's', 'said', 'same', 'saw', 'say', 'saying', 'says', 'sec', 'section', 'see', 'seeing', 'seem', 'seemed', 'seeming', 'seems', 'seen', 'self', 'selves', 'sent', 'seven', 'several', 'shall', 'she', 'shed', 'shell', 'shes', 'should', 'shouldnt', 'show', 'showed', 'shown', 'showns', 'shows', 'significant', 'significantly', 'similar', 'similarly', 'since', 'six', 'slightly', 'so', 'some', 'somebody', 'somehow', 'someone', 'somethan', 'something', 'sometime', 'sometimes', 'somewhat', 'somewhere', 'soon', 'sorry', 'specifically', 'specified', 'specify', 'specifying', 'state', 'states', 'still', 'stop', 'strongly', 'sub', 'substantially', 'successfully', 'such', 'sufficiently', 'suggest', 'sup', 'sure', 't', 'take', 'taken', 'taking', 'tell', 'tends', 'th', 'than', 'thank', 'thanks', 'thanx', 'that', 'thatll', 'thats', 'thatve', 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'thence', 'there', 'thereafter', 'thereby', 'thered', 'therefore', 'therein', 'therell', 'thereof', 'therere', 'theres', 'thereto', 'thereupon', 'thereve', 'these', 'they', 'theyd', 'theyll', 'theyre', 'theyve', 'think', 'this', 'those', 'thou', 'though', 'thoughh', 'thousand', 'throug', 'through', 'throughout', 'thru', 'thus', 'til', 'tip', 'to', 'together', 'too', 'took', 'toward', 'towards', 'tried', 'tries', 'truly', 'try', 'trying', 'ts', 'twice', 'two', 'u', 'un', 'under', 'unfortunately', 'unless', 'unlike', 'unlikely', 'until', 'unto', 'up', 'upon', 'ups', 'us', 'use', 'used', 'useful', 'usefully', 'usefulness', 'uses', 'using', 'usually', 'v', 'value', 'various', 've', 'very', 'via', 'viz', 'vol', 'vols', 'vs', 'w', 'want', 'wants', 'was', 'wasnt', 'way', 'we', 'wed', 'welcome', 'well', 'went', 'were', 'werent', 'weve', 'what', 'whatever', 'whatll', 'whats', 'when', 'whence', 'whenever', 'where', 'whereafter', 'whereas', 'whereby', 'wherein', 'wheres', 'whereupon', 'wherever', 'whether', 'which', 'while', 'whim', 'whither', 'who', 'whod', 'whoever', 'whole', 'wholl', 'whom', 'whomever', 'whos', 'whose', 'why', 'widely', 'willing', 'wish', 'with', 'within', 'without', 'wont', 'words', 'world', 'would', 'wouldnt', 'www', 'x', 'y', 'yes', 'yet', 'you', 'youd', 'youll', 'your', 'youre', 'yours', 'yourself', 'yourselves', 'youve', 'z', 'zero', 'isn', 'doesn', 'didn', 'couldn', 'mustn', 'shoudn', 'wasn', 'woudn', 'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', 'should', 'now', 'gonna', "n't", '-lrb-', '-rrb-', "'m", "'ll", "'re", "'s", "'ve", '&'], titles=[u'admiral', u'archbishop', u'alan', u'merrill', u'sarah', 'queen', u'king', u'sen', u'chancellor', u'prime minister', 'cardinal', u'bishop', u'father', u'hon', u'rev', u'reverend', 'pope', u'sir', u'doctor', u'professor', u'president', 'senator', u'congressman', u'congresswoman', u'mr', u'ms', 'mrs', u'miss', u'dr', u'bill', u'hillary', u'hillary rodham', 'saddam', u'osama', u'ayatollah', u'george', u'george w', 'mitt', u'malcolm', u'barack', u'ronald', u'john', u'john f', 'william', u'al', u'bob'], whpro=[u'who', u'what', u'why', u'where', u'when', u'how'])

wordlists(pronouns, conjunctions, articles, determiners, prepositions, connectors, modals, closedclass, stopwords, titles, whpro)

Systemic functional process types

Inflected verbforms for systemic process types.

corpkit.dictionaries.process_types.processes

Stopwords

A list of arbitrary stopwords.

corpkit.dictionaries.stopwords.stopwords

Systemic/dependency label conversion

Systemic-functional to dependency role translation.

corpkit.dictionaries.roles.roles = roles(actor=['agent', 'agent', 'csubj', 'nsubj'], adjunct=['(prep|nmod)(_|:).*', 'advcl', 'advmod', 'agent', 'tmod'], any=['acl', 'acl(_|:)relcl', 'advcl', 'advmod', 'amod', 'appos', 'aux', 'auxpass', 'case', 'cc', 'cc:preconj', 'ccomp', 'compound', 'compound:prt', 'conj', 'cop', 'csubj', 'csubjpass', 'dep', 'det', 'det:predet', 'discourse', 'dislocated', 'dobj', 'expl', 'foreign', 'goeswith', 'iobj', 'list', 'mark', 'mwe', 'name', 'neg', 'nmod', 'nmod:npmod', 'nmod:poss', 'nmod:tmod', 'nsubj', 'nsubjpass', 'nummod', 'parataxis', 'punct', 'remnant', 'reparandum', 'root', 'vocative', 'xcomp'], auxiliary=['aux', 'auxpass'], circumstance=['(prep|nmod)(_|:).*', 'advmod', 'pobj', 'tmod'], classifier=['compound', 'nn'], complement=['acomp', 'dobj', 'iobj'], deictic=['det', 'poss', 'possessive', 'preconj', 'predet'], epithet=['amod'], event=['acl', 'acl(_|:)relcl', 'advcl', 'ccomp', 'cop', 'root'], existential=['expl'], finite=['aux'], goal=['acomp', 'csubjpass', 'dobj', 'iobj', 'nsubjpass'], modal=['aux', 'auxpass'], modifier=['acl(_|:)relcl', 'advmod', 'amod', 'compound', 'nmod.*', 'nn'], numerative=['number', 'quantmod'], participant=['acomp', 'agent', 'appos', 'csubj', 'csubjpass', 'dobj', 'iobj', 'nsubj', 'nsubjpass', 'xcomp', 'xsubj'], participant1=['agent', 'csubj', 'nsubj'], participant2=['acomp', 'csubjpass', 'dobj', 'iobj', 'nsubjpass', 'xcomp'], polarity=['neg'], postmodifier=['acl(_|:)relcl', 'nmod:.*'], predicator=['ccomp', 'cop', 'root'], premodifier=['amod', 'compound', 'nmod', 'nn'], process=['acl', 'acl(_|:)relcl', 'advcl', 'aux', 'auxpass', 'ccomp', 'cop', 'prt', 'root'], qualifier=['rcmod', 'vmod'], subject=['csubj', 'csubjpass', 'nsubj', 'nsubjpass'], textual=['cc', 'mark', 'ref'], thing=['(prep|nmod)(_|:).*', 'agent', 'appos', 'csubj', 'csubjpass', 'dobj', 'iobj', 'nsubj', 'nsubjpass', 'pobj', 'tmod'])

roles(actor, adjunct, any, auxiliary, circumstance, classifier, complement, deictic, epithet, event, existential, finite, goal, modal, modifier, numerative, participant, participant1, participant2, polarity, postmodifier, predicator, premodifier, process, qualifier, subject, textual, thing)

BNC reference corpus

BNC word frequency list.

corpkit.dictionaries.bnc.bnc

Spelling conversion

A dict with U.S. English spellings as keys, U.K. spellings as values.

corpkit.dictionaries.word_transforms.usa_convert