Modularity & Documentation & Automated Testing
pip install pycodestyle
pycodestyle test.py
# Import needed package
import pycodestyle
# Create a StyleGuide instance
style_checker = pycodestyle.StyleGuide()
# Run PEP 8 check on multiple files
result = style_checker.check_files(['nay_pep8.py', 'yay_pep8.py'])
# Print result of PEP 8 style check
print(result.messages)
[__init__.py](<http://init.py>) file let us know that the directory is a package.
from .document import Document 이런식으로 init.py 파일에 추가해줄 수 있음.
util.py나 class 함수들을 넣어놓기 유용함.

class Document:
def __init__(self, text):
self.text = text
# Tokenize the document with non-public tokenize method
self.tokens = self._tokenize()
# Perform word count with non-public count_words method
self.word_counts = self._count_words()
def _tokenize(self):
return tokenize(self.text)
# non-public method to tally document's word counts with Counter
def _count_words(self):
return Counter(self.tokens)
# Define a SocialMedia class that is a child of the `Document class`
class SocialMedia(Document):
"""Analyze text data from social media
:param text: social media text to analyze
:ivar hashtag_counts: Counter object containing counts of #hashtags used in text
:ivar mention_counts: Counter object containing counts of @mentions used in text
"""
def __init__(self, text):
Document.__init__(self, text)
self.hashtag_counts = self._count_hashtags()
self.mention_counts = self._count_mentions()
def _count_hashtags(self):
# Filter attribute so only words starting with '#' remain
return filter_word_counts(self.word_counts, first_char='#')
def _count_mentions(self):
# Filter attribute so only words starting with '@' remain
return filter_word_counts(self.word_counts, first_char='@')
Docstring - reStructuredText

# Complete the function's docstring
def tokenize(text, regex=r'[a-zA-z]+'):
"""Split text into tokens using a regular expression
:param text: text to be tokenized
:param regex: regular expression used to match tokens using re.findall
:return: a list of resulting tokens
>>> tokenize('the rain in spain')
['the', 'rain', 'in', 'spain']
"""
return re.findall(regex, text, flags=re.IGNORECASE)
# Print the docstring
help(tokenize)
doctest.testmod()