diff --git a/01/jread/data.py b/01/jread/data.py new file mode 100644 index 00000000..39845833 --- /dev/null +++ b/01/jread/data.py @@ -0,0 +1,6 @@ +DICTIONARY = '../dictionary.txt' + +scrabble_scores = [(1, "E A O I N R T L S U"), (2, "D G"), (3, "B C M P"), + (4, "F H V W Y"), (5, "K"), (8, "J X"), (10, "Q Z")] +LETTER_SCORES = {letter: score for score, letters in scrabble_scores + for letter in letters.split()} diff --git a/01/jread/test_wordvalue.py b/01/jread/test_wordvalue.py new file mode 100644 index 00000000..0a2ae9bb --- /dev/null +++ b/01/jread/test_wordvalue.py @@ -0,0 +1,28 @@ +import unittest + +from data import DICTIONARY, LETTER_SCORES +from wordvalue import load_words, calc_word_value, max_word_value + +TEST_WORDS = ('bob', 'julian', 'pybites', 'quit', 'barbeque') + +class TestWordValue(unittest.TestCase): + + def test_load_words(self): + words = load_words() + self.assertEqual(len(words), 235886) + self.assertEqual(words[0], 'A') + self.assertEqual(words[-1], 'Zyzzogeton') + self.assertNotIn(' ', ''.join(words)) + + def test_calc_word_value(self): + self.assertEqual(calc_word_value('bob'), 7) + self.assertEqual(calc_word_value('JuliaN'), 13) + self.assertEqual(calc_word_value('PyBites'), 14) + self.assertEqual(calc_word_value('benzalphenylhydrazone'), 56) + + def test_max_word_value(self): + self.assertEqual(max_word_value(TEST_WORDS), 'barbeque') + self.assertEqual(max_word_value(), 'benzalphenylhydrazone') + +if __name__ == "__main__": + unittest.main() diff --git a/01/jread/wordvalue.py b/01/jread/wordvalue.py new file mode 100644 index 00000000..a7027eb3 --- /dev/null +++ b/01/jread/wordvalue.py @@ -0,0 +1,50 @@ +import re + +from data import DICTIONARY, LETTER_SCORES + +CHARS_REGEX = re.compile('[a-zA-Z]+') + + +def load_words() -> list: + """ + Loads words from the file designated by the DICTIONARY constant into a list + (trailing newline characters are stripped) + @return: the words in DICTIONARY as a list + """ + words: list + with open(DICTIONARY) as f: + words = [line.rstrip('\n') for line in f] + return words + + +def calc_word_value(word: str) -> int: + """ + Calculates the scabble value of the word specified + @param word: the word value to calculate the scabble value for + @return: the scrabble value (an integer) + """ + value: int = 0 + for c in ''.join(CHARS_REGEX.findall(word)): + value += LETTER_SCORES[c.upper()] + return value + + +def max_word_value(dictionary: list = None) -> str: + """ + Determine the word in dictionary which provides the highest scrabble score + @param dictionary: an optional list of alternate words to check. If not specified, load_words() will be used + @return: the word in dictionary providing the highest scrabble score + """ + dictionary = load_words() if dictionary is None else dictionary + word: str = '' + value: int = 0 + for check_word in dictionary: + check_word_value = calc_word_value(check_word) + if check_word_value > value: + word = check_word + value = check_word_value + return word + + +if __name__ == "__main__": + print(max_word_value()) diff --git a/13/jread/directors.py b/13/jread/directors.py new file mode 100644 index 00000000..1966a6c1 --- /dev/null +++ b/13/jread/directors.py @@ -0,0 +1,53 @@ +import csv +from collections import defaultdict, namedtuple + +MOVIE_DATA = '../movie_metadata.csv' +NUM_TOP_DIRECTORS = 20 +MIN_MOVIES = 4 +MIN_YEAR = 1960 + +Director = namedtuple('Director', 'name avg_score movies') +Movie = namedtuple('Movie', 'title year score') + + +def get_movies_by_director(): + movies = defaultdict(list) + with open(MOVIE_DATA) as f: + for rec in csv.DictReader(f): + try: + name = rec['director_name'] + if int(rec['title_year']) >= MIN_YEAR: + movies[name].append(Movie(title=rec['movie_title'].replace('\xa0', ''), + year=int(rec['title_year']), score=float(rec['imdb_score']))) + except ValueError: + continue + + directors = {} + for name in movies: + if len(movies[name]) >= MIN_MOVIES: + directors[name] = (Director(name=name, avg_score=_calc_mean(movies[name]), movies=movies[name])) + + return directors + + +def _calc_mean(movies): + return round(sum(movie.score for movie in movies) / len(movies), 1) + + +def print_results(directors): + for i, director in zip(range(20), sorted(directors.items(), key=lambda x: float(x[1].avg_score), reverse=True)): + print() + print(f'{i+1:02d}. {director[0]:<52} {director[1].avg_score}') + print('-' * 60) + for movie in sorted(director[1].movies, key=lambda x: float(x.score), reverse=True): + print(f'{movie.year}] {movie.title:<50} {movie.score}') + + +def main(): + directors = get_movies_by_director() + print_results(directors) + + +if __name__ == '__main__': + main() + diff --git a/13/jread/test_directors.py b/13/jread/test_directors.py new file mode 100644 index 00000000..695a7afe --- /dev/null +++ b/13/jread/test_directors.py @@ -0,0 +1,31 @@ +from directors import get_movies_by_director, _calc_mean + +def test(): + directors = get_movies_by_director() + + assert 'Sergio Leone' in directors + assert len(directors['Sergio Leone'].movies) == 4 + assert len(directors['Peter Jackson'].movies) == 12 + + movies_sergio = directors['Sergio Leone'].movies + movies_nolan = directors['Christopher Nolan'].movies + assert _calc_mean(movies_sergio) == 8.5 + assert _calc_mean(movies_nolan) == 8.4 + + assert 'Andrew Stanton' not in directors + + expected_directors = ['Sergio Leone', 'Christopher Nolan', 'Quentin Tarantino', + 'Hayao Miyazaki', 'Frank Darabont', 'Stanley Kubrick'] + expected_avg_scores = [8.5, 8.4, 8.2, 8.2, 8.0, 8.0] + expected_num_movies = [4, 8, 8, 4, 4, 7] + report = sorted(directors.items(), key=lambda x: float(x[1].avg_score), reverse=True) + for counter, (i, j, k) in enumerate(zip(expected_directors, expected_avg_scores, expected_num_movies)): + assert (report[counter][0], report[counter][1].avg_score) == (i, j) + assert len(report[counter][1].movies) == k + assert _calc_mean(report[counter][1].movies) == j + + return "tests pass" + + +if __name__ == '__main__': + print(test())