diff options
author | Anthony Wang | 2022-02-21 14:29:48 -0600 |
---|---|---|
committer | Anthony Wang | 2022-02-21 14:29:48 -0600 |
commit | f0643a44614b68aae22b9d6d20f6d59e59509dbb (patch) | |
tree | f52ad29b56c228de4db2648c17299fb799893a42 /data.py | |
parent | ff086a4dda75a1c27982e2471de0cf2fc99a2f2d (diff) |
Reformat data.py
Diffstat (limited to 'data.py')
-rw-r--r-- | data.py | 3 |
1 files changed, 2 insertions, 1 deletions
@@ -23,7 +23,8 @@ text = [unescape(sub(r'<[^>]*>', ' ', status[2])) for status in statuses] # Extract all words from statuses words = [word for message in text for word in message.split()] # Remove URLs and special characters and convert to lowercase -words = [sub(r'[^a-z0-9]', '', word.lower()) for word in words if word.find('://') == -1] +words = [sub(r'[^a-z0-9]', '', word.lower()) + for word in words if word.find('://') == -1] # Remove empty strings words = [word for word in words if word != ''] |