aboutsummaryrefslogtreecommitdiff
path: root/data.py
diff options
context:
space:
mode:
authorAnthony Wang2022-02-21 15:49:29 -0600
committerAnthony Wang2022-02-21 15:49:29 -0600
commit13c502ef53e8d16689730dc3ee97a05f78b2ef1e (patch)
tree25fce22a08a702bd0860efcefbe0dca14eefe60c /data.py
parent176f6f306bc5025f9c821491f139f94fd971050b (diff)
Ignore URLs by ignoring strings with .
Diffstat (limited to 'data.py')
-rw-r--r--data.py2
1 files changed, 1 insertions, 1 deletions
diff --git a/data.py b/data.py
index 0a1fbc8..87dbc91 100644
--- a/data.py
+++ b/data.py
@@ -24,7 +24,7 @@ text = [unescape(sub(r'<[^>]*>', ' ', status[2])) for status in statuses]
words = [word for message in text for word in message.split()]
# Remove URLs and special characters and convert to lowercase
words = [sub(r'[^a-z0-9]', '', word.lower())
- for word in words if word.find('://') == -1]
+ for word in words if word.find('.') == -1]
# Remove empty strings
words = [word for word in words if word != '']