1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
|
from argparse import ArgumentParser
from random import randint, choice
from torch import float16
from transformers import AutoTokenizer, AutoModelForCausalLM
parser = ArgumentParser()
parser.add_argument('-b', '--backend', choices=['mastodon', 'misskey', 'matrix', 'none'], default='mastodon',
help='fediverse server type')
parser.add_argument('-i', '--instance', help='Mastodon instance hosting the bot')
parser.add_argument('-t', '--token', help='Mastodon application access token')
parser.add_argument('-n', '--input', help='initial input text')
parser.add_argument('-d', '--data', default='data',
help='data for automatic input generation')
parser.add_argument('-m', '--model', default='model',
help='path to load saved model')
args = parser.parse_args()
tokenizer = AutoTokenizer.from_pretrained('gpt2-large')
model = AutoModelForCausalLM.from_pretrained(args.model, torch_dtype=float16).to('cuda')
if args.input is None:
# Create random input
if randint(0, 1) == 0:
args.input = choice([
'I am',
'My life is',
'Computers are',
'This is',
'My',
'I\'ve',
'No one',
'I love',
'I will die of',
'I',
'The',
'Anime',
'I\'m going to die',
'Hello',
'@ta180m@exozy.me',
'Life',
'My favorite',
'I\'m not',
'I hate',
'I think',
'In my opinion',
'Breaking news:',
'Have I ever told you that',
'I read on the news that',
'I never knew that',
'My dream is',
'It\'s terrible that'
])
else:
with open(args.data, 'r') as f:
# Get a line with at least two words
lines = f.readlines()
line = choice(lines).split()
while len(line) < 2:
line = choice(lines).split()
# Remove mentions
if line[0].count('@') > 1:
line[0] = '@'.join(line[0].split('@')[0:2])
if line[1].count('@') > 1:
line[1] = '@'.join(line[1].split('@')[0:2])
args.input = line[0] + ' ' + line[1]
# Run the input through the model
print(args.input)
inputs = tokenizer.encode(args.input, return_tensors='pt').to('cuda')
output = tokenizer.decode(model.generate(
inputs, do_sample=True, max_length=150, top_p=0.9)[0])
print(output)
# Prepare the post
output = output.split('\n')
post = output[0]
if len(post) < 200 and len(output) > 1:
post = output[0] + '\n' + output[1]
post = post[:500]
# Post it!
if args.backend == 'mastodon':
from mastodon import Mastodon
mastodon = Mastodon(
access_token=args.token,
api_base_url=args.instance
)
mastodon.status_post(post)
elif args.backend == 'misskey':
from Misskey import Misskey
misskey = Misskey(args.instance, i=args.token)
misskey.notes_create(post)
elif args.backend == 'matrix':
import simplematrixbotlib as botlib
creds = botlib.Creds(args.instance, 'ebooks', args.token)
bot = botlib.Bot(creds)
@bot.listener.on_startup
async def room_joined(room_id):
await bot.api.send_text_message(room_id=room_id, message=post)
bot.run()
|