aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTa180m2020-05-30 14:55:49 +0000
committerrepl.it user2020-05-30 14:55:49 +0000
commit6e16251dae537324c57fef1b54668f07e597c671 (patch)
tree599cebbb7e50423f0d2405cd1d92ac9a4b069072
parent9895a8346b3f75a1427dcb78f36e67c440ee0139 (diff)
Latest version
-rw-r--r--.replit2
-rw-r--r--adjlist427
-rw-r--r--src/main.cpp71
3 files changed, 293 insertions, 207 deletions
diff --git a/.replit b/.replit
index 330a26e..6ba928f 100644
--- a/.replit
+++ b/.replit
@@ -1,2 +1,2 @@
language = "cpp"
-run = "g++ src/*.cpp -o compress -std=c++1y; echo Compilation done; ./compress soc1.txt 89"
+run = "g++ src/*.cpp -o compress -std=c++1y; echo Compilation done; ./compress soc1.txt 30"
diff --git a/adjlist b/adjlist
index c3254d8..5c33973 100644
--- a/adjlist
+++ b/adjlist
@@ -1,200 +1,227 @@
-good 231605
-other 215885
-more 204207
-new 184735
-many 151831
-first 122599
-great 112203
-such 96282
-own 80354
-few 79347
-same 79274
-high 78209
-last 77135
-most 69223
-different 68446
-small 61453
-large 59995
-important 58807
-next 57079
-big 56641
-little 53993
-old 53835
-social 53217
-able 52787
-available 52513
-online 52116
-free 48563
-long 47269
-easy 47229
-local 45900
-much 44002
-several 41300
-full 41250
-real 41198
-sure 39843
-public 39216
-possible 38240
-least 37647
-bad 37080
-personal 35343
-low 35335
-late 33974
-young 33739
-hard 33348
-current 31799
-only 31724
-right 31587
-second 30897
-early 30367
-special 30135
-simple 30023
-major 27928
-human 27633
-short 26420
-strong 26274
-true 25912
-open 25816
-whole 25675
-less 25493
-financial 25155
-common 25046
-due 24836
-top 24722
-past 24478
-various 23942
-certain 23903
-recent 23392
-single 23271
-political 22713
-clear 22560
-specific 22495
-main 22323
-particular 22136
-happy 21700
-similar 21546
-natural 21203
-interesting 21086
-national 21039
-American 20879
-private 20479
-international 20327
-difficult 19955
-effective 19781
-unique 19645
-professional 19607
-perfect 19566
-economic 19529
-additional 19480
-key 19379
-mobile 19355
-original 18890
-nice 18845
-medical 18745
-third 18696
-entire 18664
-likely 18598
-necessary 18475
-global 18339
-general 18271
-popular 18118
-successful 18021
-beautiful 17655
-wrong 17654
-significant 17540
-legal 17391
-enough 17266
-final 17107
-healthy 16889
-white 16680
-ready 16642
-huge 16599
-interested 16547
-wide 16201
-former 15669
-safe 15532
-close 15450
-traditional 15384
-amazing 15252
-future 15195
-individual 15147
-physical 15039
-basic 14808
-complete 14715
-positive 14316
-black 14289
-federal 14283
-digital 14120
-deep 14066
-potential 14046
-useful 13966
-regular 13955
-hot 13834
-further 13751
-previous 13639
-serious 13514
-multiple 13493
-extra 13462
-excellent 13356
-poor 13322
-responsible 13308
-wonderful 13284
-quick 13281
-modern 13223
-daily 13007
-active 12973
-critical 12920
-favorite 12918
-annual 12761
-powerful 12745
-total 12661
-creative 12500
-appropriate 12418
-green 12219
-worth 12174
-normal 11877
-actual 11801
-fresh 11796
-fine 11705
-direct 11408
-present 11321
-cheap 11305
-military 11086
-rich 10952
-primary 10817
-relevant 10754
-essential 10702
-environmental 10660
-aware 10548
-fast 10518
-cool 10513
-corporate 10490
-red 10373
-technical 10346
-overall 10329
-light 10255
-live 10218
-independent 10022
-commercial 9957
-complex 9863
-average 9636
-cultural 9401
-dark 9233
-sexual 9229
-foreign 9166
-standard 8997
-educational 8949
-awesome 8937
-expensive 8934
-numerous 8871
-clean 8840
-proper 8724
-cold 8723
-academic 8657
-heavy 8588
-mental 8586
-initial 8566
-central 8561
-video 8554
-negative 8494
-exciting 8444
+Shy
+Bad
+Odd
+Real
+Hurt
+Rich
+Drab
+Dull
+Easy
+Sore
+Kind
+Lazy
+Blue
+Evil
+Long
+Tame
+Fair
+Busy
+Calm
+Fine
+Nice
+Ugly
+Open
+Vast
+Good
+Wild
+Poor
+Cute
+Dark
+Dead
+Zany
+Alert
+Alive
+Angry
+Scary
+Dizzy
+Shiny
+Itchy
+Silly
+Eager
+Awful
+Jolly
+Black
+Light
+Super
+Bored
+Lucky
+Brave
+Tense
+Misty
+Tasty
+Fancy
+Muddy
+Mushy
+Tired
+Tough
+Nasty
+Clean
+Frail
+Clear
+Nutty
+Funny
+Upset
+Crazy
+Weary
+Plain
+Cruel
+Witty
+Happy
+Wrong
+Proud
+Homely
+Quaint
+Hungry
+Amused
+Sleepy
+Smoggy
+Elated
+Joyous
+Better
+Stormy
+Stupid
+Bloody
+Lively
+Lonely
+Lovely
+Brainy
+Tender
+Bright
+Famous
+Modern
+Fierce
+Filthy
+Clever
+Cloudy
+Clumsy
+Gentle
+Gifted
+Creepy
+Wicked
+Grumpy
+Poised
+Putrid
+Defiant
+Annoyed
+Selfish
+Anxious
+Ashamed
+Jealous
+Smiling
+Average
+Jittery
+Elegant
+Strange
+Envious
+Excited
+Careful
+Foolish
+Fragile
+Naughty
+Nervous
+Ugliest
+Frantic
+Unusual
+Uptight
+Panicky
+Perfect
+Crowded
+Curious
+Worried
+Healthy
+Prickly
+Helpful
+Puzzled
+Zealous
+Homeless
+Adorable
+Horrible
+Relieved
+Distinct
+Annoying
+Doubtful
+Innocent
+Arrogant
+Splendid
+Spotless
+Blushing
+Talented
+Faithful
+Terrible
+Thankful
+Cautious
+Charming
+Troubled
+Cheerful
+Friendly
+Obedient
+Colorful
+Gleaming
+Confused
+Glorious
+Gorgeous
+Graceful
+Grieving
+Pleasant
+Handsome
+Powerful
+Precious
+Helpless
+Defeated
+Depressed
+Agreeable
+Different
+Difficult
+Repulsive
+Disgusted
+Important
+Disturbed
+Sparkling
+Beautiful
+Energetic
+Blue-eyed
+Expensive
+Exuberant
+Breakable
+Fantastic
+Unsightly
+Obnoxious
+Combative
+Concerned
+Glamorous
+Condemned
+Vivacious
+Wandering
+Grotesque
+Wide-eyed
+Worrisome
+Dangerous
+Hilarious
+Delightful
+Aggressive
+Determined
+Impossible
+Attractive
+Enchanting
+Bewildered
+Successful
+Motionless
+Thoughtful
+Mysterious
+Frightened
+Outrageous
+Victorious
+Courageous
+Adventurous
+Inexpensive
+Inquisitive
+Embarrassed
+Encouraging
+Magnificent
+Thoughtless
+Comfortable
+Outstanding
+Cooperative
+Enthusiastic
+Uninterested
+Old-fashioned
diff --git a/src/main.cpp b/src/main.cpp
index 3d5562a..7b1546e 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -14,13 +14,50 @@ struct weightstruct
return weightstruct(word+w.word,clause+w.clause,sentence+w.sentence);
}
};
+map<string,int> wordlist;
+set<string> adjlist;
+void getlists()
+{
+ ifstream listfile("fullist");
+ for(int i=0;i<500;i++)
+ {
+ string a;int b;
+ listfile>>a>>b;
+ wordlist[a]=b;
+ }
+ ifstream listfileadj("adjlist");
+ for(int i=0;i<227;i++)
+ {
+ string a;
+ listfileadj>>a;
+ adjlist.insert(a);
+ }
+}
weightstruct getweight(string s)
{
if(s[0]=='$')
- return weightstruct(200,100,100);
- if(s=="one"||s=="two"||s=="three"||s=="four"||s=="five"||s=="six"||s=="seven"||s=="eight"||s=="nine")return weightstruct(100,50,50);
- if(s=="example")return weightstruct(-300,-200,-200);
+ return weightstruct(200,50,10);
+ if(s=="one"||s=="two"||s=="three"||s=="four"||s=="five"||s=="six"||s=="seven"||s=="eight"||s=="nine")return weightstruct(100,25,10);
+ if(s=="example")return weightstruct(-300,-200,-100);
+
+
+
+ if(s=="spi")
+ return weightstruct(-100,-10,-200);
+ if(s=="ipf")
+ return weightstruct(-100,30,50);
+ if(s=="please") return weightstruct(-300,0,0);
+ if(adjlist.find(s)!=adjlist.end())
+ {
+ return weightstruct(-100,-10,-10);
+ }
+ if(wordlist.find(s)!=wordlist.end())
+ {
+ return weightstruct(-wordlist[s]/100000,-10,-5);
+ }
+
+ /*
if(s=="a"||s=="is"||s=="are"||s=="to"||s=="so"||s=="as") return weightstruct(-200,0,0);
if(s=="the")
return weightstruct(-200,0,0);
@@ -38,6 +75,14 @@ void preprocessword(int stage,string& s,string& next,int pos)
{
if(s[0]=='$')
s=s.substr(1,s.length()-2);
+ if(s.length()>5)
+ {
+ string tmp;
+ for(char x:s)
+ if(x!='a'&&x!='e'&&x!='i'&&x!='o'&&x!='u')
+ tmp+=x;
+ s=tmp;
+ }
}
if(stage==0)
{
@@ -48,6 +93,19 @@ void preprocessword(int stage,string& s,string& next,int pos)
}
if(stage==1)
{
+ if(s=="sample"&&next=="input")
+ s="spi",next="";
+ if(s=="sample"&&next=="output")
+ s="spo",next="";
+ if(s=="input"&&next=="format")
+ s="ipf",next="";
+ if(s=="output"&&next=="format")
+ s="opf",next="";
+
+ if(s=="input")
+ s="ipt";
+ if(s=="output")
+ s="opt";
if(s=="and")
s=",";
if(s.size()>=2)
@@ -94,7 +152,7 @@ struct clause
}
else
{
- if(s.at(j)!=' ')
+ if(s.at(j)!=' '&&s.at(j)!='\"'&&s.at(j)!='\"')
word+=(s.at(j));
//if(('a'<=s.at(j)&&s.at(j)<='z')||('A'<=s.at(j)&&s.at(j)<='Z')||('0'<=s.at(j)&&s.at(j)<='9')||(iseq&&s.at(j)!=' '))
// word+=('A'<=s.at(j)&&s.at(j)<='Z')?(s.at(j)-'A'+'a'):(s.at(j));
@@ -156,7 +214,7 @@ struct sentence
for(int j=0;j<s.length();j++)
{
if(s.at(j)=='$') iseq=!iseq;
- if(s.at(j)!=','|| iseq) cl+=s.at(j);
+ if((s.at(j)!=','&&s.at(j)!='('&&s.at(j)!=')'&&s.at(j)!=';'&&s.at(j)!=':')|| iseq) cl+=s.at(j);
else
{
clause tmp;
@@ -313,6 +371,7 @@ int compressrate;
int targetsize;
int main(int argc, char *argv[]) {
+ getlists();
string input="";
string input_file = argv[1];
@@ -371,7 +430,7 @@ int main(int argc, char *argv[]) {
output.calcweight();
output.preprocess(-1);
string s = output.gettextstring(-100000);
- //cout << s << '\n';
+ cout << s << '\n';
//where can i find a library of words in categories can't find any
// not really sure