diff options
author | Ta180m | 2020-05-30 14:55:49 +0000 |
---|---|---|
committer | repl.it user | 2020-05-30 14:55:49 +0000 |
commit | 6e16251dae537324c57fef1b54668f07e597c671 (patch) | |
tree | 599cebbb7e50423f0d2405cd1d92ac9a4b069072 | |
parent | 9895a8346b3f75a1427dcb78f36e67c440ee0139 (diff) |
Latest version
-rw-r--r-- | .replit | 2 | ||||
-rw-r--r-- | adjlist | 427 | ||||
-rw-r--r-- | src/main.cpp | 71 |
3 files changed, 293 insertions, 207 deletions
@@ -1,2 +1,2 @@ language = "cpp"
-run = "g++ src/*.cpp -o compress -std=c++1y; echo Compilation done; ./compress soc1.txt 89" +run = "g++ src/*.cpp -o compress -std=c++1y; echo Compilation done; ./compress soc1.txt 30" @@ -1,200 +1,227 @@ -good 231605 -other 215885 -more 204207 -new 184735 -many 151831 -first 122599 -great 112203 -such 96282 -own 80354 -few 79347 -same 79274 -high 78209 -last 77135 -most 69223 -different 68446 -small 61453 -large 59995 -important 58807 -next 57079 -big 56641 -little 53993 -old 53835 -social 53217 -able 52787 -available 52513 -online 52116 -free 48563 -long 47269 -easy 47229 -local 45900 -much 44002 -several 41300 -full 41250 -real 41198 -sure 39843 -public 39216 -possible 38240 -least 37647 -bad 37080 -personal 35343 -low 35335 -late 33974 -young 33739 -hard 33348 -current 31799 -only 31724 -right 31587 -second 30897 -early 30367 -special 30135 -simple 30023 -major 27928 -human 27633 -short 26420 -strong 26274 -true 25912 -open 25816 -whole 25675 -less 25493 -financial 25155 -common 25046 -due 24836 -top 24722 -past 24478 -various 23942 -certain 23903 -recent 23392 -single 23271 -political 22713 -clear 22560 -specific 22495 -main 22323 -particular 22136 -happy 21700 -similar 21546 -natural 21203 -interesting 21086 -national 21039 -American 20879 -private 20479 -international 20327 -difficult 19955 -effective 19781 -unique 19645 -professional 19607 -perfect 19566 -economic 19529 -additional 19480 -key 19379 -mobile 19355 -original 18890 -nice 18845 -medical 18745 -third 18696 -entire 18664 -likely 18598 -necessary 18475 -global 18339 -general 18271 -popular 18118 -successful 18021 -beautiful 17655 -wrong 17654 -significant 17540 -legal 17391 -enough 17266 -final 17107 -healthy 16889 -white 16680 -ready 16642 -huge 16599 -interested 16547 -wide 16201 -former 15669 -safe 15532 -close 15450 -traditional 15384 -amazing 15252 -future 15195 -individual 15147 -physical 15039 -basic 14808 -complete 14715 -positive 14316 -black 14289 -federal 14283 -digital 14120 -deep 14066 -potential 14046 -useful 13966 -regular 13955 -hot 13834 -further 13751 -previous 13639 -serious 13514 -multiple 13493 -extra 13462 -excellent 13356 -poor 13322 -responsible 13308 -wonderful 13284 -quick 13281 -modern 13223 -daily 13007 -active 12973 -critical 12920 -favorite 12918 -annual 12761 -powerful 12745 -total 12661 -creative 12500 -appropriate 12418 -green 12219 -worth 12174 -normal 11877 -actual 11801 -fresh 11796 -fine 11705 -direct 11408 -present 11321 -cheap 11305 -military 11086 -rich 10952 -primary 10817 -relevant 10754 -essential 10702 -environmental 10660 -aware 10548 -fast 10518 -cool 10513 -corporate 10490 -red 10373 -technical 10346 -overall 10329 -light 10255 -live 10218 -independent 10022 -commercial 9957 -complex 9863 -average 9636 -cultural 9401 -dark 9233 -sexual 9229 -foreign 9166 -standard 8997 -educational 8949 -awesome 8937 -expensive 8934 -numerous 8871 -clean 8840 -proper 8724 -cold 8723 -academic 8657 -heavy 8588 -mental 8586 -initial 8566 -central 8561 -video 8554 -negative 8494 -exciting 8444 +Shy +Bad +Odd +Real +Hurt +Rich +Drab +Dull +Easy +Sore +Kind +Lazy +Blue +Evil +Long +Tame +Fair +Busy +Calm +Fine +Nice +Ugly +Open +Vast +Good +Wild +Poor +Cute +Dark +Dead +Zany +Alert +Alive +Angry +Scary +Dizzy +Shiny +Itchy +Silly +Eager +Awful +Jolly +Black +Light +Super +Bored +Lucky +Brave +Tense +Misty +Tasty +Fancy +Muddy +Mushy +Tired +Tough +Nasty +Clean +Frail +Clear +Nutty +Funny +Upset +Crazy +Weary +Plain +Cruel +Witty +Happy +Wrong +Proud +Homely +Quaint +Hungry +Amused +Sleepy +Smoggy +Elated +Joyous +Better +Stormy +Stupid +Bloody +Lively +Lonely +Lovely +Brainy +Tender +Bright +Famous +Modern +Fierce +Filthy +Clever +Cloudy +Clumsy +Gentle +Gifted +Creepy +Wicked +Grumpy +Poised +Putrid +Defiant +Annoyed +Selfish +Anxious +Ashamed +Jealous +Smiling +Average +Jittery +Elegant +Strange +Envious +Excited +Careful +Foolish +Fragile +Naughty +Nervous +Ugliest +Frantic +Unusual +Uptight +Panicky +Perfect +Crowded +Curious +Worried +Healthy +Prickly +Helpful +Puzzled +Zealous +Homeless +Adorable +Horrible +Relieved +Distinct +Annoying +Doubtful +Innocent +Arrogant +Splendid +Spotless +Blushing +Talented +Faithful +Terrible +Thankful +Cautious +Charming +Troubled +Cheerful +Friendly +Obedient +Colorful +Gleaming +Confused +Glorious +Gorgeous +Graceful +Grieving +Pleasant +Handsome +Powerful +Precious +Helpless +Defeated +Depressed +Agreeable +Different +Difficult +Repulsive +Disgusted +Important +Disturbed +Sparkling +Beautiful +Energetic +Blue-eyed +Expensive +Exuberant +Breakable +Fantastic +Unsightly +Obnoxious +Combative +Concerned +Glamorous +Condemned +Vivacious +Wandering +Grotesque +Wide-eyed +Worrisome +Dangerous +Hilarious +Delightful +Aggressive +Determined +Impossible +Attractive +Enchanting +Bewildered +Successful +Motionless +Thoughtful +Mysterious +Frightened +Outrageous +Victorious +Courageous +Adventurous +Inexpensive +Inquisitive +Embarrassed +Encouraging +Magnificent +Thoughtless +Comfortable +Outstanding +Cooperative +Enthusiastic +Uninterested +Old-fashioned diff --git a/src/main.cpp b/src/main.cpp index 3d5562a..7b1546e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -14,13 +14,50 @@ struct weightstruct return weightstruct(word+w.word,clause+w.clause,sentence+w.sentence); } }; +map<string,int> wordlist; +set<string> adjlist; +void getlists() +{ + ifstream listfile("fullist"); + for(int i=0;i<500;i++) + { + string a;int b; + listfile>>a>>b; + wordlist[a]=b; + } + ifstream listfileadj("adjlist"); + for(int i=0;i<227;i++) + { + string a; + listfileadj>>a; + adjlist.insert(a); + } +} weightstruct getweight(string s) { if(s[0]=='$') - return weightstruct(200,100,100); - if(s=="one"||s=="two"||s=="three"||s=="four"||s=="five"||s=="six"||s=="seven"||s=="eight"||s=="nine")return weightstruct(100,50,50); - if(s=="example")return weightstruct(-300,-200,-200); + return weightstruct(200,50,10); + if(s=="one"||s=="two"||s=="three"||s=="four"||s=="five"||s=="six"||s=="seven"||s=="eight"||s=="nine")return weightstruct(100,25,10); + if(s=="example")return weightstruct(-300,-200,-100); + + + + if(s=="spi") + return weightstruct(-100,-10,-200); + if(s=="ipf") + return weightstruct(-100,30,50); + if(s=="please") return weightstruct(-300,0,0); + if(adjlist.find(s)!=adjlist.end()) + { + return weightstruct(-100,-10,-10); + } + if(wordlist.find(s)!=wordlist.end()) + { + return weightstruct(-wordlist[s]/100000,-10,-5); + } + + /* if(s=="a"||s=="is"||s=="are"||s=="to"||s=="so"||s=="as") return weightstruct(-200,0,0); if(s=="the") return weightstruct(-200,0,0); @@ -38,6 +75,14 @@ void preprocessword(int stage,string& s,string& next,int pos) { if(s[0]=='$') s=s.substr(1,s.length()-2); + if(s.length()>5) + { + string tmp; + for(char x:s) + if(x!='a'&&x!='e'&&x!='i'&&x!='o'&&x!='u') + tmp+=x; + s=tmp; + } } if(stage==0) { @@ -48,6 +93,19 @@ void preprocessword(int stage,string& s,string& next,int pos) } if(stage==1) { + if(s=="sample"&&next=="input") + s="spi",next=""; + if(s=="sample"&&next=="output") + s="spo",next=""; + if(s=="input"&&next=="format") + s="ipf",next=""; + if(s=="output"&&next=="format") + s="opf",next=""; + + if(s=="input") + s="ipt"; + if(s=="output") + s="opt"; if(s=="and") s=","; if(s.size()>=2) @@ -94,7 +152,7 @@ struct clause } else { - if(s.at(j)!=' ') + if(s.at(j)!=' '&&s.at(j)!='\"'&&s.at(j)!='\"') word+=(s.at(j)); //if(('a'<=s.at(j)&&s.at(j)<='z')||('A'<=s.at(j)&&s.at(j)<='Z')||('0'<=s.at(j)&&s.at(j)<='9')||(iseq&&s.at(j)!=' ')) // word+=('A'<=s.at(j)&&s.at(j)<='Z')?(s.at(j)-'A'+'a'):(s.at(j)); @@ -156,7 +214,7 @@ struct sentence for(int j=0;j<s.length();j++) { if(s.at(j)=='$') iseq=!iseq; - if(s.at(j)!=','|| iseq) cl+=s.at(j); + if((s.at(j)!=','&&s.at(j)!='('&&s.at(j)!=')'&&s.at(j)!=';'&&s.at(j)!=':')|| iseq) cl+=s.at(j); else { clause tmp; @@ -313,6 +371,7 @@ int compressrate; int targetsize; int main(int argc, char *argv[]) { + getlists(); string input=""; string input_file = argv[1]; @@ -371,7 +430,7 @@ int main(int argc, char *argv[]) { output.calcweight(); output.preprocess(-1); string s = output.gettextstring(-100000); - //cout << s << '\n'; + cout << s << '\n'; //where can i find a library of words in categories can't find any // not really sure |