is_utf8.cpp 242 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753
  1. #ifndef IS_UTF8_H
  2. #define IS_UTF8_H
  3. #include <cstring>
  4. #ifndef IS_UTF8_COMPILER_CHECK_H
  5. #define IS_UTF8_COMPILER_CHECK_H
  6. #ifndef __cplusplus
  7. #error we require a C++ compiler
  8. #endif
  9. #ifndef IS_UTF8_CPLUSPLUS
  10. #if defined(_MSVC_LANG) && !defined(__clang__)
  11. #define IS_UTF8_CPLUSPLUS (_MSC_VER == 1900 ? 201103L : _MSVC_LANG)
  12. #else
  13. #define IS_UTF8_CPLUSPLUS __cplusplus
  14. #endif
  15. #endif
  16. // C++ 17
  17. #if !defined(IS_UTF8_CPLUSPLUS17) && (IS_UTF8_CPLUSPLUS >= 201703L)
  18. #define IS_UTF8_CPLUSPLUS17 1
  19. #endif
  20. // C++ 14
  21. #if !defined(IS_UTF8_CPLUSPLUS14) && (IS_UTF8_CPLUSPLUS >= 201402L)
  22. #define IS_UTF8_CPLUSPLUS14 1
  23. #endif
  24. // C++ 11
  25. #if !defined(IS_UTF8_CPLUSPLUS11) && (IS_UTF8_CPLUSPLUS >= 201103L)
  26. #define IS_UTF8_CPLUSPLUS11 1
  27. #endif
  28. #ifndef IS_UTF8_CPLUSPLUS11
  29. #error we require a compiler compliant with the C++11 standard
  30. #endif
  31. #endif // IS_UTF8_COMPILER_CHECK_H
  32. #ifndef IS_UTF8_COMMON_DEFS_H
  33. #define IS_UTF8_COMMON_DEFS_H
  34. #include <cassert>
  35. #ifndef IS_UTF8_PORTABILITY_H
  36. #define IS_UTF8_PORTABILITY_H
  37. #include <cassert>
  38. #include <cfloat>
  39. #include <cstddef>
  40. #include <cstdint>
  41. #include <cstdlib>
  42. #ifndef _WIN32
  43. // strcasecmp, strncasecmp
  44. #include <strings.h>
  45. #endif
  46. #ifdef _MSC_VER
  47. #define IS_UTF8_VISUAL_STUDIO 1
  48. /**
  49. * We want to differentiate carefully between
  50. * clang under visual studio and regular visual
  51. * studio.
  52. *
  53. * Under clang for Windows, we enable:
  54. * * target pragmas so that part and only part of the
  55. * code gets compiled for advanced instructions.
  56. *
  57. */
  58. #ifdef __clang__
  59. // clang under visual studio
  60. #define IS_UTF8_CLANG_VISUAL_STUDIO 1
  61. #else
  62. // just regular visual studio (best guess)
  63. #define IS_UTF8_REGULAR_VISUAL_STUDIO 1
  64. #endif // __clang__
  65. #endif // _MSC_VER
  66. #ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
  67. // https://en.wikipedia.org/wiki/C_alternative_tokens
  68. // This header should have no effect, except maybe
  69. // under Visual Studio.
  70. #include <iso646.h>
  71. #endif
  72. #if defined(__x86_64__) || defined(_M_AMD64)
  73. #define IS_UTF8_IS_X86_64 1
  74. #elif defined(__aarch64__) || defined(_M_ARM64)
  75. #define IS_UTF8_IS_ARM64 1
  76. #elif defined(__PPC64__) || defined(_M_PPC64)
  77. //#define IS_UTF8_IS_PPC64 1
  78. #pragma message("The library does yet support SIMD acceleration under\
  79. POWER processors.")
  80. #else
  81. #define IS_UTF8_IS_32BITS 1
  82. // We do not support 32-bit platforms, but it can be
  83. // handy to identify them.
  84. #if defined(_M_IX86) || defined(__i386__)
  85. #define IS_UTF8_IS_X86_32BITS 1
  86. #elif defined(__arm__) || defined(_M_ARM)
  87. #define IS_UTF8_IS_ARM_32BITS 1
  88. #elif defined(__PPC__) || defined(_M_PPC)
  89. #define IS_UTF8_IS_PPC_32BITS 1
  90. #endif
  91. #endif // defined(__x86_64__) || defined(_M_AMD64)
  92. // this is almost standard?
  93. #define IS_UTF8_STRINGIFY_IMPLEMENTATION_(a) #a
  94. #define IS_UTF8_STRINGIFY(a) IS_UTF8_STRINGIFY_IMPLEMENTATION_(a)
  95. // We are going to use runtime dispatch.
  96. #ifdef IS_UTF8_IS_X86_64
  97. #ifdef __clang__
  98. // clang does not have GCC push pop
  99. // warning: clang attribute push can't be used within a namespace in clang up
  100. // til 8.0 so IS_UTF8_TARGET_REGION and IS_UTF8_UNTARGET_REGION must be
  101. // *outside* of a namespace.
  102. #define IS_UTF8_TARGET_REGION(T) \
  103. _Pragma(IS_UTF8_STRINGIFY( \
  104. clang attribute push(__attribute__((target(T))), apply_to = function)))
  105. #define IS_UTF8_UNTARGET_REGION _Pragma("clang attribute pop")
  106. #elif defined(__GNUC__)
  107. // GCC is easier
  108. #define IS_UTF8_TARGET_REGION(T) \
  109. _Pragma("GCC push_options") _Pragma(IS_UTF8_STRINGIFY(GCC target(T)))
  110. #define IS_UTF8_UNTARGET_REGION _Pragma("GCC pop_options")
  111. #endif // clang then gcc
  112. #endif // x86
  113. // Default target region macros don't do anything.
  114. #ifndef IS_UTF8_TARGET_REGION
  115. #define IS_UTF8_TARGET_REGION(T)
  116. #define IS_UTF8_UNTARGET_REGION
  117. #endif
  118. #if defined(__GNUC__) && !defined(__clang__)
  119. #if __GNUC__ >= 11
  120. #define IS_UTF8_GCC11ORMORE 1
  121. #endif // __GNUC__ >= 11
  122. #endif // defined(__GNUC__) && !defined(__clang__)
  123. #endif // IS_UTF8_PORTABILITY_H
  124. #ifndef IS_UTF8_AVX512_H_
  125. #define IS_UTF8_AVX512_H_
  126. /*
  127. It's possible to override AVX512 settings with cmake DCMAKE_CXX_FLAGS.
  128. All preprocessor directives has form `IS_UTF8_HAS_AVX512{feature}`,
  129. where a feature is a code name for extensions.
  130. Please see the listing below to find which are supported.
  131. */
  132. #ifndef IS_UTF8_HAS_AVX512F
  133. #if defined(__AVX512F__) && __AVX512F__ == 1
  134. #define IS_UTF8_HAS_AVX512F 1
  135. #endif
  136. #endif
  137. #ifndef IS_UTF8_HAS_AVX512DQ
  138. #if defined(__AVX512DQ__) && __AVX512DQ__ == 1
  139. #define IS_UTF8_HAS_AVX512DQ 1
  140. #endif
  141. #endif
  142. #ifndef IS_UTF8_HAS_AVX512IFMA
  143. #if defined(__AVX512IFMA__) && __AVX512IFMA__ == 1
  144. #define IS_UTF8_HAS_AVX512IFMA 1
  145. #endif
  146. #endif
  147. #ifndef IS_UTF8_HAS_AVX512CD
  148. #if defined(__AVX512CD__) && __AVX512CD__ == 1
  149. #define IS_UTF8_HAS_AVX512CD 1
  150. #endif
  151. #endif
  152. #ifndef IS_UTF8_HAS_AVX512BW
  153. #if defined(__AVX512BW__) && __AVX512BW__ == 1
  154. #define IS_UTF8_HAS_AVX512BW 1
  155. #endif
  156. #endif
  157. #ifndef IS_UTF8_HAS_AVX512VL
  158. #if defined(__AVX512VL__) && __AVX512VL__ == 1
  159. #define IS_UTF8_HAS_AVX512VL 1
  160. #endif
  161. #endif
  162. #ifndef IS_UTF8_HAS_AVX512VBMI
  163. #if defined(__AVX512VBMI__) && __AVX512VBMI__ == 1
  164. #define IS_UTF8_HAS_AVX512VBMI 1
  165. #endif
  166. #endif
  167. #ifndef IS_UTF8_HAS_AVX512VBMI2
  168. #if defined(__AVX512VBMI2__) && __AVX512VBMI2__ == 1
  169. #define IS_UTF8_HAS_AVX512VBMI2 1
  170. #endif
  171. #endif
  172. #ifndef IS_UTF8_HAS_AVX512VNNI
  173. #if defined(__AVX512VNNI__) && __AVX512VNNI__ == 1
  174. #define IS_UTF8_HAS_AVX512VNNI 1
  175. #endif
  176. #endif
  177. #ifndef IS_UTF8_HAS_AVX512BITALG
  178. #if defined(__AVX512BITALG__) && __AVX512BITALG__ == 1
  179. #define IS_UTF8_HAS_AVX512BITALG 1
  180. #endif
  181. #endif
  182. #ifndef IS_UTF8_HAS_AVX512VPOPCNTDQ
  183. #if defined(__AVX512VPOPCNTDQ__) && __AVX512VPOPCNTDQ__ == 1
  184. #define IS_UTF8_HAS_AVX512VPOPCNTDQ 1
  185. #endif
  186. #endif
  187. #endif // IS_UTF8_AVX512_H_
  188. #if defined(__GNUC__)
  189. // Marks a block with a name so that MCA analysis can see it.
  190. #define IS_UTF8_BEGIN_DEBUG_BLOCK(name) \
  191. __asm volatile("# LLVM-MCA-BEGIN " #name);
  192. #define IS_UTF8_END_DEBUG_BLOCK(name) __asm volatile("# LLVM-MCA-END " #name);
  193. #define IS_UTF8_DEBUG_BLOCK(name, block) \
  194. BEGIN_DEBUG_BLOCK(name); \
  195. block; \
  196. END_DEBUG_BLOCK(name);
  197. #else
  198. #define IS_UTF8_BEGIN_DEBUG_BLOCK(name)
  199. #define IS_UTF8_END_DEBUG_BLOCK(name)
  200. #define IS_UTF8_DEBUG_BLOCK(name, block)
  201. #endif
  202. // Align to N-byte boundary
  203. #define IS_UTF8_ROUNDUP_N(a, n) (((a) + ((n)-1)) & ~((n)-1))
  204. #define IS_UTF8_ROUNDDOWN_N(a, n) ((a) & ~((n)-1))
  205. #define IS_UTF8_ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
  206. #if defined(IS_UTF8_REGULAR_VISUAL_STUDIO)
  207. #define is_utf8_really_inline __forceinline
  208. #define is_utf8_never_inline __declspec(noinline)
  209. #define is_utf8_unused
  210. #define is_utf8_warn_unused
  211. #ifndef is_utf8_likely
  212. #define is_utf8_likely(x) x
  213. #endif
  214. #ifndef is_utf8_unlikely
  215. #define is_utf8_unlikely(x) x
  216. #endif
  217. #define IS_UTF8_PUSH_DISABLE_WARNINGS __pragma(warning(push))
  218. #define IS_UTF8_PUSH_DISABLE_ALL_WARNINGS __pragma(warning(push, 0))
  219. #define IS_UTF8_DISABLE_VS_WARNING(WARNING_NUMBER) \
  220. __pragma(warning(disable : WARNING_NUMBER))
  221. // Get rid of Intellisense-only warnings (Code Analysis)
  222. // Though __has_include is C++17, it is supported in Visual Studio 2017 or
  223. // better (_MSC_VER>=1910).
  224. #ifdef __has_include
  225. #if __has_include(<CppCoreCheck\Warnings.h>)
  226. #include <CppCoreCheck\Warnings.h>
  227. #define IS_UTF8_DISABLE_UNDESIRED_WARNINGS \
  228. IS_UTF8_DISABLE_VS_WARNING(ALL_CPPCORECHECK_WARNINGS)
  229. #endif
  230. #endif
  231. #ifndef IS_UTF8_DISABLE_UNDESIRED_WARNINGS
  232. #define IS_UTF8_DISABLE_UNDESIRED_WARNINGS
  233. #endif
  234. #define IS_UTF8_DISABLE_DEPRECATED_WARNING IS_UTF8_DISABLE_VS_WARNING(4996)
  235. #define IS_UTF8_DISABLE_STRICT_OVERFLOW_WARNING
  236. #define IS_UTF8_POP_DISABLE_WARNINGS __pragma(warning(pop))
  237. #else // IS_UTF8_REGULAR_VISUAL_STUDIO
  238. #define is_utf8_really_inline inline __attribute__((always_inline))
  239. #define is_utf8_never_inline inline __attribute__((noinline))
  240. #define is_utf8_unused __attribute__((unused))
  241. #define is_utf8_warn_unused __attribute__((warn_unused_result))
  242. #ifndef is_utf8_likely
  243. #define is_utf8_likely(x) __builtin_expect(!!(x), 1)
  244. #endif
  245. #ifndef is_utf8_unlikely
  246. #define is_utf8_unlikely(x) __builtin_expect(!!(x), 0)
  247. #endif
  248. #define IS_UTF8_PUSH_DISABLE_WARNINGS _Pragma("GCC diagnostic push")
  249. // gcc doesn't seem to disable all warnings with all and extra, add warnings
  250. // here as necessary
  251. #define IS_UTF8_PUSH_DISABLE_ALL_WARNINGS \
  252. IS_UTF8_PUSH_DISABLE_WARNINGS \
  253. IS_UTF8_DISABLE_GCC_WARNING(-Weffc++) \
  254. IS_UTF8_DISABLE_GCC_WARNING(-Wall) \
  255. IS_UTF8_DISABLE_GCC_WARNING(-Wconversion) \
  256. IS_UTF8_DISABLE_GCC_WARNING(-Wextra) \
  257. IS_UTF8_DISABLE_GCC_WARNING(-Wattributes) \
  258. IS_UTF8_DISABLE_GCC_WARNING(-Wimplicit-fallthrough) \
  259. IS_UTF8_DISABLE_GCC_WARNING(-Wnon-virtual-dtor) \
  260. IS_UTF8_DISABLE_GCC_WARNING(-Wreturn-type) \
  261. IS_UTF8_DISABLE_GCC_WARNING(-Wshadow) \
  262. IS_UTF8_DISABLE_GCC_WARNING(-Wunused-parameter) \
  263. IS_UTF8_DISABLE_GCC_WARNING(-Wunused-variable)
  264. #define IS_UTF8_PRAGMA(P) _Pragma(#P)
  265. #define IS_UTF8_DISABLE_GCC_WARNING(WARNING) \
  266. IS_UTF8_PRAGMA(GCC diagnostic ignored #WARNING)
  267. #if defined(IS_UTF8_CLANG_VISUAL_STUDIO)
  268. #define IS_UTF8_DISABLE_UNDESIRED_WARNINGS \
  269. IS_UTF8_DISABLE_GCC_WARNING(-Wmicrosoft-include)
  270. #else
  271. #define IS_UTF8_DISABLE_UNDESIRED_WARNINGS
  272. #endif
  273. #define IS_UTF8_DISABLE_DEPRECATED_WARNING \
  274. IS_UTF8_DISABLE_GCC_WARNING(-Wdeprecated-declarations)
  275. #define IS_UTF8_DISABLE_STRICT_OVERFLOW_WARNING \
  276. IS_UTF8_DISABLE_GCC_WARNING(-Wstrict-overflow)
  277. #define IS_UTF8_POP_DISABLE_WARNINGS _Pragma("GCC diagnostic pop")
  278. #endif // MSC_VER
  279. #if defined(IS_UTF8_VISUAL_STUDIO)
  280. /**
  281. * It does not matter here whether you are using
  282. * the regular visual studio or clang under visual
  283. * studio.
  284. */
  285. #if IS_UTF8_USING_LIBRARY
  286. #define IS_UTF8_DLLIMPORTEXPORT __declspec(dllimport)
  287. #else
  288. #define IS_UTF8_DLLIMPORTEXPORT __declspec(dllexport)
  289. #endif
  290. #else
  291. #define IS_UTF8_DLLIMPORTEXPORT
  292. #endif
  293. /// If EXPR is an error, returns it.
  294. #define IS_UTF8_TRY(EXPR) \
  295. { \
  296. auto _err = (EXPR); \
  297. if (_err) { \
  298. return _err; \
  299. } \
  300. }
  301. #endif // IS_UTF8_COMMON_DEFS_H
  302. #include <string>
  303. namespace is_utf8_internals {
  304. enum encoding_type {
  305. UTF8 = 1, // BOM 0xef 0xbb 0xbf
  306. UTF16_LE = 2, // BOM 0xff 0xfe
  307. UTF16_BE = 4, // BOM 0xfe 0xff
  308. UTF32_LE = 8, // BOM 0xff 0xfe 0x00 0x00
  309. UTF32_BE = 16, // BOM 0x00 0x00 0xfe 0xff
  310. unspecified = 0
  311. };
  312. enum endianness { LITTLE, BIG };
  313. std::string to_string(encoding_type bom);
  314. // Note that BOM for UTF8 is discouraged.
  315. namespace BOM {
  316. /**
  317. * Checks for a BOM. If not, returns unspecified
  318. * @param input the string to process
  319. * @param length the length of the string in words
  320. * @return the corresponding encoding
  321. */
  322. encoding_type check_bom(const uint8_t *byte, size_t length);
  323. encoding_type check_bom(const char *byte, size_t length);
  324. /**
  325. * Returns the size, in bytes, of the BOM for a given encoding type.
  326. * Note that UTF8 BOM are discouraged.
  327. * @param bom the encoding type
  328. * @return the size in bytes of the corresponding BOM
  329. */
  330. size_t bom_byte_size(encoding_type bom);
  331. } // namespace BOM
  332. } // namespace is_utf8_internals
  333. #ifndef ERROR_H
  334. #define ERROR_H
  335. namespace is_utf8_internals {
  336. enum error_code {
  337. SUCCESS = 0,
  338. HEADER_BITS, // Any byte must have fewer than 5 header bits.
  339. TOO_SHORT, // The leading byte must be followed by N-1 continuation bytes,
  340. // where N is the UTF-8 character length This is also the error
  341. // when the input is truncated.
  342. TOO_LONG, // The leading byte must not be a continuation byte.
  343. OVERLONG, // The decoded character must be above U+7F for two-byte characters,
  344. // U+7FF for three-byte characters, and U+FFFF for four-byte
  345. // characters.
  346. TOO_LARGE, // The decoded character must be less than or equal to U+10FFFF OR
  347. // less than or equal than U+7F for ASCII.
  348. SURROGATE, // The decoded character must be not be in U+D800...DFFF (UTF-8 or
  349. // UTF-32) OR a high surrogate must be followed by a low surrogate
  350. // and a low surrogate must be preceded by a high surrogate
  351. // (UTF-16)
  352. OTHER // Not related to validation/transcoding.
  353. };
  354. struct result {
  355. error_code error;
  356. size_t
  357. count; // In case of error, indicates the position of the error. In case
  358. // of success, indicates the number of words validated/written.
  359. is_utf8_really_inline result();
  360. is_utf8_really_inline result(error_code, size_t);
  361. };
  362. } // namespace is_utf8_internals
  363. #endif
  364. IS_UTF8_PUSH_DISABLE_WARNINGS
  365. IS_UTF8_DISABLE_UNDESIRED_WARNINGS
  366. #ifndef IS_UTF8_IMPLEMENTATION_H
  367. #define IS_UTF8_IMPLEMENTATION_H
  368. #include <string>
  369. #if !defined(IS_UTF8_NO_THREADS)
  370. #include <atomic>
  371. #endif
  372. #include <tuple>
  373. #include <vector>
  374. /* From
  375. https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
  376. Highly modified.
  377. Copyright (c) 2016- Facebook, Inc (Adam Paszke)
  378. Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
  379. Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
  380. Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
  381. Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
  382. Copyright (c) 2011-2013 NYU (Clement Farabet)
  383. Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
  384. Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute
  385. (Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
  386. Samy Bengio, Johnny Mariethoz)
  387. All rights reserved.
  388. Redistribution and use in source and binary forms, with or without
  389. modification, are permitted provided that the following conditions are met:
  390. 1. Redistributions of source code must retain the above copyright
  391. notice, this list of conditions and the following disclaimer.
  392. 2. Redistributions in binary form must reproduce the above copyright
  393. notice, this list of conditions and the following disclaimer in the
  394. documentation and/or other materials provided with the distribution.
  395. 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
  396. America and IDIAP Research Institute nor the names of its contributors may be
  397. used to endorse or promote products derived from this software without
  398. specific prior written permission.
  399. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  400. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  401. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  402. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  403. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  404. CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  405. SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  406. INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  407. CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  408. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  409. POSSIBILITY OF SUCH DAMAGE.
  410. */
  411. #ifndef IS_UTF8_INTERNAL_ISADETECTION_H
  412. #define IS_UTF8_INTERNAL_ISADETECTION_H
  413. #include <cstdint>
  414. #include <cstdlib>
  415. #if defined(_MSC_VER)
  416. #include <intrin.h>
  417. #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
  418. #include <cpuid.h>
  419. #endif
  420. namespace is_utf8_internals {
  421. namespace internal {
  422. enum instruction_set {
  423. DEFAULT = 0x0,
  424. NEON = 0x1,
  425. AVX2 = 0x4,
  426. SSE42 = 0x8,
  427. PCLMULQDQ = 0x10,
  428. BMI1 = 0x20,
  429. BMI2 = 0x40,
  430. ALTIVEC = 0x80,
  431. AVX512F = 0x100,
  432. AVX512DQ = 0x200,
  433. AVX512IFMA = 0x400,
  434. AVX512PF = 0x800,
  435. AVX512ER = 0x1000,
  436. AVX512CD = 0x2000,
  437. AVX512BW = 0x4000,
  438. AVX512VL = 0x8000,
  439. AVX512VBMI2 = 0x10000
  440. };
  441. #if defined(__PPC64__)
  442. static inline uint32_t detect_supported_architectures() {
  443. return instruction_set::ALTIVEC;
  444. }
  445. #elif defined(__aarch64__) || defined(_M_ARM64)
  446. static inline uint32_t detect_supported_architectures() {
  447. return instruction_set::NEON;
  448. }
  449. #elif defined(__x86_64__) || defined(_M_AMD64) // x64
  450. namespace {
  451. namespace cpuid_bit {
  452. // Can be found on Intel ISA Reference for CPUID
  453. // EAX = 0x01
  454. constexpr uint32_t pclmulqdq = uint32_t(1)
  455. << 1; ///< @private bit 1 of ECX for EAX=0x1
  456. constexpr uint32_t sse42 = uint32_t(1)
  457. << 20; ///< @private bit 20 of ECX for EAX=0x1
  458. // EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf)
  459. // See: "Table 3-8. Information Returned by CPUID Instruction"
  460. namespace ebx {
  461. constexpr uint32_t bmi1 = uint32_t(1) << 3;
  462. constexpr uint32_t avx2 = uint32_t(1) << 5;
  463. constexpr uint32_t bmi2 = uint32_t(1) << 8;
  464. constexpr uint32_t avx512f = uint32_t(1) << 16;
  465. constexpr uint32_t avx512dq = uint32_t(1) << 17;
  466. constexpr uint32_t avx512cd = uint32_t(1) << 28;
  467. constexpr uint32_t avx512bw = uint32_t(1) << 30;
  468. constexpr uint32_t avx512vl = uint32_t(1) << 31;
  469. } // namespace ebx
  470. namespace ecx {
  471. constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6;
  472. } // namespace ecx
  473. } // namespace cpuid_bit
  474. } // namespace
  475. static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
  476. uint32_t *edx) {
  477. #if defined(_MSC_VER)
  478. int cpu_info[4];
  479. __cpuid(cpu_info, *eax);
  480. *eax = cpu_info[0];
  481. *ebx = cpu_info[1];
  482. *ecx = cpu_info[2];
  483. *edx = cpu_info[3];
  484. #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
  485. uint32_t level = *eax;
  486. __get_cpuid(level, eax, ebx, ecx, edx);
  487. #else
  488. uint32_t a = *eax, b, c = *ecx, d;
  489. asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
  490. *eax = a;
  491. *ebx = b;
  492. *ecx = c;
  493. *edx = d;
  494. #endif
  495. }
  496. static inline uint32_t detect_supported_architectures() {
  497. uint32_t eax;
  498. uint32_t ebx = 0;
  499. uint32_t ecx = 0;
  500. uint32_t edx = 0;
  501. uint32_t host_isa = 0x0;
  502. // EBX for EAX=0x1
  503. eax = 0x1;
  504. cpuid(&eax, &ebx, &ecx, &edx);
  505. if (ecx & cpuid_bit::sse42) {
  506. host_isa |= instruction_set::SSE42;
  507. }
  508. if (ecx & cpuid_bit::pclmulqdq) {
  509. host_isa |= instruction_set::PCLMULQDQ;
  510. }
  511. // ECX for EAX=0x7
  512. eax = 0x7;
  513. ecx = 0x0; // Sub-leaf = 0
  514. cpuid(&eax, &ebx, &ecx, &edx);
  515. if (ebx & cpuid_bit::ebx::avx2) {
  516. host_isa |= instruction_set::AVX2;
  517. }
  518. if (ebx & cpuid_bit::ebx::bmi1) {
  519. host_isa |= instruction_set::BMI1;
  520. }
  521. if (ebx & cpuid_bit::ebx::bmi2) {
  522. host_isa |= instruction_set::BMI2;
  523. }
  524. if (ebx & cpuid_bit::ebx::avx512f) {
  525. host_isa |= instruction_set::AVX512F;
  526. }
  527. if (ebx & cpuid_bit::ebx::avx512bw) {
  528. host_isa |= instruction_set::AVX512BW;
  529. }
  530. if (ebx & cpuid_bit::ebx::avx512cd) {
  531. host_isa |= instruction_set::AVX512CD;
  532. }
  533. if (ebx & cpuid_bit::ebx::avx512dq) {
  534. host_isa |= instruction_set::AVX512DQ;
  535. }
  536. if (ebx & cpuid_bit::ebx::avx512vl) {
  537. host_isa |= instruction_set::AVX512VL;
  538. }
  539. if (ecx & cpuid_bit::ecx::avx512vbmi2) {
  540. host_isa |= instruction_set::AVX512VBMI2;
  541. }
  542. return host_isa;
  543. }
  544. #else // fallback
  545. static inline uint32_t detect_supported_architectures() {
  546. return instruction_set::DEFAULT;
  547. }
  548. #endif // end SIMD extension detection code
  549. } // namespace internal
  550. } // namespace is_utf8_internals
  551. #endif // IS_UTF8_INTERNAL_ISADETECTION_H
  552. namespace is_utf8_internals {
  553. /**
  554. * Validate the UTF-8 string. This function may be best when you expect
  555. * the input to be almost always valid. Otherwise, consider using
  556. * validate_utf8_with_errors.
  557. *
  558. * Overridden by each implementation.
  559. *
  560. * @param buf the UTF-8 string to validate.
  561. * @param len the length of the string in bytes.
  562. * @return true if and only if the string is valid UTF-8.
  563. */
  564. bool validate_utf8(const char *buf, size_t len) noexcept;
  565. class implementation {
  566. public:
  567. virtual const std::string &name() const { return _name; }
  568. virtual const std::string &description() const { return _description; }
  569. bool supported_by_runtime_system() const;
  570. virtual uint32_t required_instruction_sets() const {
  571. return _required_instruction_sets;
  572. }
  573. /**
  574. * Validate the UTF-8 string.
  575. *
  576. * Overridden by each implementation.
  577. *
  578. * @param buf the UTF-8 string to validate.
  579. * @param len the length of the string in bytes.
  580. * @return true if and only if the string is valid UTF-8.
  581. */
  582. is_utf8_warn_unused virtual bool validate_utf8(const char *buf,
  583. size_t len) const noexcept = 0;
  584. protected:
  585. /** @private Construct an implementation with the given name and description.
  586. * For subclasses. */
  587. is_utf8_really_inline implementation(std::string name,
  588. std::string description,
  589. uint32_t required_instruction_sets)
  590. : _name(name), _description(description),
  591. _required_instruction_sets(required_instruction_sets) {}
  592. virtual ~implementation() = default;
  593. private:
  594. /**
  595. * The name of this implementation.
  596. */
  597. const std::string _name;
  598. /**
  599. * The description of this implementation.
  600. */
  601. const std::string _description;
  602. /**
  603. * Instruction sets required for this implementation.
  604. */
  605. const uint32_t _required_instruction_sets;
  606. };
  607. /** @private */
  608. namespace internal {
  609. class available_implementation_list {
  610. public:
  611. /** Get the list of available implementations */
  612. is_utf8_really_inline available_implementation_list() {}
  613. /** Number of implementations */
  614. size_t size() const noexcept;
  615. /** STL const begin() iterator */
  616. const implementation *const *begin() const noexcept;
  617. /** STL const end() iterator */
  618. const implementation *const *end() const noexcept;
  619. /**
  620. * Get the implementation with the given name.
  621. *
  622. * Case sensitive.
  623. *
  624. * const implementation *impl =
  625. * is_utf8_internals::available_implementations["westmere"]; if (!impl) {
  626. * exit(1); } if (!imp->supported_by_runtime_system()) { exit(1); }
  627. * is_utf8_internals::active_implementation = impl;
  628. *
  629. * @param name the implementation to find, e.g. "westmere", "haswell", "arm64"
  630. * @return the implementation, or nullptr if the parse failed.
  631. */
  632. const implementation *operator[](const std::string &name) const noexcept {
  633. for (const implementation *impl : *this) {
  634. if (impl->name() == name) {
  635. return impl;
  636. }
  637. }
  638. return nullptr;
  639. }
  640. /**
  641. * Detect the most advanced implementation supported by the current host.
  642. *
  643. * This is used to initialize the implementation on startup.
  644. *
  645. * const implementation *impl =
  646. * is_utf8_internals::available_implementation::detect_best_supported();
  647. * is_utf8_internals::active_implementation = impl;
  648. *
  649. * @return the most advanced supported implementation for the current host, or
  650. * an implementation that returns UNSUPPORTED_ARCHITECTURE if there is no
  651. * supported implementation. Will never return nullptr.
  652. */
  653. const implementation *detect_best_supported() const noexcept;
  654. };
  655. template <typename T> class atomic_ptr {
  656. public:
  657. atomic_ptr(T *_ptr) : ptr{_ptr} {}
  658. #if defined(IS_UTF8_NO_THREADS)
  659. operator const T *() const { return ptr; }
  660. const T &operator*() const { return *ptr; }
  661. const T *operator->() const { return ptr; }
  662. operator T *() { return ptr; }
  663. T &operator*() { return *ptr; }
  664. T *operator->() { return ptr; }
  665. atomic_ptr &operator=(T *_ptr) {
  666. ptr = _ptr;
  667. return *this;
  668. }
  669. #else
  670. operator const T *() const { return ptr.load(); }
  671. const T &operator*() const { return *ptr; }
  672. const T *operator->() const { return ptr.load(); }
  673. operator T *() { return ptr.load(); }
  674. T &operator*() { return *ptr; }
  675. T *operator->() { return ptr.load(); }
  676. atomic_ptr &operator=(T *_ptr) {
  677. ptr = _ptr;
  678. return *this;
  679. }
  680. #endif
  681. private:
  682. #if defined(IS_UTF8_NO_THREADS)
  683. T *ptr;
  684. #else
  685. std::atomic<T *> ptr;
  686. #endif
  687. };
  688. } // namespace internal
  689. /**
  690. * The list of available implementations compiled into simdutf.
  691. */
  692. extern IS_UTF8_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations();
  693. /**
  694. * The active implementation.
  695. *
  696. * Automatically initialized on first use to the most advanced implementation
  697. * supported by this hardware.
  698. */
  699. extern IS_UTF8_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation();
  700. } // namespace is_utf8_internals
  701. #endif // IS_UTF8_IMPLEMENTATION_H
  702. // Implementation-internal files (must be included before the implementations
  703. // themselves, to keep amalgamation working--otherwise, the first time a file is
  704. // included, it might be put inside the #ifdef
  705. // IS_UTF8_IMPLEMENTATION_ARM64/FALLBACK/etc., which means the other
  706. // implementations can't compile unless that implementation is turned on).
  707. IS_UTF8_POP_DISABLE_WARNINGS
  708. #endif // IS_UTF8_H
  709. #include <climits>
  710. #include <initializer_list>
  711. // Useful for debugging purposes
  712. namespace is_utf8_internals {
  713. namespace {
  714. template <typename T> std::string toBinaryString(T b) {
  715. std::string binary = "";
  716. T mask = T(1) << (sizeof(T) * CHAR_BIT - 1);
  717. while (mask > 0) {
  718. binary += ((b & mask) == 0) ? '0' : '1';
  719. mask >>= 1;
  720. }
  721. return binary;
  722. }
  723. } // namespace
  724. } // namespace is_utf8_internals
  725. // Implementations
  726. // The best choice should always come first!
  727. #ifndef IS_UTF8_ARM64_H
  728. #define IS_UTF8_ARM64_H
  729. #ifdef IS_UTF8_FALLBACK_H
  730. #error "arm64.h must be included before fallback.h"
  731. #endif
  732. #ifndef IS_UTF8_IMPLEMENTATION_ARM64
  733. #define IS_UTF8_IMPLEMENTATION_ARM64 (IS_UTF8_IS_ARM64)
  734. #endif
  735. #if IS_UTF8_IMPLEMENTATION_ARM64 &&IS_UTF8_IS_ARM64
  736. #define IS_UTF8_CAN_ALWAYS_RUN_ARM64 1
  737. #else
  738. #define IS_UTF8_CAN_ALWAYS_RUN_ARM64 0
  739. #endif
  740. #if IS_UTF8_IMPLEMENTATION_ARM64
  741. namespace is_utf8_internals {
  742. /**
  743. * Implementation for NEON (ARMv8).
  744. */
  745. namespace arm64 {} // namespace arm64
  746. } // namespace is_utf8_internals
  747. #ifndef IS_UTF8_ARM64_IMPLEMENTATION_H
  748. #define IS_UTF8_ARM64_IMPLEMENTATION_H
  749. namespace is_utf8_internals {
  750. namespace arm64 {
  751. class implementation final : public is_utf8_internals::implementation {
  752. public:
  753. is_utf8_really_inline implementation()
  754. : is_utf8_internals::implementation("arm64", "ARM NEON",
  755. internal::instruction_set::NEON) {}
  756. is_utf8_warn_unused bool validate_utf8(const char *buf,
  757. size_t len) const noexcept final;
  758. };
  759. } // namespace arm64
  760. } // namespace is_utf8_internals
  761. #endif // IS_UTF8_ARM64_IMPLEMENTATION_H
  762. // redefining IS_UTF8_IMPLEMENTATION to "arm64"
  763. // #define IS_UTF8_IMPLEMENTATION arm64
  764. // Declarations
  765. #ifndef IS_UTF8_ARM64_INTRINSICS_H
  766. #define IS_UTF8_ARM64_INTRINSICS_H
  767. // This should be the correct header whether
  768. // you use visual studio or other compilers.
  769. #include <arm_neon.h>
  770. #endif // IS_UTF8_ARM64_INTRINSICS_H
  771. #ifndef IS_UTF8_ARM64_BITMANIPULATION_H
  772. #define IS_UTF8_ARM64_BITMANIPULATION_H
  773. namespace is_utf8_internals {
  774. namespace arm64 {
  775. namespace {} // unnamed namespace
  776. } // namespace arm64
  777. } // namespace is_utf8_internals
  778. #endif // IS_UTF8_ARM64_BITMANIPULATION_H
  779. #ifndef IS_UTF8_ARM64_SIMD_H
  780. #define IS_UTF8_ARM64_SIMD_H
  781. #include <type_traits>
  782. namespace is_utf8_internals {
  783. namespace arm64 {
  784. namespace {
  785. namespace simd {
  786. #ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
  787. namespace {
  788. // Start of private section with Visual Studio workaround
  789. /**
  790. * make_uint8x16_t initializes a SIMD register (uint8x16_t).
  791. * This is needed because, incredibly, the syntax uint8x16_t x = {1,2,3...}
  792. * is not recognized under Visual Studio! This is a workaround.
  793. * Using a std::initializer_list<uint8_t> as a parameter resulted in
  794. * inefficient code. With the current approach, if the parameters are
  795. * compile-time constants,
  796. * GNU GCC compiles it to ldr, the same as uint8x16_t x = {1,2,3...}.
  797. * You should not use this function except for compile-time constants:
  798. * it is not efficient.
  799. */
  800. is_utf8_really_inline uint8x16_t make_uint8x16_t(
  801. uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6,
  802. uint8_t x7, uint8_t x8, uint8_t x9, uint8_t x10, uint8_t x11, uint8_t x12,
  803. uint8_t x13, uint8_t x14, uint8_t x15, uint8_t x16) {
  804. // Doing a load like so end ups generating worse code.
  805. // uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8,
  806. // x9, x10,x11,x12,x13,x14,x15,x16};
  807. // return vld1q_u8(array);
  808. uint8x16_t x{};
  809. // incredibly, Visual Studio does not allow x[0] = x1
  810. x = vsetq_lane_u8(x1, x, 0);
  811. x = vsetq_lane_u8(x2, x, 1);
  812. x = vsetq_lane_u8(x3, x, 2);
  813. x = vsetq_lane_u8(x4, x, 3);
  814. x = vsetq_lane_u8(x5, x, 4);
  815. x = vsetq_lane_u8(x6, x, 5);
  816. x = vsetq_lane_u8(x7, x, 6);
  817. x = vsetq_lane_u8(x8, x, 7);
  818. x = vsetq_lane_u8(x9, x, 8);
  819. x = vsetq_lane_u8(x10, x, 9);
  820. x = vsetq_lane_u8(x11, x, 10);
  821. x = vsetq_lane_u8(x12, x, 11);
  822. x = vsetq_lane_u8(x13, x, 12);
  823. x = vsetq_lane_u8(x14, x, 13);
  824. x = vsetq_lane_u8(x15, x, 14);
  825. x = vsetq_lane_u8(x16, x, 15);
  826. return x;
  827. }
  828. // We have to do the same work for make_int8x16_t
  829. is_utf8_really_inline int8x16_t
  830. make_int8x16_t(int8_t x1, int8_t x2, int8_t x3, int8_t x4, int8_t x5, int8_t x6,
  831. int8_t x7, int8_t x8, int8_t x9, int8_t x10, int8_t x11,
  832. int8_t x12, int8_t x13, int8_t x14, int8_t x15, int8_t x16) {
  833. // Doing a load like so end ups generating worse code.
  834. // int8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8,
  835. // x9, x10,x11,x12,x13,x14,x15,x16};
  836. // return vld1q_s8(array);
  837. int8x16_t x{};
  838. // incredibly, Visual Studio does not allow x[0] = x1
  839. x = vsetq_lane_s8(x1, x, 0);
  840. x = vsetq_lane_s8(x2, x, 1);
  841. x = vsetq_lane_s8(x3, x, 2);
  842. x = vsetq_lane_s8(x4, x, 3);
  843. x = vsetq_lane_s8(x5, x, 4);
  844. x = vsetq_lane_s8(x6, x, 5);
  845. x = vsetq_lane_s8(x7, x, 6);
  846. x = vsetq_lane_s8(x8, x, 7);
  847. x = vsetq_lane_s8(x9, x, 8);
  848. x = vsetq_lane_s8(x10, x, 9);
  849. x = vsetq_lane_s8(x11, x, 10);
  850. x = vsetq_lane_s8(x12, x, 11);
  851. x = vsetq_lane_s8(x13, x, 12);
  852. x = vsetq_lane_s8(x14, x, 13);
  853. x = vsetq_lane_s8(x15, x, 14);
  854. x = vsetq_lane_s8(x16, x, 15);
  855. return x;
  856. }
  857. is_utf8_really_inline uint8x8_t make_uint8x8_t(uint8_t x1, uint8_t x2,
  858. uint8_t x3, uint8_t x4,
  859. uint8_t x5, uint8_t x6,
  860. uint8_t x7, uint8_t x8) {
  861. uint8x8_t x{};
  862. x = vset_lane_u8(x1, x, 0);
  863. x = vset_lane_u8(x2, x, 1);
  864. x = vset_lane_u8(x3, x, 2);
  865. x = vset_lane_u8(x4, x, 3);
  866. x = vset_lane_u8(x5, x, 4);
  867. x = vset_lane_u8(x6, x, 5);
  868. x = vset_lane_u8(x7, x, 6);
  869. x = vset_lane_u8(x8, x, 7);
  870. return x;
  871. }
  872. is_utf8_really_inline uint16x8_t make_uint16x8_t(uint16_t x1, uint16_t x2,
  873. uint16_t x3, uint16_t x4,
  874. uint16_t x5, uint16_t x6,
  875. uint16_t x7, uint16_t x8) {
  876. uint16x8_t x{};
  877. x = vsetq_lane_u16(x1, x, 0);
  878. x = vsetq_lane_u16(x2, x, 1);
  879. x = vsetq_lane_u16(x3, x, 2);
  880. x = vsetq_lane_u16(x4, x, 3);
  881. x = vsetq_lane_u16(x5, x, 4);
  882. x = vsetq_lane_u16(x6, x, 5);
  883. x = vsetq_lane_u16(x7, x, 6);
  884. x = vsetq_lane_u16(x8, x, 7);
  885. ;
  886. return x;
  887. }
  888. is_utf8_really_inline int16x8_t make_int16x8_t(int16_t x1, int16_t x2,
  889. int16_t x3, int16_t x4,
  890. int16_t x5, int16_t x6,
  891. int16_t x7, int16_t x8) {
  892. uint16x8_t x{};
  893. x = vsetq_lane_s16(x1, x, 0);
  894. x = vsetq_lane_s16(x2, x, 1);
  895. x = vsetq_lane_s16(x3, x, 2);
  896. x = vsetq_lane_s16(x4, x, 3);
  897. x = vsetq_lane_s16(x5, x, 4);
  898. x = vsetq_lane_s16(x6, x, 5);
  899. x = vsetq_lane_s16(x7, x, 6);
  900. x = vsetq_lane_s16(x8, x, 7);
  901. ;
  902. return x;
  903. }
  904. // End of private section with Visual Studio workaround
  905. } // namespace
  906. #endif // IS_UTF8_REGULAR_VISUAL_STUDIO
  907. template <typename T> struct simd8;
  908. //
  909. // Base class of simd8<uint8_t> and simd8<bool>, both of which use uint8x16_t
  910. // internally.
  911. //
  912. template <typename T, typename Mask = simd8<bool>> struct base_u8 {
  913. uint8x16_t value;
  914. static const int SIZE = sizeof(value);
  915. // Conversion from/to SIMD register
  916. is_utf8_really_inline base_u8(const uint8x16_t _value) : value(_value) {}
  917. is_utf8_really_inline operator const uint8x16_t &() const {
  918. return this->value;
  919. }
  920. is_utf8_really_inline operator uint8x16_t &() { return this->value; }
  921. is_utf8_really_inline T first() const { return vgetq_lane_u8(*this, 0); }
  922. is_utf8_really_inline T last() const { return vgetq_lane_u8(*this, 15); }
  923. // Bit operations
  924. is_utf8_really_inline simd8<T> operator|(const simd8<T> other) const {
  925. return vorrq_u8(*this, other);
  926. }
  927. is_utf8_really_inline simd8<T> operator&(const simd8<T> other) const {
  928. return vandq_u8(*this, other);
  929. }
  930. is_utf8_really_inline simd8<T> operator^(const simd8<T> other) const {
  931. return veorq_u8(*this, other);
  932. }
  933. is_utf8_really_inline simd8<T> bit_andnot(const simd8<T> other) const {
  934. return vbicq_u8(*this, other);
  935. }
  936. is_utf8_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
  937. is_utf8_really_inline simd8<T> &operator|=(const simd8<T> other) {
  938. auto this_cast = static_cast<simd8<T> *>(this);
  939. *this_cast = *this_cast | other;
  940. return *this_cast;
  941. }
  942. is_utf8_really_inline simd8<T> &operator&=(const simd8<T> other) {
  943. auto this_cast = static_cast<simd8<T> *>(this);
  944. *this_cast = *this_cast & other;
  945. return *this_cast;
  946. }
  947. is_utf8_really_inline simd8<T> &operator^=(const simd8<T> other) {
  948. auto this_cast = static_cast<simd8<T> *>(this);
  949. *this_cast = *this_cast ^ other;
  950. return *this_cast;
  951. }
  952. friend is_utf8_really_inline Mask operator==(const simd8<T> lhs,
  953. const simd8<T> rhs) {
  954. return vceqq_u8(lhs, rhs);
  955. }
  956. template <int N = 1>
  957. is_utf8_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
  958. return vextq_u8(prev_chunk, *this, 16 - N);
  959. }
  960. };
  961. // SIMD byte mask type (returned by things like eq and gt)
  962. template <> struct simd8<bool> : base_u8<bool> {
  963. typedef uint16_t bitmask_t;
  964. typedef uint32_t bitmask2_t;
  965. static is_utf8_really_inline simd8<bool> splat(bool _value) {
  966. return vmovq_n_u8(uint8_t(-(!!_value)));
  967. }
  968. is_utf8_really_inline simd8(const uint8x16_t _value)
  969. : base_u8<bool>(_value) {}
  970. // False constructor
  971. is_utf8_really_inline simd8() : simd8(vdupq_n_u8(0)) {}
  972. // Splat constructor
  973. is_utf8_really_inline simd8(bool _value) : simd8(splat(_value)) {}
  974. is_utf8_really_inline void store(uint8_t dst[16]) const {
  975. return vst1q_u8(dst, *this);
  976. }
  977. // We return uint32_t instead of uint16_t because that seems to be more
  978. // efficient for most purposes (cutting it down to uint16_t costs performance
  979. // in some compilers).
  980. is_utf8_really_inline uint32_t to_bitmask() const {
  981. #ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
  982. const uint8x16_t bit_mask =
  983. make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01,
  984. 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
  985. #else
  986. const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
  987. 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
  988. #endif
  989. auto minput = *this & bit_mask;
  990. uint8x16_t tmp = vpaddq_u8(minput, minput);
  991. tmp = vpaddq_u8(tmp, tmp);
  992. tmp = vpaddq_u8(tmp, tmp);
  993. return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
  994. }
  995. // Returns 4-bit out of each byte, alternating between the high 4 bits and low
  996. // bits result it is 64 bit. This method is expected to be faster than none()
  997. // and is equivalent when the vector register is the result of a comparison,
  998. // with byte values 0xff and 0x00.
  999. is_utf8_really_inline uint64_t to_bitmask64() const {
  1000. return vget_lane_u64(
  1001. vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0);
  1002. }
  1003. is_utf8_really_inline bool any() const { return vmaxvq_u8(*this) != 0; }
  1004. is_utf8_really_inline bool none() const { return vmaxvq_u8(*this) == 0; }
  1005. is_utf8_really_inline bool all() const { return vminvq_u8(*this) == 0xFF; }
  1006. };
  1007. // Unsigned bytes
  1008. template <> struct simd8<uint8_t> : base_u8<uint8_t> {
  1009. static is_utf8_really_inline simd8<uint8_t> splat(uint8_t _value) {
  1010. return vmovq_n_u8(_value);
  1011. }
  1012. static is_utf8_really_inline simd8<uint8_t> zero() { return vdupq_n_u8(0); }
  1013. static is_utf8_really_inline simd8<uint8_t> load(const uint8_t *values) {
  1014. return vld1q_u8(values);
  1015. }
  1016. is_utf8_really_inline simd8(const uint8x16_t _value)
  1017. : base_u8<uint8_t>(_value) {}
  1018. // Zero constructor
  1019. is_utf8_really_inline simd8() : simd8(zero()) {}
  1020. // Array constructor
  1021. is_utf8_really_inline simd8(const uint8_t values[16]) : simd8(load(values)) {}
  1022. // Splat constructor
  1023. is_utf8_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
  1024. // Member-by-member initialization
  1025. #ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
  1026. is_utf8_really_inline
  1027. simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5,
  1028. uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10,
  1029. uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15)
  1030. : simd8(make_uint8x16_t(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
  1031. v12, v13, v14, v15)) {}
  1032. #else
  1033. is_utf8_really_inline
  1034. simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5,
  1035. uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10,
  1036. uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15)
  1037. : simd8(uint8x16_t{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
  1038. v13, v14, v15}) {}
  1039. #endif
  1040. // Repeat 16 values as many times as necessary (usually for lookup tables)
  1041. is_utf8_really_inline static simd8<uint8_t>
  1042. repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4,
  1043. uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9,
  1044. uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14,
  1045. uint8_t v15) {
  1046. return simd8<uint8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
  1047. v13, v14, v15);
  1048. }
  1049. // Store to array
  1050. is_utf8_really_inline void store(uint8_t dst[16]) const {
  1051. return vst1q_u8(dst, *this);
  1052. }
  1053. // Saturated math
  1054. is_utf8_really_inline simd8<uint8_t>
  1055. saturating_add(const simd8<uint8_t> other) const {
  1056. return vqaddq_u8(*this, other);
  1057. }
  1058. is_utf8_really_inline simd8<uint8_t>
  1059. saturating_sub(const simd8<uint8_t> other) const {
  1060. return vqsubq_u8(*this, other);
  1061. }
  1062. // Addition/subtraction are the same for signed and unsigned
  1063. is_utf8_really_inline simd8<uint8_t>
  1064. operator+(const simd8<uint8_t> other) const {
  1065. return vaddq_u8(*this, other);
  1066. }
  1067. is_utf8_really_inline simd8<uint8_t>
  1068. operator-(const simd8<uint8_t> other) const {
  1069. return vsubq_u8(*this, other);
  1070. }
  1071. is_utf8_really_inline simd8<uint8_t> &operator+=(const simd8<uint8_t> other) {
  1072. *this = *this + other;
  1073. return *this;
  1074. }
  1075. is_utf8_really_inline simd8<uint8_t> &operator-=(const simd8<uint8_t> other) {
  1076. *this = *this - other;
  1077. return *this;
  1078. }
  1079. // Order-specific operations
  1080. is_utf8_really_inline uint8_t max_val() const { return vmaxvq_u8(*this); }
  1081. is_utf8_really_inline uint8_t min_val() const { return vminvq_u8(*this); }
  1082. is_utf8_really_inline simd8<uint8_t>
  1083. max_val(const simd8<uint8_t> other) const {
  1084. return vmaxq_u8(*this, other);
  1085. }
  1086. is_utf8_really_inline simd8<uint8_t>
  1087. min_val(const simd8<uint8_t> other) const {
  1088. return vminq_u8(*this, other);
  1089. }
  1090. is_utf8_really_inline simd8<bool>
  1091. operator<=(const simd8<uint8_t> other) const {
  1092. return vcleq_u8(*this, other);
  1093. }
  1094. is_utf8_really_inline simd8<bool>
  1095. operator>=(const simd8<uint8_t> other) const {
  1096. return vcgeq_u8(*this, other);
  1097. }
  1098. is_utf8_really_inline simd8<bool>
  1099. operator<(const simd8<uint8_t> other) const {
  1100. return vcltq_u8(*this, other);
  1101. }
  1102. is_utf8_really_inline simd8<bool>
  1103. operator>(const simd8<uint8_t> other) const {
  1104. return vcgtq_u8(*this, other);
  1105. }
  1106. // Same as >, but instead of guaranteeing all 1's == true, false = 0 and true
  1107. // = nonzero. For ARM, returns all 1's.
  1108. is_utf8_really_inline simd8<uint8_t>
  1109. gt_bits(const simd8<uint8_t> other) const {
  1110. return simd8<uint8_t>(*this > other);
  1111. }
  1112. // Same as <, but instead of guaranteeing all 1's == true, false = 0 and true
  1113. // = nonzero. For ARM, returns all 1's.
  1114. is_utf8_really_inline simd8<uint8_t>
  1115. lt_bits(const simd8<uint8_t> other) const {
  1116. return simd8<uint8_t>(*this < other);
  1117. }
  1118. // Bit-specific operations
  1119. is_utf8_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const {
  1120. return vtstq_u8(*this, bits);
  1121. }
  1122. is_utf8_really_inline bool is_ascii() const {
  1123. return this->max_val() < 0b10000000u;
  1124. }
  1125. is_utf8_really_inline bool any_bits_set_anywhere() const {
  1126. return this->max_val() != 0;
  1127. }
  1128. is_utf8_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const {
  1129. return (*this & bits).any_bits_set_anywhere();
  1130. }
  1131. template <int N> is_utf8_really_inline simd8<uint8_t> shr() const {
  1132. return vshrq_n_u8(*this, N);
  1133. }
  1134. template <int N> is_utf8_really_inline simd8<uint8_t> shl() const {
  1135. return vshlq_n_u8(*this, N);
  1136. }
  1137. // Perform a lookup assuming the value is between 0 and 16 (undefined behavior
  1138. // for out of range values)
  1139. template <typename L>
  1140. is_utf8_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
  1141. return lookup_table.apply_lookup_16_to(*this);
  1142. }
  1143. template <typename L>
  1144. is_utf8_really_inline simd8<L>
  1145. lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4,
  1146. L replace5, L replace6, L replace7, L replace8, L replace9,
  1147. L replace10, L replace11, L replace12, L replace13, L replace14,
  1148. L replace15) const {
  1149. return lookup_16(simd8<L>::repeat_16(
  1150. replace0, replace1, replace2, replace3, replace4, replace5, replace6,
  1151. replace7, replace8, replace9, replace10, replace11, replace12,
  1152. replace13, replace14, replace15));
  1153. }
  1154. template <typename T>
  1155. is_utf8_really_inline simd8<uint8_t>
  1156. apply_lookup_16_to(const simd8<T> original) const {
  1157. return vqtbl1q_u8(*this, simd8<uint8_t>(original));
  1158. }
  1159. };
  1160. // Signed bytes
  1161. template <> struct simd8<int8_t> {
  1162. int8x16_t value;
  1163. static is_utf8_really_inline simd8<int8_t> splat(int8_t _value) {
  1164. return vmovq_n_s8(_value);
  1165. }
  1166. static is_utf8_really_inline simd8<int8_t> zero() { return vdupq_n_s8(0); }
  1167. static is_utf8_really_inline simd8<int8_t> load(const int8_t values[16]) {
  1168. return vld1q_s8(values);
  1169. }
  1170. template <endianness big_endian>
  1171. is_utf8_really_inline void store_ascii_as_utf16(char16_t *p) const {
  1172. uint16x8_t first = vmovl_u8(vget_low_u8(vreinterpretq_u8_s8(this->value)));
  1173. uint16x8_t second = vmovl_high_u8(vreinterpretq_u8_s8(this->value));
  1174. if (big_endian) {
  1175. #ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
  1176. const uint8x16_t swap =
  1177. make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
  1178. #else
  1179. const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6,
  1180. 9, 8, 11, 10, 13, 12, 15, 14};
  1181. #endif
  1182. first =
  1183. vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(first), swap));
  1184. second =
  1185. vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(second), swap));
  1186. }
  1187. vst1q_u16(reinterpret_cast<uint16_t *>(p), first);
  1188. vst1q_u16(reinterpret_cast<uint16_t *>(p + 8), second);
  1189. }
  1190. is_utf8_really_inline void store_ascii_as_utf32(char32_t *p) const {
  1191. vst1q_u32(reinterpret_cast<uint32_t *>(p),
  1192. vmovl_u16(vget_low_u16(
  1193. vmovl_u8(vget_low_u8(vreinterpretq_u8_s8(this->value))))));
  1194. vst1q_u32(reinterpret_cast<uint32_t *>(p + 4),
  1195. vmovl_high_u16(
  1196. vmovl_u8(vget_low_u8(vreinterpretq_u8_s8(this->value)))));
  1197. vst1q_u32(reinterpret_cast<uint32_t *>(p + 8),
  1198. vmovl_u16(vget_low_u16(
  1199. vmovl_high_u8(vreinterpretq_u8_s8(this->value)))));
  1200. vst1q_u32(reinterpret_cast<uint32_t *>(p + 12),
  1201. vmovl_high_u16(vmovl_high_u8(vreinterpretq_u8_s8(this->value))));
  1202. }
  1203. // Conversion from/to SIMD register
  1204. is_utf8_really_inline simd8(const int8x16_t _value) : value{_value} {}
  1205. is_utf8_really_inline operator const int8x16_t &() const {
  1206. return this->value;
  1207. }
  1208. is_utf8_really_inline operator const uint8x16_t() const {
  1209. return vreinterpretq_u8_s8(this->value);
  1210. }
  1211. is_utf8_really_inline operator int8x16_t &() { return this->value; }
  1212. // Zero constructor
  1213. is_utf8_really_inline simd8() : simd8(zero()) {}
  1214. // Splat constructor
  1215. is_utf8_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
  1216. // Array constructor
  1217. is_utf8_really_inline simd8(const int8_t *values) : simd8(load(values)) {}
  1218. // Member-by-member initialization
  1219. #ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
  1220. is_utf8_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3,
  1221. int8_t v4, int8_t v5, int8_t v6, int8_t v7,
  1222. int8_t v8, int8_t v9, int8_t v10, int8_t v11,
  1223. int8_t v12, int8_t v13, int8_t v14, int8_t v15)
  1224. : simd8(make_int8x16_t(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
  1225. v12, v13, v14, v15)) {}
  1226. #else
  1227. is_utf8_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3,
  1228. int8_t v4, int8_t v5, int8_t v6, int8_t v7,
  1229. int8_t v8, int8_t v9, int8_t v10, int8_t v11,
  1230. int8_t v12, int8_t v13, int8_t v14, int8_t v15)
  1231. : simd8(int8x16_t{v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
  1232. v13, v14, v15}) {}
  1233. #endif
  1234. // Repeat 16 values as many times as necessary (usually for lookup tables)
  1235. is_utf8_really_inline static simd8<int8_t>
  1236. repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5,
  1237. int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11,
  1238. int8_t v12, int8_t v13, int8_t v14, int8_t v15) {
  1239. return simd8<int8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
  1240. v13, v14, v15);
  1241. }
  1242. // Store to array
  1243. is_utf8_really_inline void store(int8_t dst[16]) const {
  1244. return vst1q_s8(dst, value);
  1245. }
  1246. // Explicit conversion to/from unsigned
  1247. //
  1248. // Under Visual Studio/ARM64 uint8x16_t and int8x16_t are apparently the same
  1249. // type. In theory, we could check this occurrence with std::same_as and
  1250. // std::enabled_if but it is C++14 and relatively ugly and hard to read.
  1251. #ifndef IS_UTF8_REGULAR_VISUAL_STUDIO
  1252. is_utf8_really_inline explicit simd8(const uint8x16_t other)
  1253. : simd8(vreinterpretq_s8_u8(other)) {}
  1254. #endif
  1255. is_utf8_really_inline operator simd8<uint8_t>() const {
  1256. return vreinterpretq_u8_s8(this->value);
  1257. }
  1258. is_utf8_really_inline simd8<int8_t>
  1259. operator|(const simd8<int8_t> other) const {
  1260. return vorrq_s8(value, other.value);
  1261. }
  1262. is_utf8_really_inline simd8<int8_t>
  1263. operator&(const simd8<int8_t> other) const {
  1264. return vandq_s8(value, other.value);
  1265. }
  1266. is_utf8_really_inline simd8<int8_t>
  1267. operator^(const simd8<int8_t> other) const {
  1268. return veorq_s8(value, other.value);
  1269. }
  1270. is_utf8_really_inline simd8<int8_t>
  1271. bit_andnot(const simd8<int8_t> other) const {
  1272. return vbicq_s8(value, other.value);
  1273. }
  1274. // Math
  1275. is_utf8_really_inline simd8<int8_t>
  1276. operator+(const simd8<int8_t> other) const {
  1277. return vaddq_s8(value, other.value);
  1278. }
  1279. is_utf8_really_inline simd8<int8_t>
  1280. operator-(const simd8<int8_t> other) const {
  1281. return vsubq_s8(value, other.value);
  1282. }
  1283. is_utf8_really_inline simd8<int8_t> &operator+=(const simd8<int8_t> other) {
  1284. *this = *this + other;
  1285. return *this;
  1286. }
  1287. is_utf8_really_inline simd8<int8_t> &operator-=(const simd8<int8_t> other) {
  1288. *this = *this - other;
  1289. return *this;
  1290. }
  1291. is_utf8_really_inline int8_t max_val() const { return vmaxvq_s8(value); }
  1292. is_utf8_really_inline int8_t min_val() const { return vminvq_s8(value); }
  1293. is_utf8_really_inline bool is_ascii() const { return this->min_val() >= 0; }
  1294. // Order-sensitive comparisons
  1295. is_utf8_really_inline simd8<int8_t> max_val(const simd8<int8_t> other) const {
  1296. return vmaxq_s8(value, other.value);
  1297. }
  1298. is_utf8_really_inline simd8<int8_t> min_val(const simd8<int8_t> other) const {
  1299. return vminq_s8(value, other.value);
  1300. }
  1301. is_utf8_really_inline simd8<bool> operator>(const simd8<int8_t> other) const {
  1302. return vcgtq_s8(value, other.value);
  1303. }
  1304. is_utf8_really_inline simd8<bool> operator<(const simd8<int8_t> other) const {
  1305. return vcltq_s8(value, other.value);
  1306. }
  1307. is_utf8_really_inline simd8<bool>
  1308. operator==(const simd8<int8_t> other) const {
  1309. return vceqq_s8(value, other.value);
  1310. }
  1311. template <int N = 1>
  1312. is_utf8_really_inline simd8<int8_t>
  1313. prev(const simd8<int8_t> prev_chunk) const {
  1314. return vextq_s8(prev_chunk, *this, 16 - N);
  1315. }
  1316. // Perform a lookup assuming no value is larger than 16
  1317. template <typename L>
  1318. is_utf8_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
  1319. return lookup_table.apply_lookup_16_to(*this);
  1320. }
  1321. template <typename L>
  1322. is_utf8_really_inline simd8<L>
  1323. lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4,
  1324. L replace5, L replace6, L replace7, L replace8, L replace9,
  1325. L replace10, L replace11, L replace12, L replace13, L replace14,
  1326. L replace15) const {
  1327. return lookup_16(simd8<L>::repeat_16(
  1328. replace0, replace1, replace2, replace3, replace4, replace5, replace6,
  1329. replace7, replace8, replace9, replace10, replace11, replace12,
  1330. replace13, replace14, replace15));
  1331. }
  1332. template <typename T>
  1333. is_utf8_really_inline simd8<int8_t>
  1334. apply_lookup_16_to(const simd8<T> original) {
  1335. return vqtbl1q_s8(*this, simd8<uint8_t>(original));
  1336. }
  1337. };
  1338. template <typename T> struct simd8x64 {
  1339. static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
  1340. static_assert(NUM_CHUNKS == 4,
  1341. "ARM kernel should use four registers per 64-byte block.");
  1342. simd8<T> chunks[NUM_CHUNKS];
  1343. simd8x64(const simd8x64<T> &o) = delete; // no copy allowed
  1344. simd8x64<T> &
  1345. operator=(const simd8<T> other) = delete; // no assignment allowed
  1346. simd8x64() = delete; // no default constructor allowed
  1347. is_utf8_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1,
  1348. const simd8<T> chunk2, const simd8<T> chunk3)
  1349. : chunks{chunk0, chunk1, chunk2, chunk3} {}
  1350. is_utf8_really_inline simd8x64(const T *ptr)
  1351. : chunks{simd8<T>::load(ptr),
  1352. simd8<T>::load(ptr + sizeof(simd8<T>) / sizeof(T)),
  1353. simd8<T>::load(ptr + 2 * sizeof(simd8<T>) / sizeof(T)),
  1354. simd8<T>::load(ptr + 3 * sizeof(simd8<T>) / sizeof(T))} {}
  1355. is_utf8_really_inline void store(T *ptr) const {
  1356. this->chunks[0].store(ptr + sizeof(simd8<T>) * 0 / sizeof(T));
  1357. this->chunks[1].store(ptr + sizeof(simd8<T>) * 1 / sizeof(T));
  1358. this->chunks[2].store(ptr + sizeof(simd8<T>) * 2 / sizeof(T));
  1359. this->chunks[3].store(ptr + sizeof(simd8<T>) * 3 / sizeof(T));
  1360. }
  1361. is_utf8_really_inline simd8x64<T> &operator|=(const simd8x64<T> &other) {
  1362. this->chunks[0] |= other.chunks[0];
  1363. this->chunks[1] |= other.chunks[1];
  1364. this->chunks[2] |= other.chunks[2];
  1365. this->chunks[3] |= other.chunks[3];
  1366. return *this;
  1367. }
  1368. is_utf8_really_inline simd8<T> reduce_or() const {
  1369. return (this->chunks[0] | this->chunks[1]) |
  1370. (this->chunks[2] | this->chunks[3]);
  1371. }
  1372. is_utf8_really_inline bool is_ascii() const { return reduce_or().is_ascii(); }
  1373. template <endianness endian>
  1374. is_utf8_really_inline void store_ascii_as_utf16(char16_t *ptr) const {
  1375. this->chunks[0].template store_ascii_as_utf16<endian>(ptr +
  1376. sizeof(simd8<T>) * 0);
  1377. this->chunks[1].template store_ascii_as_utf16<endian>(ptr +
  1378. sizeof(simd8<T>) * 1);
  1379. this->chunks[2].template store_ascii_as_utf16<endian>(ptr +
  1380. sizeof(simd8<T>) * 2);
  1381. this->chunks[3].template store_ascii_as_utf16<endian>(ptr +
  1382. sizeof(simd8<T>) * 3);
  1383. }
  1384. is_utf8_really_inline void store_ascii_as_utf32(char32_t *ptr) const {
  1385. this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8<T>) * 0);
  1386. this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8<T>) * 1);
  1387. this->chunks[2].store_ascii_as_utf32(ptr + sizeof(simd8<T>) * 2);
  1388. this->chunks[3].store_ascii_as_utf32(ptr + sizeof(simd8<T>) * 3);
  1389. }
  1390. is_utf8_really_inline uint64_t to_bitmask() const {
  1391. #ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
  1392. const uint8x16_t bit_mask =
  1393. make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01,
  1394. 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
  1395. #else
  1396. const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
  1397. 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
  1398. #endif
  1399. // Add each of the elements next to each other, successively, to stuff each
  1400. // 8 byte mask into one.
  1401. uint8x16_t sum0 =
  1402. vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[0]), bit_mask),
  1403. vandq_u8(uint8x16_t(this->chunks[1]), bit_mask));
  1404. uint8x16_t sum1 =
  1405. vpaddq_u8(vandq_u8(uint8x16_t(this->chunks[2]), bit_mask),
  1406. vandq_u8(uint8x16_t(this->chunks[3]), bit_mask));
  1407. sum0 = vpaddq_u8(sum0, sum1);
  1408. sum0 = vpaddq_u8(sum0, sum0);
  1409. return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
  1410. }
  1411. is_utf8_really_inline uint64_t eq(const T m) const {
  1412. const simd8<T> mask = simd8<T>::splat(m);
  1413. return simd8x64<bool>(this->chunks[0] == mask, this->chunks[1] == mask,
  1414. this->chunks[2] == mask, this->chunks[3] == mask)
  1415. .to_bitmask();
  1416. }
  1417. is_utf8_really_inline uint64_t lteq(const T m) const {
  1418. const simd8<T> mask = simd8<T>::splat(m);
  1419. return simd8x64<bool>(this->chunks[0] <= mask, this->chunks[1] <= mask,
  1420. this->chunks[2] <= mask, this->chunks[3] <= mask)
  1421. .to_bitmask();
  1422. }
  1423. is_utf8_really_inline uint64_t in_range(const T low, const T high) const {
  1424. const simd8<T> mask_low = simd8<T>::splat(low);
  1425. const simd8<T> mask_high = simd8<T>::splat(high);
  1426. return simd8x64<bool>(
  1427. (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
  1428. (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low),
  1429. (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low),
  1430. (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low))
  1431. .to_bitmask();
  1432. }
  1433. is_utf8_really_inline uint64_t not_in_range(const T low, const T high) const {
  1434. const simd8<T> mask_low = simd8<T>::splat(low);
  1435. const simd8<T> mask_high = simd8<T>::splat(high);
  1436. return simd8x64<bool>(
  1437. (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low),
  1438. (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low),
  1439. (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low),
  1440. (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low))
  1441. .to_bitmask();
  1442. }
  1443. is_utf8_really_inline uint64_t lt(const T m) const {
  1444. const simd8<T> mask = simd8<T>::splat(m);
  1445. return simd8x64<bool>(this->chunks[0] < mask, this->chunks[1] < mask,
  1446. this->chunks[2] < mask, this->chunks[3] < mask)
  1447. .to_bitmask();
  1448. }
  1449. is_utf8_really_inline uint64_t gt(const T m) const {
  1450. const simd8<T> mask = simd8<T>::splat(m);
  1451. return simd8x64<bool>(this->chunks[0] > mask, this->chunks[1] > mask,
  1452. this->chunks[2] > mask, this->chunks[3] > mask)
  1453. .to_bitmask();
  1454. }
  1455. is_utf8_really_inline uint64_t gteq(const T m) const {
  1456. const simd8<T> mask = simd8<T>::splat(m);
  1457. return simd8x64<bool>(this->chunks[0] >= mask, this->chunks[1] >= mask,
  1458. this->chunks[2] >= mask, this->chunks[3] >= mask)
  1459. .to_bitmask();
  1460. }
  1461. is_utf8_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
  1462. const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
  1463. return simd8x64<bool>(simd8<uint8_t>(uint8x16_t(this->chunks[0])) >= mask,
  1464. simd8<uint8_t>(uint8x16_t(this->chunks[1])) >= mask,
  1465. simd8<uint8_t>(uint8x16_t(this->chunks[2])) >= mask,
  1466. simd8<uint8_t>(uint8x16_t(this->chunks[3])) >= mask)
  1467. .to_bitmask();
  1468. }
  1469. }; // struct simd8x64<T>
  1470. template <typename T> struct simd16;
  1471. template <typename T, typename Mask = simd16<bool>> struct base_u16 {
  1472. uint16x8_t value;
  1473. static const int SIZE = sizeof(value);
  1474. // Conversion from/to SIMD register
  1475. is_utf8_really_inline base_u16() = default;
  1476. is_utf8_really_inline base_u16(const uint16x8_t _value) : value(_value) {}
  1477. is_utf8_really_inline operator const uint16x8_t &() const {
  1478. return this->value;
  1479. }
  1480. is_utf8_really_inline operator uint16x8_t &() { return this->value; }
  1481. // Bit operations
  1482. is_utf8_really_inline simd16<T> operator|(const simd16<T> other) const {
  1483. return vorrq_u16(*this, other);
  1484. }
  1485. is_utf8_really_inline simd16<T> operator&(const simd16<T> other) const {
  1486. return vandq_u16(*this, other);
  1487. }
  1488. is_utf8_really_inline simd16<T> operator^(const simd16<T> other) const {
  1489. return veorq_u16(*this, other);
  1490. }
  1491. is_utf8_really_inline simd16<T> bit_andnot(const simd16<T> other) const {
  1492. return vbicq_u16(*this, other);
  1493. }
  1494. is_utf8_really_inline simd16<T> operator~() const { return *this ^ 0xFFu; }
  1495. is_utf8_really_inline simd16<T> &operator|=(const simd16<T> other) {
  1496. auto this_cast = static_cast<simd16<T> *>(this);
  1497. *this_cast = *this_cast | other;
  1498. return *this_cast;
  1499. }
  1500. is_utf8_really_inline simd16<T> &operator&=(const simd16<T> other) {
  1501. auto this_cast = static_cast<simd16<T> *>(this);
  1502. *this_cast = *this_cast & other;
  1503. return *this_cast;
  1504. }
  1505. is_utf8_really_inline simd16<T> &operator^=(const simd16<T> other) {
  1506. auto this_cast = static_cast<simd16<T> *>(this);
  1507. *this_cast = *this_cast ^ other;
  1508. return *this_cast;
  1509. }
  1510. is_utf8_really_inline Mask operator==(const simd16<T> other) const {
  1511. return vceqq_u16(*this, other);
  1512. }
  1513. template <int N = 1>
  1514. is_utf8_really_inline simd16<T> prev(const simd16<T> prev_chunk) const {
  1515. return vextq_u18(prev_chunk, *this, 8 - N);
  1516. }
  1517. };
  1518. template <typename T, typename Mask = simd16<bool>>
  1519. struct base16 : base_u16<T> {
  1520. typedef uint16_t bitmask_t;
  1521. typedef uint32_t bitmask2_t;
  1522. is_utf8_really_inline base16() : base_u16<T>() {}
  1523. is_utf8_really_inline base16(const uint16x8_t _value) : base_u16<T>(_value) {}
  1524. template <typename Pointer>
  1525. is_utf8_really_inline base16(const Pointer *ptr) : base16(vld1q_u16(ptr)) {}
  1526. is_utf8_really_inline Mask operator==(const simd16<T> other) const {
  1527. return vceqq_u16(*this, other);
  1528. }
  1529. static const int SIZE = sizeof(base_u16<T>::value);
  1530. template <int N = 1>
  1531. is_utf8_really_inline simd16<T> prev(const simd16<T> prev_chunk) const {
  1532. return vextq_u18(prev_chunk, *this, 8 - N);
  1533. }
  1534. };
  1535. // SIMD byte mask type (returned by things like eq and gt)
  1536. template <> struct simd16<bool> : base16<bool> {
  1537. static is_utf8_really_inline simd16<bool> splat(bool _value) {
  1538. return vmovq_n_u16(uint16_t(-(!!_value)));
  1539. }
  1540. is_utf8_really_inline simd16<bool>() : base16() {}
  1541. is_utf8_really_inline simd16<bool>(const uint16x8_t _value)
  1542. : base16<bool>(_value) {}
  1543. // Splat constructor
  1544. is_utf8_really_inline simd16<bool>(bool _value)
  1545. : base16<bool>(splat(_value)) {}
  1546. };
  1547. template <typename T> struct base16_numeric : base16<T> {
  1548. static is_utf8_really_inline simd16<T> splat(T _value) {
  1549. return vmovq_n_u16(_value);
  1550. }
  1551. static is_utf8_really_inline simd16<T> zero() { return vdupq_n_u16(0); }
  1552. static is_utf8_really_inline simd16<T> load(const T values[8]) {
  1553. return vld1q_u16(reinterpret_cast<const uint16_t *>(values));
  1554. }
  1555. is_utf8_really_inline base16_numeric() : base16<T>() {}
  1556. is_utf8_really_inline base16_numeric(const uint16x8_t _value)
  1557. : base16<T>(_value) {}
  1558. // Store to array
  1559. is_utf8_really_inline void store(T dst[8]) const {
  1560. return vst1q_u16(dst, *this);
  1561. }
  1562. // Override to distinguish from bool version
  1563. is_utf8_really_inline simd16<T> operator~() const { return *this ^ 0xFFu; }
  1564. // Addition/subtraction are the same for signed and unsigned
  1565. is_utf8_really_inline simd16<T> operator+(const simd16<T> other) const {
  1566. return vaddq_u8(*this, other);
  1567. }
  1568. is_utf8_really_inline simd16<T> operator-(const simd16<T> other) const {
  1569. return vsubq_u8(*this, other);
  1570. }
  1571. is_utf8_really_inline simd16<T> &operator+=(const simd16<T> other) {
  1572. *this = *this + other;
  1573. return *static_cast<simd16<T> *>(this);
  1574. }
  1575. is_utf8_really_inline simd16<T> &operator-=(const simd16<T> other) {
  1576. *this = *this - other;
  1577. return *static_cast<simd16<T> *>(this);
  1578. }
  1579. };
  1580. // Signed words
  1581. template <> struct simd16<int16_t> : base16_numeric<int16_t> {
  1582. is_utf8_really_inline simd16() : base16_numeric<int16_t>() {}
  1583. #ifndef IS_UTF8_REGULAR_VISUAL_STUDIO
  1584. is_utf8_really_inline simd16(const uint16x8_t _value)
  1585. : base16_numeric<int16_t>(_value) {}
  1586. #endif
  1587. is_utf8_really_inline simd16(const int16x8_t _value)
  1588. : base16_numeric<int16_t>(vreinterpretq_u16_s16(_value)) {}
  1589. // Splat constructor
  1590. is_utf8_really_inline simd16(int16_t _value) : simd16(splat(_value)) {}
  1591. // Array constructor
  1592. is_utf8_really_inline simd16(const int16_t *values) : simd16(load(values)) {}
  1593. is_utf8_really_inline simd16(const char16_t *values)
  1594. : simd16(load(reinterpret_cast<const int16_t *>(values))) {}
  1595. is_utf8_really_inline operator simd16<uint16_t>() const;
  1596. is_utf8_really_inline operator const uint16x8_t &() const {
  1597. return this->value;
  1598. }
  1599. is_utf8_really_inline operator const int16x8_t() const {
  1600. return vreinterpretq_s16_u16(this->value);
  1601. }
  1602. is_utf8_really_inline int16_t max_val() const {
  1603. return vmaxvq_s16(vreinterpretq_s16_u16(this->value));
  1604. }
  1605. is_utf8_really_inline int16_t min_val() const {
  1606. return vminvq_s16(vreinterpretq_s16_u16(this->value));
  1607. }
  1608. // Order-sensitive comparisons
  1609. is_utf8_really_inline simd16<int16_t>
  1610. max_val(const simd16<int16_t> other) const {
  1611. return vmaxq_s16(vreinterpretq_s16_u16(this->value),
  1612. vreinterpretq_s16_u16(other.value));
  1613. }
  1614. is_utf8_really_inline simd16<int16_t>
  1615. min_val(const simd16<int16_t> other) const {
  1616. return vmaxq_s16(vreinterpretq_s16_u16(this->value),
  1617. vreinterpretq_s16_u16(other.value));
  1618. }
  1619. is_utf8_really_inline simd16<bool>
  1620. operator>(const simd16<int16_t> other) const {
  1621. return vcgtq_s16(vreinterpretq_s16_u16(this->value),
  1622. vreinterpretq_s16_u16(other.value));
  1623. }
  1624. is_utf8_really_inline simd16<bool>
  1625. operator<(const simd16<int16_t> other) const {
  1626. return vcltq_s16(vreinterpretq_s16_u16(this->value),
  1627. vreinterpretq_s16_u16(other.value));
  1628. }
  1629. };
  1630. // Unsigned words
  1631. template <> struct simd16<uint16_t> : base16_numeric<uint16_t> {
  1632. is_utf8_really_inline simd16() : base16_numeric<uint16_t>() {}
  1633. is_utf8_really_inline simd16(const uint16x8_t _value)
  1634. : base16_numeric<uint16_t>(_value) {}
  1635. // Splat constructor
  1636. is_utf8_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {}
  1637. // Array constructor
  1638. is_utf8_really_inline simd16(const uint16_t *values) : simd16(load(values)) {}
  1639. is_utf8_really_inline simd16(const char16_t *values)
  1640. : simd16(load(reinterpret_cast<const uint16_t *>(values))) {}
  1641. is_utf8_really_inline int16_t max_val() const { return vmaxvq_u16(*this); }
  1642. is_utf8_really_inline int16_t min_val() const { return vminvq_u16(*this); }
  1643. // Saturated math
  1644. is_utf8_really_inline simd16<uint16_t>
  1645. saturating_add(const simd16<uint16_t> other) const {
  1646. return vqaddq_u16(*this, other);
  1647. }
  1648. is_utf8_really_inline simd16<uint16_t>
  1649. saturating_sub(const simd16<uint16_t> other) const {
  1650. return vqsubq_u16(*this, other);
  1651. }
  1652. // Order-specific operations
  1653. is_utf8_really_inline simd16<uint16_t>
  1654. max_val(const simd16<uint16_t> other) const {
  1655. return vmaxq_u16(*this, other);
  1656. }
  1657. is_utf8_really_inline simd16<uint16_t>
  1658. min_val(const simd16<uint16_t> other) const {
  1659. return vminq_u16(*this, other);
  1660. }
  1661. // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
  1662. is_utf8_really_inline simd16<uint16_t>
  1663. gt_bits(const simd16<uint16_t> other) const {
  1664. return this->saturating_sub(other);
  1665. }
  1666. // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
  1667. is_utf8_really_inline simd16<uint16_t>
  1668. lt_bits(const simd16<uint16_t> other) const {
  1669. return other.saturating_sub(*this);
  1670. }
  1671. is_utf8_really_inline simd16<bool>
  1672. operator<=(const simd16<uint16_t> other) const {
  1673. return vcleq_u16(*this, other);
  1674. }
  1675. is_utf8_really_inline simd16<bool>
  1676. operator>=(const simd16<uint16_t> other) const {
  1677. return vcgeq_u16(*this, other);
  1678. }
  1679. is_utf8_really_inline simd16<bool>
  1680. operator>(const simd16<uint16_t> other) const {
  1681. return vcgtq_u16(*this, other);
  1682. }
  1683. is_utf8_really_inline simd16<bool>
  1684. operator<(const simd16<uint16_t> other) const {
  1685. return vcltq_u16(*this, other);
  1686. }
  1687. // Bit-specific operations
  1688. is_utf8_really_inline simd16<bool> bits_not_set() const {
  1689. return *this == uint16_t(0);
  1690. }
  1691. template <int N> is_utf8_really_inline simd16<uint16_t> shr() const {
  1692. return simd16<uint16_t>(vshrq_n_u16(*this, N));
  1693. }
  1694. template <int N> is_utf8_really_inline simd16<uint16_t> shl() const {
  1695. return simd16<uint16_t>(vshlq_n_u16(*this, N));
  1696. }
  1697. // logical operations
  1698. is_utf8_really_inline simd16<uint16_t>
  1699. operator|(const simd16<uint16_t> other) const {
  1700. return vorrq_u16(*this, other);
  1701. }
  1702. is_utf8_really_inline simd16<uint16_t>
  1703. operator&(const simd16<uint16_t> other) const {
  1704. return vandq_u16(*this, other);
  1705. }
  1706. is_utf8_really_inline simd16<uint16_t>
  1707. operator^(const simd16<uint16_t> other) const {
  1708. return veorq_u16(*this, other);
  1709. }
  1710. // Pack with the unsigned saturation two uint16_t words into single uint8_t
  1711. // vector
  1712. static is_utf8_really_inline simd8<uint8_t> pack(const simd16<uint16_t> &v0,
  1713. const simd16<uint16_t> &v1) {
  1714. return vqmovn_high_u16(vqmovn_u16(v0), v1);
  1715. }
  1716. // Change the endianness
  1717. is_utf8_really_inline simd16<uint16_t> swap_bytes() const {
  1718. #ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
  1719. const uint8x16_t swap =
  1720. make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
  1721. #else
  1722. const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6,
  1723. 9, 8, 11, 10, 13, 12, 15, 14};
  1724. #endif
  1725. return vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(*this), swap));
  1726. }
  1727. };
  1728. is_utf8_really_inline simd16<int16_t>::operator simd16<uint16_t>() const {
  1729. return this->value;
  1730. }
  1731. template <typename T> struct simd16x32 {
  1732. static constexpr int NUM_CHUNKS = 64 / sizeof(simd16<T>);
  1733. static_assert(NUM_CHUNKS == 4,
  1734. "ARM kernel should use four registers per 64-byte block.");
  1735. simd16<T> chunks[NUM_CHUNKS];
  1736. simd16x32(const simd16x32<T> &o) = delete; // no copy allowed
  1737. simd16x32<T> &
  1738. operator=(const simd16<T> other) = delete; // no assignment allowed
  1739. simd16x32() = delete; // no default constructor allowed
  1740. is_utf8_really_inline
  1741. simd16x32(const simd16<T> chunk0, const simd16<T> chunk1,
  1742. const simd16<T> chunk2, const simd16<T> chunk3)
  1743. : chunks{chunk0, chunk1, chunk2, chunk3} {}
  1744. is_utf8_really_inline simd16x32(const T *ptr)
  1745. : chunks{simd16<T>::load(ptr),
  1746. simd16<T>::load(ptr + sizeof(simd16<T>) / sizeof(T)),
  1747. simd16<T>::load(ptr + 2 * sizeof(simd16<T>) / sizeof(T)),
  1748. simd16<T>::load(ptr + 3 * sizeof(simd16<T>) / sizeof(T))} {}
  1749. is_utf8_really_inline void store(T *ptr) const {
  1750. this->chunks[0].store(ptr + sizeof(simd16<T>) * 0 / sizeof(T));
  1751. this->chunks[1].store(ptr + sizeof(simd16<T>) * 1 / sizeof(T));
  1752. this->chunks[2].store(ptr + sizeof(simd16<T>) * 2 / sizeof(T));
  1753. this->chunks[3].store(ptr + sizeof(simd16<T>) * 3 / sizeof(T));
  1754. }
  1755. is_utf8_really_inline simd16<T> reduce_or() const {
  1756. return (this->chunks[0] | this->chunks[1]) |
  1757. (this->chunks[2] | this->chunks[3]);
  1758. }
  1759. is_utf8_really_inline bool is_ascii() const { return reduce_or().is_ascii(); }
  1760. is_utf8_really_inline void store_ascii_as_utf16(char16_t *ptr) const {
  1761. this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16<T>) * 0);
  1762. this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16<T>) * 1);
  1763. this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16<T>) * 2);
  1764. this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16<T>) * 3);
  1765. }
  1766. is_utf8_really_inline uint64_t to_bitmask() const {
  1767. #ifdef IS_UTF8_REGULAR_VISUAL_STUDIO
  1768. const uint8x16_t bit_mask =
  1769. make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01,
  1770. 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
  1771. #else
  1772. const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
  1773. 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
  1774. #endif
  1775. // Add each of the elements next to each other, successively, to stuff each
  1776. // 8 byte mask into one.
  1777. uint8x16_t sum0 = vpaddq_u8(
  1778. vreinterpretq_u8_u16(this->chunks[0] & vreinterpretq_u16_u8(bit_mask)),
  1779. vreinterpretq_u8_u16(this->chunks[1] & vreinterpretq_u16_u8(bit_mask)));
  1780. uint8x16_t sum1 = vpaddq_u8(
  1781. vreinterpretq_u8_u16(this->chunks[2] & vreinterpretq_u16_u8(bit_mask)),
  1782. vreinterpretq_u8_u16(this->chunks[3] & vreinterpretq_u16_u8(bit_mask)));
  1783. sum0 = vpaddq_u8(sum0, sum1);
  1784. sum0 = vpaddq_u8(sum0, sum0);
  1785. return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
  1786. }
  1787. is_utf8_really_inline void swap_bytes() {
  1788. this->chunks[0] = this->chunks[0].swap_bytes();
  1789. this->chunks[1] = this->chunks[1].swap_bytes();
  1790. this->chunks[2] = this->chunks[2].swap_bytes();
  1791. this->chunks[3] = this->chunks[3].swap_bytes();
  1792. }
  1793. is_utf8_really_inline uint64_t eq(const T m) const {
  1794. const simd16<T> mask = simd16<T>::splat(m);
  1795. return simd16x32<bool>(this->chunks[0] == mask, this->chunks[1] == mask,
  1796. this->chunks[2] == mask, this->chunks[3] == mask)
  1797. .to_bitmask();
  1798. }
  1799. is_utf8_really_inline uint64_t lteq(const T m) const {
  1800. const simd16<T> mask = simd16<T>::splat(m);
  1801. return simd16x32<bool>(this->chunks[0] <= mask, this->chunks[1] <= mask,
  1802. this->chunks[2] <= mask, this->chunks[3] <= mask)
  1803. .to_bitmask();
  1804. }
  1805. is_utf8_really_inline uint64_t in_range(const T low, const T high) const {
  1806. const simd16<T> mask_low = simd16<T>::splat(low);
  1807. const simd16<T> mask_high = simd16<T>::splat(high);
  1808. return simd16x32<bool>(
  1809. (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
  1810. (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low),
  1811. (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low),
  1812. (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low))
  1813. .to_bitmask();
  1814. }
  1815. is_utf8_really_inline uint64_t not_in_range(const T low, const T high) const {
  1816. const simd16<T> mask_low = simd16<T>::splat(low);
  1817. const simd16<T> mask_high = simd16<T>::splat(high);
  1818. return simd16x32<bool>(
  1819. (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low),
  1820. (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low),
  1821. (this->chunks[2] > mask_high) | (this->chunks[2] < mask_low),
  1822. (this->chunks[3] > mask_high) | (this->chunks[3] < mask_low))
  1823. .to_bitmask();
  1824. }
  1825. is_utf8_really_inline uint64_t lt(const T m) const {
  1826. const simd16<T> mask = simd16<T>::splat(m);
  1827. return simd16x32<bool>(this->chunks[0] < mask, this->chunks[1] < mask,
  1828. this->chunks[2] < mask, this->chunks[3] < mask)
  1829. .to_bitmask();
  1830. }
  1831. }; // struct simd16x32<T>
  1832. template <>
  1833. is_utf8_really_inline uint64_t simd16x32<uint16_t>::not_in_range(
  1834. const uint16_t low, const uint16_t high) const {
  1835. const simd16<uint16_t> mask_low = simd16<uint16_t>::splat(low);
  1836. const simd16<uint16_t> mask_high = simd16<uint16_t>::splat(high);
  1837. simd16x32<uint16_t> x(simd16<uint16_t>((this->chunks[0] > mask_high) |
  1838. (this->chunks[0] < mask_low)),
  1839. simd16<uint16_t>((this->chunks[1] > mask_high) |
  1840. (this->chunks[1] < mask_low)),
  1841. simd16<uint16_t>((this->chunks[2] > mask_high) |
  1842. (this->chunks[2] < mask_low)),
  1843. simd16<uint16_t>((this->chunks[3] > mask_high) |
  1844. (this->chunks[3] < mask_low)));
  1845. return x.to_bitmask();
  1846. }
  1847. } // namespace simd
  1848. } // unnamed namespace
  1849. } // namespace arm64
  1850. } // namespace is_utf8_internals
  1851. #endif // IS_UTF8_ARM64_SIMD_H
  1852. #endif // IS_UTF8_IMPLEMENTATION_ARM64
  1853. #endif // IS_UTF8_ARM64_H
  1854. #ifndef IS_UTF8_ICELAKE_H
  1855. #define IS_UTF8_ICELAKE_H
  1856. #ifdef __has_include
  1857. // How do we detect that a compiler supports vbmi2?
  1858. // For sure if the following header is found, we are ok?
  1859. #if __has_include(<avx512vbmi2intrin.h>)
  1860. #define IS_UTF8_COMPILER_SUPPORTS_VBMI2 1
  1861. #endif
  1862. #endif
  1863. #ifdef _MSC_VER
  1864. #if _MSC_VER >= 1920
  1865. // Visual Studio 2019 and up support VBMI2 under x64 even if the header
  1866. // avx512vbmi2intrin.h is not found.
  1867. #define IS_UTF8_COMPILER_SUPPORTS_VBMI2 1
  1868. #endif
  1869. #endif
  1870. // We allow icelake on x64 as long as the compiler is known to support VBMI2.
  1871. #ifndef IS_UTF8_IMPLEMENTATION_ICELAKE
  1872. #define IS_UTF8_IMPLEMENTATION_ICELAKE \
  1873. ((IS_UTF8_IS_X86_64) && (IS_UTF8_COMPILER_SUPPORTS_VBMI2))
  1874. #endif
  1875. // To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this
  1876. // next line, see https://github.com/simdutf/simdutf/issues/1247
  1877. #define IS_UTF8_CAN_ALWAYS_RUN_ICELAKE \
  1878. ((IS_UTF8_IMPLEMENTATION_ICELAKE) && (IS_UTF8_IS_X86_64) && (__AVX2__) && \
  1879. (IS_UTF8_HAS_AVX512F && IS_UTF8_HAS_AVX512DQ && IS_UTF8_HAS_AVX512VL && \
  1880. IS_UTF8_HAS_AVX512VBMI2) && \
  1881. (!IS_UTF8_IS_32BITS))
  1882. #if IS_UTF8_IMPLEMENTATION_ICELAKE
  1883. #if IS_UTF8_CAN_ALWAYS_RUN_ICELAKE
  1884. #define IS_UTF8_TARGET_ICELAKE
  1885. #define IS_UTF8_UNTARGET_ICELAKE
  1886. #else
  1887. #define IS_UTF8_TARGET_ICELAKE \
  1888. IS_UTF8_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi," \
  1889. "avx512vbmi2,avx512vl,avx2,bmi,bmi2,pclmul,lzcnt")
  1890. #define IS_UTF8_UNTARGET_ICELAKE IS_UTF8_UNTARGET_REGION
  1891. #endif
  1892. namespace is_utf8_internals {
  1893. namespace icelake {} // namespace icelake
  1894. } // namespace is_utf8_internals
  1895. //
  1896. // These two need to be included outside IS_UTF8_TARGET_REGION
  1897. //
  1898. #ifndef IS_UTF8_ICELAKE_INTRINSICS_H
  1899. #define IS_UTF8_ICELAKE_INTRINSICS_H
  1900. #ifdef IS_UTF8_VISUAL_STUDIO
  1901. // under clang within visual studio, this will include <x86intrin.h>
  1902. #include <immintrin.h>
  1903. #include <intrin.h> // visual studio or clang
  1904. #else
  1905. #if IS_UTF8_GCC11ORMORE
  1906. // We should not get warnings while including <x86intrin.h> yet we do
  1907. // under some versions of GCC.
  1908. // If the x86intrin.h header has uninitialized values that are problematic,
  1909. // it is a GCC issue, we want to ignore these warnings.
  1910. IS_UTF8_DISABLE_GCC_WARNING(-Wuninitialized)
  1911. #endif
  1912. #include <x86intrin.h> // elsewhere
  1913. #if IS_UTF8_GCC11ORMORE
  1914. // cancels the suppression of the -Wuninitialized
  1915. IS_UTF8_POP_DISABLE_WARNINGS
  1916. #endif
  1917. #ifndef _tzcnt_u64
  1918. #define _tzcnt_u64(x) __tzcnt_u64(x)
  1919. #endif // _tzcnt_u64
  1920. #endif // IS_UTF8_VISUAL_STUDIO
  1921. #ifdef IS_UTF8_CLANG_VISUAL_STUDIO
  1922. #include <avx2intrin.h>
  1923. #include <avxintrin.h>
  1924. #include <bmi2intrin.h> // for _pext_u64, _pdep_u64
  1925. #include <bmiintrin.h> // for _blsr_u64
  1926. #include <immintrin.h> // for most things (AVX2, AVX512, _popcnt64)
  1927. #include <lzcntintrin.h> // for __lzcnt64
  1928. #include <smmintrin.h>
  1929. #include <tmmintrin.h>
  1930. #include <wmmintrin.h> // for _mm_clmulepi64_si128
  1931. // Important: we need the AVX-512 headers:
  1932. #include <avx512bwintrin.h>
  1933. #include <avx512cdintrin.h>
  1934. #include <avx512dqintrin.h>
  1935. #include <avx512fintrin.h>
  1936. #include <avx512vbmi2intrin.h>
  1937. #include <avx512vbmiintrin.h>
  1938. #include <avx512vlbwintrin.h>
  1939. #include <avx512vlintrin.h>
  1940. // unfortunately, we may not get _blsr_u64, but, thankfully, clang
  1941. // has it as a macro.
  1942. #ifndef _blsr_u64
  1943. // we roll our own
  1944. #define _blsr_u64(n) ((n - 1) & n)
  1945. #endif // _blsr_u64
  1946. #endif // IS_UTF8_CLANG_VISUAL_STUDIO
  1947. #if defined(__GNUC__) && !defined(__clang__)
  1948. #if __GNUC__ == 8
  1949. #define IS_UTF8_GCC8 1
  1950. #elif __GNUC__ == 9
  1951. #define IS_UTF8_GCC9 1
  1952. #endif // __GNUC__ == 8 || __GNUC__ == 9
  1953. #endif // defined(__GNUC__) && !defined(__clang__)
  1954. #if IS_UTF8_GCC8
  1955. #pragma GCC push_options
  1956. #pragma GCC target("avx512f")
  1957. /**
  1958. * GCC 8 fails to provide _mm512_set_epi8. We roll our own.
  1959. */
  1960. inline __m512i
  1961. _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4,
  1962. uint8_t a5, uint8_t a6, uint8_t a7, uint8_t a8, uint8_t a9,
  1963. uint8_t a10, uint8_t a11, uint8_t a12, uint8_t a13, uint8_t a14,
  1964. uint8_t a15, uint8_t a16, uint8_t a17, uint8_t a18, uint8_t a19,
  1965. uint8_t a20, uint8_t a21, uint8_t a22, uint8_t a23, uint8_t a24,
  1966. uint8_t a25, uint8_t a26, uint8_t a27, uint8_t a28, uint8_t a29,
  1967. uint8_t a30, uint8_t a31, uint8_t a32, uint8_t a33, uint8_t a34,
  1968. uint8_t a35, uint8_t a36, uint8_t a37, uint8_t a38, uint8_t a39,
  1969. uint8_t a40, uint8_t a41, uint8_t a42, uint8_t a43, uint8_t a44,
  1970. uint8_t a45, uint8_t a46, uint8_t a47, uint8_t a48, uint8_t a49,
  1971. uint8_t a50, uint8_t a51, uint8_t a52, uint8_t a53, uint8_t a54,
  1972. uint8_t a55, uint8_t a56, uint8_t a57, uint8_t a58, uint8_t a59,
  1973. uint8_t a60, uint8_t a61, uint8_t a62, uint8_t a63) {
  1974. return _mm512_set_epi64(
  1975. uint64_t(a7) + (uint64_t(a6) << 8) + (uint64_t(a5) << 16) +
  1976. (uint64_t(a4) << 24) + (uint64_t(a3) << 32) + (uint64_t(a2) << 40) +
  1977. (uint64_t(a1) << 48) + (uint64_t(a0) << 56),
  1978. uint64_t(a15) + (uint64_t(a14) << 8) + (uint64_t(a13) << 16) +
  1979. (uint64_t(a12) << 24) + (uint64_t(a11) << 32) +
  1980. (uint64_t(a10) << 40) + (uint64_t(a9) << 48) + (uint64_t(a8) << 56),
  1981. uint64_t(a23) + (uint64_t(a22) << 8) + (uint64_t(a21) << 16) +
  1982. (uint64_t(a20) << 24) + (uint64_t(a19) << 32) +
  1983. (uint64_t(a18) << 40) + (uint64_t(a17) << 48) + (uint64_t(a16) << 56),
  1984. uint64_t(a31) + (uint64_t(a30) << 8) + (uint64_t(a29) << 16) +
  1985. (uint64_t(a28) << 24) + (uint64_t(a27) << 32) +
  1986. (uint64_t(a26) << 40) + (uint64_t(a25) << 48) + (uint64_t(a24) << 56),
  1987. uint64_t(a39) + (uint64_t(a38) << 8) + (uint64_t(a37) << 16) +
  1988. (uint64_t(a36) << 24) + (uint64_t(a35) << 32) +
  1989. (uint64_t(a34) << 40) + (uint64_t(a33) << 48) + (uint64_t(a32) << 56),
  1990. uint64_t(a47) + (uint64_t(a46) << 8) + (uint64_t(a45) << 16) +
  1991. (uint64_t(a44) << 24) + (uint64_t(a43) << 32) +
  1992. (uint64_t(a42) << 40) + (uint64_t(a41) << 48) + (uint64_t(a40) << 56),
  1993. uint64_t(a55) + (uint64_t(a54) << 8) + (uint64_t(a53) << 16) +
  1994. (uint64_t(a52) << 24) + (uint64_t(a51) << 32) +
  1995. (uint64_t(a50) << 40) + (uint64_t(a49) << 48) + (uint64_t(a48) << 56),
  1996. uint64_t(a63) + (uint64_t(a62) << 8) + (uint64_t(a61) << 16) +
  1997. (uint64_t(a60) << 24) + (uint64_t(a59) << 32) +
  1998. (uint64_t(a58) << 40) + (uint64_t(a57) << 48) +
  1999. (uint64_t(a56) << 56));
  2000. }
  2001. #pragma GCC pop_options
  2002. #endif // IS_UTF8_GCC8
  2003. #endif // IS_UTF8_HASWELL_INTRINSICS_H
  2004. #ifndef IS_UTF8_ICELAKE_IMPLEMENTATION_H
  2005. #define IS_UTF8_ICELAKE_IMPLEMENTATION_H
  2006. namespace is_utf8_internals {
  2007. namespace icelake {
  2008. class implementation final : public is_utf8_internals::implementation {
  2009. public:
  2010. is_utf8_really_inline implementation()
  2011. : is_utf8_internals::implementation(
  2012. "icelake",
  2013. "Intel AVX512 (AVX-512BW, AVX-512CD, AVX-512VL, AVX-512VBMI2 "
  2014. "extensions)",
  2015. internal::instruction_set::AVX2 |
  2016. internal::instruction_set::PCLMULQDQ |
  2017. internal::instruction_set::BMI1 |
  2018. internal::instruction_set::BMI2 |
  2019. internal::instruction_set::AVX512BW |
  2020. internal::instruction_set::AVX512CD |
  2021. internal::instruction_set::AVX512VL |
  2022. internal::instruction_set::AVX512VBMI2) {}
  2023. is_utf8_warn_unused bool validate_utf8(const char *buf,
  2024. size_t len) const noexcept final;
  2025. };
  2026. } // namespace icelake
  2027. } // namespace is_utf8_internals
  2028. #endif // IS_UTF8_ICELAKE_IMPLEMENTATION_H
  2029. #if IS_UTF8_GCC11ORMORE // workaround for
  2030. // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
  2031. IS_UTF8_POP_DISABLE_WARNINGS
  2032. #endif // end of workaround
  2033. #endif // IS_UTF8_IMPLEMENTATION_ICELAKE
  2034. #endif // IS_UTF8_ICELAKE_H
  2035. #ifndef IS_UTF8_HASWELL_H
  2036. #define IS_UTF8_HASWELL_H
  2037. #ifdef IS_UTF8_WESTMERE_H
  2038. #error "haswell.h must be included before westmere.h"
  2039. #endif
  2040. #ifdef IS_UTF8_FALLBACK_H
  2041. #error "haswell.h must be included before fallback.h"
  2042. #endif
  2043. // Default Haswell to on if this is x86-64. Even if we're not compiled for it,
  2044. // it could be selected at runtime.
  2045. #ifndef IS_UTF8_IMPLEMENTATION_HASWELL
  2046. //
  2047. // You do not want to restrict it like so: IS_UTF8_IS_X86_64 && __AVX2__
  2048. // because we want to rely on *runtime dispatch*.
  2049. //
  2050. #if IS_UTF8_CAN_ALWAYS_RUN_ICELAKE
  2051. #define IS_UTF8_IMPLEMENTATION_HASWELL 0
  2052. #else
  2053. #define IS_UTF8_IMPLEMENTATION_HASWELL (IS_UTF8_IS_X86_64)
  2054. #endif
  2055. #endif
  2056. // To see why (__BMI__) && (__PCLMUL__) && (__LZCNT__) are not part of this
  2057. // next line, see https://github.com/simdutf/simdutf/issues/1247
  2058. #if ((IS_UTF8_IMPLEMENTATION_HASWELL) && (IS_UTF8_IS_X86_64) && (__AVX2__))
  2059. #define IS_UTF8_CAN_ALWAYS_RUN_HASWELL 1
  2060. #else
  2061. #define IS_UTF8_CAN_ALWAYS_RUN_HASWELL 0
  2062. #endif
  2063. #if IS_UTF8_IMPLEMENTATION_HASWELL
  2064. #define IS_UTF8_TARGET_HASWELL IS_UTF8_TARGET_REGION("avx2,bmi,pclmul,lzcnt")
  2065. namespace is_utf8_internals {
  2066. /**
  2067. * Implementation for Haswell (Intel AVX2).
  2068. */
  2069. namespace haswell {} // namespace haswell
  2070. } // namespace is_utf8_internals
  2071. //
  2072. // These two need to be included outside IS_UTF8_TARGET_REGION
  2073. //
  2074. #ifndef IS_UTF8_HASWELL_IMPLEMENTATION_H
  2075. #define IS_UTF8_HASWELL_IMPLEMENTATION_H
  2076. // The constructor may be executed on any host, so we take care not to use
  2077. // IS_UTF8_TARGET_REGION
  2078. namespace is_utf8_internals {
  2079. namespace haswell {
  2080. class implementation final : public is_utf8_internals::implementation {
  2081. public:
  2082. is_utf8_really_inline implementation()
  2083. : is_utf8_internals::implementation(
  2084. "haswell", "Intel/AMD AVX2",
  2085. internal::instruction_set::AVX2 |
  2086. internal::instruction_set::PCLMULQDQ |
  2087. internal::instruction_set::BMI1 |
  2088. internal::instruction_set::BMI2) {}
  2089. is_utf8_warn_unused bool validate_utf8(const char *buf,
  2090. size_t len) const noexcept final;
  2091. };
  2092. } // namespace haswell
  2093. } // namespace is_utf8_internals
  2094. #endif // IS_UTF8_HASWELL_IMPLEMENTATION_H
  2095. #ifndef IS_UTF8_HASWELL_INTRINSICS_H
  2096. #define IS_UTF8_HASWELL_INTRINSICS_H
  2097. #ifdef IS_UTF8_VISUAL_STUDIO
  2098. // under clang within visual studio, this will include <x86intrin.h>
  2099. #include <intrin.h> // visual studio or clang
  2100. #else
  2101. #if IS_UTF8_GCC11ORMORE
  2102. // We should not get warnings while including <x86intrin.h> yet we do
  2103. // under some versions of GCC.
  2104. // If the x86intrin.h header has uninitialized values that are problematic,
  2105. // it is a GCC issue, we want to ignore these warnings.
  2106. IS_UTF8_DISABLE_GCC_WARNING(-Wuninitialized)
  2107. #endif
  2108. #include <x86intrin.h> // elsewhere
  2109. #if IS_UTF8_GCC11ORMORE
  2110. // cancels the suppression of the -Wuninitialized
  2111. IS_UTF8_POP_DISABLE_WARNINGS
  2112. #endif
  2113. #endif // IS_UTF8_VISUAL_STUDIO
  2114. #ifdef IS_UTF8_CLANG_VISUAL_STUDIO
  2115. #include <avx2intrin.h>
  2116. #include <avxintrin.h>
  2117. #include <bmiintrin.h> // for _blsr_u64
  2118. #include <immintrin.h> // for most things (AVX2, AVX512, _popcnt64)
  2119. #include <lzcntintrin.h> // for __lzcnt64
  2120. #include <smmintrin.h>
  2121. #include <tmmintrin.h>
  2122. #include <wmmintrin.h> // for _mm_clmulepi64_si128
  2123. // unfortunately, we may not get _blsr_u64, but, thankfully, clang
  2124. // has it as a macro.
  2125. #ifndef _blsr_u64
  2126. // we roll our own
  2127. #define _blsr_u64(n) ((n - 1) & n)
  2128. #endif // _blsr_u64
  2129. #endif // IS_UTF8_CLANG_VISUAL_STUDIO
  2130. #endif // IS_UTF8_HASWELL_INTRINSICS_H
  2131. //
  2132. // The rest need to be inside the region
  2133. //
  2134. // redefining IS_UTF8_IMPLEMENTATION to "haswell"
  2135. // #define IS_UTF8_IMPLEMENTATION haswell
  2136. IS_UTF8_TARGET_HASWELL
  2137. #if IS_UTF8_GCC11ORMORE // workaround for
  2138. // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
  2139. IS_UTF8_DISABLE_GCC_WARNING(-Wmaybe-uninitialized)
  2140. #endif // end of workaround
  2141. // Declarations
  2142. #ifndef IS_UTF8_HASWELL_SIMD_H
  2143. #define IS_UTF8_HASWELL_SIMD_H
  2144. namespace is_utf8_internals {
  2145. namespace haswell {
  2146. namespace {
  2147. namespace simd {
  2148. // Forward-declared so they can be used by splat and friends.
  2149. template <typename Child> struct base {
  2150. __m256i value;
  2151. // Zero constructor
  2152. is_utf8_really_inline base() : value{__m256i()} {}
  2153. // Conversion from SIMD register
  2154. is_utf8_really_inline base(const __m256i _value) : value(_value) {}
  2155. // Conversion to SIMD register
  2156. is_utf8_really_inline operator const __m256i &() const { return this->value; }
  2157. is_utf8_really_inline operator __m256i &() { return this->value; }
  2158. template <endianness big_endian>
  2159. is_utf8_really_inline void store_ascii_as_utf16(char16_t *ptr) const {
  2160. __m256i first = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(*this));
  2161. __m256i second = _mm256_cvtepu8_epi16(_mm256_extractf128_si256(*this, 1));
  2162. if (big_endian) {
  2163. const __m256i swap = _mm256_setr_epi8(
  2164. 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18,
  2165. 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30);
  2166. first = _mm256_shuffle_epi8(first, swap);
  2167. second = _mm256_shuffle_epi8(second, swap);
  2168. }
  2169. _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), first);
  2170. _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 16), second);
  2171. }
  2172. is_utf8_really_inline void store_ascii_as_utf32(char32_t *ptr) const {
  2173. _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr),
  2174. _mm256_cvtepu8_epi32(_mm256_castsi256_si128(*this)));
  2175. _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 8),
  2176. _mm256_cvtepu8_epi32(_mm256_castsi256_si128(
  2177. _mm256_srli_si256(*this, 8))));
  2178. _mm256_storeu_si256(
  2179. reinterpret_cast<__m256i *>(ptr + 16),
  2180. _mm256_cvtepu8_epi32(_mm256_extractf128_si256(*this, 1)));
  2181. _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr + 24),
  2182. _mm256_cvtepu8_epi32(_mm_srli_si128(
  2183. _mm256_extractf128_si256(*this, 1), 8)));
  2184. }
  2185. // Bit operations
  2186. is_utf8_really_inline Child operator|(const Child other) const {
  2187. return _mm256_or_si256(*this, other);
  2188. }
  2189. is_utf8_really_inline Child operator&(const Child other) const {
  2190. return _mm256_and_si256(*this, other);
  2191. }
  2192. is_utf8_really_inline Child operator^(const Child other) const {
  2193. return _mm256_xor_si256(*this, other);
  2194. }
  2195. is_utf8_really_inline Child bit_andnot(const Child other) const {
  2196. return _mm256_andnot_si256(other, *this);
  2197. }
  2198. is_utf8_really_inline Child &operator|=(const Child other) {
  2199. auto this_cast = static_cast<Child *>(this);
  2200. *this_cast = *this_cast | other;
  2201. return *this_cast;
  2202. }
  2203. is_utf8_really_inline Child &operator&=(const Child other) {
  2204. auto this_cast = static_cast<Child *>(this);
  2205. *this_cast = *this_cast & other;
  2206. return *this_cast;
  2207. }
  2208. is_utf8_really_inline Child &operator^=(const Child other) {
  2209. auto this_cast = static_cast<Child *>(this);
  2210. *this_cast = *this_cast ^ other;
  2211. return *this_cast;
  2212. }
  2213. };
  2214. // Forward-declared so they can be used by splat and friends.
  2215. template <typename T> struct simd8;
  2216. template <typename T, typename Mask = simd8<bool>>
  2217. struct base8 : base<simd8<T>> {
  2218. typedef uint32_t bitmask_t;
  2219. typedef uint64_t bitmask2_t;
  2220. is_utf8_really_inline base8() : base<simd8<T>>() {}
  2221. is_utf8_really_inline base8(const __m256i _value) : base<simd8<T>>(_value) {}
  2222. is_utf8_really_inline T first() const {
  2223. return _mm256_extract_epi8(*this, 0);
  2224. }
  2225. is_utf8_really_inline T last() const {
  2226. return _mm256_extract_epi8(*this, 31);
  2227. }
  2228. friend is_utf8_really_inline Mask operator==(const simd8<T> lhs,
  2229. const simd8<T> rhs) {
  2230. return _mm256_cmpeq_epi8(lhs, rhs);
  2231. }
  2232. static const int SIZE = sizeof(base<T>::value);
  2233. template <int N = 1>
  2234. is_utf8_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
  2235. return _mm256_alignr_epi8(
  2236. *this, _mm256_permute2x128_si256(prev_chunk, *this, 0x21), 16 - N);
  2237. }
  2238. };
  2239. // SIMD byte mask type (returned by things like eq and gt)
  2240. template <> struct simd8<bool> : base8<bool> {
  2241. static is_utf8_really_inline simd8<bool> splat(bool _value) {
  2242. return _mm256_set1_epi8(uint8_t(-(!!_value)));
  2243. }
  2244. is_utf8_really_inline simd8<bool>() : base8() {}
  2245. is_utf8_really_inline simd8<bool>(const __m256i _value)
  2246. : base8<bool>(_value) {}
  2247. // Splat constructor
  2248. is_utf8_really_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
  2249. is_utf8_really_inline uint32_t to_bitmask() const {
  2250. return uint32_t(_mm256_movemask_epi8(*this));
  2251. }
  2252. is_utf8_really_inline bool any() const {
  2253. return !_mm256_testz_si256(*this, *this);
  2254. }
  2255. is_utf8_really_inline bool none() const {
  2256. return _mm256_testz_si256(*this, *this);
  2257. }
  2258. is_utf8_really_inline bool all() const {
  2259. return static_cast<uint32_t>(_mm256_movemask_epi8(*this)) == 0xFFFFFFFF;
  2260. }
  2261. is_utf8_really_inline simd8<bool> operator~() const { return *this ^ true; }
  2262. };
  2263. template <typename T> struct base8_numeric : base8<T> {
  2264. static is_utf8_really_inline simd8<T> splat(T _value) {
  2265. return _mm256_set1_epi8(_value);
  2266. }
  2267. static is_utf8_really_inline simd8<T> zero() {
  2268. return _mm256_setzero_si256();
  2269. }
  2270. static is_utf8_really_inline simd8<T> load(const T values[32]) {
  2271. return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(values));
  2272. }
  2273. // Repeat 16 values as many times as necessary (usually for lookup tables)
  2274. static is_utf8_really_inline simd8<T> repeat_16(T v0, T v1, T v2, T v3, T v4,
  2275. T v5, T v6, T v7, T v8, T v9,
  2276. T v10, T v11, T v12, T v13,
  2277. T v14, T v15) {
  2278. return simd8<T>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
  2279. v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
  2280. v12, v13, v14, v15);
  2281. }
  2282. is_utf8_really_inline base8_numeric() : base8<T>() {}
  2283. is_utf8_really_inline base8_numeric(const __m256i _value)
  2284. : base8<T>(_value) {}
  2285. // Store to array
  2286. is_utf8_really_inline void store(T dst[32]) const {
  2287. return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this);
  2288. }
  2289. // Addition/subtraction are the same for signed and unsigned
  2290. is_utf8_really_inline simd8<T> operator+(const simd8<T> other) const {
  2291. return _mm256_add_epi8(*this, other);
  2292. }
  2293. is_utf8_really_inline simd8<T> operator-(const simd8<T> other) const {
  2294. return _mm256_sub_epi8(*this, other);
  2295. }
  2296. is_utf8_really_inline simd8<T> &operator+=(const simd8<T> other) {
  2297. *this = *this + other;
  2298. return *static_cast<simd8<T> *>(this);
  2299. }
  2300. is_utf8_really_inline simd8<T> &operator-=(const simd8<T> other) {
  2301. *this = *this - other;
  2302. return *static_cast<simd8<T> *>(this);
  2303. }
  2304. // Override to distinguish from bool version
  2305. is_utf8_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
  2306. // Perform a lookup assuming the value is between 0 and 16 (undefined behavior
  2307. // for out of range values)
  2308. template <typename L>
  2309. is_utf8_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
  2310. return _mm256_shuffle_epi8(lookup_table, *this);
  2311. }
  2312. template <typename L>
  2313. is_utf8_really_inline simd8<L>
  2314. lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4,
  2315. L replace5, L replace6, L replace7, L replace8, L replace9,
  2316. L replace10, L replace11, L replace12, L replace13, L replace14,
  2317. L replace15) const {
  2318. return lookup_16(simd8<L>::repeat_16(
  2319. replace0, replace1, replace2, replace3, replace4, replace5, replace6,
  2320. replace7, replace8, replace9, replace10, replace11, replace12,
  2321. replace13, replace14, replace15));
  2322. }
  2323. };
  2324. // Signed bytes
  2325. template <> struct simd8<int8_t> : base8_numeric<int8_t> {
  2326. is_utf8_really_inline simd8() : base8_numeric<int8_t>() {}
  2327. is_utf8_really_inline simd8(const __m256i _value)
  2328. : base8_numeric<int8_t>(_value) {}
  2329. // Splat constructor
  2330. is_utf8_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
  2331. // Array constructor
  2332. is_utf8_really_inline simd8(const int8_t values[32]) : simd8(load(values)) {}
  2333. is_utf8_really_inline operator simd8<uint8_t>() const;
  2334. // Member-by-member initialization
  2335. is_utf8_really_inline
  2336. simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5,
  2337. int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11,
  2338. int8_t v12, int8_t v13, int8_t v14, int8_t v15, int8_t v16, int8_t v17,
  2339. int8_t v18, int8_t v19, int8_t v20, int8_t v21, int8_t v22, int8_t v23,
  2340. int8_t v24, int8_t v25, int8_t v26, int8_t v27, int8_t v28, int8_t v29,
  2341. int8_t v30, int8_t v31)
  2342. : simd8(_mm256_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
  2343. v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
  2344. v22, v23, v24, v25, v26, v27, v28, v29, v30,
  2345. v31)) {}
  2346. // Repeat 16 values as many times as necessary (usually for lookup tables)
  2347. is_utf8_really_inline static simd8<int8_t>
  2348. repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5,
  2349. int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11,
  2350. int8_t v12, int8_t v13, int8_t v14, int8_t v15) {
  2351. return simd8<int8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
  2352. v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9,
  2353. v10, v11, v12, v13, v14, v15);
  2354. }
  2355. is_utf8_really_inline bool is_ascii() const {
  2356. return _mm256_movemask_epi8(*this) == 0;
  2357. }
  2358. // Order-sensitive comparisons
  2359. is_utf8_really_inline simd8<int8_t> max_val(const simd8<int8_t> other) const {
  2360. return _mm256_max_epi8(*this, other);
  2361. }
  2362. is_utf8_really_inline simd8<int8_t> min_val(const simd8<int8_t> other) const {
  2363. return _mm256_min_epi8(*this, other);
  2364. }
  2365. is_utf8_really_inline simd8<bool> operator>(const simd8<int8_t> other) const {
  2366. return _mm256_cmpgt_epi8(*this, other);
  2367. }
  2368. is_utf8_really_inline simd8<bool> operator<(const simd8<int8_t> other) const {
  2369. return _mm256_cmpgt_epi8(other, *this);
  2370. }
  2371. };
  2372. // Unsigned bytes
  2373. template <> struct simd8<uint8_t> : base8_numeric<uint8_t> {
  2374. is_utf8_really_inline simd8() : base8_numeric<uint8_t>() {}
  2375. is_utf8_really_inline simd8(const __m256i _value)
  2376. : base8_numeric<uint8_t>(_value) {}
  2377. // Splat constructor
  2378. is_utf8_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
  2379. // Array constructor
  2380. is_utf8_really_inline simd8(const uint8_t values[32]) : simd8(load(values)) {}
  2381. // Member-by-member initialization
  2382. is_utf8_really_inline
  2383. simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5,
  2384. uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10,
  2385. uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15,
  2386. uint8_t v16, uint8_t v17, uint8_t v18, uint8_t v19, uint8_t v20,
  2387. uint8_t v21, uint8_t v22, uint8_t v23, uint8_t v24, uint8_t v25,
  2388. uint8_t v26, uint8_t v27, uint8_t v28, uint8_t v29, uint8_t v30,
  2389. uint8_t v31)
  2390. : simd8(_mm256_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
  2391. v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
  2392. v22, v23, v24, v25, v26, v27, v28, v29, v30,
  2393. v31)) {}
  2394. // Repeat 16 values as many times as necessary (usually for lookup tables)
  2395. is_utf8_really_inline static simd8<uint8_t>
  2396. repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4,
  2397. uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9,
  2398. uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14,
  2399. uint8_t v15) {
  2400. return simd8<uint8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
  2401. v13, v14, v15, v0, v1, v2, v3, v4, v5, v6, v7, v8, v9,
  2402. v10, v11, v12, v13, v14, v15);
  2403. }
  2404. // Saturated math
  2405. is_utf8_really_inline simd8<uint8_t>
  2406. saturating_add(const simd8<uint8_t> other) const {
  2407. return _mm256_adds_epu8(*this, other);
  2408. }
  2409. is_utf8_really_inline simd8<uint8_t>
  2410. saturating_sub(const simd8<uint8_t> other) const {
  2411. return _mm256_subs_epu8(*this, other);
  2412. }
  2413. // Order-specific operations
  2414. is_utf8_really_inline simd8<uint8_t>
  2415. max_val(const simd8<uint8_t> other) const {
  2416. return _mm256_max_epu8(*this, other);
  2417. }
  2418. is_utf8_really_inline simd8<uint8_t>
  2419. min_val(const simd8<uint8_t> other) const {
  2420. return _mm256_min_epu8(other, *this);
  2421. }
  2422. // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
  2423. is_utf8_really_inline simd8<uint8_t>
  2424. gt_bits(const simd8<uint8_t> other) const {
  2425. return this->saturating_sub(other);
  2426. }
  2427. // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
  2428. is_utf8_really_inline simd8<uint8_t>
  2429. lt_bits(const simd8<uint8_t> other) const {
  2430. return other.saturating_sub(*this);
  2431. }
  2432. is_utf8_really_inline simd8<bool>
  2433. operator<=(const simd8<uint8_t> other) const {
  2434. return other.max_val(*this) == other;
  2435. }
  2436. is_utf8_really_inline simd8<bool>
  2437. operator>=(const simd8<uint8_t> other) const {
  2438. return other.min_val(*this) == other;
  2439. }
  2440. is_utf8_really_inline simd8<bool>
  2441. operator>(const simd8<uint8_t> other) const {
  2442. return this->gt_bits(other).any_bits_set();
  2443. }
  2444. is_utf8_really_inline simd8<bool>
  2445. operator<(const simd8<uint8_t> other) const {
  2446. return this->lt_bits(other).any_bits_set();
  2447. }
  2448. // Bit-specific operations
  2449. is_utf8_really_inline simd8<bool> bits_not_set() const {
  2450. return *this == uint8_t(0);
  2451. }
  2452. is_utf8_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const {
  2453. return (*this & bits).bits_not_set();
  2454. }
  2455. is_utf8_really_inline simd8<bool> any_bits_set() const {
  2456. return ~this->bits_not_set();
  2457. }
  2458. is_utf8_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const {
  2459. return ~this->bits_not_set(bits);
  2460. }
  2461. is_utf8_really_inline bool is_ascii() const {
  2462. return _mm256_movemask_epi8(*this) == 0;
  2463. }
  2464. is_utf8_really_inline bool bits_not_set_anywhere() const {
  2465. return _mm256_testz_si256(*this, *this);
  2466. }
  2467. is_utf8_really_inline bool any_bits_set_anywhere() const {
  2468. return !bits_not_set_anywhere();
  2469. }
  2470. is_utf8_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const {
  2471. return _mm256_testz_si256(*this, bits);
  2472. }
  2473. is_utf8_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const {
  2474. return !bits_not_set_anywhere(bits);
  2475. }
  2476. template <int N> is_utf8_really_inline simd8<uint8_t> shr() const {
  2477. return simd8<uint8_t>(_mm256_srli_epi16(*this, N)) & uint8_t(0xFFu >> N);
  2478. }
  2479. template <int N> is_utf8_really_inline simd8<uint8_t> shl() const {
  2480. return simd8<uint8_t>(_mm256_slli_epi16(*this, N)) & uint8_t(0xFFu << N);
  2481. }
  2482. // Get one of the bits and make a bitmask out of it.
  2483. // e.g. value.get_bit<7>() gets the high bit
  2484. template <int N> is_utf8_really_inline int get_bit() const {
  2485. return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7 - N));
  2486. }
  2487. };
  2488. is_utf8_really_inline simd8<int8_t>::operator simd8<uint8_t>() const {
  2489. return this->value;
  2490. }
  2491. template <typename T> struct simd8x64 {
  2492. static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
  2493. static_assert(NUM_CHUNKS == 2,
  2494. "Haswell kernel should use two registers per 64-byte block.");
  2495. simd8<T> chunks[NUM_CHUNKS];
  2496. simd8x64(const simd8x64<T> &o) = delete; // no copy allowed
  2497. simd8x64<T> &
  2498. operator=(const simd8<T> other) = delete; // no assignment allowed
  2499. simd8x64() = delete; // no default constructor allowed
  2500. is_utf8_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1)
  2501. : chunks{chunk0, chunk1} {}
  2502. is_utf8_really_inline simd8x64(const T *ptr)
  2503. : chunks{simd8<T>::load(ptr),
  2504. simd8<T>::load(ptr + sizeof(simd8<T>) / sizeof(T))} {}
  2505. is_utf8_really_inline void store(T *ptr) const {
  2506. this->chunks[0].store(ptr + sizeof(simd8<T>) * 0 / sizeof(T));
  2507. this->chunks[1].store(ptr + sizeof(simd8<T>) * 1 / sizeof(T));
  2508. }
  2509. is_utf8_really_inline uint64_t to_bitmask() const {
  2510. uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask());
  2511. uint64_t r_hi = this->chunks[1].to_bitmask();
  2512. return r_lo | (r_hi << 32);
  2513. }
  2514. is_utf8_really_inline simd8x64<T> &operator|=(const simd8x64<T> &other) {
  2515. this->chunks[0] |= other.chunks[0];
  2516. this->chunks[1] |= other.chunks[1];
  2517. return *this;
  2518. }
  2519. is_utf8_really_inline simd8<T> reduce_or() const {
  2520. return this->chunks[0] | this->chunks[1];
  2521. }
  2522. is_utf8_really_inline bool is_ascii() const {
  2523. return this->reduce_or().is_ascii();
  2524. }
  2525. template <endianness endian>
  2526. is_utf8_really_inline void store_ascii_as_utf16(char16_t *ptr) const {
  2527. this->chunks[0].template store_ascii_as_utf16<endian>(ptr +
  2528. sizeof(simd8<T>) * 0);
  2529. this->chunks[1].template store_ascii_as_utf16<endian>(ptr +
  2530. sizeof(simd8<T>) * 1);
  2531. }
  2532. is_utf8_really_inline void store_ascii_as_utf32(char32_t *ptr) const {
  2533. this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8<T>) * 0);
  2534. this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8<T>) * 1);
  2535. }
  2536. is_utf8_really_inline simd8x64<T> bit_or(const T m) const {
  2537. const simd8<T> mask = simd8<T>::splat(m);
  2538. return simd8x64<T>(this->chunks[0] | mask, this->chunks[1] | mask);
  2539. }
  2540. is_utf8_really_inline uint64_t eq(const T m) const {
  2541. const simd8<T> mask = simd8<T>::splat(m);
  2542. return simd8x64<bool>(this->chunks[0] == mask, this->chunks[1] == mask)
  2543. .to_bitmask();
  2544. }
  2545. is_utf8_really_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
  2546. return simd8x64<bool>(this->chunks[0] == other.chunks[0],
  2547. this->chunks[1] == other.chunks[1])
  2548. .to_bitmask();
  2549. }
  2550. is_utf8_really_inline uint64_t lteq(const T m) const {
  2551. const simd8<T> mask = simd8<T>::splat(m);
  2552. return simd8x64<bool>(this->chunks[0] <= mask, this->chunks[1] <= mask)
  2553. .to_bitmask();
  2554. }
  2555. is_utf8_really_inline uint64_t in_range(const T low, const T high) const {
  2556. const simd8<T> mask_low = simd8<T>::splat(low);
  2557. const simd8<T> mask_high = simd8<T>::splat(high);
  2558. return simd8x64<bool>(
  2559. (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
  2560. (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low),
  2561. (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low),
  2562. (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low))
  2563. .to_bitmask();
  2564. }
  2565. is_utf8_really_inline uint64_t not_in_range(const T low, const T high) const {
  2566. const simd8<T> mask_low = simd8<T>::splat(low);
  2567. const simd8<T> mask_high = simd8<T>::splat(high);
  2568. return simd8x64<bool>(
  2569. (this->chunks[0] > mask_high) | (this->chunks[0] < mask_low),
  2570. (this->chunks[1] > mask_high) | (this->chunks[1] < mask_low))
  2571. .to_bitmask();
  2572. }
  2573. is_utf8_really_inline uint64_t lt(const T m) const {
  2574. const simd8<T> mask = simd8<T>::splat(m);
  2575. return simd8x64<bool>(this->chunks[0] < mask, this->chunks[1] < mask)
  2576. .to_bitmask();
  2577. }
  2578. is_utf8_really_inline uint64_t gt(const T m) const {
  2579. const simd8<T> mask = simd8<T>::splat(m);
  2580. return simd8x64<bool>(this->chunks[0] > mask, this->chunks[1] > mask)
  2581. .to_bitmask();
  2582. }
  2583. is_utf8_really_inline uint64_t gteq(const T m) const {
  2584. const simd8<T> mask = simd8<T>::splat(m);
  2585. return simd8x64<bool>(this->chunks[0] >= mask, this->chunks[1] >= mask)
  2586. .to_bitmask();
  2587. }
  2588. is_utf8_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
  2589. const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
  2590. return simd8x64<bool>((simd8<uint8_t>(__m256i(this->chunks[0])) >= mask),
  2591. (simd8<uint8_t>(__m256i(this->chunks[1])) >= mask))
  2592. .to_bitmask();
  2593. }
  2594. }; // struct simd8x64<T>
  2595. #ifdef __GNUC__
  2596. #if __GNUC__ < 8
  2597. #define _mm256_set_m128i(xmm1, xmm2) \
  2598. _mm256_permute2f128_si256(_mm256_castsi128_si256(xmm1), \
  2599. _mm256_castsi128_si256(xmm2), 2)
  2600. #define _mm256_setr_m128i(xmm2, xmm1) \
  2601. _mm256_permute2f128_si256(_mm256_castsi128_si256(xmm1), \
  2602. _mm256_castsi128_si256(xmm2), 2)
  2603. #endif
  2604. #endif
  2605. template <typename T> struct simd16;
  2606. template <typename T, typename Mask = simd16<bool>>
  2607. struct base16 : base<simd16<T>> {
  2608. using bitmask_type = uint32_t;
  2609. is_utf8_really_inline base16() : base<simd16<T>>() {}
  2610. is_utf8_really_inline base16(const __m256i _value)
  2611. : base<simd16<T>>(_value) {}
  2612. template <typename Pointer>
  2613. is_utf8_really_inline base16(const Pointer *ptr)
  2614. : base16(_mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr))) {}
  2615. friend is_utf8_really_inline Mask operator==(const simd16<T> lhs,
  2616. const simd16<T> rhs) {
  2617. return _mm256_cmpeq_epi16(lhs, rhs);
  2618. }
  2619. /// the size of vector in bytes
  2620. static const int SIZE = sizeof(base<simd16<T>>::value);
  2621. /// the number of elements of type T a vector can hold
  2622. static const int ELEMENTS = SIZE / sizeof(T);
  2623. template <int N = 1>
  2624. is_utf8_really_inline simd16<T> prev(const simd16<T> prev_chunk) const {
  2625. return _mm256_alignr_epi8(*this, prev_chunk, 16 - N);
  2626. }
  2627. };
  2628. // SIMD byte mask type (returned by things like eq and gt)
  2629. template <> struct simd16<bool> : base16<bool> {
  2630. static is_utf8_really_inline simd16<bool> splat(bool _value) {
  2631. return _mm256_set1_epi16(uint16_t(-(!!_value)));
  2632. }
  2633. is_utf8_really_inline simd16<bool>() : base16() {}
  2634. is_utf8_really_inline simd16<bool>(const __m256i _value)
  2635. : base16<bool>(_value) {}
  2636. // Splat constructor
  2637. is_utf8_really_inline simd16<bool>(bool _value)
  2638. : base16<bool>(splat(_value)) {}
  2639. is_utf8_really_inline bitmask_type to_bitmask() const {
  2640. return _mm256_movemask_epi8(*this);
  2641. }
  2642. is_utf8_really_inline bool any() const {
  2643. return !_mm256_testz_si256(*this, *this);
  2644. }
  2645. is_utf8_really_inline simd16<bool> operator~() const { return *this ^ true; }
  2646. };
  2647. template <typename T> struct base16_numeric : base16<T> {
  2648. static is_utf8_really_inline simd16<T> splat(T _value) {
  2649. return _mm256_set1_epi16(_value);
  2650. }
  2651. static is_utf8_really_inline simd16<T> zero() {
  2652. return _mm256_setzero_si256();
  2653. }
  2654. static is_utf8_really_inline simd16<T> load(const T values[8]) {
  2655. return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(values));
  2656. }
  2657. is_utf8_really_inline base16_numeric() : base16<T>() {}
  2658. is_utf8_really_inline base16_numeric(const __m256i _value)
  2659. : base16<T>(_value) {}
  2660. // Store to array
  2661. is_utf8_really_inline void store(T dst[8]) const {
  2662. return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this);
  2663. }
  2664. // Override to distinguish from bool version
  2665. is_utf8_really_inline simd16<T> operator~() const { return *this ^ 0xFFFFu; }
  2666. // Addition/subtraction are the same for signed and unsigned
  2667. is_utf8_really_inline simd16<T> operator+(const simd16<T> other) const {
  2668. return _mm256_add_epi16(*this, other);
  2669. }
  2670. is_utf8_really_inline simd16<T> operator-(const simd16<T> other) const {
  2671. return _mm256_sub_epi16(*this, other);
  2672. }
  2673. is_utf8_really_inline simd16<T> &operator+=(const simd16<T> other) {
  2674. *this = *this + other;
  2675. return *static_cast<simd16<T> *>(this);
  2676. }
  2677. is_utf8_really_inline simd16<T> &operator-=(const simd16<T> other) {
  2678. *this = *this - other;
  2679. return *static_cast<simd16<T> *>(this);
  2680. }
  2681. };
  2682. // Signed words
  2683. template <> struct simd16<int16_t> : base16_numeric<int16_t> {
  2684. is_utf8_really_inline simd16() : base16_numeric<int16_t>() {}
  2685. is_utf8_really_inline simd16(const __m256i _value)
  2686. : base16_numeric<int16_t>(_value) {}
  2687. // Splat constructor
  2688. is_utf8_really_inline simd16(int16_t _value) : simd16(splat(_value)) {}
  2689. // Array constructor
  2690. is_utf8_really_inline simd16(const int16_t *values) : simd16(load(values)) {}
  2691. is_utf8_really_inline simd16(const char16_t *values)
  2692. : simd16(load(reinterpret_cast<const int16_t *>(values))) {}
  2693. // Order-sensitive comparisons
  2694. is_utf8_really_inline simd16<int16_t>
  2695. max_val(const simd16<int16_t> other) const {
  2696. return _mm256_max_epi16(*this, other);
  2697. }
  2698. is_utf8_really_inline simd16<int16_t>
  2699. min_val(const simd16<int16_t> other) const {
  2700. return _mm256_min_epi16(*this, other);
  2701. }
  2702. is_utf8_really_inline simd16<bool>
  2703. operator>(const simd16<int16_t> other) const {
  2704. return _mm256_cmpgt_epi16(*this, other);
  2705. }
  2706. is_utf8_really_inline simd16<bool>
  2707. operator<(const simd16<int16_t> other) const {
  2708. return _mm256_cmpgt_epi16(other, *this);
  2709. }
  2710. };
  2711. // Unsigned words
  2712. template <> struct simd16<uint16_t> : base16_numeric<uint16_t> {
  2713. is_utf8_really_inline simd16() : base16_numeric<uint16_t>() {}
  2714. is_utf8_really_inline simd16(const __m256i _value)
  2715. : base16_numeric<uint16_t>(_value) {}
  2716. // Splat constructor
  2717. is_utf8_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {}
  2718. // Array constructor
  2719. is_utf8_really_inline simd16(const uint16_t *values) : simd16(load(values)) {}
  2720. is_utf8_really_inline simd16(const char16_t *values)
  2721. : simd16(load(reinterpret_cast<const uint16_t *>(values))) {}
  2722. // Saturated math
  2723. is_utf8_really_inline simd16<uint16_t>
  2724. saturating_add(const simd16<uint16_t> other) const {
  2725. return _mm256_adds_epu16(*this, other);
  2726. }
  2727. is_utf8_really_inline simd16<uint16_t>
  2728. saturating_sub(const simd16<uint16_t> other) const {
  2729. return _mm256_subs_epu16(*this, other);
  2730. }
  2731. // Order-specific operations
  2732. is_utf8_really_inline simd16<uint16_t>
  2733. max_val(const simd16<uint16_t> other) const {
  2734. return _mm256_max_epu16(*this, other);
  2735. }
  2736. is_utf8_really_inline simd16<uint16_t>
  2737. min_val(const simd16<uint16_t> other) const {
  2738. return _mm256_min_epu16(*this, other);
  2739. }
  2740. // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
  2741. is_utf8_really_inline simd16<uint16_t>
  2742. gt_bits(const simd16<uint16_t> other) const {
  2743. return this->saturating_sub(other);
  2744. }
  2745. // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
  2746. is_utf8_really_inline simd16<uint16_t>
  2747. lt_bits(const simd16<uint16_t> other) const {
  2748. return other.saturating_sub(*this);
  2749. }
  2750. is_utf8_really_inline simd16<bool>
  2751. operator<=(const simd16<uint16_t> other) const {
  2752. return other.max_val(*this) == other;
  2753. }
  2754. is_utf8_really_inline simd16<bool>
  2755. operator>=(const simd16<uint16_t> other) const {
  2756. return other.min_val(*this) == other;
  2757. }
  2758. is_utf8_really_inline simd16<bool>
  2759. operator>(const simd16<uint16_t> other) const {
  2760. return this->gt_bits(other).any_bits_set();
  2761. }
  2762. is_utf8_really_inline simd16<bool>
  2763. operator<(const simd16<uint16_t> other) const {
  2764. return this->gt_bits(other).any_bits_set();
  2765. }
  2766. // Bit-specific operations
  2767. is_utf8_really_inline simd16<bool> bits_not_set() const {
  2768. return *this == uint16_t(0);
  2769. }
  2770. is_utf8_really_inline simd16<bool> bits_not_set(simd16<uint16_t> bits) const {
  2771. return (*this & bits).bits_not_set();
  2772. }
  2773. is_utf8_really_inline simd16<bool> any_bits_set() const {
  2774. return ~this->bits_not_set();
  2775. }
  2776. is_utf8_really_inline simd16<bool> any_bits_set(simd16<uint16_t> bits) const {
  2777. return ~this->bits_not_set(bits);
  2778. }
  2779. is_utf8_really_inline bool bits_not_set_anywhere() const {
  2780. return _mm256_testz_si256(*this, *this);
  2781. }
  2782. is_utf8_really_inline bool any_bits_set_anywhere() const {
  2783. return !bits_not_set_anywhere();
  2784. }
  2785. is_utf8_really_inline bool
  2786. bits_not_set_anywhere(simd16<uint16_t> bits) const {
  2787. return _mm256_testz_si256(*this, bits);
  2788. }
  2789. is_utf8_really_inline bool
  2790. any_bits_set_anywhere(simd16<uint16_t> bits) const {
  2791. return !bits_not_set_anywhere(bits);
  2792. }
  2793. template <int N> is_utf8_really_inline simd16<uint16_t> shr() const {
  2794. return simd16<uint16_t>(_mm256_srli_epi16(*this, N));
  2795. }
  2796. template <int N> is_utf8_really_inline simd16<uint16_t> shl() const {
  2797. return simd16<uint16_t>(_mm256_slli_epi16(*this, N));
  2798. }
  2799. // Get one of the bits and make a bitmask out of it.
  2800. // e.g. value.get_bit<7>() gets the high bit
  2801. template <int N> is_utf8_really_inline int get_bit() const {
  2802. return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 15 - N));
  2803. }
  2804. // Change the endianness
  2805. is_utf8_really_inline simd16<uint16_t> swap_bytes() const {
  2806. const __m256i swap = _mm256_setr_epi8(
  2807. 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18,
  2808. 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30);
  2809. return _mm256_shuffle_epi8(*this, swap);
  2810. }
  2811. // Pack with the unsigned saturation two uint16_t words into single uint8_t
  2812. // vector
  2813. static is_utf8_really_inline simd8<uint8_t> pack(const simd16<uint16_t> &v0,
  2814. const simd16<uint16_t> &v1) {
  2815. // Note: the AVX2 variant of pack operates on 128-bit lanes, thus
  2816. // we have to shuffle lanes in order to produce bytes in the
  2817. // correct order.
  2818. // get the 0th lanes
  2819. const __m128i lo_0 = _mm256_extracti128_si256(v0, 0);
  2820. const __m128i lo_1 = _mm256_extracti128_si256(v1, 0);
  2821. // get the 1st lanes
  2822. const __m128i hi_0 = _mm256_extracti128_si256(v0, 1);
  2823. const __m128i hi_1 = _mm256_extracti128_si256(v1, 1);
  2824. // build new vectors (shuffle lanes)
  2825. const __m256i t0 = _mm256_set_m128i(lo_1, lo_0);
  2826. const __m256i t1 = _mm256_set_m128i(hi_1, hi_0);
  2827. // pack words in linear order from v0 and v1
  2828. return _mm256_packus_epi16(t0, t1);
  2829. }
  2830. };
  2831. template <typename T> struct simd16x32 {
  2832. static constexpr int NUM_CHUNKS = 64 / sizeof(simd16<T>);
  2833. static_assert(NUM_CHUNKS == 2,
  2834. "Haswell kernel should use two registers per 64-byte block.");
  2835. simd16<T> chunks[NUM_CHUNKS];
  2836. simd16x32(const simd16x32<T> &o) = delete; // no copy allowed
  2837. simd16x32<T> &
  2838. operator=(const simd16<T> other) = delete; // no assignment allowed
  2839. simd16x32() = delete; // no default constructor allowed
  2840. is_utf8_really_inline simd16x32(const simd16<T> chunk0,
  2841. const simd16<T> chunk1)
  2842. : chunks{chunk0, chunk1} {}
  2843. is_utf8_really_inline simd16x32(const T *ptr)
  2844. : chunks{simd16<T>::load(ptr),
  2845. simd16<T>::load(ptr + sizeof(simd16<T>) / sizeof(T))} {}
  2846. is_utf8_really_inline void store(T *ptr) const {
  2847. this->chunks[0].store(ptr + sizeof(simd16<T>) * 0 / sizeof(T));
  2848. this->chunks[1].store(ptr + sizeof(simd16<T>) * 1 / sizeof(T));
  2849. }
  2850. is_utf8_really_inline uint64_t to_bitmask() const {
  2851. uint64_t r_lo = uint32_t(this->chunks[0].to_bitmask());
  2852. uint64_t r_hi = this->chunks[1].to_bitmask();
  2853. return r_lo | (r_hi << 32);
  2854. }
  2855. is_utf8_really_inline simd16<T> reduce_or() const {
  2856. return this->chunks[0] | this->chunks[1];
  2857. }
  2858. is_utf8_really_inline bool is_ascii() const {
  2859. return this->reduce_or().is_ascii();
  2860. }
  2861. is_utf8_really_inline void store_ascii_as_utf16(char16_t *ptr) const {
  2862. this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16<T>) * 0);
  2863. this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16<T>));
  2864. }
  2865. is_utf8_really_inline simd16x32<T> bit_or(const T m) const {
  2866. const simd16<T> mask = simd16<T>::splat(m);
  2867. return simd16x32<T>(this->chunks[0] | mask, this->chunks[1] | mask);
  2868. }
  2869. is_utf8_really_inline void swap_bytes() {
  2870. this->chunks[0] = this->chunks[0].swap_bytes();
  2871. this->chunks[1] = this->chunks[1].swap_bytes();
  2872. }
  2873. is_utf8_really_inline uint64_t eq(const T m) const {
  2874. const simd16<T> mask = simd16<T>::splat(m);
  2875. return simd16x32<bool>(this->chunks[0] == mask, this->chunks[1] == mask)
  2876. .to_bitmask();
  2877. }
  2878. is_utf8_really_inline uint64_t eq(const simd16x32<uint16_t> &other) const {
  2879. return simd16x32<bool>(this->chunks[0] == other.chunks[0],
  2880. this->chunks[1] == other.chunks[1])
  2881. .to_bitmask();
  2882. }
  2883. is_utf8_really_inline uint64_t lteq(const T m) const {
  2884. const simd16<T> mask = simd16<T>::splat(m);
  2885. return simd16x32<bool>(this->chunks[0] <= mask, this->chunks[1] <= mask)
  2886. .to_bitmask();
  2887. }
  2888. is_utf8_really_inline uint64_t in_range(const T low, const T high) const {
  2889. const simd16<T> mask_low = simd16<T>::splat(low);
  2890. const simd16<T> mask_high = simd16<T>::splat(high);
  2891. return simd16x32<bool>(
  2892. (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
  2893. (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low),
  2894. (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low),
  2895. (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low))
  2896. .to_bitmask();
  2897. }
  2898. is_utf8_really_inline uint64_t not_in_range(const T low, const T high) const {
  2899. const simd16<T> mask_low = simd16<T>::splat(static_cast<T>(low - 1));
  2900. const simd16<T> mask_high = simd16<T>::splat(static_cast<T>(high + 1));
  2901. return simd16x32<bool>(
  2902. (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low),
  2903. (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low))
  2904. .to_bitmask();
  2905. }
  2906. is_utf8_really_inline uint64_t lt(const T m) const {
  2907. const simd16<T> mask = simd16<T>::splat(m);
  2908. return simd16x32<bool>(this->chunks[0] < mask, this->chunks[1] < mask)
  2909. .to_bitmask();
  2910. }
  2911. }; // struct simd16x32<T>
  2912. } // namespace simd
  2913. } // unnamed namespace
  2914. } // namespace haswell
  2915. } // namespace is_utf8_internals
  2916. #endif // IS_UTF8_HASWELL_SIMD_H
  2917. IS_UTF8_UNTARGET_REGION
  2918. #if IS_UTF8_GCC11ORMORE // workaround for
  2919. // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
  2920. #pragma GCC diagnostic pop
  2921. #endif // end of workaround
  2922. #endif // IS_UTF8_IMPLEMENTATION_HASWELL
  2923. #endif // IS_UTF8_HASWELL_COMMON_H
  2924. #ifndef IS_UTF8_WESTMERE_H
  2925. #define IS_UTF8_WESTMERE_H
  2926. #ifdef IS_UTF8_FALLBACK_H
  2927. #error "westmere.h must be included before fallback.h"
  2928. #endif
  2929. // Default Westmere to on if this is x86-64, unless we'll always select Haswell.
  2930. #ifndef IS_UTF8_IMPLEMENTATION_WESTMERE
  2931. //
  2932. // You do not want to set it to (IS_UTF8_IS_X86_64 && !IS_UTF8_REQUIRES_HASWELL)
  2933. // because you want to rely on runtime dispatch!
  2934. //
  2935. #if IS_UTF8_CAN_ALWAYS_RUN_ICELAKE || IS_UTF8_CAN_ALWAYS_RUN_HASWELL
  2936. #define IS_UTF8_IMPLEMENTATION_WESTMERE 0
  2937. #else
  2938. #define IS_UTF8_IMPLEMENTATION_WESTMERE (IS_UTF8_IS_X86_64)
  2939. #endif
  2940. #endif
  2941. #if IS_UTF8_IMPLEMENTATION_WESTMERE && IS_UTF8_IS_X86_64 && __SSE4_2__ && __PCLMUL__
  2942. #define IS_UTF8_CAN_ALWAYS_RUN_WESTMERE 1
  2943. #else
  2944. #define IS_UTF8_CAN_ALWAYS_RUN_WESTMERE 0
  2945. #endif
  2946. #if IS_UTF8_IMPLEMENTATION_WESTMERE
  2947. #define IS_UTF8_TARGET_WESTMERE IS_UTF8_TARGET_REGION("sse4.2,pclmul")
  2948. namespace is_utf8_internals {
  2949. /**
  2950. * Implementation for Westmere (Intel SSE4.2).
  2951. */
  2952. namespace westmere {} // namespace westmere
  2953. } // namespace is_utf8_internals
  2954. //
  2955. // These two need to be included outside IS_UTF8_TARGET_REGION
  2956. //
  2957. #ifndef IS_UTF8_WESTMERE_IMPLEMENTATION_H
  2958. #define IS_UTF8_WESTMERE_IMPLEMENTATION_H
  2959. // The constructor may be executed on any host, so we take care not to use
  2960. // IS_UTF8_TARGET_REGION
  2961. namespace is_utf8_internals {
  2962. namespace westmere {
  2963. class implementation final : public is_utf8_internals::implementation {
  2964. public:
  2965. is_utf8_really_inline implementation()
  2966. : is_utf8_internals::implementation(
  2967. "westmere", "Intel/AMD SSE4.2",
  2968. internal::instruction_set::SSE42 |
  2969. internal::instruction_set::PCLMULQDQ) {}
  2970. is_utf8_warn_unused bool validate_utf8(const char *buf,
  2971. size_t len) const noexcept final;
  2972. };
  2973. } // namespace westmere
  2974. } // namespace is_utf8_internals
  2975. #endif // IS_UTF8_WESTMERE_IMPLEMENTATION_H
  2976. #ifndef IS_UTF8_WESTMERE_INTRINSICS_H
  2977. #define IS_UTF8_WESTMERE_INTRINSICS_H
  2978. #ifdef IS_UTF8_VISUAL_STUDIO
  2979. // under clang within visual studio, this will include <x86intrin.h>
  2980. #include <intrin.h> // visual studio or clang
  2981. #else
  2982. #if IS_UTF8_GCC11ORMORE
  2983. // We should not get warnings while including <x86intrin.h> yet we do
  2984. // under some versions of GCC.
  2985. // If the x86intrin.h header has uninitialized values that are problematic,
  2986. // it is a GCC issue, we want to ignore these warnings.
  2987. IS_UTF8_DISABLE_GCC_WARNING(-Wuninitialized)
  2988. #endif
  2989. #include <x86intrin.h> // elsewhere
  2990. #if IS_UTF8_GCC11ORMORE
  2991. // cancels the suppression of the -Wuninitialized
  2992. IS_UTF8_POP_DISABLE_WARNINGS
  2993. #endif
  2994. #endif // IS_UTF8_VISUAL_STUDIO
  2995. #ifdef IS_UTF8_CLANG_VISUAL_STUDIO
  2996. /**
  2997. * You are not supposed, normally, to include these
  2998. * headers directly. Instead you should either include intrin.h
  2999. * or x86intrin.h. However, when compiling with clang
  3000. * under Windows (i.e., when _MSC_VER is set), these headers
  3001. * only get included *if* the corresponding features are detected
  3002. * from macros:
  3003. */
  3004. #include <smmintrin.h> // for _mm_alignr_epi8
  3005. #include <wmmintrin.h> // for _mm_clmulepi64_si128
  3006. #endif
  3007. #endif // IS_UTF8_WESTMERE_INTRINSICS_H
  3008. //
  3009. // The rest need to be inside the region
  3010. //
  3011. // redefining IS_UTF8_IMPLEMENTATION to "westmere"
  3012. // #define IS_UTF8_IMPLEMENTATION westmere
  3013. IS_UTF8_TARGET_WESTMERE
  3014. // Declarations
  3015. #ifndef IS_UTF8_WESTMERE_SIMD_H
  3016. #define IS_UTF8_WESTMERE_SIMD_H
  3017. namespace is_utf8_internals {
  3018. namespace westmere {
  3019. namespace {
  3020. namespace simd {
  3021. template <typename Child> struct base {
  3022. __m128i value;
  3023. // Zero constructor
  3024. is_utf8_really_inline base() : value{__m128i()} {}
  3025. // Conversion from SIMD register
  3026. is_utf8_really_inline base(const __m128i _value) : value(_value) {}
  3027. // Conversion to SIMD register
  3028. is_utf8_really_inline operator const __m128i &() const { return this->value; }
  3029. is_utf8_really_inline operator __m128i &() { return this->value; }
  3030. template <endianness big_endian>
  3031. is_utf8_really_inline void store_ascii_as_utf16(char16_t *p) const {
  3032. __m128i first = _mm_cvtepu8_epi16(*this);
  3033. __m128i second = _mm_cvtepu8_epi16(_mm_srli_si128(*this, 8));
  3034. if (big_endian) {
  3035. const __m128i swap =
  3036. _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
  3037. first = _mm_shuffle_epi8(first, swap);
  3038. second = _mm_shuffle_epi8(second, swap);
  3039. }
  3040. _mm_storeu_si128(reinterpret_cast<__m128i *>(p), first);
  3041. _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 8), second);
  3042. }
  3043. is_utf8_really_inline void store_ascii_as_utf32(char32_t *p) const {
  3044. _mm_storeu_si128(reinterpret_cast<__m128i *>(p), _mm_cvtepu8_epi32(*this));
  3045. _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 4),
  3046. _mm_cvtepu8_epi32(_mm_srli_si128(*this, 4)));
  3047. _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 8),
  3048. _mm_cvtepu8_epi32(_mm_srli_si128(*this, 8)));
  3049. _mm_storeu_si128(reinterpret_cast<__m128i *>(p + 12),
  3050. _mm_cvtepu8_epi32(_mm_srli_si128(*this, 12)));
  3051. }
  3052. // Bit operations
  3053. is_utf8_really_inline Child operator|(const Child other) const {
  3054. return _mm_or_si128(*this, other);
  3055. }
  3056. is_utf8_really_inline Child operator&(const Child other) const {
  3057. return _mm_and_si128(*this, other);
  3058. }
  3059. is_utf8_really_inline Child operator^(const Child other) const {
  3060. return _mm_xor_si128(*this, other);
  3061. }
  3062. is_utf8_really_inline Child bit_andnot(const Child other) const {
  3063. return _mm_andnot_si128(other, *this);
  3064. }
  3065. is_utf8_really_inline Child &operator|=(const Child other) {
  3066. auto this_cast = static_cast<Child *>(this);
  3067. *this_cast = *this_cast | other;
  3068. return *this_cast;
  3069. }
  3070. is_utf8_really_inline Child &operator&=(const Child other) {
  3071. auto this_cast = static_cast<Child *>(this);
  3072. *this_cast = *this_cast & other;
  3073. return *this_cast;
  3074. }
  3075. is_utf8_really_inline Child &operator^=(const Child other) {
  3076. auto this_cast = static_cast<Child *>(this);
  3077. *this_cast = *this_cast ^ other;
  3078. return *this_cast;
  3079. }
  3080. };
  3081. // Forward-declared so they can be used by splat and friends.
  3082. template <typename T> struct simd8;
  3083. template <typename T, typename Mask = simd8<bool>>
  3084. struct base8 : base<simd8<T>> {
  3085. typedef uint16_t bitmask_t;
  3086. typedef uint32_t bitmask2_t;
  3087. is_utf8_really_inline T first() const { return _mm_extract_epi8(*this, 0); }
  3088. is_utf8_really_inline T last() const { return _mm_extract_epi8(*this, 15); }
  3089. is_utf8_really_inline base8() : base<simd8<T>>() {}
  3090. is_utf8_really_inline base8(const __m128i _value) : base<simd8<T>>(_value) {}
  3091. friend is_utf8_really_inline Mask operator==(const simd8<T> lhs,
  3092. const simd8<T> rhs) {
  3093. return _mm_cmpeq_epi8(lhs, rhs);
  3094. }
  3095. static const int SIZE = sizeof(base<simd8<T>>::value);
  3096. template <int N = 1>
  3097. is_utf8_really_inline simd8<T> prev(const simd8<T> prev_chunk) const {
  3098. return _mm_alignr_epi8(*this, prev_chunk, 16 - N);
  3099. }
  3100. };
  3101. // SIMD byte mask type (returned by things like eq and gt)
  3102. template <> struct simd8<bool> : base8<bool> {
  3103. static is_utf8_really_inline simd8<bool> splat(bool _value) {
  3104. return _mm_set1_epi8(uint8_t(-(!!_value)));
  3105. }
  3106. is_utf8_really_inline simd8<bool>() : base8() {}
  3107. is_utf8_really_inline simd8<bool>(const __m128i _value)
  3108. : base8<bool>(_value) {}
  3109. // Splat constructor
  3110. is_utf8_really_inline simd8<bool>(bool _value) : base8<bool>(splat(_value)) {}
  3111. is_utf8_really_inline int to_bitmask() const {
  3112. return _mm_movemask_epi8(*this);
  3113. }
  3114. is_utf8_really_inline bool any() const {
  3115. return !_mm_testz_si128(*this, *this);
  3116. }
  3117. is_utf8_really_inline bool none() const {
  3118. return _mm_testz_si128(*this, *this);
  3119. }
  3120. is_utf8_really_inline bool all() const {
  3121. return _mm_movemask_epi8(*this) == 0xFFFF;
  3122. }
  3123. is_utf8_really_inline simd8<bool> operator~() const { return *this ^ true; }
  3124. };
  3125. template <typename T> struct base8_numeric : base8<T> {
  3126. static is_utf8_really_inline simd8<T> splat(T _value) {
  3127. return _mm_set1_epi8(_value);
  3128. }
  3129. static is_utf8_really_inline simd8<T> zero() { return _mm_setzero_si128(); }
  3130. static is_utf8_really_inline simd8<T> load(const T values[16]) {
  3131. return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values));
  3132. }
  3133. // Repeat 16 values as many times as necessary (usually for lookup tables)
  3134. static is_utf8_really_inline simd8<T> repeat_16(T v0, T v1, T v2, T v3, T v4,
  3135. T v5, T v6, T v7, T v8, T v9,
  3136. T v10, T v11, T v12, T v13,
  3137. T v14, T v15) {
  3138. return simd8<T>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
  3139. v14, v15);
  3140. }
  3141. is_utf8_really_inline base8_numeric() : base8<T>() {}
  3142. is_utf8_really_inline base8_numeric(const __m128i _value)
  3143. : base8<T>(_value) {}
  3144. // Store to array
  3145. is_utf8_really_inline void store(T dst[16]) const {
  3146. return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this);
  3147. }
  3148. // Override to distinguish from bool version
  3149. is_utf8_really_inline simd8<T> operator~() const { return *this ^ 0xFFu; }
  3150. // Addition/subtraction are the same for signed and unsigned
  3151. is_utf8_really_inline simd8<T> operator+(const simd8<T> other) const {
  3152. return _mm_add_epi8(*this, other);
  3153. }
  3154. is_utf8_really_inline simd8<T> operator-(const simd8<T> other) const {
  3155. return _mm_sub_epi8(*this, other);
  3156. }
  3157. is_utf8_really_inline simd8<T> &operator+=(const simd8<T> other) {
  3158. *this = *this + other;
  3159. return *static_cast<simd8<T> *>(this);
  3160. }
  3161. is_utf8_really_inline simd8<T> &operator-=(const simd8<T> other) {
  3162. *this = *this - other;
  3163. return *static_cast<simd8<T> *>(this);
  3164. }
  3165. // Perform a lookup assuming the value is between 0 and 16 (undefined behavior
  3166. // for out of range values)
  3167. template <typename L>
  3168. is_utf8_really_inline simd8<L> lookup_16(simd8<L> lookup_table) const {
  3169. return _mm_shuffle_epi8(lookup_table, *this);
  3170. }
  3171. template <typename L>
  3172. is_utf8_really_inline simd8<L>
  3173. lookup_16(L replace0, L replace1, L replace2, L replace3, L replace4,
  3174. L replace5, L replace6, L replace7, L replace8, L replace9,
  3175. L replace10, L replace11, L replace12, L replace13, L replace14,
  3176. L replace15) const {
  3177. return lookup_16(simd8<L>::repeat_16(
  3178. replace0, replace1, replace2, replace3, replace4, replace5, replace6,
  3179. replace7, replace8, replace9, replace10, replace11, replace12,
  3180. replace13, replace14, replace15));
  3181. }
  3182. };
  3183. // Signed bytes
  3184. template <> struct simd8<int8_t> : base8_numeric<int8_t> {
  3185. is_utf8_really_inline simd8() : base8_numeric<int8_t>() {}
  3186. is_utf8_really_inline simd8(const __m128i _value)
  3187. : base8_numeric<int8_t>(_value) {}
  3188. // Splat constructor
  3189. is_utf8_really_inline simd8(int8_t _value) : simd8(splat(_value)) {}
  3190. // Array constructor
  3191. is_utf8_really_inline simd8(const int8_t *values) : simd8(load(values)) {}
  3192. // Member-by-member initialization
  3193. is_utf8_really_inline simd8(int8_t v0, int8_t v1, int8_t v2, int8_t v3,
  3194. int8_t v4, int8_t v5, int8_t v6, int8_t v7,
  3195. int8_t v8, int8_t v9, int8_t v10, int8_t v11,
  3196. int8_t v12, int8_t v13, int8_t v14, int8_t v15)
  3197. : simd8(_mm_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
  3198. v12, v13, v14, v15)) {}
  3199. // Repeat 16 values as many times as necessary (usually for lookup tables)
  3200. is_utf8_really_inline static simd8<int8_t>
  3201. repeat_16(int8_t v0, int8_t v1, int8_t v2, int8_t v3, int8_t v4, int8_t v5,
  3202. int8_t v6, int8_t v7, int8_t v8, int8_t v9, int8_t v10, int8_t v11,
  3203. int8_t v12, int8_t v13, int8_t v14, int8_t v15) {
  3204. return simd8<int8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
  3205. v13, v14, v15);
  3206. }
  3207. is_utf8_really_inline operator simd8<uint8_t>() const;
  3208. is_utf8_really_inline bool is_ascii() const {
  3209. return _mm_movemask_epi8(*this) == 0;
  3210. }
  3211. // Order-sensitive comparisons
  3212. is_utf8_really_inline simd8<int8_t> max_val(const simd8<int8_t> other) const {
  3213. return _mm_max_epi8(*this, other);
  3214. }
  3215. is_utf8_really_inline simd8<int8_t> min_val(const simd8<int8_t> other) const {
  3216. return _mm_min_epi8(*this, other);
  3217. }
  3218. is_utf8_really_inline simd8<bool> operator>(const simd8<int8_t> other) const {
  3219. return _mm_cmpgt_epi8(*this, other);
  3220. }
  3221. is_utf8_really_inline simd8<bool> operator<(const simd8<int8_t> other) const {
  3222. return _mm_cmpgt_epi8(other, *this);
  3223. }
  3224. };
  3225. // Unsigned bytes
  3226. template <> struct simd8<uint8_t> : base8_numeric<uint8_t> {
  3227. is_utf8_really_inline simd8() : base8_numeric<uint8_t>() {}
  3228. is_utf8_really_inline simd8(const __m128i _value)
  3229. : base8_numeric<uint8_t>(_value) {}
  3230. // Splat constructor
  3231. is_utf8_really_inline simd8(uint8_t _value) : simd8(splat(_value)) {}
  3232. // Array constructor
  3233. is_utf8_really_inline simd8(const uint8_t *values) : simd8(load(values)) {}
  3234. // Member-by-member initialization
  3235. is_utf8_really_inline
  3236. simd8(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4, uint8_t v5,
  3237. uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9, uint8_t v10,
  3238. uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14, uint8_t v15)
  3239. : simd8(_mm_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
  3240. v12, v13, v14, v15)) {}
  3241. // Repeat 16 values as many times as necessary (usually for lookup tables)
  3242. is_utf8_really_inline static simd8<uint8_t>
  3243. repeat_16(uint8_t v0, uint8_t v1, uint8_t v2, uint8_t v3, uint8_t v4,
  3244. uint8_t v5, uint8_t v6, uint8_t v7, uint8_t v8, uint8_t v9,
  3245. uint8_t v10, uint8_t v11, uint8_t v12, uint8_t v13, uint8_t v14,
  3246. uint8_t v15) {
  3247. return simd8<uint8_t>(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
  3248. v13, v14, v15);
  3249. }
  3250. // Saturated math
  3251. is_utf8_really_inline simd8<uint8_t>
  3252. saturating_add(const simd8<uint8_t> other) const {
  3253. return _mm_adds_epu8(*this, other);
  3254. }
  3255. is_utf8_really_inline simd8<uint8_t>
  3256. saturating_sub(const simd8<uint8_t> other) const {
  3257. return _mm_subs_epu8(*this, other);
  3258. }
  3259. // Order-specific operations
  3260. is_utf8_really_inline simd8<uint8_t>
  3261. max_val(const simd8<uint8_t> other) const {
  3262. return _mm_max_epu8(*this, other);
  3263. }
  3264. is_utf8_really_inline simd8<uint8_t>
  3265. min_val(const simd8<uint8_t> other) const {
  3266. return _mm_min_epu8(*this, other);
  3267. }
  3268. // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
  3269. is_utf8_really_inline simd8<uint8_t>
  3270. gt_bits(const simd8<uint8_t> other) const {
  3271. return this->saturating_sub(other);
  3272. }
  3273. // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
  3274. is_utf8_really_inline simd8<uint8_t>
  3275. lt_bits(const simd8<uint8_t> other) const {
  3276. return other.saturating_sub(*this);
  3277. }
  3278. is_utf8_really_inline simd8<bool>
  3279. operator<=(const simd8<uint8_t> other) const {
  3280. return other.max_val(*this) == other;
  3281. }
  3282. is_utf8_really_inline simd8<bool>
  3283. operator>=(const simd8<uint8_t> other) const {
  3284. return other.min_val(*this) == other;
  3285. }
  3286. is_utf8_really_inline simd8<bool>
  3287. operator>(const simd8<uint8_t> other) const {
  3288. return this->gt_bits(other).any_bits_set();
  3289. }
  3290. is_utf8_really_inline simd8<bool>
  3291. operator<(const simd8<uint8_t> other) const {
  3292. return this->gt_bits(other).any_bits_set();
  3293. }
  3294. // Bit-specific operations
  3295. is_utf8_really_inline simd8<bool> bits_not_set() const {
  3296. return *this == uint8_t(0);
  3297. }
  3298. is_utf8_really_inline simd8<bool> bits_not_set(simd8<uint8_t> bits) const {
  3299. return (*this & bits).bits_not_set();
  3300. }
  3301. is_utf8_really_inline simd8<bool> any_bits_set() const {
  3302. return ~this->bits_not_set();
  3303. }
  3304. is_utf8_really_inline simd8<bool> any_bits_set(simd8<uint8_t> bits) const {
  3305. return ~this->bits_not_set(bits);
  3306. }
  3307. is_utf8_really_inline bool is_ascii() const {
  3308. return _mm_movemask_epi8(*this) == 0;
  3309. }
  3310. is_utf8_really_inline bool bits_not_set_anywhere() const {
  3311. return _mm_testz_si128(*this, *this);
  3312. }
  3313. is_utf8_really_inline bool any_bits_set_anywhere() const {
  3314. return !bits_not_set_anywhere();
  3315. }
  3316. is_utf8_really_inline bool bits_not_set_anywhere(simd8<uint8_t> bits) const {
  3317. return _mm_testz_si128(*this, bits);
  3318. }
  3319. is_utf8_really_inline bool any_bits_set_anywhere(simd8<uint8_t> bits) const {
  3320. return !bits_not_set_anywhere(bits);
  3321. }
  3322. template <int N> is_utf8_really_inline simd8<uint8_t> shr() const {
  3323. return simd8<uint8_t>(_mm_srli_epi16(*this, N)) & uint8_t(0xFFu >> N);
  3324. }
  3325. template <int N> is_utf8_really_inline simd8<uint8_t> shl() const {
  3326. return simd8<uint8_t>(_mm_slli_epi16(*this, N)) & uint8_t(0xFFu << N);
  3327. }
  3328. // Get one of the bits and make a bitmask out of it.
  3329. // e.g. value.get_bit<7>() gets the high bit
  3330. template <int N> is_utf8_really_inline int get_bit() const {
  3331. return _mm_movemask_epi8(_mm_slli_epi16(*this, 7 - N));
  3332. }
  3333. };
  3334. is_utf8_really_inline simd8<int8_t>::operator simd8<uint8_t>() const {
  3335. return this->value;
  3336. }
  3337. // Unsigned bytes
  3338. template <> struct simd8<uint16_t> : base<uint16_t> {
  3339. static is_utf8_really_inline simd8<uint16_t> splat(uint16_t _value) {
  3340. return _mm_set1_epi16(_value);
  3341. }
  3342. static is_utf8_really_inline simd8<uint16_t> load(const uint16_t values[8]) {
  3343. return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values));
  3344. }
  3345. is_utf8_really_inline simd8() : base<uint16_t>() {}
  3346. is_utf8_really_inline simd8(const __m128i _value) : base<uint16_t>(_value) {}
  3347. // Splat constructor
  3348. is_utf8_really_inline simd8(uint16_t _value) : simd8(splat(_value)) {}
  3349. // Array constructor
  3350. is_utf8_really_inline simd8(const uint16_t *values) : simd8(load(values)) {}
  3351. // Member-by-member initialization
  3352. is_utf8_really_inline simd8(uint16_t v0, uint16_t v1, uint16_t v2,
  3353. uint16_t v3, uint16_t v4, uint16_t v5,
  3354. uint16_t v6, uint16_t v7)
  3355. : simd8(_mm_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7)) {}
  3356. // Saturated math
  3357. is_utf8_really_inline simd8<uint16_t>
  3358. saturating_add(const simd8<uint16_t> other) const {
  3359. return _mm_adds_epu16(*this, other);
  3360. }
  3361. is_utf8_really_inline simd8<uint16_t>
  3362. saturating_sub(const simd8<uint16_t> other) const {
  3363. return _mm_subs_epu16(*this, other);
  3364. }
  3365. // Order-specific operations
  3366. is_utf8_really_inline simd8<uint16_t>
  3367. max_val(const simd8<uint16_t> other) const {
  3368. return _mm_max_epu16(*this, other);
  3369. }
  3370. is_utf8_really_inline simd8<uint16_t>
  3371. min_val(const simd8<uint16_t> other) const {
  3372. return _mm_min_epu16(*this, other);
  3373. }
  3374. // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
  3375. is_utf8_really_inline simd8<uint16_t>
  3376. gt_bits(const simd8<uint16_t> other) const {
  3377. return this->saturating_sub(other);
  3378. }
  3379. // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
  3380. is_utf8_really_inline simd8<uint16_t>
  3381. lt_bits(const simd8<uint16_t> other) const {
  3382. return other.saturating_sub(*this);
  3383. }
  3384. is_utf8_really_inline simd8<bool>
  3385. operator<=(const simd8<uint16_t> other) const {
  3386. return other.max_val(*this) == other;
  3387. }
  3388. is_utf8_really_inline simd8<bool>
  3389. operator>=(const simd8<uint16_t> other) const {
  3390. return other.min_val(*this) == other;
  3391. }
  3392. is_utf8_really_inline simd8<bool>
  3393. operator==(const simd8<uint16_t> other) const {
  3394. return _mm_cmpeq_epi16(*this, other);
  3395. }
  3396. is_utf8_really_inline simd8<bool>
  3397. operator&(const simd8<uint16_t> other) const {
  3398. return _mm_and_si128(*this, other);
  3399. }
  3400. is_utf8_really_inline simd8<bool>
  3401. operator|(const simd8<uint16_t> other) const {
  3402. return _mm_or_si128(*this, other);
  3403. }
  3404. // Bit-specific operations
  3405. is_utf8_really_inline simd8<bool> bits_not_set() const {
  3406. return *this == uint16_t(0);
  3407. }
  3408. is_utf8_really_inline simd8<bool> any_bits_set() const {
  3409. return ~this->bits_not_set();
  3410. }
  3411. is_utf8_really_inline bool bits_not_set_anywhere() const {
  3412. return _mm_testz_si128(*this, *this);
  3413. }
  3414. is_utf8_really_inline bool any_bits_set_anywhere() const {
  3415. return !bits_not_set_anywhere();
  3416. }
  3417. is_utf8_really_inline bool bits_not_set_anywhere(simd8<uint16_t> bits) const {
  3418. return _mm_testz_si128(*this, bits);
  3419. }
  3420. is_utf8_really_inline bool any_bits_set_anywhere(simd8<uint16_t> bits) const {
  3421. return !bits_not_set_anywhere(bits);
  3422. }
  3423. };
  3424. template <typename T> struct simd8x64 {
  3425. static constexpr int NUM_CHUNKS = 64 / sizeof(simd8<T>);
  3426. static_assert(NUM_CHUNKS == 4,
  3427. "Westmere kernel should use four registers per 64-byte block.");
  3428. simd8<T> chunks[NUM_CHUNKS];
  3429. simd8x64(const simd8x64<T> &o) = delete; // no copy allowed
  3430. simd8x64<T> &
  3431. operator=(const simd8<T> other) = delete; // no assignment allowed
  3432. simd8x64() = delete; // no default constructor allowed
  3433. is_utf8_really_inline simd8x64(const simd8<T> chunk0, const simd8<T> chunk1,
  3434. const simd8<T> chunk2, const simd8<T> chunk3)
  3435. : chunks{chunk0, chunk1, chunk2, chunk3} {}
  3436. is_utf8_really_inline simd8x64(const T *ptr)
  3437. : chunks{simd8<T>::load(ptr),
  3438. simd8<T>::load(ptr + sizeof(simd8<T>) / sizeof(T)),
  3439. simd8<T>::load(ptr + 2 * sizeof(simd8<T>) / sizeof(T)),
  3440. simd8<T>::load(ptr + 3 * sizeof(simd8<T>) / sizeof(T))} {}
  3441. is_utf8_really_inline void store(T *ptr) const {
  3442. this->chunks[0].store(ptr + sizeof(simd8<T>) * 0 / sizeof(T));
  3443. this->chunks[1].store(ptr + sizeof(simd8<T>) * 1 / sizeof(T));
  3444. this->chunks[2].store(ptr + sizeof(simd8<T>) * 2 / sizeof(T));
  3445. this->chunks[3].store(ptr + sizeof(simd8<T>) * 3 / sizeof(T));
  3446. }
  3447. is_utf8_really_inline simd8x64<T> &operator|=(const simd8x64<T> &other) {
  3448. this->chunks[0] |= other.chunks[0];
  3449. this->chunks[1] |= other.chunks[1];
  3450. this->chunks[2] |= other.chunks[2];
  3451. this->chunks[3] |= other.chunks[3];
  3452. return *this;
  3453. }
  3454. is_utf8_really_inline simd8<T> reduce_or() const {
  3455. return (this->chunks[0] | this->chunks[1]) |
  3456. (this->chunks[2] | this->chunks[3]);
  3457. }
  3458. is_utf8_really_inline bool is_ascii() const {
  3459. return this->reduce_or().is_ascii();
  3460. }
  3461. template <endianness endian>
  3462. is_utf8_really_inline void store_ascii_as_utf16(char16_t *ptr) const {
  3463. this->chunks[0].template store_ascii_as_utf16<endian>(ptr +
  3464. sizeof(simd8<T>) * 0);
  3465. this->chunks[1].template store_ascii_as_utf16<endian>(ptr +
  3466. sizeof(simd8<T>) * 1);
  3467. this->chunks[2].template store_ascii_as_utf16<endian>(ptr +
  3468. sizeof(simd8<T>) * 2);
  3469. this->chunks[3].template store_ascii_as_utf16<endian>(ptr +
  3470. sizeof(simd8<T>) * 3);
  3471. }
  3472. is_utf8_really_inline void store_ascii_as_utf32(char32_t *ptr) const {
  3473. this->chunks[0].store_ascii_as_utf32(ptr + sizeof(simd8<T>) * 0);
  3474. this->chunks[1].store_ascii_as_utf32(ptr + sizeof(simd8<T>) * 1);
  3475. this->chunks[2].store_ascii_as_utf32(ptr + sizeof(simd8<T>) * 2);
  3476. this->chunks[3].store_ascii_as_utf32(ptr + sizeof(simd8<T>) * 3);
  3477. }
  3478. is_utf8_really_inline uint64_t to_bitmask() const {
  3479. uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
  3480. uint64_t r1 = this->chunks[1].to_bitmask();
  3481. uint64_t r2 = this->chunks[2].to_bitmask();
  3482. uint64_t r3 = this->chunks[3].to_bitmask();
  3483. return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
  3484. }
  3485. is_utf8_really_inline uint64_t eq(const T m) const {
  3486. const simd8<T> mask = simd8<T>::splat(m);
  3487. return simd8x64<bool>(this->chunks[0] == mask, this->chunks[1] == mask,
  3488. this->chunks[2] == mask, this->chunks[3] == mask)
  3489. .to_bitmask();
  3490. }
  3491. is_utf8_really_inline uint64_t eq(const simd8x64<uint8_t> &other) const {
  3492. return simd8x64<bool>(this->chunks[0] == other.chunks[0],
  3493. this->chunks[1] == other.chunks[1],
  3494. this->chunks[2] == other.chunks[2],
  3495. this->chunks[3] == other.chunks[3])
  3496. .to_bitmask();
  3497. }
  3498. is_utf8_really_inline uint64_t lteq(const T m) const {
  3499. const simd8<T> mask = simd8<T>::splat(m);
  3500. return simd8x64<bool>(this->chunks[0] <= mask, this->chunks[1] <= mask,
  3501. this->chunks[2] <= mask, this->chunks[3] <= mask)
  3502. .to_bitmask();
  3503. }
  3504. is_utf8_really_inline uint64_t in_range(const T low, const T high) const {
  3505. const simd8<T> mask_low = simd8<T>::splat(low);
  3506. const simd8<T> mask_high = simd8<T>::splat(high);
  3507. return simd8x64<bool>(
  3508. (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
  3509. (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low),
  3510. (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low),
  3511. (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low))
  3512. .to_bitmask();
  3513. }
  3514. is_utf8_really_inline uint64_t not_in_range(const T low, const T high) const {
  3515. const simd8<T> mask_low = simd8<T>::splat(low - 1);
  3516. const simd8<T> mask_high = simd8<T>::splat(high + 1);
  3517. return simd8x64<bool>(
  3518. (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low),
  3519. (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low),
  3520. (this->chunks[2] >= mask_high) | (this->chunks[2] <= mask_low),
  3521. (this->chunks[3] >= mask_high) | (this->chunks[3] <= mask_low))
  3522. .to_bitmask();
  3523. }
  3524. is_utf8_really_inline uint64_t lt(const T m) const {
  3525. const simd8<T> mask = simd8<T>::splat(m);
  3526. return simd8x64<bool>(this->chunks[0] < mask, this->chunks[1] < mask,
  3527. this->chunks[2] < mask, this->chunks[3] < mask)
  3528. .to_bitmask();
  3529. }
  3530. is_utf8_really_inline uint64_t gt(const T m) const {
  3531. const simd8<T> mask = simd8<T>::splat(m);
  3532. return simd8x64<bool>(this->chunks[0] > mask, this->chunks[1] > mask,
  3533. this->chunks[2] > mask, this->chunks[3] > mask)
  3534. .to_bitmask();
  3535. }
  3536. is_utf8_really_inline uint64_t gteq(const T m) const {
  3537. const simd8<T> mask = simd8<T>::splat(m);
  3538. return simd8x64<bool>(this->chunks[0] >= mask, this->chunks[1] >= mask,
  3539. this->chunks[2] >= mask, this->chunks[3] >= mask)
  3540. .to_bitmask();
  3541. }
  3542. is_utf8_really_inline uint64_t gteq_unsigned(const uint8_t m) const {
  3543. const simd8<uint8_t> mask = simd8<uint8_t>::splat(m);
  3544. return simd8x64<bool>(simd8<uint8_t>(__m128i(this->chunks[0])) >= mask,
  3545. simd8<uint8_t>(__m128i(this->chunks[1])) >= mask,
  3546. simd8<uint8_t>(__m128i(this->chunks[2])) >= mask,
  3547. simd8<uint8_t>(__m128i(this->chunks[3])) >= mask)
  3548. .to_bitmask();
  3549. }
  3550. }; // struct simd8x64<T>
  3551. template <typename T> struct simd16;
  3552. template <typename T, typename Mask = simd16<bool>>
  3553. struct base16 : base<simd16<T>> {
  3554. typedef uint16_t bitmask_t;
  3555. typedef uint32_t bitmask2_t;
  3556. is_utf8_really_inline base16() : base<simd16<T>>() {}
  3557. is_utf8_really_inline base16(const __m128i _value)
  3558. : base<simd16<T>>(_value) {}
  3559. template <typename Pointer>
  3560. is_utf8_really_inline base16(const Pointer *ptr)
  3561. : base16(_mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr))) {}
  3562. friend is_utf8_really_inline Mask operator==(const simd16<T> lhs,
  3563. const simd16<T> rhs) {
  3564. return _mm_cmpeq_epi16(lhs, rhs);
  3565. }
  3566. static const int SIZE = sizeof(base<simd16<T>>::value);
  3567. template <int N = 1>
  3568. is_utf8_really_inline simd16<T> prev(const simd16<T> prev_chunk) const {
  3569. return _mm_alignr_epi8(*this, prev_chunk, 16 - N);
  3570. }
  3571. };
  3572. // SIMD byte mask type (returned by things like eq and gt)
  3573. template <> struct simd16<bool> : base16<bool> {
  3574. static is_utf8_really_inline simd16<bool> splat(bool _value) {
  3575. return _mm_set1_epi16(uint16_t(-(!!_value)));
  3576. }
  3577. is_utf8_really_inline simd16<bool>() : base16() {}
  3578. is_utf8_really_inline simd16<bool>(const __m128i _value)
  3579. : base16<bool>(_value) {}
  3580. // Splat constructor
  3581. is_utf8_really_inline simd16<bool>(bool _value)
  3582. : base16<bool>(splat(_value)) {}
  3583. is_utf8_really_inline int to_bitmask() const {
  3584. return _mm_movemask_epi8(*this);
  3585. }
  3586. is_utf8_really_inline bool any() const {
  3587. return !_mm_testz_si128(*this, *this);
  3588. }
  3589. is_utf8_really_inline simd16<bool> operator~() const { return *this ^ true; }
  3590. };
  3591. template <typename T> struct base16_numeric : base16<T> {
  3592. static is_utf8_really_inline simd16<T> splat(T _value) {
  3593. return _mm_set1_epi16(_value);
  3594. }
  3595. static is_utf8_really_inline simd16<T> zero() { return _mm_setzero_si128(); }
  3596. static is_utf8_really_inline simd16<T> load(const T values[8]) {
  3597. return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values));
  3598. }
  3599. is_utf8_really_inline base16_numeric() : base16<T>() {}
  3600. is_utf8_really_inline base16_numeric(const __m128i _value)
  3601. : base16<T>(_value) {}
  3602. // Store to array
  3603. is_utf8_really_inline void store(T dst[8]) const {
  3604. return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this);
  3605. }
  3606. // Override to distinguish from bool version
  3607. is_utf8_really_inline simd16<T> operator~() const { return *this ^ 0xFFu; }
  3608. // Addition/subtraction are the same for signed and unsigned
  3609. is_utf8_really_inline simd16<T> operator+(const simd16<T> other) const {
  3610. return _mm_add_epi16(*this, other);
  3611. }
  3612. is_utf8_really_inline simd16<T> operator-(const simd16<T> other) const {
  3613. return _mm_sub_epi16(*this, other);
  3614. }
  3615. is_utf8_really_inline simd16<T> &operator+=(const simd16<T> other) {
  3616. *this = *this + other;
  3617. return *static_cast<simd16<T> *>(this);
  3618. }
  3619. is_utf8_really_inline simd16<T> &operator-=(const simd16<T> other) {
  3620. *this = *this - other;
  3621. return *static_cast<simd16<T> *>(this);
  3622. }
  3623. };
  3624. // Signed words
  3625. template <> struct simd16<int16_t> : base16_numeric<int16_t> {
  3626. is_utf8_really_inline simd16() : base16_numeric<int16_t>() {}
  3627. is_utf8_really_inline simd16(const __m128i _value)
  3628. : base16_numeric<int16_t>(_value) {}
  3629. // Splat constructor
  3630. is_utf8_really_inline simd16(int16_t _value) : simd16(splat(_value)) {}
  3631. // Array constructor
  3632. is_utf8_really_inline simd16(const int16_t *values) : simd16(load(values)) {}
  3633. is_utf8_really_inline simd16(const char16_t *values)
  3634. : simd16(load(reinterpret_cast<const int16_t *>(values))) {}
  3635. // Member-by-member initialization
  3636. is_utf8_really_inline simd16(int16_t v0, int16_t v1, int16_t v2, int16_t v3,
  3637. int16_t v4, int16_t v5, int16_t v6, int16_t v7)
  3638. : simd16(_mm_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7)) {}
  3639. is_utf8_really_inline operator simd16<uint16_t>() const;
  3640. // Order-sensitive comparisons
  3641. is_utf8_really_inline simd16<int16_t>
  3642. max_val(const simd16<int16_t> other) const {
  3643. return _mm_max_epi16(*this, other);
  3644. }
  3645. is_utf8_really_inline simd16<int16_t>
  3646. min_val(const simd16<int16_t> other) const {
  3647. return _mm_min_epi16(*this, other);
  3648. }
  3649. is_utf8_really_inline simd16<bool>
  3650. operator>(const simd16<int16_t> other) const {
  3651. return _mm_cmpgt_epi16(*this, other);
  3652. }
  3653. is_utf8_really_inline simd16<bool>
  3654. operator<(const simd16<int16_t> other) const {
  3655. return _mm_cmpgt_epi16(other, *this);
  3656. }
  3657. };
  3658. // Unsigned words
  3659. template <> struct simd16<uint16_t> : base16_numeric<uint16_t> {
  3660. is_utf8_really_inline simd16() : base16_numeric<uint16_t>() {}
  3661. is_utf8_really_inline simd16(const __m128i _value)
  3662. : base16_numeric<uint16_t>(_value) {}
  3663. // Splat constructor
  3664. is_utf8_really_inline simd16(uint16_t _value) : simd16(splat(_value)) {}
  3665. // Array constructor
  3666. is_utf8_really_inline simd16(const uint16_t *values) : simd16(load(values)) {}
  3667. is_utf8_really_inline simd16(const char16_t *values)
  3668. : simd16(load(reinterpret_cast<const uint16_t *>(values))) {}
  3669. // Member-by-member initialization
  3670. is_utf8_really_inline simd16(uint16_t v0, uint16_t v1, uint16_t v2,
  3671. uint16_t v3, uint16_t v4, uint16_t v5,
  3672. uint16_t v6, uint16_t v7)
  3673. : simd16(_mm_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7)) {}
  3674. // Repeat 16 values as many times as necessary (usually for lookup tables)
  3675. is_utf8_really_inline static simd16<uint16_t>
  3676. repeat_16(uint16_t v0, uint16_t v1, uint16_t v2, uint16_t v3, uint16_t v4,
  3677. uint16_t v5, uint16_t v6, uint16_t v7) {
  3678. return simd16<uint16_t>(v0, v1, v2, v3, v4, v5, v6, v7);
  3679. }
  3680. // Saturated math
  3681. is_utf8_really_inline simd16<uint16_t>
  3682. saturating_add(const simd16<uint16_t> other) const {
  3683. return _mm_adds_epu16(*this, other);
  3684. }
  3685. is_utf8_really_inline simd16<uint16_t>
  3686. saturating_sub(const simd16<uint16_t> other) const {
  3687. return _mm_subs_epu16(*this, other);
  3688. }
  3689. // Order-specific operations
  3690. is_utf8_really_inline simd16<uint16_t>
  3691. max_val(const simd16<uint16_t> other) const {
  3692. return _mm_max_epu16(*this, other);
  3693. }
  3694. is_utf8_really_inline simd16<uint16_t>
  3695. min_val(const simd16<uint16_t> other) const {
  3696. return _mm_min_epu16(*this, other);
  3697. }
  3698. // Same as >, but only guarantees true is nonzero (< guarantees true = -1)
  3699. is_utf8_really_inline simd16<uint16_t>
  3700. gt_bits(const simd16<uint16_t> other) const {
  3701. return this->saturating_sub(other);
  3702. }
  3703. // Same as <, but only guarantees true is nonzero (< guarantees true = -1)
  3704. is_utf8_really_inline simd16<uint16_t>
  3705. lt_bits(const simd16<uint16_t> other) const {
  3706. return other.saturating_sub(*this);
  3707. }
  3708. is_utf8_really_inline simd16<bool>
  3709. operator<=(const simd16<uint16_t> other) const {
  3710. return other.max_val(*this) == other;
  3711. }
  3712. is_utf8_really_inline simd16<bool>
  3713. operator>=(const simd16<uint16_t> other) const {
  3714. return other.min_val(*this) == other;
  3715. }
  3716. is_utf8_really_inline simd16<bool>
  3717. operator>(const simd16<uint16_t> other) const {
  3718. return this->gt_bits(other).any_bits_set();
  3719. }
  3720. is_utf8_really_inline simd16<bool>
  3721. operator<(const simd16<uint16_t> other) const {
  3722. return this->gt_bits(other).any_bits_set();
  3723. }
  3724. // Bit-specific operations
  3725. is_utf8_really_inline simd16<bool> bits_not_set() const {
  3726. return *this == uint16_t(0);
  3727. }
  3728. is_utf8_really_inline simd16<bool> bits_not_set(simd16<uint16_t> bits) const {
  3729. return (*this & bits).bits_not_set();
  3730. }
  3731. is_utf8_really_inline simd16<bool> any_bits_set() const {
  3732. return ~this->bits_not_set();
  3733. }
  3734. is_utf8_really_inline simd16<bool> any_bits_set(simd16<uint16_t> bits) const {
  3735. return ~this->bits_not_set(bits);
  3736. }
  3737. is_utf8_really_inline bool bits_not_set_anywhere() const {
  3738. return _mm_testz_si128(*this, *this);
  3739. }
  3740. is_utf8_really_inline bool any_bits_set_anywhere() const {
  3741. return !bits_not_set_anywhere();
  3742. }
  3743. is_utf8_really_inline bool
  3744. bits_not_set_anywhere(simd16<uint16_t> bits) const {
  3745. return _mm_testz_si128(*this, bits);
  3746. }
  3747. is_utf8_really_inline bool
  3748. any_bits_set_anywhere(simd16<uint16_t> bits) const {
  3749. return !bits_not_set_anywhere(bits);
  3750. }
  3751. template <int N> is_utf8_really_inline simd16<uint16_t> shr() const {
  3752. return simd16<uint16_t>(_mm_srli_epi16(*this, N));
  3753. }
  3754. template <int N> is_utf8_really_inline simd16<uint16_t> shl() const {
  3755. return simd16<uint16_t>(_mm_slli_epi16(*this, N));
  3756. }
  3757. // Get one of the bits and make a bitmask out of it.
  3758. // e.g. value.get_bit<7>() gets the high bit
  3759. template <int N> is_utf8_really_inline int get_bit() const {
  3760. return _mm_movemask_epi8(_mm_slli_epi16(*this, 7 - N));
  3761. }
  3762. // Change the endianness
  3763. is_utf8_really_inline simd16<uint16_t> swap_bytes() const {
  3764. const __m128i swap =
  3765. _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
  3766. return _mm_shuffle_epi8(*this, swap);
  3767. }
  3768. // Pack with the unsigned saturation two uint16_t words into single uint8_t
  3769. // vector
  3770. static is_utf8_really_inline simd8<uint8_t> pack(const simd16<uint16_t> &v0,
  3771. const simd16<uint16_t> &v1) {
  3772. return _mm_packus_epi16(v0, v1);
  3773. }
  3774. };
  3775. is_utf8_really_inline simd16<int16_t>::operator simd16<uint16_t>() const {
  3776. return this->value;
  3777. }
  3778. template <typename T> struct simd16x32 {
  3779. static constexpr int NUM_CHUNKS = 64 / sizeof(simd16<T>);
  3780. static_assert(NUM_CHUNKS == 4,
  3781. "Westmere kernel should use four registers per 64-byte block.");
  3782. simd16<T> chunks[NUM_CHUNKS];
  3783. simd16x32(const simd16x32<T> &o) = delete; // no copy allowed
  3784. simd16x32<T> &
  3785. operator=(const simd16<T> other) = delete; // no assignment allowed
  3786. simd16x32() = delete; // no default constructor allowed
  3787. is_utf8_really_inline
  3788. simd16x32(const simd16<T> chunk0, const simd16<T> chunk1,
  3789. const simd16<T> chunk2, const simd16<T> chunk3)
  3790. : chunks{chunk0, chunk1, chunk2, chunk3} {}
  3791. is_utf8_really_inline simd16x32(const T *ptr)
  3792. : chunks{simd16<T>::load(ptr),
  3793. simd16<T>::load(ptr + sizeof(simd16<T>) / sizeof(T)),
  3794. simd16<T>::load(ptr + 2 * sizeof(simd16<T>) / sizeof(T)),
  3795. simd16<T>::load(ptr + 3 * sizeof(simd16<T>) / sizeof(T))} {}
  3796. is_utf8_really_inline void store(T *ptr) const {
  3797. this->chunks[0].store(ptr + sizeof(simd16<T>) * 0 / sizeof(T));
  3798. this->chunks[1].store(ptr + sizeof(simd16<T>) * 1 / sizeof(T));
  3799. this->chunks[2].store(ptr + sizeof(simd16<T>) * 2 / sizeof(T));
  3800. this->chunks[3].store(ptr + sizeof(simd16<T>) * 3 / sizeof(T));
  3801. }
  3802. is_utf8_really_inline simd16<T> reduce_or() const {
  3803. return (this->chunks[0] | this->chunks[1]) |
  3804. (this->chunks[2] | this->chunks[3]);
  3805. }
  3806. is_utf8_really_inline bool is_ascii() const {
  3807. return this->reduce_or().is_ascii();
  3808. }
  3809. is_utf8_really_inline void store_ascii_as_utf16(char16_t *ptr) const {
  3810. this->chunks[0].store_ascii_as_utf16(ptr + sizeof(simd16<T>) * 0);
  3811. this->chunks[1].store_ascii_as_utf16(ptr + sizeof(simd16<T>) * 1);
  3812. this->chunks[2].store_ascii_as_utf16(ptr + sizeof(simd16<T>) * 2);
  3813. this->chunks[3].store_ascii_as_utf16(ptr + sizeof(simd16<T>) * 3);
  3814. }
  3815. is_utf8_really_inline uint64_t to_bitmask() const {
  3816. uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
  3817. uint64_t r1 = this->chunks[1].to_bitmask();
  3818. uint64_t r2 = this->chunks[2].to_bitmask();
  3819. uint64_t r3 = this->chunks[3].to_bitmask();
  3820. return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
  3821. }
  3822. is_utf8_really_inline void swap_bytes() {
  3823. this->chunks[0] = this->chunks[0].swap_bytes();
  3824. this->chunks[1] = this->chunks[1].swap_bytes();
  3825. this->chunks[2] = this->chunks[2].swap_bytes();
  3826. this->chunks[3] = this->chunks[3].swap_bytes();
  3827. }
  3828. is_utf8_really_inline uint64_t eq(const T m) const {
  3829. const simd16<T> mask = simd16<T>::splat(m);
  3830. return simd16x32<bool>(this->chunks[0] == mask, this->chunks[1] == mask,
  3831. this->chunks[2] == mask, this->chunks[3] == mask)
  3832. .to_bitmask();
  3833. }
  3834. is_utf8_really_inline uint64_t eq(const simd16x32<uint16_t> &other) const {
  3835. return simd16x32<bool>(this->chunks[0] == other.chunks[0],
  3836. this->chunks[1] == other.chunks[1],
  3837. this->chunks[2] == other.chunks[2],
  3838. this->chunks[3] == other.chunks[3])
  3839. .to_bitmask();
  3840. }
  3841. is_utf8_really_inline uint64_t lteq(const T m) const {
  3842. const simd16<T> mask = simd16<T>::splat(m);
  3843. return simd16x32<bool>(this->chunks[0] <= mask, this->chunks[1] <= mask,
  3844. this->chunks[2] <= mask, this->chunks[3] <= mask)
  3845. .to_bitmask();
  3846. }
  3847. is_utf8_really_inline uint64_t in_range(const T low, const T high) const {
  3848. const simd16<T> mask_low = simd16<T>::splat(low);
  3849. const simd16<T> mask_high = simd16<T>::splat(high);
  3850. return simd16x32<bool>(
  3851. (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
  3852. (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low),
  3853. (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low),
  3854. (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low))
  3855. .to_bitmask();
  3856. }
  3857. is_utf8_really_inline uint64_t not_in_range(const T low, const T high) const {
  3858. const simd16<T> mask_low = simd16<T>::splat(static_cast<T>(low - 1));
  3859. const simd16<T> mask_high = simd16<T>::splat(static_cast<T>(high + 1));
  3860. return simd16x32<bool>(
  3861. (this->chunks[0] >= mask_high) | (this->chunks[0] <= mask_low),
  3862. (this->chunks[1] >= mask_high) | (this->chunks[1] <= mask_low),
  3863. (this->chunks[2] >= mask_high) | (this->chunks[2] <= mask_low),
  3864. (this->chunks[3] >= mask_high) | (this->chunks[3] <= mask_low))
  3865. .to_bitmask();
  3866. }
  3867. is_utf8_really_inline uint64_t lt(const T m) const {
  3868. const simd16<T> mask = simd16<T>::splat(m);
  3869. return simd16x32<bool>(this->chunks[0] < mask, this->chunks[1] < mask,
  3870. this->chunks[2] < mask, this->chunks[3] < mask)
  3871. .to_bitmask();
  3872. }
  3873. }; // struct simd16x32<T>
  3874. } // namespace simd
  3875. } // unnamed namespace
  3876. } // namespace westmere
  3877. } // namespace is_utf8_internals
  3878. #endif // IS_UTF8_WESTMERE_SIMD_INPUT_H
  3879. IS_UTF8_UNTARGET_REGION
  3880. #endif // IS_UTF8_IMPLEMENTATION_WESTMERE
  3881. #endif // IS_UTF8_WESTMERE_COMMON_H
  3882. #ifndef IS_UTF8_FALLBACK_H
  3883. #define IS_UTF8_FALLBACK_H
  3884. // Note that fallback.h is always imported last.
  3885. // Default Fallback to on unless a builtin implementation has already been
  3886. // selected.
  3887. #ifndef IS_UTF8_IMPLEMENTATION_FALLBACK
  3888. #if IS_UTF8_CAN_ALWAYS_RUN_ARM64 || IS_UTF8_CAN_ALWAYS_RUN_ICELAKE || \
  3889. IS_UTF8_CAN_ALWAYS_RUN_HASWELL || IS_UTF8_CAN_ALWAYS_RUN_WESTMERE || \
  3890. IS_UTF8_CAN_ALWAYS_RUN_PPC64
  3891. #define IS_UTF8_IMPLEMENTATION_FALLBACK 0
  3892. #else
  3893. #define IS_UTF8_IMPLEMENTATION_FALLBACK 1
  3894. #endif
  3895. #endif
  3896. #if IS_UTF8_IMPLEMENTATION_FALLBACK
  3897. #define IS_UTF8_CAN_ALWAYS_RUN_FALLBACK 1
  3898. #else
  3899. #define IS_UTF8_CAN_ALWAYS_RUN_FALLBACK 0
  3900. #endif
  3901. #if IS_UTF8_IMPLEMENTATION_FALLBACK
  3902. namespace is_utf8_internals {
  3903. /**
  3904. * Fallback implementation (runs on any machine).
  3905. */
  3906. namespace fallback {} // namespace fallback
  3907. } // namespace is_utf8_internals
  3908. #ifndef IS_UTF8_FALLBACK_IMPLEMENTATION_H
  3909. #define IS_UTF8_FALLBACK_IMPLEMENTATION_H
  3910. namespace is_utf8_internals {
  3911. namespace fallback {
  3912. class implementation final : public is_utf8_internals::implementation {
  3913. public:
  3914. is_utf8_really_inline implementation()
  3915. : is_utf8_internals::implementation(
  3916. "fallback", "Generic fallback implementation", 0) {}
  3917. is_utf8_warn_unused bool validate_utf8(const char *buf,
  3918. size_t len) const noexcept final;
  3919. };
  3920. } // namespace fallback
  3921. } // namespace is_utf8_internals
  3922. #endif // IS_UTF8_FALLBACK_IMPLEMENTATION_H
  3923. // redefining IS_UTF8_IMPLEMENTATION to "fallback"
  3924. // #define IS_UTF8_IMPLEMENTATION fallback
  3925. // Declarations
  3926. #ifndef IS_UTF8_FALLBACK_BITMANIPULATION_H
  3927. #define IS_UTF8_FALLBACK_BITMANIPULATION_H
  3928. #include <limits>
  3929. namespace is_utf8_internals {
  3930. namespace fallback {
  3931. namespace {
  3932. #if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64)
  3933. static inline unsigned char _BitScanForward64(unsigned long *ret, uint64_t x) {
  3934. unsigned long x0 = (unsigned long)x, top, bottom;
  3935. _BitScanForward(&top, (unsigned long)(x >> 32));
  3936. _BitScanForward(&bottom, x0);
  3937. *ret = x0 ? bottom : 32 + top;
  3938. return x != 0;
  3939. }
  3940. static unsigned char _BitScanReverse64(unsigned long *ret, uint64_t x) {
  3941. unsigned long x1 = (unsigned long)(x >> 32), top, bottom;
  3942. _BitScanReverse(&top, x1);
  3943. _BitScanReverse(&bottom, (unsigned long)x);
  3944. *ret = x1 ? top + 32 : bottom;
  3945. return x != 0;
  3946. }
  3947. #endif
  3948. } // unnamed namespace
  3949. } // namespace fallback
  3950. } // namespace is_utf8_internals
  3951. #endif // IS_UTF8_FALLBACK_BITMANIPULATION_H
  3952. #endif // IS_UTF8_IMPLEMENTATION_FALLBACK
  3953. #endif // IS_UTF8_FALLBACK_H
  3954. namespace is_utf8_internals {
  3955. bool implementation::supported_by_runtime_system() const {
  3956. uint32_t required_instruction_sets = this->required_instruction_sets();
  3957. uint32_t supported_instruction_sets =
  3958. internal::detect_supported_architectures();
  3959. return ((supported_instruction_sets & required_instruction_sets) ==
  3960. required_instruction_sets);
  3961. }
  3962. namespace internal {
  3963. // Static array of known implementations. We're hoping these get baked into the
  3964. // executable without requiring a static initializer.
  3965. #if IS_UTF8_IMPLEMENTATION_ICELAKE
  3966. const icelake::implementation icelake_singleton{};
  3967. #endif
  3968. #if IS_UTF8_IMPLEMENTATION_HASWELL
  3969. const haswell::implementation haswell_singleton{};
  3970. #endif
  3971. #if IS_UTF8_IMPLEMENTATION_WESTMERE
  3972. const westmere::implementation westmere_singleton{};
  3973. #endif
  3974. #if IS_UTF8_IMPLEMENTATION_ARM64
  3975. const arm64::implementation arm64_singleton{};
  3976. #endif
  3977. #if IS_UTF8_IMPLEMENTATION_PPC64
  3978. const ppc64::implementation ppc64_singleton{};
  3979. #endif
  3980. #if IS_UTF8_IMPLEMENTATION_FALLBACK
  3981. const fallback::implementation fallback_singleton{};
  3982. #endif
  3983. /**
  3984. * @private Detects best supported implementation on first use, and sets it
  3985. */
  3986. class detect_best_supported_implementation_on_first_use final
  3987. : public implementation {
  3988. public:
  3989. const std::string &name() const noexcept final { return set_best()->name(); }
  3990. const std::string &description() const noexcept final {
  3991. return set_best()->description();
  3992. }
  3993. uint32_t required_instruction_sets() const noexcept final {
  3994. return set_best()->required_instruction_sets();
  3995. }
  3996. is_utf8_warn_unused bool
  3997. validate_utf8(const char *buf, size_t len) const noexcept final override {
  3998. return set_best()->validate_utf8(buf, len);
  3999. }
  4000. is_utf8_really_inline
  4001. detect_best_supported_implementation_on_first_use() noexcept
  4002. : implementation("best_supported_detector",
  4003. "Detects the best supported implementation and sets it",
  4004. 0) {}
  4005. private:
  4006. const implementation *set_best() const noexcept;
  4007. };
  4008. const detect_best_supported_implementation_on_first_use
  4009. detect_best_supported_implementation_on_first_use_singleton;
  4010. const std::initializer_list<const implementation *>
  4011. available_implementation_pointers {
  4012. #if IS_UTF8_IMPLEMENTATION_ICELAKE
  4013. &icelake_singleton,
  4014. #endif
  4015. #if IS_UTF8_IMPLEMENTATION_HASWELL
  4016. &haswell_singleton,
  4017. #endif
  4018. #if IS_UTF8_IMPLEMENTATION_WESTMERE
  4019. &westmere_singleton,
  4020. #endif
  4021. #if IS_UTF8_IMPLEMENTATION_ARM64
  4022. &arm64_singleton,
  4023. #endif
  4024. #if IS_UTF8_IMPLEMENTATION_PPC64
  4025. &ppc64_singleton,
  4026. #endif
  4027. #if IS_UTF8_IMPLEMENTATION_FALLBACK
  4028. &fallback_singleton,
  4029. #endif
  4030. }; // available_implementation_pointers
  4031. // So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no
  4032. // support
  4033. class unsupported_implementation final : public implementation {
  4034. public:
  4035. is_utf8_warn_unused bool validate_utf8(const char *,
  4036. size_t) const noexcept final override {
  4037. return false; // Just refuse to validate. Given that we have a fallback
  4038. // implementation
  4039. // it seems unlikely that unsupported_implementation will ever be used. If
  4040. // it is used, then it will flag all strings as invalid. The alternative is
  4041. // to return an error_code from which the user has to figure out whether the
  4042. // string is valid UTF-8... which seems like a lot of work just to handle
  4043. // the very unlikely case that we have an unsupported implementation. And,
  4044. // when it does happen (that we have an unsupported implementation), what
  4045. // are the chances that the programmer has a fallback? Given that *we*
  4046. // provide the fallback, it implies that the programmer would need a
  4047. // fallback for our fallback.
  4048. }
  4049. unsupported_implementation()
  4050. : implementation("unsupported",
  4051. "Unsupported CPU (no detected SIMD instructions)", 0) {}
  4052. };
  4053. const unsupported_implementation unsupported_singleton{};
  4054. size_t available_implementation_list::size() const noexcept {
  4055. return internal::available_implementation_pointers.size();
  4056. }
  4057. const implementation *const *
  4058. available_implementation_list::begin() const noexcept {
  4059. return internal::available_implementation_pointers.begin();
  4060. }
  4061. const implementation *const *
  4062. available_implementation_list::end() const noexcept {
  4063. return internal::available_implementation_pointers.end();
  4064. }
  4065. const implementation *
  4066. available_implementation_list::detect_best_supported() const noexcept {
  4067. // They are prelisted in priority order, so we just go down the list
  4068. uint32_t supported_instruction_sets =
  4069. internal::detect_supported_architectures();
  4070. for (const implementation *impl :
  4071. internal::available_implementation_pointers) {
  4072. uint32_t required_instruction_sets = impl->required_instruction_sets();
  4073. if ((supported_instruction_sets & required_instruction_sets) ==
  4074. required_instruction_sets) {
  4075. return impl;
  4076. }
  4077. }
  4078. return &unsupported_singleton; // this should never happen?
  4079. }
  4080. const implementation *
  4081. detect_best_supported_implementation_on_first_use::set_best() const noexcept {
  4082. IS_UTF8_PUSH_DISABLE_WARNINGS
  4083. IS_UTF8_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC:
  4084. // manually verified this is safe
  4085. char *force_implementation_name = getenv("IS_UTF8_FORCE_IMPLEMENTATION");
  4086. IS_UTF8_POP_DISABLE_WARNINGS
  4087. if (force_implementation_name) {
  4088. auto force_implementation =
  4089. get_available_implementations()[force_implementation_name];
  4090. if (force_implementation) {
  4091. return get_active_implementation() = force_implementation;
  4092. } else {
  4093. // Note: abort() and stderr usage within the library is forbidden.
  4094. return get_active_implementation() = &unsupported_singleton;
  4095. }
  4096. }
  4097. return get_active_implementation() =
  4098. get_available_implementations().detect_best_supported();
  4099. }
  4100. } // namespace internal
  4101. IS_UTF8_DLLIMPORTEXPORT const internal::available_implementation_list& get_available_implementations() {
  4102. static const internal::available_implementation_list available_implementations{};
  4103. return available_implementations;
  4104. }
  4105. IS_UTF8_DLLIMPORTEXPORT internal::atomic_ptr<const implementation>& get_active_implementation() {
  4106. static const internal::detect_best_supported_implementation_on_first_use detect_best_supported_implementation_on_first_use_singleton;
  4107. static internal::atomic_ptr<const implementation> active_implementation{&detect_best_supported_implementation_on_first_use_singleton};
  4108. return active_implementation;
  4109. }
  4110. is_utf8_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept {
  4111. return get_active_implementation()->validate_utf8(buf, len);
  4112. }
  4113. const implementation *builtin_implementation() {
  4114. static const implementation *builtin_impl =
  4115. get_available_implementations()[IS_UTF8_STRINGIFY(
  4116. IS_UTF8_BUILTIN_IMPLEMENTATION)];
  4117. return builtin_impl;
  4118. }
  4119. } // namespace is_utf8_internals
  4120. // The scalar routines should be included once.
  4121. #ifndef IS_UTF8_UTF8_H
  4122. #define IS_UTF8_UTF8_H
  4123. namespace is_utf8_internals {
  4124. namespace scalar {
  4125. namespace {
  4126. namespace utf8 {
  4127. #if IS_UTF8_IS_ARM64
  4128. // not needed
  4129. #else
  4130. // credit: based on code from Google Fuchsia (Apache Licensed)
  4131. inline is_utf8_warn_unused bool validate(const char *buf, size_t len) noexcept {
  4132. const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
  4133. uint64_t pos = 0;
  4134. uint32_t code_point = 0;
  4135. while (pos < len) {
  4136. // check of the next 8 bytes are ascii.
  4137. uint64_t next_pos = pos + 16;
  4138. if (next_pos <=
  4139. len) { // if it is safe to read 8 more bytes, check that they are ascii
  4140. uint64_t v1;
  4141. std::memcpy(&v1, data + pos, sizeof(uint64_t));
  4142. uint64_t v2;
  4143. std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
  4144. uint64_t v{v1 | v2};
  4145. if ((v & 0x8080808080808080) == 0) {
  4146. pos = next_pos;
  4147. continue;
  4148. }
  4149. }
  4150. unsigned char byte = data[pos];
  4151. while (byte < 0b10000000) {
  4152. if (++pos == len) {
  4153. return true;
  4154. }
  4155. byte = data[pos];
  4156. }
  4157. if ((byte & 0b11100000) == 0b11000000) {
  4158. next_pos = pos + 2;
  4159. if (next_pos > len) {
  4160. return false;
  4161. }
  4162. if ((data[pos + 1] & 0b11000000) != 0b10000000) {
  4163. return false;
  4164. }
  4165. // range check
  4166. code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
  4167. if ((code_point < 0x80) || (0x7ff < code_point)) {
  4168. return false;
  4169. }
  4170. } else if ((byte & 0b11110000) == 0b11100000) {
  4171. next_pos = pos + 3;
  4172. if (next_pos > len) {
  4173. return false;
  4174. }
  4175. if ((data[pos + 1] & 0b11000000) != 0b10000000) {
  4176. return false;
  4177. }
  4178. if ((data[pos + 2] & 0b11000000) != 0b10000000) {
  4179. return false;
  4180. }
  4181. // range check
  4182. code_point = (byte & 0b00001111) << 12 |
  4183. (data[pos + 1] & 0b00111111) << 6 |
  4184. (data[pos + 2] & 0b00111111);
  4185. if ((code_point < 0x800) || (0xffff < code_point) ||
  4186. (0xd7ff < code_point && code_point < 0xe000)) {
  4187. return false;
  4188. }
  4189. } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000
  4190. next_pos = pos + 4;
  4191. if (next_pos > len) {
  4192. return false;
  4193. }
  4194. if ((data[pos + 1] & 0b11000000) != 0b10000000) {
  4195. return false;
  4196. }
  4197. if ((data[pos + 2] & 0b11000000) != 0b10000000) {
  4198. return false;
  4199. }
  4200. if ((data[pos + 3] & 0b11000000) != 0b10000000) {
  4201. return false;
  4202. }
  4203. // range check
  4204. code_point =
  4205. (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
  4206. (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
  4207. if (code_point <= 0xffff || 0x10ffff < code_point) {
  4208. return false;
  4209. }
  4210. } else {
  4211. // we may have a continuation
  4212. return false;
  4213. }
  4214. pos = next_pos;
  4215. }
  4216. return true;
  4217. }
  4218. #endif
  4219. } // namespace utf8
  4220. } // unnamed namespace
  4221. } // namespace scalar
  4222. } // namespace is_utf8_internals
  4223. #endif
  4224. IS_UTF8_PUSH_DISABLE_WARNINGS
  4225. IS_UTF8_DISABLE_UNDESIRED_WARNINGS
  4226. #if IS_UTF8_IMPLEMENTATION_ARM64
  4227. // redefining IS_UTF8_IMPLEMENTATION to "arm64"
  4228. // #define IS_UTF8_IMPLEMENTATION arm64
  4229. namespace is_utf8_internals {
  4230. namespace arm64 {
  4231. namespace {
  4232. #ifndef IS_UTF8_ARM64_H
  4233. #error "arm64.h must be included"
  4234. #endif
  4235. using namespace simd;
  4236. is_utf8_really_inline bool is_ascii(const simd8x64<uint8_t> &input) {
  4237. simd8<uint8_t> bits = input.reduce_or();
  4238. return bits.max_val() < 0b10000000u;
  4239. }
  4240. is_utf8_unused is_utf8_really_inline simd8<bool>
  4241. must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2,
  4242. const simd8<uint8_t> prev3) {
  4243. simd8<bool> is_second_byte = prev1 >= uint8_t(0b11000000u);
  4244. simd8<bool> is_third_byte = prev2 >= uint8_t(0b11100000u);
  4245. simd8<bool> is_fourth_byte = prev3 >= uint8_t(0b11110000u);
  4246. // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller
  4247. // is using ^ as well. This will work fine because we only have to report
  4248. // errors for cases with 0-1 lead bytes. Multiple lead bytes implies 2
  4249. // overlapping multibyte characters, and if that happens, there is guaranteed
  4250. // to be at least *one* lead byte that is part of only 1 other multibyte
  4251. // character. The error will be detected there.
  4252. return is_second_byte ^ is_third_byte ^ is_fourth_byte;
  4253. }
  4254. is_utf8_really_inline simd8<bool>
  4255. must_be_2_3_continuation(const simd8<uint8_t> prev2,
  4256. const simd8<uint8_t> prev3) {
  4257. simd8<bool> is_third_byte = prev2 >= uint8_t(0b11100000u);
  4258. simd8<bool> is_fourth_byte = prev3 >= uint8_t(0b11110000u);
  4259. return is_third_byte ^ is_fourth_byte;
  4260. }
  4261. } // unnamed namespace
  4262. } // namespace arm64
  4263. } // namespace is_utf8_internals
  4264. namespace is_utf8_internals {
  4265. namespace arm64 {
  4266. namespace {
  4267. // Walks through a buffer in block-sized increments, loading the last part with
  4268. // spaces
  4269. template <size_t STEP_SIZE> struct buf_block_reader {
  4270. public:
  4271. is_utf8_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
  4272. is_utf8_really_inline size_t block_index();
  4273. is_utf8_really_inline bool has_full_block() const;
  4274. is_utf8_really_inline const uint8_t *full_block() const;
  4275. /**
  4276. * Get the last block, padded with spaces.
  4277. *
  4278. * There will always be a last block, with at least 1 byte, unless len == 0
  4279. * (in which case this function fills the buffer with spaces and returns 0. In
  4280. * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder
  4281. * block with STEP_SIZE bytes and no spaces for padding.
  4282. *
  4283. * @return the number of effective characters in the last block.
  4284. */
  4285. is_utf8_really_inline size_t get_remainder(uint8_t *dst) const;
  4286. is_utf8_really_inline void advance();
  4287. private:
  4288. const uint8_t *buf;
  4289. const size_t len;
  4290. const size_t lenminusstep;
  4291. size_t idx;
  4292. };
  4293. // Routines to print masks and text for debugging bitmask operations
  4294. is_utf8_unused static char *format_input_text_64(const uint8_t *text) {
  4295. static char *buf =
  4296. reinterpret_cast<char *>(malloc(sizeof(simd8x64<uint8_t>) + 1));
  4297. for (size_t i = 0; i < sizeof(simd8x64<uint8_t>); i++) {
  4298. buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
  4299. }
  4300. buf[sizeof(simd8x64<uint8_t>)] = '\0';
  4301. return buf;
  4302. }
  4303. // Routines to print masks and text for debugging bitmask operations
  4304. is_utf8_unused static char *format_input_text(const simd8x64<uint8_t> &in) {
  4305. static char *buf =
  4306. reinterpret_cast<char *>(malloc(sizeof(simd8x64<uint8_t>) + 1));
  4307. in.store(reinterpret_cast<uint8_t *>(buf));
  4308. for (size_t i = 0; i < sizeof(simd8x64<uint8_t>); i++) {
  4309. if (buf[i] < ' ') {
  4310. buf[i] = '_';
  4311. }
  4312. }
  4313. buf[sizeof(simd8x64<uint8_t>)] = '\0';
  4314. return buf;
  4315. }
  4316. is_utf8_unused static char *format_mask(uint64_t mask) {
  4317. static char *buf = reinterpret_cast<char *>(malloc(64 + 1));
  4318. for (size_t i = 0; i < 64; i++) {
  4319. buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
  4320. }
  4321. buf[64] = '\0';
  4322. return buf;
  4323. }
  4324. template <size_t STEP_SIZE>
  4325. is_utf8_really_inline
  4326. buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len)
  4327. : buf{_buf}, len{_len},
  4328. lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
  4329. template <size_t STEP_SIZE>
  4330. is_utf8_really_inline size_t buf_block_reader<STEP_SIZE>::block_index() {
  4331. return idx;
  4332. }
  4333. template <size_t STEP_SIZE>
  4334. is_utf8_really_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
  4335. return idx < lenminusstep;
  4336. }
  4337. template <size_t STEP_SIZE>
  4338. is_utf8_really_inline const uint8_t *
  4339. buf_block_reader<STEP_SIZE>::full_block() const {
  4340. return &buf[idx];
  4341. }
  4342. template <size_t STEP_SIZE>
  4343. is_utf8_really_inline size_t
  4344. buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
  4345. if (len == idx) {
  4346. return 0;
  4347. } // memcpy(dst, null, 0) will trigger an error with some sanitizers
  4348. std::memset(dst, 0x20,
  4349. STEP_SIZE); // std::memset STEP_SIZE because it's more efficient
  4350. // to write out 8 or 16 bytes at once.
  4351. std::memcpy(dst, buf + idx, len - idx);
  4352. return len - idx;
  4353. }
  4354. template <size_t STEP_SIZE>
  4355. is_utf8_really_inline void buf_block_reader<STEP_SIZE>::advance() {
  4356. idx += STEP_SIZE;
  4357. }
  4358. } // unnamed namespace
  4359. } // namespace arm64
  4360. } // namespace is_utf8_internals
  4361. namespace is_utf8_internals {
  4362. namespace arm64 {
  4363. namespace {
  4364. namespace utf8_validation {
  4365. using namespace simd;
  4366. is_utf8_really_inline simd8<uint8_t>
  4367. check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
  4368. // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
  4369. // Bit 1 = Too Long (ASCII followed by continuation)
  4370. // Bit 2 = Overlong 3-byte
  4371. // Bit 4 = Surrogate
  4372. // Bit 5 = Overlong 2-byte
  4373. // Bit 7 = Two Continuations
  4374. constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______
  4375. // 11______ 11______
  4376. constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______
  4377. constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____
  4378. constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____
  4379. constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______
  4380. constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______
  4381. constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____
  4382. // 11110100 101_____
  4383. // 11110101 1001____
  4384. // 11110101 101_____
  4385. // 1111011_ 1001____
  4386. // 1111011_ 101_____
  4387. // 11111___ 1001____
  4388. // 11111___ 101_____
  4389. constexpr const uint8_t TOO_LARGE_1000 = 1 << 6;
  4390. // 11110101 1000____
  4391. // 1111011_ 1000____
  4392. // 11111___ 1000____
  4393. constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____
  4394. const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
  4395. // 0_______ ________ <ASCII in byte 1>
  4396. TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
  4397. TOO_LONG,
  4398. // 10______ ________ <continuation in byte 1>
  4399. TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
  4400. // 1100____ ________ <two byte lead in byte 1>
  4401. TOO_SHORT | OVERLONG_2,
  4402. // 1101____ ________ <two byte lead in byte 1>
  4403. TOO_SHORT,
  4404. // 1110____ ________ <three byte lead in byte 1>
  4405. TOO_SHORT | OVERLONG_3 | SURROGATE,
  4406. // 1111____ ________ <four+ byte lead in byte 1>
  4407. TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4);
  4408. constexpr const uint8_t CARRY =
  4409. TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
  4410. const simd8<uint8_t> byte_1_low =
  4411. (prev1 & 0x0F)
  4412. .lookup_16<uint8_t>(
  4413. // ____0000 ________
  4414. CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
  4415. // ____0001 ________
  4416. CARRY | OVERLONG_2,
  4417. // ____001_ ________
  4418. CARRY, CARRY,
  4419. // ____0100 ________
  4420. CARRY | TOO_LARGE,
  4421. // ____0101 ________
  4422. CARRY | TOO_LARGE | TOO_LARGE_1000,
  4423. // ____011_ ________
  4424. CARRY | TOO_LARGE | TOO_LARGE_1000,
  4425. CARRY | TOO_LARGE | TOO_LARGE_1000,
  4426. // ____1___ ________
  4427. CARRY | TOO_LARGE | TOO_LARGE_1000,
  4428. CARRY | TOO_LARGE | TOO_LARGE_1000,
  4429. CARRY | TOO_LARGE | TOO_LARGE_1000,
  4430. CARRY | TOO_LARGE | TOO_LARGE_1000,
  4431. CARRY | TOO_LARGE | TOO_LARGE_1000,
  4432. // ____1101 ________
  4433. CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
  4434. CARRY | TOO_LARGE | TOO_LARGE_1000,
  4435. CARRY | TOO_LARGE | TOO_LARGE_1000);
  4436. const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
  4437. // ________ 0_______ <ASCII in byte 2>
  4438. TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
  4439. TOO_SHORT, TOO_SHORT,
  4440. // ________ 1000____
  4441. TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 |
  4442. OVERLONG_4,
  4443. // ________ 1001____
  4444. TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
  4445. // ________ 101_____
  4446. TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
  4447. TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
  4448. // ________ 11______
  4449. TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT);
  4450. return (byte_1_high & byte_1_low & byte_2_high);
  4451. }
  4452. is_utf8_really_inline simd8<uint8_t>
  4453. check_multibyte_lengths(const simd8<uint8_t> input,
  4454. const simd8<uint8_t> prev_input,
  4455. const simd8<uint8_t> sc) {
  4456. simd8<uint8_t> prev2 = input.prev<2>(prev_input);
  4457. simd8<uint8_t> prev3 = input.prev<3>(prev_input);
  4458. simd8<uint8_t> must23 =
  4459. simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
  4460. simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
  4461. return must23_80 ^ sc;
  4462. }
  4463. //
  4464. // Return nonzero if there are incomplete multibyte characters at the end of the
  4465. // block: e.g. if there is a 4-byte character, but it's 3 bytes from the end.
  4466. //
  4467. is_utf8_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
  4468. // If the previous input's last 3 bytes match this, they're too short (they
  4469. // ended at EOF):
  4470. // ... 1111____ 111_____ 11______
  4471. static const uint8_t max_array[32] = {255,
  4472. 255,
  4473. 255,
  4474. 255,
  4475. 255,
  4476. 255,
  4477. 255,
  4478. 255,
  4479. 255,
  4480. 255,
  4481. 255,
  4482. 255,
  4483. 255,
  4484. 255,
  4485. 255,
  4486. 255,
  4487. 255,
  4488. 255,
  4489. 255,
  4490. 255,
  4491. 255,
  4492. 255,
  4493. 255,
  4494. 255,
  4495. 255,
  4496. 255,
  4497. 255,
  4498. 255,
  4499. 255,
  4500. 0b11110000u - 1,
  4501. 0b11100000u - 1,
  4502. 0b11000000u - 1};
  4503. const simd8<uint8_t> max_value(
  4504. &max_array[sizeof(max_array) - sizeof(simd8<uint8_t>)]);
  4505. return input.gt_bits(max_value);
  4506. }
  4507. struct utf8_checker {
  4508. // If this is nonzero, there has been a UTF-8 error.
  4509. simd8<uint8_t> error;
  4510. // The last input we received
  4511. simd8<uint8_t> prev_input_block;
  4512. // Whether the last input we received was incomplete (used for ASCII fast
  4513. // path)
  4514. simd8<uint8_t> prev_incomplete;
  4515. //
  4516. // Check whether the current bytes are valid UTF-8.
  4517. //
  4518. is_utf8_really_inline void check_utf8_bytes(const simd8<uint8_t> input,
  4519. const simd8<uint8_t> prev_input) {
  4520. // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+
  4521. // lead bytes (2, 3, 4-byte leads become large positive numbers instead of
  4522. // small negative numbers)
  4523. simd8<uint8_t> prev1 = input.prev<1>(prev_input);
  4524. simd8<uint8_t> sc = check_special_cases(input, prev1);
  4525. this->error |= check_multibyte_lengths(input, prev_input, sc);
  4526. }
  4527. // The only problem that can happen at EOF is that a multibyte character is
  4528. // too short or a byte value too large in the last bytes: check_special_cases
  4529. // only checks for bytes too large in the first of two bytes.
  4530. is_utf8_really_inline void check_eof() {
  4531. // If the previous block had incomplete UTF-8 characters at the end, an
  4532. // ASCII block can't possibly finish them.
  4533. this->error |= this->prev_incomplete;
  4534. }
  4535. is_utf8_really_inline void check_next_input(const simd8x64<uint8_t> &input) {
  4536. if (is_utf8_likely(is_ascii(input))) {
  4537. this->error |= this->prev_incomplete;
  4538. } else {
  4539. // you might think that a for-loop would work, but under Visual Studio, it
  4540. // is not good enough.
  4541. static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) ||
  4542. (simd8x64<uint8_t>::NUM_CHUNKS == 4),
  4543. "We support either two or four chunks per 64-byte block.");
  4544. if (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
  4545. this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
  4546. this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
  4547. } else if (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
  4548. this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
  4549. this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
  4550. this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
  4551. this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
  4552. }
  4553. this->prev_incomplete =
  4554. is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS - 1]);
  4555. this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS - 1];
  4556. }
  4557. }
  4558. // do not forget to call check_eof!
  4559. is_utf8_really_inline bool errors() const {
  4560. return this->error.any_bits_set_anywhere();
  4561. }
  4562. }; // struct utf8_checker
  4563. } // namespace utf8_validation
  4564. using utf8_validation::utf8_checker;
  4565. } // unnamed namespace
  4566. } // namespace arm64
  4567. } // namespace is_utf8_internals
  4568. namespace is_utf8_internals {
  4569. namespace arm64 {
  4570. namespace {
  4571. namespace utf8_validation {
  4572. /**
  4573. * Validates that the string is actual UTF-8.
  4574. */
  4575. template <class checker>
  4576. bool generic_validate_utf8(const uint8_t *input, size_t length) {
  4577. checker c{};
  4578. buf_block_reader<64> reader(input, length);
  4579. while (reader.has_full_block()) {
  4580. simd::simd8x64<uint8_t> in(reader.full_block());
  4581. c.check_next_input(in);
  4582. reader.advance();
  4583. }
  4584. uint8_t block[64]{};
  4585. reader.get_remainder(block);
  4586. simd::simd8x64<uint8_t> in(block);
  4587. c.check_next_input(in);
  4588. reader.advance();
  4589. c.check_eof();
  4590. return !c.errors();
  4591. }
  4592. bool generic_validate_utf8(const char *input, size_t length) {
  4593. return generic_validate_utf8<utf8_checker>(
  4594. reinterpret_cast<const uint8_t *>(input), length);
  4595. }
  4596. } // namespace utf8_validation
  4597. } // unnamed namespace
  4598. } // namespace arm64
  4599. } // namespace is_utf8_internals
  4600. //
  4601. // Implementation-specific overrides
  4602. //
  4603. namespace is_utf8_internals {
  4604. namespace arm64 {
  4605. is_utf8_warn_unused bool
  4606. implementation::validate_utf8(const char *buf, size_t len) const noexcept {
  4607. return arm64::utf8_validation::generic_validate_utf8(buf, len);
  4608. }
  4609. } // namespace arm64
  4610. } // namespace is_utf8_internals
  4611. #endif
  4612. #if IS_UTF8_IMPLEMENTATION_FALLBACK
  4613. // redefining IS_UTF8_IMPLEMENTATION to "fallback"
  4614. // #define IS_UTF8_IMPLEMENTATION fallback
  4615. namespace is_utf8_internals {
  4616. namespace fallback {
  4617. is_utf8_warn_unused bool
  4618. implementation::validate_utf8(const char *buf, size_t len) const noexcept {
  4619. return scalar::utf8::validate(buf, len);
  4620. }
  4621. } // namespace fallback
  4622. } // namespace is_utf8_internals
  4623. #endif
  4624. #if IS_UTF8_IMPLEMENTATION_ICELAKE
  4625. // redefining IS_UTF8_IMPLEMENTATION to "icelake"
  4626. // #define IS_UTF8_IMPLEMENTATION icelake
  4627. IS_UTF8_TARGET_ICELAKE
  4628. #if IS_UTF8_GCC11ORMORE // workaround for
  4629. // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
  4630. IS_UTF8_DISABLE_GCC_WARNING(-Wmaybe-uninitialized)
  4631. #endif // end of workaround
  4632. namespace is_utf8_internals {
  4633. namespace icelake {
  4634. namespace {
  4635. #ifndef IS_UTF8_ICELAKE_H
  4636. #error "icelake.h must be included"
  4637. #endif
  4638. /**
  4639. * Store the last N bytes of previous followed by 512-N bytes from input.
  4640. */
  4641. template <int N> __m512i prev(__m512i input, __m512i previous) {
  4642. static_assert(N <= 32, "N must be no larger than 32");
  4643. const __m512i movemask =
  4644. _mm512_setr_epi32(28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
  4645. const __m512i rotated = _mm512_permutex2var_epi32(input, movemask, previous);
  4646. #if IS_UTF8_GCC8 || IS_UTF8_GCC9
  4647. constexpr int shift = 16 - N; // workaround for GCC8,9
  4648. return _mm512_alignr_epi8(input, rotated, shift);
  4649. #else
  4650. return _mm512_alignr_epi8(input, rotated, 16 - N);
  4651. #endif // IS_UTF8_GCC8 || IS_UTF8_GCC9
  4652. }
  4653. is_utf8_really_inline __m512i check_special_cases(__m512i input,
  4654. const __m512i prev1) {
  4655. __m512i mask1 = _mm512_setr_epi64(0x0202020202020202, 0x4915012180808080,
  4656. 0x0202020202020202, 0x4915012180808080,
  4657. 0x0202020202020202, 0x4915012180808080,
  4658. 0x0202020202020202, 0x4915012180808080);
  4659. const __m512i v_0f = _mm512_set1_epi8(0x0f);
  4660. __m512i index1 = _mm512_and_si512(_mm512_srli_epi16(prev1, 4), v_0f);
  4661. __m512i byte_1_high = _mm512_shuffle_epi8(mask1, index1);
  4662. __m512i mask2 = _mm512_setr_epi64(0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb,
  4663. 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb,
  4664. 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb,
  4665. 0xcbcbcb8b8383a3e7, 0xcbcbdbcbcbcbcbcb);
  4666. __m512i index2 = _mm512_and_si512(prev1, v_0f);
  4667. __m512i byte_1_low = _mm512_shuffle_epi8(mask2, index2);
  4668. __m512i mask3 =
  4669. _mm512_setr_epi64(0x101010101010101, 0x1010101babaaee6, 0x101010101010101,
  4670. 0x1010101babaaee6, 0x101010101010101, 0x1010101babaaee6,
  4671. 0x101010101010101, 0x1010101babaaee6);
  4672. __m512i index3 = _mm512_and_si512(_mm512_srli_epi16(input, 4), v_0f);
  4673. __m512i byte_2_high = _mm512_shuffle_epi8(mask3, index3);
  4674. return _mm512_ternarylogic_epi64(byte_1_high, byte_1_low, byte_2_high, 128);
  4675. }
  4676. is_utf8_really_inline __m512i check_multibyte_lengths(const __m512i input,
  4677. const __m512i prev_input,
  4678. const __m512i sc) {
  4679. __m512i prev2 = prev<2>(input, prev_input);
  4680. __m512i prev3 = prev<3>(input, prev_input);
  4681. __m512i is_third_byte = _mm512_subs_epu8(
  4682. prev2, _mm512_set1_epi8(char(0b11011111))); // Only 111_____ will be > 0
  4683. __m512i is_fourth_byte = _mm512_subs_epu8(
  4684. prev3, _mm512_set1_epi8(char(0b11101111))); // Only 1111____ will be > 0
  4685. __m512i is_third_or_fourth_byte =
  4686. _mm512_or_si512(is_third_byte, is_fourth_byte);
  4687. const __m512i v_7f = _mm512_set1_epi8(char(0x7f));
  4688. is_third_or_fourth_byte = _mm512_adds_epu8(v_7f, is_third_or_fourth_byte);
  4689. // We want to compute (is_third_or_fourth_byte AND v80) XOR sc.
  4690. const __m512i v_80 = _mm512_set1_epi8(char(0x80));
  4691. return _mm512_ternarylogic_epi32(is_third_or_fourth_byte, v_80, sc,
  4692. 0b1101010);
  4693. //__m512i is_third_or_fourth_byte_mask =
  4694. //_mm512_and_si512(is_third_or_fourth_byte, v_80); return
  4695. // _mm512_xor_si512(is_third_or_fourth_byte_mask, sc);
  4696. }
  4697. //
  4698. // Return nonzero if there are incomplete multibyte characters at the end of the
  4699. // block: e.g. if there is a 4-byte character, but it's 3 bytes from the end.
  4700. //
  4701. is_utf8_really_inline __m512i is_incomplete(const __m512i input) {
  4702. // If the previous input's last 3 bytes match this, they're too short (they
  4703. // ended at EOF):
  4704. // ... 1111____ 111_____ 11______
  4705. __m512i max_value = _mm512_setr_epi64(0xffffffffffffffff, 0xffffffffffffffff,
  4706. 0xffffffffffffffff, 0xffffffffffffffff,
  4707. 0xffffffffffffffff, 0xffffffffffffffff,
  4708. 0xffffffffffffffff, 0xbfdfefffffffffff);
  4709. return _mm512_subs_epu8(input, max_value);
  4710. }
  4711. struct avx512_utf8_checker {
  4712. // If this is nonzero, there has been a UTF-8 error.
  4713. __m512i error{};
  4714. // The last input we received
  4715. __m512i prev_input_block{};
  4716. // Whether the last input we received was incomplete (used for ASCII fast
  4717. // path)
  4718. __m512i prev_incomplete{};
  4719. //
  4720. // Check whether the current bytes are valid UTF-8.
  4721. //
  4722. is_utf8_really_inline void check_utf8_bytes(const __m512i input,
  4723. const __m512i prev_input) {
  4724. // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+
  4725. // lead bytes (2, 3, 4-byte leads become large positive numbers instead of
  4726. // small negative numbers)
  4727. __m512i prev1 = prev<1>(input, prev_input);
  4728. __m512i sc = check_special_cases(input, prev1);
  4729. this->error = _mm512_or_si512(
  4730. check_multibyte_lengths(input, prev_input, sc), this->error);
  4731. }
  4732. // The only problem that can happen at EOF is that a multibyte character is
  4733. // too short or a byte value too large in the last bytes: check_special_cases
  4734. // only checks for bytes too large in the first of two bytes.
  4735. is_utf8_really_inline void check_eof() {
  4736. // If the previous block had incomplete UTF-8 characters at the end, an
  4737. // ASCII block can't possibly finish them.
  4738. this->error = _mm512_or_si512(this->error, this->prev_incomplete);
  4739. }
  4740. // returns true if ASCII.
  4741. is_utf8_really_inline bool check_next_input(const __m512i input) {
  4742. const __m512i v_80 = _mm512_set1_epi8(char(0x80));
  4743. const __mmask64 ascii = _mm512_test_epi8_mask(input, v_80);
  4744. if (ascii == 0) {
  4745. this->error = _mm512_or_si512(this->error, this->prev_incomplete);
  4746. return true;
  4747. } else {
  4748. this->check_utf8_bytes(input, this->prev_input_block);
  4749. this->prev_incomplete = is_incomplete(input);
  4750. this->prev_input_block = input;
  4751. return false;
  4752. }
  4753. }
  4754. // do not forget to call check_eof!
  4755. is_utf8_really_inline bool errors() const {
  4756. return _mm512_test_epi8_mask(this->error, this->error) != 0;
  4757. }
  4758. }; // struct avx512_utf8_checker
  4759. } // namespace
  4760. } // namespace icelake
  4761. } // namespace is_utf8_internals
  4762. namespace is_utf8_internals {
  4763. namespace icelake {
  4764. is_utf8_warn_unused bool
  4765. implementation::validate_utf8(const char *buf, size_t len) const noexcept {
  4766. avx512_utf8_checker checker{};
  4767. const char *ptr = buf;
  4768. const char *end = ptr + len;
  4769. for (; ptr + 64 <= end; ptr += 64) {
  4770. const __m512i utf8 = _mm512_loadu_si512((const __m512i *)ptr);
  4771. checker.check_next_input(utf8);
  4772. }
  4773. {
  4774. const __m512i utf8 = _mm512_maskz_loadu_epi8((1ULL << (end - ptr)) - 1,
  4775. (const __m512i *)ptr);
  4776. checker.check_next_input(utf8);
  4777. }
  4778. checker.check_eof();
  4779. return !checker.errors();
  4780. }
  4781. } // namespace icelake
  4782. } // namespace is_utf8_internals
  4783. IS_UTF8_UNTARGET_REGION
  4784. #if IS_UTF8_GCC11ORMORE // workaround for
  4785. // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
  4786. IS_UTF8_POP_DISABLE_WARNINGS
  4787. #endif // end of workaround
  4788. #endif
  4789. #if IS_UTF8_IMPLEMENTATION_HASWELL
  4790. // redefining IS_UTF8_IMPLEMENTATION to "haswell"
  4791. // #define IS_UTF8_IMPLEMENTATION haswell
  4792. IS_UTF8_TARGET_HASWELL
  4793. #if IS_UTF8_GCC11ORMORE // workaround for
  4794. // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
  4795. IS_UTF8_DISABLE_GCC_WARNING(-Wmaybe-uninitialized)
  4796. #endif // end of workaround
  4797. namespace is_utf8_internals {
  4798. namespace haswell {
  4799. namespace {
  4800. #ifndef IS_UTF8_HASWELL_H
  4801. #error "haswell.h must be included"
  4802. #endif
  4803. using namespace simd;
  4804. is_utf8_really_inline bool is_ascii(const simd8x64<uint8_t> &input) {
  4805. return input.reduce_or().is_ascii();
  4806. }
  4807. is_utf8_unused is_utf8_really_inline simd8<bool>
  4808. must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2,
  4809. const simd8<uint8_t> prev3) {
  4810. simd8<uint8_t> is_second_byte =
  4811. prev1.saturating_sub(0b11000000u - 1); // Only 11______ will be > 0
  4812. simd8<uint8_t> is_third_byte =
  4813. prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0
  4814. simd8<uint8_t> is_fourth_byte =
  4815. prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0
  4816. // Caller requires a bool (all 1's). All values resulting from the subtraction
  4817. // will be <= 64, so signed comparison is fine.
  4818. return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) >
  4819. int8_t(0);
  4820. }
  4821. is_utf8_really_inline simd8<bool>
  4822. must_be_2_3_continuation(const simd8<uint8_t> prev2,
  4823. const simd8<uint8_t> prev3) {
  4824. simd8<uint8_t> is_third_byte =
  4825. prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0
  4826. simd8<uint8_t> is_fourth_byte =
  4827. prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0
  4828. // Caller requires a bool (all 1's). All values resulting from the subtraction
  4829. // will be <= 64, so signed comparison is fine.
  4830. return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
  4831. }
  4832. } // unnamed namespace
  4833. } // namespace haswell
  4834. } // namespace is_utf8_internals
  4835. namespace is_utf8_internals {
  4836. namespace haswell {
  4837. namespace {
  4838. // Walks through a buffer in block-sized increments, loading the last part with
  4839. // spaces
  4840. template <size_t STEP_SIZE> struct buf_block_reader {
  4841. public:
  4842. is_utf8_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
  4843. is_utf8_really_inline size_t block_index();
  4844. is_utf8_really_inline bool has_full_block() const;
  4845. is_utf8_really_inline const uint8_t *full_block() const;
  4846. /**
  4847. * Get the last block, padded with spaces.
  4848. *
  4849. * There will always be a last block, with at least 1 byte, unless len == 0
  4850. * (in which case this function fills the buffer with spaces and returns 0. In
  4851. * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder
  4852. * block with STEP_SIZE bytes and no spaces for padding.
  4853. *
  4854. * @return the number of effective characters in the last block.
  4855. */
  4856. is_utf8_really_inline size_t get_remainder(uint8_t *dst) const;
  4857. is_utf8_really_inline void advance();
  4858. private:
  4859. const uint8_t *buf;
  4860. const size_t len;
  4861. const size_t lenminusstep;
  4862. size_t idx;
  4863. };
  4864. // Routines to print masks and text for debugging bitmask operations
  4865. is_utf8_unused static char *format_input_text_64(const uint8_t *text) {
  4866. static char *buf =
  4867. reinterpret_cast<char *>(malloc(sizeof(simd8x64<uint8_t>) + 1));
  4868. for (size_t i = 0; i < sizeof(simd8x64<uint8_t>); i++) {
  4869. buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
  4870. }
  4871. buf[sizeof(simd8x64<uint8_t>)] = '\0';
  4872. return buf;
  4873. }
  4874. // Routines to print masks and text for debugging bitmask operations
  4875. is_utf8_unused static char *format_input_text(const simd8x64<uint8_t> &in) {
  4876. static char *buf =
  4877. reinterpret_cast<char *>(malloc(sizeof(simd8x64<uint8_t>) + 1));
  4878. in.store(reinterpret_cast<uint8_t *>(buf));
  4879. for (size_t i = 0; i < sizeof(simd8x64<uint8_t>); i++) {
  4880. if (buf[i] < ' ') {
  4881. buf[i] = '_';
  4882. }
  4883. }
  4884. buf[sizeof(simd8x64<uint8_t>)] = '\0';
  4885. return buf;
  4886. }
  4887. is_utf8_unused static char *format_mask(uint64_t mask) {
  4888. static char *buf = reinterpret_cast<char *>(malloc(64 + 1));
  4889. for (size_t i = 0; i < 64; i++) {
  4890. buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
  4891. }
  4892. buf[64] = '\0';
  4893. return buf;
  4894. }
  4895. template <size_t STEP_SIZE>
  4896. is_utf8_really_inline
  4897. buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len)
  4898. : buf{_buf}, len{_len},
  4899. lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
  4900. template <size_t STEP_SIZE>
  4901. is_utf8_really_inline size_t buf_block_reader<STEP_SIZE>::block_index() {
  4902. return idx;
  4903. }
  4904. template <size_t STEP_SIZE>
  4905. is_utf8_really_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
  4906. return idx < lenminusstep;
  4907. }
  4908. template <size_t STEP_SIZE>
  4909. is_utf8_really_inline const uint8_t *
  4910. buf_block_reader<STEP_SIZE>::full_block() const {
  4911. return &buf[idx];
  4912. }
  4913. template <size_t STEP_SIZE>
  4914. is_utf8_really_inline size_t
  4915. buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
  4916. if (len == idx) {
  4917. return 0;
  4918. } // memcpy(dst, null, 0) will trigger an error with some sanitizers
  4919. std::memset(dst, 0x20,
  4920. STEP_SIZE); // std::memset STEP_SIZE because it's more efficient
  4921. // to write out 8 or 16 bytes at once.
  4922. std::memcpy(dst, buf + idx, len - idx);
  4923. return len - idx;
  4924. }
  4925. template <size_t STEP_SIZE>
  4926. is_utf8_really_inline void buf_block_reader<STEP_SIZE>::advance() {
  4927. idx += STEP_SIZE;
  4928. }
  4929. } // unnamed namespace
  4930. } // namespace haswell
  4931. } // namespace is_utf8_internals
  4932. namespace is_utf8_internals {
  4933. namespace haswell {
  4934. namespace {
  4935. namespace utf8_validation {
  4936. using namespace simd;
  4937. is_utf8_really_inline simd8<uint8_t>
  4938. check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
  4939. // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
  4940. // Bit 1 = Too Long (ASCII followed by continuation)
  4941. // Bit 2 = Overlong 3-byte
  4942. // Bit 4 = Surrogate
  4943. // Bit 5 = Overlong 2-byte
  4944. // Bit 7 = Two Continuations
  4945. constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______
  4946. // 11______ 11______
  4947. constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______
  4948. constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____
  4949. constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____
  4950. constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______
  4951. constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______
  4952. constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____
  4953. // 11110100 101_____
  4954. // 11110101 1001____
  4955. // 11110101 101_____
  4956. // 1111011_ 1001____
  4957. // 1111011_ 101_____
  4958. // 11111___ 1001____
  4959. // 11111___ 101_____
  4960. constexpr const uint8_t TOO_LARGE_1000 = 1 << 6;
  4961. // 11110101 1000____
  4962. // 1111011_ 1000____
  4963. // 11111___ 1000____
  4964. constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____
  4965. const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
  4966. // 0_______ ________ <ASCII in byte 1>
  4967. TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
  4968. TOO_LONG,
  4969. // 10______ ________ <continuation in byte 1>
  4970. TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
  4971. // 1100____ ________ <two byte lead in byte 1>
  4972. TOO_SHORT | OVERLONG_2,
  4973. // 1101____ ________ <two byte lead in byte 1>
  4974. TOO_SHORT,
  4975. // 1110____ ________ <three byte lead in byte 1>
  4976. TOO_SHORT | OVERLONG_3 | SURROGATE,
  4977. // 1111____ ________ <four+ byte lead in byte 1>
  4978. TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4);
  4979. constexpr const uint8_t CARRY =
  4980. TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
  4981. const simd8<uint8_t> byte_1_low =
  4982. (prev1 & 0x0F)
  4983. .lookup_16<uint8_t>(
  4984. // ____0000 ________
  4985. CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
  4986. // ____0001 ________
  4987. CARRY | OVERLONG_2,
  4988. // ____001_ ________
  4989. CARRY, CARRY,
  4990. // ____0100 ________
  4991. CARRY | TOO_LARGE,
  4992. // ____0101 ________
  4993. CARRY | TOO_LARGE | TOO_LARGE_1000,
  4994. // ____011_ ________
  4995. CARRY | TOO_LARGE | TOO_LARGE_1000,
  4996. CARRY | TOO_LARGE | TOO_LARGE_1000,
  4997. // ____1___ ________
  4998. CARRY | TOO_LARGE | TOO_LARGE_1000,
  4999. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5000. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5001. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5002. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5003. // ____1101 ________
  5004. CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
  5005. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5006. CARRY | TOO_LARGE | TOO_LARGE_1000);
  5007. const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
  5008. // ________ 0_______ <ASCII in byte 2>
  5009. TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
  5010. TOO_SHORT, TOO_SHORT,
  5011. // ________ 1000____
  5012. TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 |
  5013. OVERLONG_4,
  5014. // ________ 1001____
  5015. TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
  5016. // ________ 101_____
  5017. TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
  5018. TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
  5019. // ________ 11______
  5020. TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT);
  5021. return (byte_1_high & byte_1_low & byte_2_high);
  5022. }
  5023. is_utf8_really_inline simd8<uint8_t>
  5024. check_multibyte_lengths(const simd8<uint8_t> input,
  5025. const simd8<uint8_t> prev_input,
  5026. const simd8<uint8_t> sc) {
  5027. simd8<uint8_t> prev2 = input.prev<2>(prev_input);
  5028. simd8<uint8_t> prev3 = input.prev<3>(prev_input);
  5029. simd8<uint8_t> must23 =
  5030. simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
  5031. simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
  5032. return must23_80 ^ sc;
  5033. }
  5034. //
  5035. // Return nonzero if there are incomplete multibyte characters at the end of the
  5036. // block: e.g. if there is a 4-byte character, but it's 3 bytes from the end.
  5037. //
  5038. is_utf8_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
  5039. // If the previous input's last 3 bytes match this, they're too short (they
  5040. // ended at EOF):
  5041. // ... 1111____ 111_____ 11______
  5042. static const uint8_t max_array[32] = {255,
  5043. 255,
  5044. 255,
  5045. 255,
  5046. 255,
  5047. 255,
  5048. 255,
  5049. 255,
  5050. 255,
  5051. 255,
  5052. 255,
  5053. 255,
  5054. 255,
  5055. 255,
  5056. 255,
  5057. 255,
  5058. 255,
  5059. 255,
  5060. 255,
  5061. 255,
  5062. 255,
  5063. 255,
  5064. 255,
  5065. 255,
  5066. 255,
  5067. 255,
  5068. 255,
  5069. 255,
  5070. 255,
  5071. 0b11110000u - 1,
  5072. 0b11100000u - 1,
  5073. 0b11000000u - 1};
  5074. const simd8<uint8_t> max_value(
  5075. &max_array[sizeof(max_array) - sizeof(simd8<uint8_t>)]);
  5076. return input.gt_bits(max_value);
  5077. }
  5078. struct utf8_checker {
  5079. // If this is nonzero, there has been a UTF-8 error.
  5080. simd8<uint8_t> error;
  5081. // The last input we received
  5082. simd8<uint8_t> prev_input_block;
  5083. // Whether the last input we received was incomplete (used for ASCII fast
  5084. // path)
  5085. simd8<uint8_t> prev_incomplete;
  5086. //
  5087. // Check whether the current bytes are valid UTF-8.
  5088. //
  5089. is_utf8_really_inline void check_utf8_bytes(const simd8<uint8_t> input,
  5090. const simd8<uint8_t> prev_input) {
  5091. // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+
  5092. // lead bytes (2, 3, 4-byte leads become large positive numbers instead of
  5093. // small negative numbers)
  5094. simd8<uint8_t> prev1 = input.prev<1>(prev_input);
  5095. simd8<uint8_t> sc = check_special_cases(input, prev1);
  5096. this->error |= check_multibyte_lengths(input, prev_input, sc);
  5097. }
  5098. // The only problem that can happen at EOF is that a multibyte character is
  5099. // too short or a byte value too large in the last bytes: check_special_cases
  5100. // only checks for bytes too large in the first of two bytes.
  5101. is_utf8_really_inline void check_eof() {
  5102. // If the previous block had incomplete UTF-8 characters at the end, an
  5103. // ASCII block can't possibly finish them.
  5104. this->error |= this->prev_incomplete;
  5105. }
  5106. is_utf8_really_inline void check_next_input(const simd8x64<uint8_t> &input) {
  5107. if (is_utf8_likely(is_ascii(input))) {
  5108. this->error |= this->prev_incomplete;
  5109. } else {
  5110. // you might think that a for-loop would work, but under Visual Studio, it
  5111. // is not good enough.
  5112. static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) ||
  5113. (simd8x64<uint8_t>::NUM_CHUNKS == 4),
  5114. "We support either two or four chunks per 64-byte block.");
  5115. if (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
  5116. this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
  5117. this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
  5118. } else if (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
  5119. this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
  5120. this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
  5121. this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
  5122. this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
  5123. }
  5124. this->prev_incomplete =
  5125. is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS - 1]);
  5126. this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS - 1];
  5127. }
  5128. }
  5129. // do not forget to call check_eof!
  5130. is_utf8_really_inline bool errors() const {
  5131. return this->error.any_bits_set_anywhere();
  5132. }
  5133. }; // struct utf8_checker
  5134. } // namespace utf8_validation
  5135. using utf8_validation::utf8_checker;
  5136. } // unnamed namespace
  5137. } // namespace haswell
  5138. } // namespace is_utf8_internals
  5139. namespace is_utf8_internals {
  5140. namespace haswell {
  5141. namespace {
  5142. namespace utf8_validation {
  5143. /**
  5144. * Validates that the string is actual UTF-8.
  5145. */
  5146. template <class checker>
  5147. bool generic_validate_utf8(const uint8_t *input, size_t length) {
  5148. checker c{};
  5149. buf_block_reader<64> reader(input, length);
  5150. while (reader.has_full_block()) {
  5151. simd::simd8x64<uint8_t> in(reader.full_block());
  5152. c.check_next_input(in);
  5153. reader.advance();
  5154. }
  5155. uint8_t block[64]{};
  5156. reader.get_remainder(block);
  5157. simd::simd8x64<uint8_t> in(block);
  5158. c.check_next_input(in);
  5159. reader.advance();
  5160. c.check_eof();
  5161. return !c.errors();
  5162. }
  5163. bool generic_validate_utf8(const char *input, size_t length) {
  5164. return generic_validate_utf8<utf8_checker>(
  5165. reinterpret_cast<const uint8_t *>(input), length);
  5166. }
  5167. } // namespace utf8_validation
  5168. } // unnamed namespace
  5169. } // namespace haswell
  5170. } // namespace is_utf8_internals
  5171. namespace is_utf8_internals {
  5172. namespace haswell {
  5173. is_utf8_warn_unused bool
  5174. implementation::validate_utf8(const char *buf, size_t len) const noexcept {
  5175. return haswell::utf8_validation::generic_validate_utf8(buf, len);
  5176. }
  5177. } // namespace haswell
  5178. } // namespace is_utf8_internals
  5179. IS_UTF8_UNTARGET_REGION
  5180. #if IS_UTF8_GCC11ORMORE // workaround for
  5181. // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
  5182. #pragma GCC diagnostic pop
  5183. #endif // end of workaround
  5184. #endif
  5185. #if IS_UTF8_IMPLEMENTATION_PPC64
  5186. // redefining IS_UTF8_IMPLEMENTATION to "ppc64"
  5187. // #define IS_UTF8_IMPLEMENTATION ppc64
  5188. namespace is_utf8_internals {
  5189. namespace ppc64 {
  5190. namespace {
  5191. #ifndef IS_UTF8_PPC64_H
  5192. #error "ppc64.h must be included"
  5193. #endif
  5194. using namespace simd;
  5195. is_utf8_really_inline bool is_ascii(const simd8x64<uint8_t> &input) {
  5196. // careful: 0x80 is not ascii.
  5197. return input.reduce_or().saturating_sub(0b01111111u).bits_not_set_anywhere();
  5198. }
  5199. is_utf8_unused is_utf8_really_inline simd8<bool>
  5200. must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2,
  5201. const simd8<uint8_t> prev3) {
  5202. simd8<uint8_t> is_second_byte =
  5203. prev1.saturating_sub(0b11000000u - 1); // Only 11______ will be > 0
  5204. simd8<uint8_t> is_third_byte =
  5205. prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0
  5206. simd8<uint8_t> is_fourth_byte =
  5207. prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0
  5208. // Caller requires a bool (all 1's). All values resulting from the subtraction
  5209. // will be <= 64, so signed comparison is fine.
  5210. return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) >
  5211. int8_t(0);
  5212. }
  5213. is_utf8_really_inline simd8<bool>
  5214. must_be_2_3_continuation(const simd8<uint8_t> prev2,
  5215. const simd8<uint8_t> prev3) {
  5216. simd8<uint8_t> is_third_byte =
  5217. prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0
  5218. simd8<uint8_t> is_fourth_byte =
  5219. prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0
  5220. // Caller requires a bool (all 1's). All values resulting from the subtraction
  5221. // will be <= 64, so signed comparison is fine.
  5222. return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
  5223. }
  5224. } // unnamed namespace
  5225. } // namespace ppc64
  5226. } // namespace is_utf8_internals
  5227. namespace is_utf8_internals {
  5228. namespace ppc64 {
  5229. namespace {
  5230. // Walks through a buffer in block-sized increments, loading the last part with
  5231. // spaces
  5232. template <size_t STEP_SIZE> struct buf_block_reader {
  5233. public:
  5234. is_utf8_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
  5235. is_utf8_really_inline size_t block_index();
  5236. is_utf8_really_inline bool has_full_block() const;
  5237. is_utf8_really_inline const uint8_t *full_block() const;
  5238. /**
  5239. * Get the last block, padded with spaces.
  5240. *
  5241. * There will always be a last block, with at least 1 byte, unless len == 0
  5242. * (in which case this function fills the buffer with spaces and returns 0. In
  5243. * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder
  5244. * block with STEP_SIZE bytes and no spaces for padding.
  5245. *
  5246. * @return the number of effective characters in the last block.
  5247. */
  5248. is_utf8_really_inline size_t get_remainder(uint8_t *dst) const;
  5249. is_utf8_really_inline void advance();
  5250. private:
  5251. const uint8_t *buf;
  5252. const size_t len;
  5253. const size_t lenminusstep;
  5254. size_t idx;
  5255. };
  5256. // Routines to print masks and text for debugging bitmask operations
  5257. is_utf8_unused static char *format_input_text_64(const uint8_t *text) {
  5258. static char *buf =
  5259. reinterpret_cast<char *>(malloc(sizeof(simd8x64<uint8_t>) + 1));
  5260. for (size_t i = 0; i < sizeof(simd8x64<uint8_t>); i++) {
  5261. buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
  5262. }
  5263. buf[sizeof(simd8x64<uint8_t>)] = '\0';
  5264. return buf;
  5265. }
  5266. // Routines to print masks and text for debugging bitmask operations
  5267. is_utf8_unused static char *format_input_text(const simd8x64<uint8_t> &in) {
  5268. static char *buf =
  5269. reinterpret_cast<char *>(malloc(sizeof(simd8x64<uint8_t>) + 1));
  5270. in.store(reinterpret_cast<uint8_t *>(buf));
  5271. for (size_t i = 0; i < sizeof(simd8x64<uint8_t>); i++) {
  5272. if (buf[i] < ' ') {
  5273. buf[i] = '_';
  5274. }
  5275. }
  5276. buf[sizeof(simd8x64<uint8_t>)] = '\0';
  5277. return buf;
  5278. }
  5279. is_utf8_unused static char *format_mask(uint64_t mask) {
  5280. static char *buf = reinterpret_cast<char *>(malloc(64 + 1));
  5281. for (size_t i = 0; i < 64; i++) {
  5282. buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
  5283. }
  5284. buf[64] = '\0';
  5285. return buf;
  5286. }
  5287. template <size_t STEP_SIZE>
  5288. is_utf8_really_inline
  5289. buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len)
  5290. : buf{_buf}, len{_len},
  5291. lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
  5292. template <size_t STEP_SIZE>
  5293. is_utf8_really_inline size_t buf_block_reader<STEP_SIZE>::block_index() {
  5294. return idx;
  5295. }
  5296. template <size_t STEP_SIZE>
  5297. is_utf8_really_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
  5298. return idx < lenminusstep;
  5299. }
  5300. template <size_t STEP_SIZE>
  5301. is_utf8_really_inline const uint8_t *
  5302. buf_block_reader<STEP_SIZE>::full_block() const {
  5303. return &buf[idx];
  5304. }
  5305. template <size_t STEP_SIZE>
  5306. is_utf8_really_inline size_t
  5307. buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
  5308. if (len == idx) {
  5309. return 0;
  5310. } // memcpy(dst, null, 0) will trigger an error with some sanitizers
  5311. std::memset(dst, 0x20,
  5312. STEP_SIZE); // std::memset STEP_SIZE because it's more efficient
  5313. // to write out 8 or 16 bytes at once.
  5314. std::memcpy(dst, buf + idx, len - idx);
  5315. return len - idx;
  5316. }
  5317. template <size_t STEP_SIZE>
  5318. is_utf8_really_inline void buf_block_reader<STEP_SIZE>::advance() {
  5319. idx += STEP_SIZE;
  5320. }
  5321. } // unnamed namespace
  5322. } // namespace ppc64
  5323. } // namespace is_utf8_internals
  5324. namespace is_utf8_internals {
  5325. namespace ppc64 {
  5326. namespace {
  5327. namespace utf8_validation {
  5328. using namespace simd;
  5329. is_utf8_really_inline simd8<uint8_t>
  5330. check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
  5331. // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
  5332. // Bit 1 = Too Long (ASCII followed by continuation)
  5333. // Bit 2 = Overlong 3-byte
  5334. // Bit 4 = Surrogate
  5335. // Bit 5 = Overlong 2-byte
  5336. // Bit 7 = Two Continuations
  5337. constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______
  5338. // 11______ 11______
  5339. constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______
  5340. constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____
  5341. constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____
  5342. constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______
  5343. constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______
  5344. constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____
  5345. // 11110100 101_____
  5346. // 11110101 1001____
  5347. // 11110101 101_____
  5348. // 1111011_ 1001____
  5349. // 1111011_ 101_____
  5350. // 11111___ 1001____
  5351. // 11111___ 101_____
  5352. constexpr const uint8_t TOO_LARGE_1000 = 1 << 6;
  5353. // 11110101 1000____
  5354. // 1111011_ 1000____
  5355. // 11111___ 1000____
  5356. constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____
  5357. const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
  5358. // 0_______ ________ <ASCII in byte 1>
  5359. TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
  5360. TOO_LONG,
  5361. // 10______ ________ <continuation in byte 1>
  5362. TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
  5363. // 1100____ ________ <two byte lead in byte 1>
  5364. TOO_SHORT | OVERLONG_2,
  5365. // 1101____ ________ <two byte lead in byte 1>
  5366. TOO_SHORT,
  5367. // 1110____ ________ <three byte lead in byte 1>
  5368. TOO_SHORT | OVERLONG_3 | SURROGATE,
  5369. // 1111____ ________ <four+ byte lead in byte 1>
  5370. TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4);
  5371. constexpr const uint8_t CARRY =
  5372. TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
  5373. const simd8<uint8_t> byte_1_low =
  5374. (prev1 & 0x0F)
  5375. .lookup_16<uint8_t>(
  5376. // ____0000 ________
  5377. CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
  5378. // ____0001 ________
  5379. CARRY | OVERLONG_2,
  5380. // ____001_ ________
  5381. CARRY, CARRY,
  5382. // ____0100 ________
  5383. CARRY | TOO_LARGE,
  5384. // ____0101 ________
  5385. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5386. // ____011_ ________
  5387. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5388. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5389. // ____1___ ________
  5390. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5391. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5392. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5393. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5394. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5395. // ____1101 ________
  5396. CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
  5397. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5398. CARRY | TOO_LARGE | TOO_LARGE_1000);
  5399. const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
  5400. // ________ 0_______ <ASCII in byte 2>
  5401. TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
  5402. TOO_SHORT, TOO_SHORT,
  5403. // ________ 1000____
  5404. TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 |
  5405. OVERLONG_4,
  5406. // ________ 1001____
  5407. TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
  5408. // ________ 101_____
  5409. TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
  5410. TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
  5411. // ________ 11______
  5412. TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT);
  5413. return (byte_1_high & byte_1_low & byte_2_high);
  5414. }
  5415. is_utf8_really_inline simd8<uint8_t>
  5416. check_multibyte_lengths(const simd8<uint8_t> input,
  5417. const simd8<uint8_t> prev_input,
  5418. const simd8<uint8_t> sc) {
  5419. simd8<uint8_t> prev2 = input.prev<2>(prev_input);
  5420. simd8<uint8_t> prev3 = input.prev<3>(prev_input);
  5421. simd8<uint8_t> must23 =
  5422. simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
  5423. simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
  5424. return must23_80 ^ sc;
  5425. }
  5426. //
  5427. // Return nonzero if there are incomplete multibyte characters at the end of the
  5428. // block: e.g. if there is a 4-byte character, but it's 3 bytes from the end.
  5429. //
  5430. is_utf8_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
  5431. // If the previous input's last 3 bytes match this, they're too short (they
  5432. // ended at EOF):
  5433. // ... 1111____ 111_____ 11______
  5434. static const uint8_t max_array[32] = {255,
  5435. 255,
  5436. 255,
  5437. 255,
  5438. 255,
  5439. 255,
  5440. 255,
  5441. 255,
  5442. 255,
  5443. 255,
  5444. 255,
  5445. 255,
  5446. 255,
  5447. 255,
  5448. 255,
  5449. 255,
  5450. 255,
  5451. 255,
  5452. 255,
  5453. 255,
  5454. 255,
  5455. 255,
  5456. 255,
  5457. 255,
  5458. 255,
  5459. 255,
  5460. 255,
  5461. 255,
  5462. 255,
  5463. 0b11110000u - 1,
  5464. 0b11100000u - 1,
  5465. 0b11000000u - 1};
  5466. const simd8<uint8_t> max_value(
  5467. &max_array[sizeof(max_array) - sizeof(simd8<uint8_t>)]);
  5468. return input.gt_bits(max_value);
  5469. }
  5470. struct utf8_checker {
  5471. // If this is nonzero, there has been a UTF-8 error.
  5472. simd8<uint8_t> error;
  5473. // The last input we received
  5474. simd8<uint8_t> prev_input_block;
  5475. // Whether the last input we received was incomplete (used for ASCII fast
  5476. // path)
  5477. simd8<uint8_t> prev_incomplete;
  5478. //
  5479. // Check whether the current bytes are valid UTF-8.
  5480. //
  5481. is_utf8_really_inline void check_utf8_bytes(const simd8<uint8_t> input,
  5482. const simd8<uint8_t> prev_input) {
  5483. // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+
  5484. // lead bytes (2, 3, 4-byte leads become large positive numbers instead of
  5485. // small negative numbers)
  5486. simd8<uint8_t> prev1 = input.prev<1>(prev_input);
  5487. simd8<uint8_t> sc = check_special_cases(input, prev1);
  5488. this->error |= check_multibyte_lengths(input, prev_input, sc);
  5489. }
  5490. // The only problem that can happen at EOF is that a multibyte character is
  5491. // too short or a byte value too large in the last bytes: check_special_cases
  5492. // only checks for bytes too large in the first of two bytes.
  5493. is_utf8_really_inline void check_eof() {
  5494. // If the previous block had incomplete UTF-8 characters at the end, an
  5495. // ASCII block can't possibly finish them.
  5496. this->error |= this->prev_incomplete;
  5497. }
  5498. is_utf8_really_inline void check_next_input(const simd8x64<uint8_t> &input) {
  5499. if (is_utf8_likely(is_ascii(input))) {
  5500. this->error |= this->prev_incomplete;
  5501. } else {
  5502. // you might think that a for-loop would work, but under Visual Studio, it
  5503. // is not good enough.
  5504. static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) ||
  5505. (simd8x64<uint8_t>::NUM_CHUNKS == 4),
  5506. "We support either two or four chunks per 64-byte block.");
  5507. if (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
  5508. this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
  5509. this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
  5510. } else if (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
  5511. this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
  5512. this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
  5513. this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
  5514. this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
  5515. }
  5516. this->prev_incomplete =
  5517. is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS - 1]);
  5518. this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS - 1];
  5519. }
  5520. }
  5521. // do not forget to call check_eof!
  5522. is_utf8_really_inline bool errors() const {
  5523. return this->error.any_bits_set_anywhere();
  5524. }
  5525. }; // struct utf8_checker
  5526. } // namespace utf8_validation
  5527. using utf8_validation::utf8_checker;
  5528. } // unnamed namespace
  5529. } // namespace ppc64
  5530. } // namespace is_utf8_internals
  5531. namespace is_utf8_internals {
  5532. namespace ppc64 {
  5533. namespace {
  5534. namespace utf8_validation {
  5535. /**
  5536. * Validates that the string is actual UTF-8.
  5537. */
  5538. template <class checker>
  5539. bool generic_validate_utf8(const uint8_t *input, size_t length) {
  5540. checker c{};
  5541. buf_block_reader<64> reader(input, length);
  5542. while (reader.has_full_block()) {
  5543. simd::simd8x64<uint8_t> in(reader.full_block());
  5544. c.check_next_input(in);
  5545. reader.advance();
  5546. }
  5547. uint8_t block[64]{};
  5548. reader.get_remainder(block);
  5549. simd::simd8x64<uint8_t> in(block);
  5550. c.check_next_input(in);
  5551. reader.advance();
  5552. c.check_eof();
  5553. return !c.errors();
  5554. }
  5555. bool generic_validate_utf8(const char *input, size_t length) {
  5556. return generic_validate_utf8<utf8_checker>(
  5557. reinterpret_cast<const uint8_t *>(input), length);
  5558. }
  5559. } // namespace utf8_validation
  5560. } // unnamed namespace
  5561. } // namespace ppc64
  5562. } // namespace is_utf8_internals
  5563. //
  5564. // Implementation-specific overrides
  5565. //
  5566. namespace is_utf8_internals {
  5567. namespace ppc64 {
  5568. is_utf8_warn_unused bool
  5569. implementation::validate_utf8(const char *buf, size_t len) const noexcept {
  5570. return ppc64::utf8_validation::generic_validate_utf8(buf, len);
  5571. }
  5572. } // namespace ppc64
  5573. } // namespace is_utf8_internals
  5574. #endif
  5575. #if IS_UTF8_IMPLEMENTATION_WESTMERE
  5576. // redefining IS_UTF8_IMPLEMENTATION to "westmere"
  5577. // #define IS_UTF8_IMPLEMENTATION westmere
  5578. IS_UTF8_TARGET_WESTMERE
  5579. namespace is_utf8_internals {
  5580. namespace westmere {
  5581. namespace {
  5582. #ifndef IS_UTF8_WESTMERE_H
  5583. #error "westmere.h must be included"
  5584. #endif
  5585. using namespace simd;
  5586. is_utf8_really_inline bool is_ascii(const simd8x64<uint8_t> &input) {
  5587. return input.reduce_or().is_ascii();
  5588. }
  5589. is_utf8_unused is_utf8_really_inline simd8<bool>
  5590. must_be_continuation(const simd8<uint8_t> prev1, const simd8<uint8_t> prev2,
  5591. const simd8<uint8_t> prev3) {
  5592. simd8<uint8_t> is_second_byte =
  5593. prev1.saturating_sub(0b11000000u - 1); // Only 11______ will be > 0
  5594. simd8<uint8_t> is_third_byte =
  5595. prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0
  5596. simd8<uint8_t> is_fourth_byte =
  5597. prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0
  5598. // Caller requires a bool (all 1's). All values resulting from the subtraction
  5599. // will be <= 64, so signed comparison is fine.
  5600. return simd8<int8_t>(is_second_byte | is_third_byte | is_fourth_byte) >
  5601. int8_t(0);
  5602. }
  5603. is_utf8_really_inline simd8<bool>
  5604. must_be_2_3_continuation(const simd8<uint8_t> prev2,
  5605. const simd8<uint8_t> prev3) {
  5606. simd8<uint8_t> is_third_byte =
  5607. prev2.saturating_sub(0b11100000u - 1); // Only 111_____ will be > 0
  5608. simd8<uint8_t> is_fourth_byte =
  5609. prev3.saturating_sub(0b11110000u - 1); // Only 1111____ will be > 0
  5610. // Caller requires a bool (all 1's). All values resulting from the subtraction
  5611. // will be <= 64, so signed comparison is fine.
  5612. return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
  5613. }
  5614. } // unnamed namespace
  5615. } // namespace westmere
  5616. } // namespace is_utf8_internals
  5617. namespace is_utf8_internals {
  5618. namespace westmere {
  5619. namespace {
  5620. // Walks through a buffer in block-sized increments, loading the last part with
  5621. // spaces
  5622. template <size_t STEP_SIZE> struct buf_block_reader {
  5623. public:
  5624. is_utf8_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
  5625. is_utf8_really_inline size_t block_index();
  5626. is_utf8_really_inline bool has_full_block() const;
  5627. is_utf8_really_inline const uint8_t *full_block() const;
  5628. /**
  5629. * Get the last block, padded with spaces.
  5630. *
  5631. * There will always be a last block, with at least 1 byte, unless len == 0
  5632. * (in which case this function fills the buffer with spaces and returns 0. In
  5633. * particular, if len == STEP_SIZE there will be 0 full_blocks and 1 remainder
  5634. * block with STEP_SIZE bytes and no spaces for padding.
  5635. *
  5636. * @return the number of effective characters in the last block.
  5637. */
  5638. is_utf8_really_inline size_t get_remainder(uint8_t *dst) const;
  5639. is_utf8_really_inline void advance();
  5640. private:
  5641. const uint8_t *buf;
  5642. const size_t len;
  5643. const size_t lenminusstep;
  5644. size_t idx;
  5645. };
  5646. // Routines to print masks and text for debugging bitmask operations
  5647. is_utf8_unused static char *format_input_text_64(const uint8_t *text) {
  5648. static char *buf =
  5649. reinterpret_cast<char *>(malloc(sizeof(simd8x64<uint8_t>) + 1));
  5650. for (size_t i = 0; i < sizeof(simd8x64<uint8_t>); i++) {
  5651. buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
  5652. }
  5653. buf[sizeof(simd8x64<uint8_t>)] = '\0';
  5654. return buf;
  5655. }
  5656. // Routines to print masks and text for debugging bitmask operations
  5657. is_utf8_unused static char *format_input_text(const simd8x64<uint8_t> &in) {
  5658. static char *buf =
  5659. reinterpret_cast<char *>(malloc(sizeof(simd8x64<uint8_t>) + 1));
  5660. in.store(reinterpret_cast<uint8_t *>(buf));
  5661. for (size_t i = 0; i < sizeof(simd8x64<uint8_t>); i++) {
  5662. if (buf[i] < ' ') {
  5663. buf[i] = '_';
  5664. }
  5665. }
  5666. buf[sizeof(simd8x64<uint8_t>)] = '\0';
  5667. return buf;
  5668. }
  5669. is_utf8_unused static char *format_mask(uint64_t mask) {
  5670. static char *buf = reinterpret_cast<char *>(malloc(64 + 1));
  5671. for (size_t i = 0; i < 64; i++) {
  5672. buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
  5673. }
  5674. buf[64] = '\0';
  5675. return buf;
  5676. }
  5677. template <size_t STEP_SIZE>
  5678. is_utf8_really_inline
  5679. buf_block_reader<STEP_SIZE>::buf_block_reader(const uint8_t *_buf, size_t _len)
  5680. : buf{_buf}, len{_len},
  5681. lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
  5682. template <size_t STEP_SIZE>
  5683. is_utf8_really_inline size_t buf_block_reader<STEP_SIZE>::block_index() {
  5684. return idx;
  5685. }
  5686. template <size_t STEP_SIZE>
  5687. is_utf8_really_inline bool buf_block_reader<STEP_SIZE>::has_full_block() const {
  5688. return idx < lenminusstep;
  5689. }
  5690. template <size_t STEP_SIZE>
  5691. is_utf8_really_inline const uint8_t *
  5692. buf_block_reader<STEP_SIZE>::full_block() const {
  5693. return &buf[idx];
  5694. }
  5695. template <size_t STEP_SIZE>
  5696. is_utf8_really_inline size_t
  5697. buf_block_reader<STEP_SIZE>::get_remainder(uint8_t *dst) const {
  5698. if (len == idx) {
  5699. return 0;
  5700. } // memcpy(dst, null, 0) will trigger an error with some sanitizers
  5701. std::memset(dst, 0x20,
  5702. STEP_SIZE); // std::memset STEP_SIZE because it's more efficient
  5703. // to write out 8 or 16 bytes at once.
  5704. std::memcpy(dst, buf + idx, len - idx);
  5705. return len - idx;
  5706. }
  5707. template <size_t STEP_SIZE>
  5708. is_utf8_really_inline void buf_block_reader<STEP_SIZE>::advance() {
  5709. idx += STEP_SIZE;
  5710. }
  5711. } // unnamed namespace
  5712. } // namespace westmere
  5713. } // namespace is_utf8_internals
  5714. namespace is_utf8_internals {
  5715. namespace westmere {
  5716. namespace {
  5717. namespace utf8_validation {
  5718. using namespace simd;
  5719. is_utf8_really_inline simd8<uint8_t>
  5720. check_special_cases(const simd8<uint8_t> input, const simd8<uint8_t> prev1) {
  5721. // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
  5722. // Bit 1 = Too Long (ASCII followed by continuation)
  5723. // Bit 2 = Overlong 3-byte
  5724. // Bit 4 = Surrogate
  5725. // Bit 5 = Overlong 2-byte
  5726. // Bit 7 = Two Continuations
  5727. constexpr const uint8_t TOO_SHORT = 1 << 0; // 11______ 0_______
  5728. // 11______ 11______
  5729. constexpr const uint8_t TOO_LONG = 1 << 1; // 0_______ 10______
  5730. constexpr const uint8_t OVERLONG_3 = 1 << 2; // 11100000 100_____
  5731. constexpr const uint8_t SURROGATE = 1 << 4; // 11101101 101_____
  5732. constexpr const uint8_t OVERLONG_2 = 1 << 5; // 1100000_ 10______
  5733. constexpr const uint8_t TWO_CONTS = 1 << 7; // 10______ 10______
  5734. constexpr const uint8_t TOO_LARGE = 1 << 3; // 11110100 1001____
  5735. // 11110100 101_____
  5736. // 11110101 1001____
  5737. // 11110101 101_____
  5738. // 1111011_ 1001____
  5739. // 1111011_ 101_____
  5740. // 11111___ 1001____
  5741. // 11111___ 101_____
  5742. constexpr const uint8_t TOO_LARGE_1000 = 1 << 6;
  5743. // 11110101 1000____
  5744. // 1111011_ 1000____
  5745. // 11111___ 1000____
  5746. constexpr const uint8_t OVERLONG_4 = 1 << 6; // 11110000 1000____
  5747. const simd8<uint8_t> byte_1_high = prev1.shr<4>().lookup_16<uint8_t>(
  5748. // 0_______ ________ <ASCII in byte 1>
  5749. TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
  5750. TOO_LONG,
  5751. // 10______ ________ <continuation in byte 1>
  5752. TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
  5753. // 1100____ ________ <two byte lead in byte 1>
  5754. TOO_SHORT | OVERLONG_2,
  5755. // 1101____ ________ <two byte lead in byte 1>
  5756. TOO_SHORT,
  5757. // 1110____ ________ <three byte lead in byte 1>
  5758. TOO_SHORT | OVERLONG_3 | SURROGATE,
  5759. // 1111____ ________ <four+ byte lead in byte 1>
  5760. TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4);
  5761. constexpr const uint8_t CARRY =
  5762. TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
  5763. const simd8<uint8_t> byte_1_low =
  5764. (prev1 & 0x0F)
  5765. .lookup_16<uint8_t>(
  5766. // ____0000 ________
  5767. CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
  5768. // ____0001 ________
  5769. CARRY | OVERLONG_2,
  5770. // ____001_ ________
  5771. CARRY, CARRY,
  5772. // ____0100 ________
  5773. CARRY | TOO_LARGE,
  5774. // ____0101 ________
  5775. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5776. // ____011_ ________
  5777. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5778. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5779. // ____1___ ________
  5780. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5781. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5782. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5783. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5784. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5785. // ____1101 ________
  5786. CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
  5787. CARRY | TOO_LARGE | TOO_LARGE_1000,
  5788. CARRY | TOO_LARGE | TOO_LARGE_1000);
  5789. const simd8<uint8_t> byte_2_high = input.shr<4>().lookup_16<uint8_t>(
  5790. // ________ 0_______ <ASCII in byte 2>
  5791. TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
  5792. TOO_SHORT, TOO_SHORT,
  5793. // ________ 1000____
  5794. TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 |
  5795. OVERLONG_4,
  5796. // ________ 1001____
  5797. TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
  5798. // ________ 101_____
  5799. TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
  5800. TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE,
  5801. // ________ 11______
  5802. TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT);
  5803. return (byte_1_high & byte_1_low & byte_2_high);
  5804. }
  5805. is_utf8_really_inline simd8<uint8_t>
  5806. check_multibyte_lengths(const simd8<uint8_t> input,
  5807. const simd8<uint8_t> prev_input,
  5808. const simd8<uint8_t> sc) {
  5809. simd8<uint8_t> prev2 = input.prev<2>(prev_input);
  5810. simd8<uint8_t> prev3 = input.prev<3>(prev_input);
  5811. simd8<uint8_t> must23 =
  5812. simd8<uint8_t>(must_be_2_3_continuation(prev2, prev3));
  5813. simd8<uint8_t> must23_80 = must23 & uint8_t(0x80);
  5814. return must23_80 ^ sc;
  5815. }
  5816. //
  5817. // Return nonzero if there are incomplete multibyte characters at the end of the
  5818. // block: e.g. if there is a 4-byte character, but it's 3 bytes from the end.
  5819. //
  5820. is_utf8_really_inline simd8<uint8_t> is_incomplete(const simd8<uint8_t> input) {
  5821. // If the previous input's last 3 bytes match this, they're too short (they
  5822. // ended at EOF):
  5823. // ... 1111____ 111_____ 11______
  5824. static const uint8_t max_array[32] = {255,
  5825. 255,
  5826. 255,
  5827. 255,
  5828. 255,
  5829. 255,
  5830. 255,
  5831. 255,
  5832. 255,
  5833. 255,
  5834. 255,
  5835. 255,
  5836. 255,
  5837. 255,
  5838. 255,
  5839. 255,
  5840. 255,
  5841. 255,
  5842. 255,
  5843. 255,
  5844. 255,
  5845. 255,
  5846. 255,
  5847. 255,
  5848. 255,
  5849. 255,
  5850. 255,
  5851. 255,
  5852. 255,
  5853. 0b11110000u - 1,
  5854. 0b11100000u - 1,
  5855. 0b11000000u - 1};
  5856. const simd8<uint8_t> max_value(
  5857. &max_array[sizeof(max_array) - sizeof(simd8<uint8_t>)]);
  5858. return input.gt_bits(max_value);
  5859. }
  5860. struct utf8_checker {
  5861. // If this is nonzero, there has been a UTF-8 error.
  5862. simd8<uint8_t> error;
  5863. // The last input we received
  5864. simd8<uint8_t> prev_input_block;
  5865. // Whether the last input we received was incomplete (used for ASCII fast
  5866. // path)
  5867. simd8<uint8_t> prev_incomplete;
  5868. //
  5869. // Check whether the current bytes are valid UTF-8.
  5870. //
  5871. is_utf8_really_inline void check_utf8_bytes(const simd8<uint8_t> input,
  5872. const simd8<uint8_t> prev_input) {
  5873. // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+
  5874. // lead bytes (2, 3, 4-byte leads become large positive numbers instead of
  5875. // small negative numbers)
  5876. simd8<uint8_t> prev1 = input.prev<1>(prev_input);
  5877. simd8<uint8_t> sc = check_special_cases(input, prev1);
  5878. this->error |= check_multibyte_lengths(input, prev_input, sc);
  5879. }
  5880. // The only problem that can happen at EOF is that a multibyte character is
  5881. // too short or a byte value too large in the last bytes: check_special_cases
  5882. // only checks for bytes too large in the first of two bytes.
  5883. is_utf8_really_inline void check_eof() {
  5884. // If the previous block had incomplete UTF-8 characters at the end, an
  5885. // ASCII block can't possibly finish them.
  5886. this->error |= this->prev_incomplete;
  5887. }
  5888. is_utf8_really_inline void check_next_input(const simd8x64<uint8_t> &input) {
  5889. if (is_utf8_likely(is_ascii(input))) {
  5890. this->error |= this->prev_incomplete;
  5891. } else {
  5892. // you might think that a for-loop would work, but under Visual Studio, it
  5893. // is not good enough.
  5894. static_assert((simd8x64<uint8_t>::NUM_CHUNKS == 2) ||
  5895. (simd8x64<uint8_t>::NUM_CHUNKS == 4),
  5896. "We support either two or four chunks per 64-byte block.");
  5897. if (simd8x64<uint8_t>::NUM_CHUNKS == 2) {
  5898. this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
  5899. this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
  5900. } else if (simd8x64<uint8_t>::NUM_CHUNKS == 4) {
  5901. this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
  5902. this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
  5903. this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
  5904. this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
  5905. }
  5906. this->prev_incomplete =
  5907. is_incomplete(input.chunks[simd8x64<uint8_t>::NUM_CHUNKS - 1]);
  5908. this->prev_input_block = input.chunks[simd8x64<uint8_t>::NUM_CHUNKS - 1];
  5909. }
  5910. }
  5911. // do not forget to call check_eof!
  5912. is_utf8_really_inline bool errors() const {
  5913. return this->error.any_bits_set_anywhere();
  5914. }
  5915. }; // struct utf8_checker
  5916. } // namespace utf8_validation
  5917. using utf8_validation::utf8_checker;
  5918. } // unnamed namespace
  5919. } // namespace westmere
  5920. } // namespace is_utf8_internals
  5921. namespace is_utf8_internals {
  5922. namespace westmere {
  5923. namespace {
  5924. namespace utf8_validation {
  5925. /**
  5926. * Validates that the string is actual UTF-8.
  5927. */
  5928. template <class checker>
  5929. bool generic_validate_utf8(const uint8_t *input, size_t length) {
  5930. checker c{};
  5931. buf_block_reader<64> reader(input, length);
  5932. while (reader.has_full_block()) {
  5933. simd::simd8x64<uint8_t> in(reader.full_block());
  5934. c.check_next_input(in);
  5935. reader.advance();
  5936. }
  5937. uint8_t block[64]{};
  5938. reader.get_remainder(block);
  5939. simd::simd8x64<uint8_t> in(block);
  5940. c.check_next_input(in);
  5941. reader.advance();
  5942. c.check_eof();
  5943. return !c.errors();
  5944. }
  5945. bool generic_validate_utf8(const char *input, size_t length) {
  5946. return generic_validate_utf8<utf8_checker>(
  5947. reinterpret_cast<const uint8_t *>(input), length);
  5948. }
  5949. } // namespace utf8_validation
  5950. } // unnamed namespace
  5951. } // namespace westmere
  5952. } // namespace is_utf8_internals
  5953. //
  5954. // Implementation-specific overrides
  5955. //
  5956. namespace is_utf8_internals {
  5957. namespace westmere {
  5958. is_utf8_warn_unused bool
  5959. implementation::validate_utf8(const char *buf, size_t len) const noexcept {
  5960. return westmere::utf8_validation::generic_validate_utf8(buf, len);
  5961. }
  5962. } // namespace westmere
  5963. } // namespace is_utf8_internals
  5964. IS_UTF8_UNTARGET_REGION
  5965. #endif
  5966. IS_UTF8_POP_DISABLE_WARNINGS
  5967. bool is_utf8(const char *src, size_t len) {
  5968. return is_utf8_internals::validate_utf8(src, len);
  5969. }