- 论坛徽章:
- 7
|
本帖最后由 rubyish 于 2017-11-07 19:12 编辑
shishi hash ~~
- #!/usr/bin/perl
- # version 26, subversion 1 (v5.26.1)
- use 5.010;
- my @list = qw[
- Book Both Camel Cite Every Extraction Language Larry Originally Page Perl Permanent Practical Programming Related Repeat Reporting Since Special TIP Then This Though Unix Upload VALID WHAT'S Wall What Wikidata YOUR acronym again and another any are backronyms became because began book borrow bumped but can cannot change changes constructed continue cover decompose compose developed development different documentation documented easier eventually evolved facto file find first for from general has here ideas identify including independently information into item its known language languages length lengthy liberally link links list major make man many mark match matching maximum means mixture multiple not number officially one only originally page pages please process processing programmers published purpose redesign reference remaining report revisions robot same scripting search segments separate single still teams that the then there this time undergone until use various version was well which with word words you your sword
- ];
- my $lexicon = from( \@list );
- while (<DATA>) {
- say "OOV:\n$_";
- say 'decompose:';
- chomp;
- decompose($_);
- say '-' x 64;
- }
- # ____________________SUB____________________
- sub from {
- my $words = shift;
- my $lexicon = {};
- for my $word (@$words) {
- my @chara = split '', $word;
- my $dit = $lexicon;
- for my $k (@chara) {
- $dit = $dit->{$k} //= {};
- }
- $dit->{'@'} = 1;
- }
- return $lexicon;
- }
- sub decompose {
- my $unknown = shift;
- my @unknown = split '', $unknown;
- my $fate = $#unknown;
- my @sigil = (0) x @unknown;
- for my $i ( 0 .. $fate ) {
- my $dit = $lexicon;
- for my $j ( $i .. $fate ) {
- $dit = $dit->{ $unknown[$j] };
- last unless $dit;
- if ( $dit->{'@'} ) {
- $sigil[$i] = $j - $i + 1;
- }
- }
- }
- say join ' ', D_( $unknown, \@sigil, 0, [ 0, $#sigil ] );
- }
- sub D_ {
- my ( $unknown, $sigil, $need, $range ) = @_;
- my ( $r1, $r2 ) = @$range;
- my ( $fine, $indes ) = ( 0, 0 );
- modify( $unknown, $sigil, $r1, $r2 ) if $need;
- for my $i ( $r1 .. $r2 ) {
- next if $sigil->[$i] <= $fine;
- $fine = $sigil->[$i];
- $indes = $i;
- }
- if ( !$fine ) {
- return substr $unknown, $r1, $r2 - $r1 + 1;
- }
- my $find = substr $unknown, $indes, $fine;
-
- $r1 <= $indes - 1
- ? D_( $unknown, $sigil, 1, [ $r1, $indes - 1 ] )
- : (),
- $find,
- $indes + $fine <= $r2
- ? D_( $unknown, $sigil, 0, [ $indes + $fine, $r2 ] )
- : ();
- }
- sub modify {
- my ( $unknown, $sigil, $r1, $r2 ) = @_;
- for my $i ( $r1 .. $r2 ) {
- next unless $sigil->[$i];
- next if $i + $sigil->[$i] - 1 <= $r2;
- my $dit = $lexicon;
- $sigil->[$i] = 0;
- for my $j ( $i .. $r2 ) {
- $dit = $dit->{ substr( $unknown, $j, 1 ) };
- if ( $dit->{'@'} ) {
- $sigil->[$i] = $j - $i + 1;
- }
- }
- }
- }
- __DATA__
- WHAT'SATDSYOURATIP
- themixtureofandwhichmeansthiswordcan
- ThoughPerlisnotofficiallyanacronymtherearevariousbackronymsinuseincludingPracticalExtractionandReportingLanguagePerlwasoriginallydevelopedbyLarryWallin1987asageneralpurposeUnixscriptinglanguagetomakereportprocessingeasierSincethenithasundergonemanychangesandrevisionsPerl6whichbeganasaredesignofPerl5in2000eventuallyevolvedintoaseparatelanguageBothlanguagescontinuetobedevelopedindependentlybydifferentdevelopmentteamsandliberallyborrowideasfromoneanother
复制代码
|
|