- 论坛徽章:
- 6
|
回复 6# Windows19
egrep不行?试试perl的,不知道行不行
- #!perl
- use strict;
- use warnings;
- use IO::File;
- use Data::Dumper;
- sub main;
- sub get_lines_from_b;
- sub find_these_in_big_file;
- sub is_in;
- main();
- sub main {
- my $lines_aref = get_lines();
- find_these_in_big_file($lines_aref)
- }
- sub get_lines {
- my $fh = IO::File->new('b.txt', 'r');
- my @lines = $fh->getlines;
- $fh->close;
- chomp @lines;
- return \@lines;
- }
- sub find_these_in_big_file {
- my ($lines_aref) = @_;
- my $read_fh = IO::File->new('a.txt', 'r');
- my $out_file = 'out.txt';
- my $write_fh = IO::File->new($out_file, 'w');
- my $million = 1_000_000;
- my $line_num = 0;
- while (my $line = $read_fh->getline) {
- ++$line_num;
- print "Processing line $line_num$/" if not $line_num % $million;
- if (is_in($lines_aref, $line)) {
- $write_fh->print($line)
- }
- }
- $read_fh->close;
- $write_fh->close;
- my $duration = time - $^T;
- print "Done, cost $duration seconds$/";
- }
- sub is_in {
- my ($lines_aref, $target_line) = @_;
- my $is_in = 0;
- for my $line (@{$lines_aref}) {
- if (index($target_line, $line) != -1) {
- $is_in = 1;
- last;
- }
- }
- return $is_in;
- }
- __END__
复制代码
|
|