#!/usr2/local/bin/perl -w # gdbmbld.pl # this script is designed to work with a tab-delimited file on stdin # it will append to a key if the key appears a second time in the input use GDBM_File; $filename = "whatever.gdbm"; # This should undoubtedly be dynamic my(%database); # declaration: the database. my($dbref); # declaration: the database-descriptor # The database-descriptor is useful for manipulating the guts of the gdbm # engine as well as a few choice direct-approach routines that we could futz # with if we were so inclined. I save it in $dbref, but never use it in this # application. # The database by contrast, is in %database, and it behaves just like an # associative array. The only difference is that it's using the file # "$filename" for storage instead of relying on an in-memory representation. # I've added ()'s to the tie() call for clarity--Perl is comfortable either # with or without them. $dbref = tie(%database, GDBM_File, $filename, &GDBM_WRCREAT, 0640); # At this point, %database lives on disk as a GDBM file # # The main body of the routine reads from standard input until nothing's left # and runs over the principal loop. The expression while () is a # special case of while and the <> operator: it's actually reading a line from # standard input (that's what does) and placing the result into $_ so # it can test it. As a side-effect, we get the line in $_ without explicit # assignment. A fairly standard (if opaque) Perl idiom. while () { # let's get rid of those pesky newlines chomp(); # now inside main loop. First split our input line into discrete # tokens. Since Perl provides split(), it's easier to just tokenize # the entire line than it is to try plucking a single token off and # leaving the rest intact. @elements = split(/\t/,$_); # our key value -- the key into the hash, or our 'word' -- will be the # first element in our list. We shift it off the array and into the # variable $key $key = shift(@elements); # Check the DB for existing data. If it exists, we'll process the # contents of the if clause. If we don't find a value in # $database{$key}, we'll skip the clause. The clause will concatenate # new data onto old data. if ($database{$key}) { # We're only executing this if we have data. If we do have # data, we split it on the tab character and put that into our # @elembase array. @elembase = split(/\t/,$database{$key}); # Now a simple concatenation to replace @elements with all the # elements instead of just the new ones. If we didn't find old # data, @elements will still have the new data in it @elements = (@elembase, @elements); } # Replace the tabs so that everything's still tab-delimited. $value = join("\t",@elements); # Put the value back into the hash. $database{$key} = $value; # Now repeat loop for next line on STDIN } # at this point, we should have done all the lines present on STDIN, and we # can close everything down. The untie() is just good housekeeping and ensures # that we write back any in-memory cache the database may be using. untie(%database);