Global Temperature Readings in MySQL form

I enjoy reading about data visualization and processing on blrpnt.com. Today there's a post about a big data release of global temperature readings, in response to the current (and totally absurd) hockey-stick debate. Anyway, I whipped up a perl script to turn the very weird text files into somewhat more useful SQL data. Here's the script, and the data is linked below. #! /usr/bin/perl -w eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' if 0; #$running_under_some_shell use strict; use File::Find (); use File::Basename; # Set the variable $File::Find::dont_use_nlink if you're using AFS, # since AFS cheats. # for the convenience of &wanted calls, including -eval statements: use vars qw/*name *dir *prune/; *name = *File::Find::name; *dir = *File::Find::dir; *prune = *File::Find::prune; my %columns = ( "Number" => "id", "Name" => "name", "Country" => "country", "Lat" => "lat", "Long" => "lon", "Height" => "height", "Start year" => "start_year", "End year" => "end_year", "First Good year" => "first_good_year", "Source ID" => "source_id", "Source file" => "source_file", "Jones data to" => "jones_data_to", "Normals source" => "normals_source", "Normals source start year" =>"normals_start", "Normals source end year" => "normals_end", "Normals source variable code" => "normals_src_variable", "Normals source percent availability" => "normals_src_pct_available", "Normals" => "normals", "Standard deviations source" => "std_dev_source", "Standard deviations source start year" => "std_dev_start", "Standard deviations source end year" => "std_dev_end", "Standard deviations" => "std_dev" ); sub wanted; # Traverse desired filesystems File::Find::find({wanted => \&wanted}, '/opt/home/colin/Downloads/yipes/'); exit; sub wanted { my ($dev,$ino,$mode,$nlink,$uid,$gid); (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) && -f _ && /\d\d\d\d\d\d/ && parse_file($name); } sub parse_file { my ( $name ) = @_; my $id = basename($name); my %attr; open(DATA, "< $name") || die("could not open $name -- $!"); while(<DATA>) { chomp; $attr{'id'} = $id; if (/=/) { my ($key, $value) = split("="); $key = trim($key); if ( exists $columns{$key} ) { $attr{$columns{$key}} = trim($value); #print $columns{$key} . " -> " . $attr{$columns{$key}} . "\n"; } else { #print "$key\n"; } } else { #1943 -2.9 -3.8 -0.1 1.8 3.7 8.4 10.5 6.7 6.9 2.7 0.8 1.6 my @temps = split; my $year = shift @temps; my $month = 1; foreach my $tmp (@temps) { #print "$year - $month - $tmp\n"; my $tmpdate = "$year-$month-01"; if ( $month < 10 ) { $tmpdate = "$year-0$month-01"; } print "INSERT INTO readings SET station_id = '$id', theyear = $year, themonth = $month, thedate = '$tmpdate', val = $tmp;\n"; $month++; } } } my @tmpsql; foreach my $key (keys %attr) { my $val = quotemeta($attr{$key}); push @tmpsql, "$key='$val'"; } print "INSERT INTO stations SET " . join(", ", @tmpsql) . ";\n"; close DATA; } sub trim($) { my $string = shift; $string =~ s/^\s+//; $string =~ s/\s+$//; return $string; }
AttachmentSize
temp-readings-120909.sql_.gz108.26 KB

Tags: 

Add new comment