Difference between revisions of "Talk:Data Warehouse"
(→Start for summary tables) |
(→Updating the warehouse) |
||
(23 intermediate revisions by 3 users not shown) | |||
Line 50: | Line 50: | ||
== Start for summary tables == | == Start for summary tables == | ||
− | Copied HIVE_DATA to S_HIVE_DATA | + | Copied HIVE_DATA to S_HIVE_DATA ( S from Summary) |
+ | |||
+ | ---- | ||
'''Date fields''' | '''Date fields''' | ||
Line 60: | Line 62: | ||
Maybe season is a usefull one as well? | Maybe season is a usefull one as well? | ||
+ | hour_of_day int(2) | ||
+ | day_of_month int(2) | ||
day_of_year int(3) | day_of_year int(3) | ||
− | month | + | month int(2) |
month_name varchar(10) | month_name varchar(10) | ||
− | quarter | + | quarter int(1) |
− | year | + | year int(4) |
− | month_year | + | month_year varchar(15) |
to fill them: | to fill them: | ||
− | # mysql --verbose -p < | + | # mysql --verbose -p < FillDateFields_local.sql |
− | # cat | + | |
+ | # cat FillDateFields_local.sql | ||
use centesb7_hivetool_converted; | use centesb7_hivetool_converted; | ||
− | + | ||
update S_HIVE_DATA set | update S_HIVE_DATA set | ||
− | day_of_year=DAYOFYEAR( | + | hour_of_day=DATE_FORMAT(hive_observation_time_local,'%H'), |
− | month=MONTH( | + | day_of_month=DAYOFMONTH(hive_observation_time_local), |
− | month_name=lower(MONTHNAME( | + | day_of_year=DAYOFYEAR(hive_observation_time_local), |
− | quarter=QUARTER( | + | month=MONTH(hive_observation_time_local), |
− | year=YEAR( | + | month_name=lower(MONTHNAME(hive_observation_time_local)), |
− | month_year=lower(DATE_FORMAT( | + | quarter=QUARTER(hive_observation_time_local), |
− | where | + | year=YEAR(hive_observation_time_local), |
+ | month_year=lower(DATE_FORMAT(hive_observation_time_local,'%M-%Y')) | ||
+ | where hive_observation_time_local != 0; | ||
+ | If you want to base the "timestamp" instead of "hive_observation_time_local" | ||
+ | ''# cat FillDateFields.sql'' | ||
+ | ''use centesb7_hivetool_converted;'' | ||
+ | |||
+ | ''update S_HIVE_DATA set'' | ||
+ | ''hour_of_day=DATE_FORMAT(timestamp,'%H'),'' | ||
+ | ''day_of_month=DAYOFMONTH(timestamp),'' | ||
+ | ''day_of_year=DAYOFYEAR(timestamp),'' | ||
+ | ''month=MONTH(timestamp),'' | ||
+ | ''month_name=lower(MONTHNAME(timestamp)),'' | ||
+ | ''quarter=QUARTER(timestamp),'' | ||
+ | ''year=YEAR(timestamp),'' | ||
+ | ''month_year=lower(DATE_FORMAT(timestamp,'%M-%Y'))'' | ||
+ | ''where timestamp != 0;'' | ||
Some easy example queries: | Some easy example queries: | ||
Line 86: | Line 107: | ||
- select hive_id,year,month_name,avg(hive_temp_c),avg(ambient_temp_c) from S_HIVE_DATA where timestamp != 0 and hive_id=10 group by hive_id,year,month_name order by year,month; | - select hive_id,year,month_name,avg(hive_temp_c),avg(ambient_temp_c) from S_HIVE_DATA where timestamp != 0 and hive_id=10 group by hive_id,year,month_name order by year,month; | ||
- select hive_id,year,month_name,avg(hive_temp_c),avg(ambient_temp_c),max(hive_weight_kgs) from S_HIVE_DATA where timestamp != 0 and hive_id=10 group by hive_id,year,month_name order by year,month; | - select hive_id,year,month_name,avg(hive_temp_c),avg(ambient_temp_c),max(hive_weight_kgs) from S_HIVE_DATA where timestamp != 0 and hive_id=10 group by hive_id,year,month_name order by year,month; | ||
+ | |||
+ | ---- | ||
+ | |||
+ | '''UTC Observation time''' | ||
+ | For this to work I've copied the HIVE_PARAMETERS table to E_HIVE_PARAMETERS ( E from Enriched). Added a 'timezone' field which was filled in manually based on the location of the hive. Eventually 'timezone' will be present by default. | ||
+ | |||
+ | The Query below will fill the "hive_observation_time_utc" based on "hive_observation_time_local": | ||
+ | |||
+ | update S_HIVE_DATA a set hive_observation_time_utc=(select CONVERT_TZ(hive_observation_time_local,timezone,'UTC') from E_HIVE_PARAMETERS b where a.hive_id = b.hive_id); | ||
+ | |||
+ | ---- | ||
'''rownums per hive''' | '''rownums per hive''' | ||
''We will need those to easily calculate delta's'' | ''We will need those to easily calculate delta's'' | ||
− | Added field "hive_row_id" bigint(20) | + | Added field "hive_row_id" bigint(20) |
+ | Put an index on it! | ||
The query below for fills in the rowid for hive_id 10 | The query below for fills in the rowid for hive_id 10 | ||
Line 105: | Line 138: | ||
# mysql -p < FillHiveRownums.sql | # mysql -p < FillHiveRownums.sql | ||
+ | query to fill in delta's for kgs | ||
+ | update S_HIVE_DATA a inner join S_HIVE_DATA b ON a.hive_id = b.hive_id and b.hive_row_id = a.hive_row_id -1 | ||
+ | set a.hive_weight_kgs_delta = a.hive_weight_kgs - b.hive_weight_kgs; | ||
+ | |||
+ | query to fill in delta's for lbs | ||
+ | update S_HIVE_DATA a inner join S_HIVE_DATA b ON a.hive_id = b.hive_id and b.hive_row_id = a.hive_row_id -1 | ||
+ | set a.hive_weight_lbs_delta = a.hive_weight_lbs - b.hive_weight_lbs; | ||
+ | |||
+ | query to fill in the manipulation fields and set the delta to 0 for that record in kgs. | ||
+ | update S_HIVE_DATA a inner join S_HIVE_DATA b on a.hive_id = b.hive_id and b.hive_row_id = a.hive_row_id -1 and a.hive_weight_kgs_delta != 0 and | ||
+ | abs((a.hive_weight_kgs - b.hive_weight_kgs)/(timestampdiff(SECOND,b.hive_observation_time_utc,a.hive_observation_time_utc)/3600)) > (60 * 0.45359237) | ||
+ | set a.hive_manipulation_change_kgs = a.hive_weight_kgs_delta, a.hive_weight_kgs_delta=0; | ||
+ | |||
+ | query to fill in the manipulation fields and set the delta to 0 for that record in lbs. | ||
+ | update S_HIVE_DATA a inner join S_HIVE_DATA b on a.hive_id = b.hive_id and b.hive_row_id = a.hive_row_id -1 and a.hive_weight_lbs_delta != 0 and | ||
+ | abs((a.hive_weight_lbs - b.hive_weight_lbs)/(timestampdiff(SECOND,b.hive_observation_time_utc,a.hive_observation_time_utc)/3600)) > 60 | ||
+ | set a.hive_manipulation_change_lbs = a.hive_weight_lbs_delta, a.hive_weight_lbs_delta=0; | ||
+ | |||
+ | query to set the filtered weight values based on the new deltas | ||
+ | First set the initial value of the filtered field: | ||
+ | update S_HIVE_DATA set hive_weight_kgs_filtered = hive_weight_kgs where hive_row_id=1 and hive_weight_kgs is not null; | ||
+ | |||
+ | Then update the rest ( temp (not finished): need to fix the fact that only one row get's updated. probably a subquery is better): | ||
+ | update S_HIVE_DATA a inner join S_HIVE_DATA b ON a.hive_id = b.hive_id and b.hive_row_id = a.hive_row_id -1 and a.hive_row_id > 1 and a.hive_id=10 set a.hive_weight_kgs_filtered = b.hive_weight_kgs_filtered + a.hive_weight_kgs_delta; | ||
+ | |||
+ | update S_HIVE_DATA a set a.hive_weight_kgs_filtered = (select b.hive_weight_kgs_filtered from S_HIVE_DATA b where a.hive_id = b.hive_id and b.hive_row_id = a.hive_row_id -1 ) + a.hive_weight_kgs_delta where a.hive_row_id > 1 and a.hive_id=10; | ||
+ | |||
+ | update S_HIVE_DATA a set a.hive_weight_kgs_filtered = (select fil from (select hive_weight_kgs_filtered AS fil,hive_id,hive_row_id from S_HIVE_DATA) AS b where a.hive_id = b.hive_id and b.hive_row_id = a.hive_row_id -1 ) + a.hive_weight_kgs_delta where a.hive_row_id > 1 and a.hive_id=10; | ||
+ | |||
+ | Stored procedure for filling the filtered weight ( alpha, needs more work) | ||
+ | |||
+ | BEGIN | ||
+ | DECLARE done INT DEFAULT FALSE; | ||
+ | DECLARE p_hive_id INT(11); | ||
+ | DECLARE p_hive_row_id INT(11); | ||
+ | DECLARE p_previous_weight DECIMAL(5,2); | ||
+ | DECLARE p_delta_weight DECIMAL(5,2); | ||
+ | DECLARE curr CURSOR FOR SELECT hive_id,hive_row_id,hive_weight_kgs_delta from S_HIVE_DATA where hive_id=pi_hiveid order by hive_row_id; | ||
+ | DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE; | ||
+ | OPEN curr; | ||
+ | read_loop: LOOP | ||
+ | FETCH curr INTO p_hive_id, p_hive_row_id,p_delta_weight; | ||
+ | IF done THEN | ||
+ | LEAVE read_loop; | ||
+ | END IF; | ||
+ | IF p_hive_row_id = 1 THEN | ||
+ | update S_HIVE_DATA set hive_weight_kgs_filtered=hive_weight_kgs where hive_id=p_hive_id and hive_row_id=p_hive_row_id; | ||
+ | ELSE | ||
+ | SELECT hive_weight_kgs_filtered INTO p_previous_weight from S_HIVE_DATA where hive_id=p_hive_id and hive_row_id=(p_hive_row_id-1) order by hive_row_id; | ||
+ | update S_HIVE_DATA set hive_weight_kgs_filtered=(p_previous_weight + p_delta_weight) where hive_id=p_hive_id and hive_row_id=p_hive_row_id; | ||
+ | END IF; | ||
+ | END LOOP; | ||
+ | CLOSE curr; | ||
+ | END | ||
+ | |||
+ | ---- | ||
TODO: | TODO: | ||
− | - | + | - fill in weight filter field |
− | - | + | - do the weight queries for lbs as well |
+ | |||
+ | ==NASA Weight Filter== | ||
+ | |||
+ | I call this the NASA weight Filter because Dr. Wayne Esaias suggested it to remove manipulation changes. This is the perl code that is used to graph the hives. | ||
+ | |||
+ | if ( $last_weight ) { | ||
+ | $delta_weight = $weight - $last_weight; | ||
+ | $delta_time = ($time - $last_time)/3600; | ||
+ | |||
+ | if ($delta_time) { $dwdt = $delta_weight/$delta_time; } | ||
+ | |||
+ | # Begin NASA manipulation change filter | ||
+ | |||
+ | if ( ($weight_filter eq "NASA") | ||
+ | && (abs $dwdt > $max_dwdt_lbs_per_hour) # if the change in weight exceeds the threshold | ||
+ | && ($quality != 6) ) # and this record is not flagged as a swarm (Quality 6) | ||
+ | { # then don't count the change as daily change, | ||
+ | $manipulation_change += $delta_weight; # count it as manipulation change | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | $daily_change += $delta_weight; # otherwise, count it as part of the daily change | ||
+ | } | ||
+ | } | ||
+ | else | ||
+ | { #first time through | ||
+ | $daily_change = $weight; | ||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | ==Updating the warehouse== | ||
+ | |||
+ | At some point in 2016 we've updated the warehouse with new data. This is a short description of the process to not have to think it out twice. | ||
+ | |||
+ | Paul delivered me a mysql dump which contained all tables, but the HIVE_DATA table contained only the new data. | ||
+ | |||
+ | I first renamed the original table HIVE_DATA to HIVE_DATA_BACK | ||
+ | |||
+ | then imported the dump: | ||
+ | # mysql -u centesb7 -p centesb7_hivetool_converted < hivetool_raw_20160617.sql | ||
+ | |||
+ | Then I've imported the new lines into our backed up table | ||
+ | |||
+ | mysql> insert ignore into HIVE_DATA_back select * from HIVE_DATA; | ||
+ | |||
+ | Then removed the HIVE_DATA table and renamed HIVE_DATA_back, back to HIVE_DATA | ||
+ | |||
+ | The I've executed the beginnig of the procedure above until the time came to create the S_HIVE_DATA table | ||
+ | |||
+ | I had two options, create a new one, or insert the data in the existing one. | ||
+ | |||
+ | to insert the new data in the new one: | ||
+ | mysql> insert ignore into S_HIVE_DATA(row_id,hive_id,timestamp,hive_observation_time_local,hive_observation_time_utc,hive_weight_lbs,hive_weight_lbs_delta,hive_weight_lbs_delta_daily,hive_weight_lbs_filtered,hive_manipulation_change_lbs,hive_weight_kgs,hive_weight_kgs_delta,hive_weight_kgs_delta_daily,hive_weight_kgs_filtered,hive_manipulation_change_kgs,hive_temp_f,hive_temp_c,hive_humidity,ambient_temp_f,ambient_temp_c,ambient_humidity,ambient_luminance,ambient_precip_in,wx_station_id,wx_observation_time_rfc822,wx_temp_f,wx_temp_c,wx_relative_humidity,wx_wind_dir,wx_wind_degrees,wx_wind_mph,wx_wind_gust_mph,wx_pressure_mb,wx_pressure_in,wx_dewpoint_f,wx_dewpoint_c,wx_solar_radiation,wx_precip_1hr_in,wx_precip_1hr_metric,wx_precip_today_in,wx_precip_today_metric,quality) select row_id,hive_id,timestamp,hive_observation_time_local,hive_observation_time_utc,hive_weight_lbs,hive_weight_lbs_delta,hive_weight_lbs_delta_daily,hive_weight_lbs_filtered,hive_manipulation_change_lbs,hive_weight_kgs,hive_weight_kgs_delta,hive_weight_kgs_delta_daily,hive_weight_kgs_filtered,hive_manipulation_change_kgs,hive_temp_f,hive_temp_c,hive_humidity,ambient_temp_f,ambient_temp_c,ambient_humidity,ambient_luminance,ambient_precip_in,wx_station_id,wx_observation_time_rfc822,wx_temp_f,wx_temp_c,wx_relative_humidity,wx_wind_dir,wx_wind_degrees,wx_wind_mph,wx_wind_gust_mph,wx_pressure_mb,wx_pressure_in,wx_dewpoint_f,wx_dewpoint_c,wx_solar_radiation,wx_precip_1hr_in,wx_precip_1hr_metric,wx_precip_today_in,wx_precip_today_metric,quality from HIVE_DATA; | ||
+ | |||
+ | but first create a copy as backup |
Latest revision as of 11:26, 20 June 2016
Contents
[hide]Conversions
I've written 2 queries to convert the temperatures. They will only process rows in which ??_temp_c is not NULL and ??_temp_f is still NULL. to make sure we don't do the same thing twice. I does seem that the ambient temp values are sometimes a bit of the scale ("999.9"), Do I need to include some kind of lower and upper limit to skip those? Or do we do some kind of basic sanity checking at some other time.
I've directly updated the HIVE_DATA table, I hope that's fine?
mysql> update HIVE_DATA set hive_temp_f = (hive_temp_c * 1.8) + 32 where hive_temp_c is not NULL and hive_temp_f is NULL; Query OK, 0 rows affected, 65535 warnings (11.07 sec) Rows matched: 4270276 Changed: 0 Warnings: 3279801
mysql> update HIVE_DATA set ambient_temp_f = (ambient_temp_c * 1.8) + 32 where ambient_temp_c is not NULL and ambient_temp_f is NULL; Query OK, 3682033 rows affected, 65535 warnings (16.86 sec) Rows matched: 3682033 Changed: 3682033 Warnings: 2445369
No sure where the warnings come from though.
for easy execution:
# mysql --verbose -p < CelciusToFarenheit.sql Enter password:
update HIVE_DATA set hive_temp_f = (hive_temp_c * 1.8) + 32 where hive_temp_c is not NULL and hive_temp_f is NULL
update HIVE_DATA set ambient_temp_f = (ambient_temp_c * 1.8) + 32 where ambient_temp_c is not NULL and ambient_temp_f is NULL
- cat CelciusToFarenheit.sql
use _hivetool_converted; update HIVE_DATA set hive_temp_f = (hive_temp_c * 1.8) + 32 where hive_temp_c is not NULL and hive_temp_f is NULL; update HIVE_DATA set ambient_temp_f = (ambient_temp_c * 1.8) + 32 where ambient_temp_c is not NULL and ambient_temp_f is NULL;
Start for summary tables
Copied HIVE_DATA to S_HIVE_DATA ( S from Summary)
Date fields To do some easy grouping per date unit
Added fields some fields that will be useful for aggregation: day_of_year,month,month_name,quarter,year,month_year
Maybe season is a usefull one as well?
hour_of_day int(2) day_of_month int(2) day_of_year int(3) month int(2) month_name varchar(10) quarter int(1) year int(4) month_year varchar(15)
to fill them:
# mysql --verbose -p < FillDateFields_local.sql
# cat FillDateFields_local.sql use centesb7_hivetool_converted;
update S_HIVE_DATA set hour_of_day=DATE_FORMAT(hive_observation_time_local,'%H'), day_of_month=DAYOFMONTH(hive_observation_time_local), day_of_year=DAYOFYEAR(hive_observation_time_local), month=MONTH(hive_observation_time_local), month_name=lower(MONTHNAME(hive_observation_time_local)), quarter=QUARTER(hive_observation_time_local), year=YEAR(hive_observation_time_local), month_year=lower(DATE_FORMAT(hive_observation_time_local,'%M-%Y')) where hive_observation_time_local != 0;
If you want to base the "timestamp" instead of "hive_observation_time_local" # cat FillDateFields.sql use centesb7_hivetool_converted; update S_HIVE_DATA set hour_of_day=DATE_FORMAT(timestamp,'%H'), day_of_month=DAYOFMONTH(timestamp), day_of_year=DAYOFYEAR(timestamp), month=MONTH(timestamp), month_name=lower(MONTHNAME(timestamp)), quarter=QUARTER(timestamp), year=YEAR(timestamp), month_year=lower(DATE_FORMAT(timestamp,'%M-%Y')) where timestamp != 0;
Some easy example queries:
- select year,month_name,hive_id,avg(hive_temp_c) from S_HIVE_DATA where timestamp != 0 group by year,month_name,hive_id; - select hive_id,year,month_name,avg(hive_temp_c),avg(ambient_temp_c) from S_HIVE_DATA where timestamp != 0 and hive_id=10 group by hive_id,year,month_name order by year,month; - select hive_id,year,month_name,avg(hive_temp_c),avg(ambient_temp_c),max(hive_weight_kgs) from S_HIVE_DATA where timestamp != 0 and hive_id=10 group by hive_id,year,month_name order by year,month;
UTC Observation time For this to work I've copied the HIVE_PARAMETERS table to E_HIVE_PARAMETERS ( E from Enriched). Added a 'timezone' field which was filled in manually based on the location of the hive. Eventually 'timezone' will be present by default.
The Query below will fill the "hive_observation_time_utc" based on "hive_observation_time_local":
update S_HIVE_DATA a set hive_observation_time_utc=(select CONVERT_TZ(hive_observation_time_local,timezone,'UTC') from E_HIVE_PARAMETERS b where a.hive_id = b.hive_id);
rownums per hive We will need those to easily calculate delta's
Added field "hive_row_id" bigint(20) Put an index on it!
The query below for fills in the rowid for hive_id 10 set @rownum:=0; update S_HIVE_DATA set hive_row_id = ( select @rownum := @rownum + 1) where hive_id=10 order by row_id asc;
To do it for all hives:
# cat WriteFillHiveRownums.sql use centesb7_hivetool_converted; select CONCAT('set @rownum:=0; update S_HIVE_DATA set hive_row_id = ( select @rownum := @rownum + 1) where hive_id=',hive_id,' order by row_id asc;') from HIVE_PARAMETERS; # echo "use centesb7_hivetool_converted;" > FillHiveRownums.sql # mysql -N -p < WriteFillHiveRownums.sql >> FillHiveRownums.sql # mysql -p < FillHiveRownums.sql
query to fill in delta's for kgs
update S_HIVE_DATA a inner join S_HIVE_DATA b ON a.hive_id = b.hive_id and b.hive_row_id = a.hive_row_id -1 set a.hive_weight_kgs_delta = a.hive_weight_kgs - b.hive_weight_kgs;
query to fill in delta's for lbs
update S_HIVE_DATA a inner join S_HIVE_DATA b ON a.hive_id = b.hive_id and b.hive_row_id = a.hive_row_id -1 set a.hive_weight_lbs_delta = a.hive_weight_lbs - b.hive_weight_lbs;
query to fill in the manipulation fields and set the delta to 0 for that record in kgs.
update S_HIVE_DATA a inner join S_HIVE_DATA b on a.hive_id = b.hive_id and b.hive_row_id = a.hive_row_id -1 and a.hive_weight_kgs_delta != 0 and abs((a.hive_weight_kgs - b.hive_weight_kgs)/(timestampdiff(SECOND,b.hive_observation_time_utc,a.hive_observation_time_utc)/3600)) > (60 * 0.45359237) set a.hive_manipulation_change_kgs = a.hive_weight_kgs_delta, a.hive_weight_kgs_delta=0;
query to fill in the manipulation fields and set the delta to 0 for that record in lbs.
update S_HIVE_DATA a inner join S_HIVE_DATA b on a.hive_id = b.hive_id and b.hive_row_id = a.hive_row_id -1 and a.hive_weight_lbs_delta != 0 and abs((a.hive_weight_lbs - b.hive_weight_lbs)/(timestampdiff(SECOND,b.hive_observation_time_utc,a.hive_observation_time_utc)/3600)) > 60 set a.hive_manipulation_change_lbs = a.hive_weight_lbs_delta, a.hive_weight_lbs_delta=0;
query to set the filtered weight values based on the new deltas First set the initial value of the filtered field:
update S_HIVE_DATA set hive_weight_kgs_filtered = hive_weight_kgs where hive_row_id=1 and hive_weight_kgs is not null;
Then update the rest ( temp (not finished): need to fix the fact that only one row get's updated. probably a subquery is better):
update S_HIVE_DATA a inner join S_HIVE_DATA b ON a.hive_id = b.hive_id and b.hive_row_id = a.hive_row_id -1 and a.hive_row_id > 1 and a.hive_id=10 set a.hive_weight_kgs_filtered = b.hive_weight_kgs_filtered + a.hive_weight_kgs_delta;
update S_HIVE_DATA a set a.hive_weight_kgs_filtered = (select b.hive_weight_kgs_filtered from S_HIVE_DATA b where a.hive_id = b.hive_id and b.hive_row_id = a.hive_row_id -1 ) + a.hive_weight_kgs_delta where a.hive_row_id > 1 and a.hive_id=10;
update S_HIVE_DATA a set a.hive_weight_kgs_filtered = (select fil from (select hive_weight_kgs_filtered AS fil,hive_id,hive_row_id from S_HIVE_DATA) AS b where a.hive_id = b.hive_id and b.hive_row_id = a.hive_row_id -1 ) + a.hive_weight_kgs_delta where a.hive_row_id > 1 and a.hive_id=10;
Stored procedure for filling the filtered weight ( alpha, needs more work)
BEGIN DECLARE done INT DEFAULT FALSE; DECLARE p_hive_id INT(11); DECLARE p_hive_row_id INT(11); DECLARE p_previous_weight DECIMAL(5,2); DECLARE p_delta_weight DECIMAL(5,2); DECLARE curr CURSOR FOR SELECT hive_id,hive_row_id,hive_weight_kgs_delta from S_HIVE_DATA where hive_id=pi_hiveid order by hive_row_id; DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE; OPEN curr; read_loop: LOOP FETCH curr INTO p_hive_id, p_hive_row_id,p_delta_weight; IF done THEN LEAVE read_loop; END IF; IF p_hive_row_id = 1 THEN update S_HIVE_DATA set hive_weight_kgs_filtered=hive_weight_kgs where hive_id=p_hive_id and hive_row_id=p_hive_row_id; ELSE SELECT hive_weight_kgs_filtered INTO p_previous_weight from S_HIVE_DATA where hive_id=p_hive_id and hive_row_id=(p_hive_row_id-1) order by hive_row_id; update S_HIVE_DATA set hive_weight_kgs_filtered=(p_previous_weight + p_delta_weight) where hive_id=p_hive_id and hive_row_id=p_hive_row_id; END IF; END LOOP; CLOSE curr; END
TODO:
- fill in weight filter field - do the weight queries for lbs as well
NASA Weight Filter
I call this the NASA weight Filter because Dr. Wayne Esaias suggested it to remove manipulation changes. This is the perl code that is used to graph the hives.
if ( $last_weight ) { $delta_weight = $weight - $last_weight; $delta_time = ($time - $last_time)/3600; if ($delta_time) { $dwdt = $delta_weight/$delta_time; } # Begin NASA manipulation change filter if ( ($weight_filter eq "NASA") && (abs $dwdt > $max_dwdt_lbs_per_hour) # if the change in weight exceeds the threshold && ($quality != 6) ) # and this record is not flagged as a swarm (Quality 6) { # then don't count the change as daily change, $manipulation_change += $delta_weight; # count it as manipulation change } else { $daily_change += $delta_weight; # otherwise, count it as part of the daily change } } else { #first time through $daily_change = $weight; }
Updating the warehouse
At some point in 2016 we've updated the warehouse with new data. This is a short description of the process to not have to think it out twice.
Paul delivered me a mysql dump which contained all tables, but the HIVE_DATA table contained only the new data.
I first renamed the original table HIVE_DATA to HIVE_DATA_BACK
then imported the dump:
- mysql -u centesb7 -p centesb7_hivetool_converted < hivetool_raw_20160617.sql
Then I've imported the new lines into our backed up table
mysql> insert ignore into HIVE_DATA_back select * from HIVE_DATA;
Then removed the HIVE_DATA table and renamed HIVE_DATA_back, back to HIVE_DATA
The I've executed the beginnig of the procedure above until the time came to create the S_HIVE_DATA table
I had two options, create a new one, or insert the data in the existing one.
to insert the new data in the new one: mysql> insert ignore into S_HIVE_DATA(row_id,hive_id,timestamp,hive_observation_time_local,hive_observation_time_utc,hive_weight_lbs,hive_weight_lbs_delta,hive_weight_lbs_delta_daily,hive_weight_lbs_filtered,hive_manipulation_change_lbs,hive_weight_kgs,hive_weight_kgs_delta,hive_weight_kgs_delta_daily,hive_weight_kgs_filtered,hive_manipulation_change_kgs,hive_temp_f,hive_temp_c,hive_humidity,ambient_temp_f,ambient_temp_c,ambient_humidity,ambient_luminance,ambient_precip_in,wx_station_id,wx_observation_time_rfc822,wx_temp_f,wx_temp_c,wx_relative_humidity,wx_wind_dir,wx_wind_degrees,wx_wind_mph,wx_wind_gust_mph,wx_pressure_mb,wx_pressure_in,wx_dewpoint_f,wx_dewpoint_c,wx_solar_radiation,wx_precip_1hr_in,wx_precip_1hr_metric,wx_precip_today_in,wx_precip_today_metric,quality) select row_id,hive_id,timestamp,hive_observation_time_local,hive_observation_time_utc,hive_weight_lbs,hive_weight_lbs_delta,hive_weight_lbs_delta_daily,hive_weight_lbs_filtered,hive_manipulation_change_lbs,hive_weight_kgs,hive_weight_kgs_delta,hive_weight_kgs_delta_daily,hive_weight_kgs_filtered,hive_manipulation_change_kgs,hive_temp_f,hive_temp_c,hive_humidity,ambient_temp_f,ambient_temp_c,ambient_humidity,ambient_luminance,ambient_precip_in,wx_station_id,wx_observation_time_rfc822,wx_temp_f,wx_temp_c,wx_relative_humidity,wx_wind_dir,wx_wind_degrees,wx_wind_mph,wx_wind_gust_mph,wx_pressure_mb,wx_pressure_in,wx_dewpoint_f,wx_dewpoint_c,wx_solar_radiation,wx_precip_1hr_in,wx_precip_1hr_metric,wx_precip_today_in,wx_precip_today_metric,quality from HIVE_DATA;
but first create a copy as backup