Linear regression analysis scatter-plot on synthetic data
This page documents a preview version.
v2.23 Preview
Preview includes features under active development and is for development and testing only.
For production, use the latest stable version (v2024.1).
For production, use the latest stable version (v2024.1).
Save this script as "synthetic-data.sql"
drop procedure if exists populate_t(
int, double precision, double precision, double precision, double precision)
cascade;
drop table if exists t cascade;
create table t(
k int primary key,
x double precision,
y double precision,
delta double precision);
create procedure populate_t(
no_of_rows in int,
slope in double precision,
intercept in double precision,
mean in double precision,
stddev in double precision)
language plpgsql
as $body$
begin
delete from t;
with
a1 as (
select
s.v as k,
s.v as x,
(s.v * slope) + intercept as y
from generate_series(1, no_of_rows) as s(v)),
a2 as (
select (
row_number() over()) as k,
r.v as delta
from normal_rand(no_of_rows, mean, stddev) as r(v))
insert into t(k, x, y, delta)
select
k, x, a1.y, a2.delta
from a1 inner join a2 using(k);
insert into t(k, x, y, delta) values
(no_of_rows + 1, 0, null, null),
(no_of_rows + 2, null, 0, null);
end;
$body$;
call populate_t(
no_of_rows => 100,
mean => 0.0,
stddev => 5.0,
slope => -1.2,
intercept => 131.4);
\o analysis-results/synthetic-data.txt
with a as (
select
regr_r2 ((y + delta), x) as r2,
regr_slope ((y + delta), x) as s,
regr_intercept((y + delta), x) as i
from t)
select
to_char(r2, '0.99') as r2,
to_char(s, '90.9') as s,
to_char(i, '990.9') as i
from a;
\o
\t on
\o analysis-results/synthetic-data.csv
select
round(x)::text||','||round(y + delta)::text
from t
where
x > 60 and
x < 95 and
x is not null and
y is not null
order by x;
\o
\t off