Skip to content

Commit d8dd691

Browse files
committed
add precision feature
1 parent 10577c1 commit d8dd691

File tree

2 files changed

+31
-13
lines changed

2 files changed

+31
-13
lines changed

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ you'll get the correlation coefficient of two columns.
2121
# check the column is valid
2222
-c,--check
2323
24+
# set precision, default is 6
25+
-p,--precision <number>
26+
2427
# show frequency table and histogram
2528
-b,--binsize <number>
2629
@@ -53,7 +56,7 @@ you can download a binary release
5356
```sh
5457
# Install with wget or curl
5558
## set the latest version on releases.
56-
VERSION=v1.0.15
59+
VERSION=v1.0.20
5760
## case you use wget
5861
wget https://github.com/solaoi/colc/releases/download/$VERSION/colc_linux_amd64.tar.gz
5962
## case you use curl
@@ -87,7 +90,6 @@ colc 2 some.csv
8790

8891
<img width="379" alt="スクリーンショット 2022-03-30 14 50 39" src="https://user-images.githubusercontent.com/46414076/160760624-58a15682-0f64-45b0-8b99-2b3732952971.png">
8992

90-
9193
Of course `-b,--binsize` works well:)
9294

9395
```

colc.ts

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import {
77
import { runner } from "./lib/common.ts";
88
import { parse } from "https://deno.land/std@0.66.0/flags/mod.ts";
99

10-
const { _, binsize, b, filter, f, check, c } = parse(Deno.args);
10+
const { _, binsize, b, filter, f, check, c, precision, p } = parse(Deno.args);
1111
const [column, filename] = _;
1212
const hasTwoColumn = (() => {
1313
if (typeof column !== "string") return false;
@@ -24,6 +24,16 @@ if (
2424
console.log("Usage:\n colc [column] [file.csv|tsv|txt]");
2525
Deno.exit(1);
2626
}
27+
const awkPrecision: number = (() => {
28+
if (
29+
typeof precision === "number" && precision > 0 &&
30+
Number.isInteger(precision)
31+
) {
32+
return precision;
33+
}
34+
if (typeof p === "number" && p > 0 && Number.isInteger(p)) return p;
35+
return 6;
36+
})();
2737
const binSize: number | null = (() => {
2838
if (typeof binsize === "number" && binsize > 0) return binsize;
2939
if (typeof b === "number" && b > 0) return b;
@@ -60,7 +70,7 @@ if (hasTwoColumn) {
6070
}
6171
bash.push("| awk");
6272
bash.push(
63-
`'BEGIN{OFMT="%.6f"}{split($1,col,",");asum+=col[1];a[NR]=col[1];bsum+=col[2];b[NR]=col[2]}END{amean=asum/NR;bmean=bsum/NR;for(i in a){as+=(a[i]-amean)^2;bs+=(b[i]-bmean)^2;sum+=(a[i]-amean)*(b[i]-bmean)};astddev=sqrt(as/NR);bstddev=sqrt(bs/NR);print sum/NR/astddev/bstddev}'`,
73+
`'BEGIN{OFMT="%.${awkPrecision}f"}{split($1,col,",");asum+=col[1];a[NR]=col[1];bsum+=col[2];b[NR]=col[2]}END{amean=asum/NR;bmean=bsum/NR;for(i in a){as+=(a[i]-amean)^2;bs+=(b[i]-bmean)^2;sum+=(a[i]-amean)*(b[i]-bmean)};astddev=sqrt(as/NR);bstddev=sqrt(bs/NR);print sum/NR/astddev/bstddev}'`,
6474
);
6575
return bash.join(" ");
6676
})();
@@ -118,7 +128,7 @@ if (binSize === null) {
118128
}
119129
bash.push("| sort -n | awk");
120130
bash.push(
121-
`'BEGIN{OFMT="%.6f"}NR==1{min=$1}{if(0==$1)zeros++;if($1<0)neg++;sum+=$1;d[NR]=$1}END{avg=sum/NR;for(i in d)s+=(d[i]-avg)^2;stddev=sqrt(s/(NR-1));q1=(3*d[int((NR-1)/4)+1]+d[int((NR-1)/4)+2])/4;q3=(d[int(3*(NR-1)/4)+1]+3*d[int(3*(NR-1)/4)+2])/4;iqr=q3-q1;stur=1+log(NR)/log(2);sturi=int(stur);sturges=stur>sturi?sturi+1:sturi;max=d[NR];range=max-min;sqrtnr=sqrt(NR);threerootnr=exp(log(NR)/3);print stddev,avg,sum,NR,max,min,sqrt(s/(NR-1))/sqrtnr,s/(NR-1),(NR%2)?d[(NR+1)/2]:(d[NR/2]+d[NR/2+1])/2,avg+stddev,avg-stddev,avg+2*stddev,avg-2*stddev,avg+3*stddev,avg-3*stddev,range/sturges,(3.5*stddev)/threerootnr,q1,q3,iqr,q1-1.5*iqr,q3+1.5*iqr,range/sqrtnr,2*iqr/threerootnr,range,zeros,zeros*100/NR,neg,neg*100/NR,stddev/avg}'`,
131+
`'BEGIN{OFMT="%.${awkPrecision}f"}NR==1{min=$1}{if(0==$1)zeros++;if($1<0)neg++;sum+=$1;d[NR]=$1}END{avg=sum/NR;for(i in d)s+=(d[i]-avg)^2;stddev=sqrt(s/(NR-1));q1=(3*d[int((NR-1)/4)+1]+d[int((NR-1)/4)+2])/4;q3=(d[int(3*(NR-1)/4)+1]+3*d[int(3*(NR-1)/4)+2])/4;iqr=q3-q1;stur=1+log(NR)/log(2);sturi=int(stur);sturges=stur>sturi?sturi+1:sturi;max=d[NR];range=max-min;sqrtnr=sqrt(NR);threerootnr=exp(log(NR)/3);print stddev,avg,sum,NR,max,min,sqrt(s/(NR-1))/sqrtnr,s/(NR-1),(NR%2)?d[(NR+1)/2]:(d[NR/2]+d[NR/2+1])/2,avg+stddev,avg-stddev,avg+2*stddev,avg-2*stddev,avg+3*stddev,avg-3*stddev,range/sturges,(3.5*stddev)/threerootnr,q1,q3,iqr,q1-1.5*iqr,q3+1.5*iqr,range/sqrtnr,2*iqr/threerootnr,range,zeros,zeros*100/NR,neg,neg*100/NR,stddev/avg}'`,
122132
);
123133
return bash.join(" ");
124134
})();
@@ -153,7 +163,7 @@ if (binSize === null) {
153163
zeroRate,
154164
negatives,
155165
negativeRate,
156-
cv
166+
cv,
157167
] = await runner
158168
.run(statsCommand).then((s) => s.split(" "));
159169
const sturgesFormulaIsInvalid = count.split(".")[0].length <= 2 &&
@@ -165,8 +175,8 @@ if (binSize === null) {
165175
"zeros": comma(zeros || "0"),
166176
"zeros(%)": comma(zeroRate),
167177
"negatives": comma(negatives || "0"),
168-
"negatives(%)": comma(negativeRate)
169-
}
178+
"negatives(%)": comma(negativeRate),
179+
};
170180
const stats = {
171181
"min": comma(min),
172182
"25%(Q1)": comma(q1),
@@ -179,7 +189,7 @@ if (binSize === null) {
179189
"IQR(Q3-Q1)": comma(iqr),
180190
"Q1–(1.5*IQR)": comma(lf),
181191
"Q3+(1.5*IQR)": comma(uf),
182-
}
192+
};
183193
const stds = {
184194
"stddev(σ)": comma(stddev),
185195
"stderr": comma(stderr),
@@ -188,16 +198,22 @@ if (binSize === null) {
188198
"mean±σ(≒68%)": `${comma(sigmaMinus1)}, ${comma(sigmaPlus1)}`,
189199
"mean±2σ(≒95%)": `${comma(sigmaMinus2)}, ${comma(sigmaPlus2)}`,
190200
"mean±3σ(≒99%)": `${comma(sigmaMinus3)}, ${comma(sigmaPlus3)}`,
191-
}
201+
};
192202
const recommendedBinsizes = {
193203
"binsize(Square-root)": sqrtBinsize,
194204
...(!sturgesFormulaIsInvalid && { "binsize(Sturges')": sturgesBinsize }),
195205
"binsize(Scott's)": scottBinsize,
196206
"binsize(FD)": fdBinsize,
197-
}
207+
};
198208
const { println, showHeader, hr } = formatter(
199209
sturgesFormulaIsInvalid ? 20 : 21,
200-
getMaxLength({...total,...stats,...iqrs,...stds,...recommendedBinsizes}),
210+
getMaxLength({
211+
...total,
212+
...stats,
213+
...iqrs,
214+
...stds,
215+
...recommendedBinsizes,
216+
}),
201217
);
202218
hasHeader ? showHeader(headerName) : hr();
203219
Object.entries(total).forEach(([key, value]) => {
@@ -236,7 +252,7 @@ const freqCommand = (() => {
236252
}
237253
bash.push("| awk");
238254
bash.push(
239-
`'BEGIN{OFMT="%.6f"}{b=int($1/${binSize});a[b]++;bmax=b>bmax?b:bmax;bmin=b<bmin?b:bmin}END{freq="";for(i in a)freq=freq "|" i "_" a[i];print NR, freq, bmin, bmax}'`,
255+
`'BEGIN{OFMT="%.${awkPrecision}f"}{b=int($1/${binSize});a[b]++;bmax=b>bmax?b:bmax;bmin=b<bmin?b:bmin}END{freq="";for(i in a)freq=freq "|" i "_" a[i];print NR, freq, bmin, bmax}'`,
240256
);
241257
return bash.join(" ");
242258
})();

0 commit comments

Comments
 (0)