Skip to content

Commit aa8c884

Browse files
committed
Update AssertError
1 parent ed27e73 commit aa8c884

File tree

4 files changed

+69
-35
lines changed

4 files changed

+69
-35
lines changed

BCTC3_VATI_GEN_CSV_CDKT_A.py

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,31 +14,31 @@
1414

1515
# PARAMS 01 - Table Type
1616
# Name of table type
17-
TABLE_TYPE = 'BCTC3_CDKT_A_1'
17+
TABLE_TYPE = 'BCTC2_4C_1'
1818

1919
# PARAMS 02 - Nums of Rows Header
2020
# NOT include first row of header
2121
NRH = 2
2222

2323
# PARAMS 03
2424
# NUMS_ROWS_BOLD_START
25-
NRBS = 3
25+
NRBS = 2
2626

2727
# PARAMS 04
2828
# NUMS_ROWS_BOLD_END
2929
NRBE = 1
3030

31-
# PARAMS 05 - Range Nums of Row in Bold
31+
# PARAMS 05 - Range Nums of Bold Rows Rest
3232
# NOT include START ROWS BOLD & END ROW
33-
# RANGE_NRB = [min, max]
34-
RANGE_NRB = [8, 12]
33+
# RANGE_NBRR = [min, max]
34+
RANGE_NBRR = [2, 3]
3535

3636
# PARAMS 06 - Range Nums of Rows in Normal
3737
# RANGE_NRN = [min, max]
38-
RANGE_NRN = [20, 30]
38+
RANGE_NRN = [4, 8]
3939

4040
# PARAMS 07 - Nums of CSV Samples Output
41-
NCSO = 500
41+
NCSO = 5
4242

4343

4444
## FIXED PARAMS ##
@@ -51,9 +51,6 @@
5151
BASE_ADDR = os.getcwd()
5252
FOLDER_OUTPUT_DIR = join_path(BASE_ADDR, OUTPUT_FOLDER)
5353

54-
# NRB - Nums of Row in Bold
55-
NRB = random.randint(RANGE_NRB[0], RANGE_NRB[1])
56-
5754
# NFR - Nums Fixed Rows
5855
NFR = NRH + NRBS + NRBE
5956

@@ -67,23 +64,23 @@
6764
INDEX_RANGE = [START_IDX, STOP_IDX]
6865

6966
# RANGE_ROWS
70-
RANGE_ROWS = [RANGE_NRB, RANGE_NRN, NFR]
67+
RANGE_ROWS = [RANGE_NBRR, RANGE_NRN, NFR]
7168

7269
# Number of parallel processes
73-
NUMS_PROCESSES = 4
70+
NUMS_PROCESSES = 1
7471

7572
# Number of epochs
76-
NUMS_EPOCHS = 300
73+
NUMS_EPOCHS = 3
7774

7875
# SUB_CONSTANT
7976
TIME = '1'
8077

8178

82-
def gen_csv(range_nrb, range_nrn, nums_fixed_rows, ctgan, index, folder_csv_dir, bd_data, prh_idx, spc_idx, cell_spc_idx, content_spc, alignment_spc):
79+
def gen_csv(range_nbrr, range_nrn, nums_fixed_rows, ctgan, index, folder_csv_dir, bd_data, prh_idx, spc_idx, cell_spc_idx, content_spc, alignment_spc):
8380
# Random nums of rows
84-
nums_rows_bold = random.randint(range_nrb[0], range_nrb[1])
81+
nums_rows_bold_rest = random.randint(range_nbrr[0], range_nbrr[1])
8582
nums_rows_normal = random.randint(range_nrn[0], range_nrn[1])
86-
nums_of_rows = nums_rows_bold + nums_rows_normal + nums_fixed_rows
83+
nums_of_rows = nums_rows_bold_rest + nums_rows_normal + nums_fixed_rows
8784

8885
# Create synthetic data
8986
synthetic_data = ctgan.sample(nums_of_rows - 1)
@@ -96,20 +93,20 @@ def gen_csv(range_nrb, range_nrn, nums_fixed_rows, ctgan, index, folder_csv_dir,
9693
lines = list(read_csv)
9794
nums_of_columns = len(lines[0])
9895

99-
changed_function(NRH, nums_of_columns, lines, bd_data, nums_rows_bold, nums_of_rows, file_csv_dir, NRBS, NRBE, prh_idx, spc_idx, cell_spc_idx, content_spc, alignment_spc)
96+
changed_function(NRH, nums_of_columns, lines, bd_data, nums_rows_bold_rest, nums_of_rows, file_csv_dir, NRBS, NRBE, prh_idx, spc_idx, cell_spc_idx, content_spc, alignment_spc)
10097

10198
print('Successfully generated csv file and txt file {:03}'.format(index + 1))
10299

103100
def make_csv(ctgan, range_rows, index_range, bd_data, merged_cell_data_idx):
104101
prh_idx, spc_idx, cell_spc_idx, content_spc, alignment_spc = merged_cell_data_idx
105-
range_nrb, range_nrn, nums_fixed_rows = range_rows
102+
range_nbrr, range_nrn, nums_fixed_rows = range_rows
106103
start_index, stop_index = index_range
107104
folder_csv_dir = './' + OUTPUT_FOLDER
108105
make_dirs_or_format_dir(folder_csv_dir)
109106

110107
with ThreadPool(processes=NUMS_PROCESSES) as pool:
111108
for index in range(start_index, stop_index):
112-
pool.apply_async(gen_csv, args=(range_nrb, range_nrn, nums_fixed_rows, ctgan, index, folder_csv_dir, bd_data, prh_idx, spc_idx, cell_spc_idx, content_spc, alignment_spc))
109+
pool.apply_async(gen_csv, args=(range_nbrr, range_nrn, nums_fixed_rows, ctgan, index, folder_csv_dir, bd_data, prh_idx, spc_idx, cell_spc_idx, content_spc, alignment_spc))
113110
pool.close()
114111
pool.join()
115112

changed_function_per_table_type.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def clear_header(nrh, nums_of_columns, lines):
77
for index in range(nums_of_columns):
88
lines[element][index] = ''
99

10-
def overwrite_bold_data(nums_of_columns, nrbs, lines, nrh, bd_data, nrbe, nums_rows_bold, nums_of_rows, prh_idx, spc_idx, cell_spc_idx, content_spc, alignment_spc):
10+
def overwrite_bold_data(nums_of_columns, nrbs, lines, nrh, bd_data, nrbe, nums_rows_bold_rest, nums_of_rows, prh_idx, spc_idx, cell_spc_idx, content_spc, alignment_spc):
1111
# List all bold rows index in GEN CSV
1212
all_bd_row_arr = []
1313
# List all space rows index in GEN CSV
@@ -39,8 +39,8 @@ def overwrite_bold_data(nums_of_columns, nrbs, lines, nrh, bd_data, nrbe, nums_r
3939
for cell in range(nums_of_columns):
4040
lines[-(idx + 1)][cell] = bd_data[-(idx + 1)][cell]
4141

42-
# Calculate nums of bold rows rest
43-
nums_rows_bold_rest = nums_rows_bold - nrbs - nrbe
42+
# # Calculate nums of bold rows rest
43+
# nums_rows_bold_rest = nums_rows_bold_rest - nrbs - nrbe
4444

4545
if nums_rows_bold_rest > 0:
4646
# RANDOM_RANGE nums of bold rows
@@ -65,15 +65,14 @@ def overwrite_bold_data(nums_of_columns, nrbs, lines, nrh, bd_data, nrbe, nums_r
6565
for bd_cell_idx in range(nums_of_columns):
6666
lines[sub_row_arr[bd_row_idx]][bd_cell_idx] = bd_data[sub_bd_arr[bd_row_idx]][bd_cell_idx]
6767

68-
if len(all_bd_row_arr) > 0:
69-
all_bd_row_arr.sort()
70-
68+
if len(all_bd_row_arr) > 0:
7169
for pos in all_bd_row_arr:
7270
space_row_arr.append(pos - 1)
7371
if nrh not in space_row_arr:
7472
space_row_arr.append(nrh)
75-
if (nums_of_rows - 1) not in space_row_arr:
76-
space_row_arr.append(nums_of_rows - 1)
73+
if (nums_of_rows - 2) not in space_row_arr:
74+
space_row_arr.append(nums_of_rows - 2)
75+
space_row_arr.append(nums_of_rows - 1)
7776
space_row_arr.sort()
7877

7978
new_prh_idx = np.where(np.in1d(original_bd_idx, prh_idx))[0]
@@ -102,7 +101,9 @@ def overwrite_bold_data(nums_of_columns, nrbs, lines, nrh, bd_data, nrbe, nums_r
102101
alig_spc = np.array(alignment_spc)[new_cont_spc_idx]
103102
for loop_count, element in enumerate(alig_spc):
104103
sub_alig_spc_arr = element.tolist()
105-
aligment_spc_arr.append(sub_alig_spc_arr)
104+
aligment_spc_arr.append(sub_alig_spc_arr)
105+
106+
all_bd_row_arr.sort()
106107

107108
return [all_bd_row_arr, space_row_arr, prh_row_arr, spc_row_arr, content_spc_arr, aligment_spc_arr]
108109

@@ -111,7 +112,7 @@ def rand_number_columns(list_index_of_columns_rand_number, nrh, nums_of_rows, li
111112
for col_idx in list_index_of_columns_rand_number:
112113
lines[row_idx][col_idx] = rand_string_number(lines[row_idx][col_idx])
113114

114-
def changed_function(nrh, nums_of_columns, lines, bd_data, nums_rows_bold, nums_of_rows, file_csv_dir, nrbs, nrbe, prh_idx, spc_idx, cell_spc_idx, content_spc, alignment_spc):
115+
def changed_function(nrh, nums_of_columns, lines, bd_data, nums_rows_bold_rest, nums_of_rows, file_csv_dir, nrbs, nrbe, prh_idx, spc_idx, cell_spc_idx, content_spc, alignment_spc):
115116
# Clear header
116117
clear_header(nrh, nums_of_columns, lines)
117118

@@ -124,7 +125,7 @@ def changed_function(nrh, nums_of_columns, lines, bd_data, nums_rows_bold, nums_
124125
lines[2][4] = "Triệu VND"
125126

126127
# Overwrite bold row data
127-
pos_info_arr = overwrite_bold_data(nums_of_columns, nrbs, lines, nrh, bd_data, nrbe, nums_rows_bold, nums_of_rows, prh_idx, spc_idx, cell_spc_idx, content_spc, alignment_spc)
128+
pos_info_arr = overwrite_bold_data(nums_of_columns, nrbs, lines, nrh, bd_data, nrbe, nums_rows_bold_rest, nums_of_rows, prh_idx, spc_idx, cell_spc_idx, content_spc, alignment_spc)
128129

129130
# Index of columns need random number
130131
# FIXME: NEED CHANGE index of columns random number per table type

dict_discrete_columns_type.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,53 @@
1414
# PROPERTIES 02
1515
# Spanning cell
1616
# Row index in bold data
17-
[0],
17+
[],
1818

1919
# PROPERTIES 03
2020
# Cell index in row have spanning cell
21-
[[0, 1]],
21+
[],
2222

2323
# PROPERTIES 04
2424
# Content of spc
25-
[['PROJECT HOON', 'TEST']],
25+
[],
2626

2727
# PROPERTIES 05
2828
# Alignment of text in spanning cell
29-
[['Center', 'Middle']]
29+
[]
30+
]
31+
32+
BCTC2_4C_1 = [
33+
# PROPERTIES 00
34+
# Discrete Columns
35+
['A',
36+
'Tổng giá trị của hợp đồng (theo tỉ giá tại ngày hiệu lực)',
37+
'Tổng giá trị ghi sổ kế toán (theo tỷ giá tại ngày lập báo cáo tài chính)',
38+
'B',
39+
'C'],
40+
41+
# PROPERTIES 01
42+
# Projected row header (index)
43+
[0, 1, 3, 4],
44+
45+
# PROPERTIES 02
46+
# Spanning cell
47+
# Row index in bold data (not include header)
48+
[],
49+
50+
# PROPERTIES 03
51+
# Cell index in row have spanning cell (not include header)
52+
[],
53+
54+
# PROPERTIES 04
55+
# Content of spc (not include header)
56+
[],
57+
58+
# PROPERTIES 05
59+
# Alignment of text in spanning cell (not include header)
60+
[] # Must have 2 arguments
3061
]
3162

3263
DICT_DISCRETE_COLUMNS_TYPE = {
33-
'BCTC3_CDKT_A_1': BCTC3_CDKT_A_1
64+
'BCTC3_CDKT_A_1': BCTC3_CDKT_A_1,
65+
'BCTC2_4C_1': BCTC2_4C_1
3466
}

utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ def rand_date():
4141
return random_date.strftime("%d.%m.%Y")
4242

4343
def rand_sub_arr(input_arr, nums_elements_sub_arr):
44+
try:
45+
assert nums_elements_sub_arr <= len(input_arr)
46+
except AssertionError:
47+
print('The NBRR (nums bold rows rest) constant is set to a value greater than the NBRR in the input CSV_BD file ({} > {}) !!!'.format(nums_elements_sub_arr, len(input_arr)))
4448
sub_arr = random.sample(input_arr, nums_elements_sub_arr)
4549
sub_arr.sort()
4650
return sub_arr

0 commit comments

Comments
 (0)