latest
This commit is contained in:
parent
70e3b66c95
commit
2265649669
3
.gitignore
vendored
3
.gitignore
vendored
@ -1 +1,2 @@
|
||||
.history
|
||||
.history
|
||||
*.png
|
||||
13
.vscode/launch.json
vendored
Normal file
13
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{"name":"Python Debugger: Current File","type":"debugpy","request":"launch","program":"${file}","console":"integratedTerminal"},
|
||||
{
|
||||
"name": "Python: Current File",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal"
|
||||
}
|
||||
]
|
||||
}
|
||||
433
clear
Normal file
433
clear
Normal file
@ -0,0 +1,433 @@
|
||||
{
|
||||
"pdf_file_processed": "test2.pdf",
|
||||
"pdf_full_path": "/mnt/c/Users/admin/Downloads/test2.pdf",
|
||||
"pages_processed_spec": "5",
|
||||
"extraction_timestamp": "2025-06-03 08:55:13 EDT",
|
||||
"total_highlights_extracted": 20,
|
||||
"settings_used": {
|
||||
"clean_edges": true,
|
||||
"show_diff_percentage": true
|
||||
},
|
||||
"highlights_data": [
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 1,
|
||||
"text": "or prejudice in",
|
||||
"color": "yellow",
|
||||
"raw_rgb_values": [
|
||||
1.0,
|
||||
0.9411770105361938,
|
||||
0.4000000059604645
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 53.75,
|
||||
"x_position": 60.0,
|
||||
"rect_details": [
|
||||
60.0,
|
||||
53.75,
|
||||
116.0,
|
||||
63.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 2,
|
||||
"text": "unin",
|
||||
"color": "yellow",
|
||||
"raw_rgb_values": [
|
||||
1.0,
|
||||
0.9411770105361938,
|
||||
0.4000000059604645
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 77.75,
|
||||
"x_position": 164.0,
|
||||
"rect_details": [
|
||||
164.0,
|
||||
77.75,
|
||||
169.0,
|
||||
87.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 3,
|
||||
"text": "uninformed about how ‘language can stand as a barrier to jus-",
|
||||
"color": "yellow",
|
||||
"raw_rgb_values": [
|
||||
1.0,
|
||||
0.9411770105361938,
|
||||
0.4000000059604645
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 77.75,
|
||||
"x_position": 164.0,
|
||||
"rect_details": [
|
||||
164.0,
|
||||
77.75,
|
||||
405.0,
|
||||
87.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 4,
|
||||
"text": "tice or equal opportunity’.",
|
||||
"color": "yellow",
|
||||
"raw_rgb_values": [
|
||||
1.0,
|
||||
0.9411770105361938,
|
||||
0.4000000059604645
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 89.75,
|
||||
"x_position": 60.0,
|
||||
"rect_details": [
|
||||
60.0,
|
||||
89.75,
|
||||
158.0,
|
||||
99.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 5,
|
||||
"text": "linguistics,",
|
||||
"color": "yellow",
|
||||
"raw_rgb_values": [
|
||||
1.0,
|
||||
0.9411770105361938,
|
||||
0.4000000059604645
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 137.75,
|
||||
"x_position": 188.0,
|
||||
"rect_details": [
|
||||
188.0,
|
||||
137.75,
|
||||
226.0,
|
||||
147.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 6,
|
||||
"text": "needs to make applied contributions to the understanding and solution of racial discrimination, criminal injustice, and other social problems.",
|
||||
"color": "yellow",
|
||||
"raw_rgb_values": [
|
||||
1.0,
|
||||
0.9411770105361938,
|
||||
0.4000000059604645
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 149.75,
|
||||
"x_position": 60.0,
|
||||
"rect_details": [
|
||||
60.0,
|
||||
149.75,
|
||||
408.0,
|
||||
171.75
|
||||
],
|
||||
"num_segments": 2
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 7,
|
||||
"text": "first",
|
||||
"color": "blue",
|
||||
"raw_rgb_values": [
|
||||
0.5607839822769165,
|
||||
0.8705880045890808,
|
||||
0.9764710068702698
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 173.75,
|
||||
"x_position": 182.0,
|
||||
"rect_details": [
|
||||
182.0,
|
||||
173.75,
|
||||
198.0,
|
||||
183.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 8,
|
||||
"text": "at interpreters are not generally provided for ‘dialects’ of a language, only for foreign ‘languages’",
|
||||
"color": "yellow",
|
||||
"raw_rgb_values": [
|
||||
1.0,
|
||||
0.9411770105361938,
|
||||
0.4000000059604645
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 197.75,
|
||||
"x_position": 60.0,
|
||||
"rect_details": [
|
||||
60.0,
|
||||
197.75,
|
||||
408.0,
|
||||
219.75
|
||||
],
|
||||
"num_segments": 2
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 9,
|
||||
"text": "(§2),",
|
||||
"color": "blue",
|
||||
"raw_rgb_values": [
|
||||
0.5607839822769165,
|
||||
0.8705880045890808,
|
||||
0.9764710068702698
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 197.75,
|
||||
"x_position": 182.0,
|
||||
"rect_details": [
|
||||
182.0,
|
||||
197.75,
|
||||
201.0,
|
||||
207.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 10,
|
||||
"text": "§3",
|
||||
"color": "blue",
|
||||
"raw_rgb_values": [
|
||||
0.5607839822769165,
|
||||
0.8705880045890808,
|
||||
0.9764710068702698
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 209.75,
|
||||
"x_position": 398.0,
|
||||
"rect_details": [
|
||||
398.0,
|
||||
209.75,
|
||||
408.0,
|
||||
219.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 11,
|
||||
"text": "specific case of Rachel Jeantel’s dialect, a",
|
||||
"color": "yellow",
|
||||
"raw_rgb_values": [
|
||||
1.0,
|
||||
0.9411770105361938,
|
||||
0.4000000059604645
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 221.75,
|
||||
"x_position": 84.0,
|
||||
"rect_details": [
|
||||
84.0,
|
||||
221.75,
|
||||
241.0,
|
||||
231.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 12,
|
||||
"text": "whether the credibility and intelligibility problems that led jurors to disregard Jeantel’s testimony were due",
|
||||
"color": "yellow",
|
||||
"raw_rgb_values": [
|
||||
1.0,
|
||||
0.9411770105361938,
|
||||
0.4000000059604645
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 269.75,
|
||||
"x_position": 60.0,
|
||||
"rect_details": [
|
||||
60.0,
|
||||
269.75,
|
||||
408.0,
|
||||
291.75
|
||||
],
|
||||
"num_segments": 2
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 13,
|
||||
"text": "§4 we",
|
||||
"color": "blue",
|
||||
"raw_rgb_values": [
|
||||
0.5607839822769165,
|
||||
0.8705880045890808,
|
||||
0.9764710068702698
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 269.75,
|
||||
"x_position": 237.0,
|
||||
"rect_details": [
|
||||
237.0,
|
||||
269.75,
|
||||
257.0,
|
||||
279.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 14,
|
||||
"text": "dialect and insti-",
|
||||
"color": "yellow",
|
||||
"raw_rgb_values": [
|
||||
1.0,
|
||||
0.9411770105361938,
|
||||
0.4000000059604645
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 317.75,
|
||||
"x_position": 342.0,
|
||||
"rect_details": [
|
||||
342.0,
|
||||
317.75,
|
||||
402.0,
|
||||
327.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 15,
|
||||
"text": "tutionalized racism negatively impact AAVE and other vernacular speakers i",
|
||||
"color": "yellow",
|
||||
"raw_rgb_values": [
|
||||
1.0,
|
||||
0.9411770105361938,
|
||||
0.4000000059604645
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 329.75,
|
||||
"x_position": 60.0,
|
||||
"rect_details": [
|
||||
60.0,
|
||||
329.75,
|
||||
367.0,
|
||||
339.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 16,
|
||||
"text": "(§5).",
|
||||
"color": "blue",
|
||||
"raw_rgb_values": [
|
||||
0.5607839822769165,
|
||||
0.8705880045890808,
|
||||
0.9764710068702698
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 341.75,
|
||||
"x_position": 342.0,
|
||||
"rect_details": [
|
||||
342.0,
|
||||
341.75,
|
||||
355.0,
|
||||
351.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 17,
|
||||
"text": "summarize our conclusions a",
|
||||
"color": "yellow",
|
||||
"raw_rgb_values": [
|
||||
1.0,
|
||||
0.9411770105361938,
|
||||
0.4000000059604645
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 353.75,
|
||||
"x_position": 60.0,
|
||||
"rect_details": [
|
||||
60.0,
|
||||
353.75,
|
||||
170.0,
|
||||
363.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 18,
|
||||
"text": "(§6).",
|
||||
"color": "blue",
|
||||
"raw_rgb_values": [
|
||||
0.5607839822769165,
|
||||
0.8705880045890808,
|
||||
0.9764710068702698
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 365.75,
|
||||
"x_position": 220.0,
|
||||
"rect_details": [
|
||||
220.0,
|
||||
365.75,
|
||||
236.0,
|
||||
375.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 19,
|
||||
"text": "at nonstandard or vernacular dialects",
|
||||
"color": "yellow",
|
||||
"raw_rgb_values": [
|
||||
1.0,
|
||||
0.9411770105361938,
|
||||
0.4000000059604645
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 407.75,
|
||||
"x_position": 206.0,
|
||||
"rect_details": [
|
||||
206.0,
|
||||
407.75,
|
||||
340.0,
|
||||
417.75
|
||||
],
|
||||
"num_segments": 1
|
||||
},
|
||||
{
|
||||
"page": 5,
|
||||
"highlight_id_on_page": 20,
|
||||
"text": "spoken most frequently and fluently by ethnic minorities and/or by less educated, working-class,orpoorpeopleworldwide.1",
|
||||
"color": "yellow",
|
||||
"raw_rgb_values": [
|
||||
1.0,
|
||||
0.9411770105361938,
|
||||
0.4000000059604645
|
||||
],
|
||||
"type": "highlight",
|
||||
"y_position": 431.75,
|
||||
"x_position": 60.0,
|
||||
"rect_details": [
|
||||
60.0,
|
||||
431.75,
|
||||
408.0,
|
||||
453.75
|
||||
],
|
||||
"num_segments": 2
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -1,4 +1,2 @@
|
||||
pdfplumber==0.10.3
|
||||
colorama==0.4.6
|
||||
pandas==2.0.3
|
||||
PyMuPDF==1.23.1
|
||||
|
||||
BIN
test/test2.pdf
Normal file
BIN
test/test2.pdf
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user