feat(pdf-extract): 新增PDF尺寸区域提取功能
- 在PdfExtractionService中实现从PDF URL中提取尺寸数据 - 通过URL读取PDF字节流并加载文档进行文本位置解析 - 利用文本分组和维度识别器提取尺寸信息列表 - 在QmsInspectionStandardController添加GET接口,支持通过URL参数提取尺寸数据 - 在QmsInspectionStandardControllerService中添加对应服务方法调用pdfExtractionService接口
This commit is contained in:
parent
f2901111b3
commit
9eccd96b00
|
|
@ -13,6 +13,7 @@ import jakarta.annotation.Resource;
|
||||||
import jakarta.validation.Valid;
|
import jakarta.validation.Valid;
|
||||||
import jakarta.validation.constraints.NotNull;
|
import jakarta.validation.constraints.NotNull;
|
||||||
import model.DimensionResult;
|
import model.DimensionResult;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
import org.springframework.validation.annotation.Validated;
|
import org.springframework.validation.annotation.Validated;
|
||||||
import org.springframework.web.bind.annotation.*;
|
import org.springframework.web.bind.annotation.*;
|
||||||
|
|
||||||
|
|
@ -106,4 +107,12 @@ public class QmsInspectionStandardController extends BaseController {
|
||||||
public ApiResult<List<DimensionResult>> pdfExtractRegion(@Valid @RequestBody QmsPdfExtractRegionQO request) throws IOException {
|
public ApiResult<List<DimensionResult>> pdfExtractRegion(@Valid @RequestBody QmsPdfExtractRegionQO request) throws IOException {
|
||||||
return ApiResult.success(inspectionStandardControllerService.pdfExtractRegion(request));
|
return ApiResult.success(inspectionStandardControllerService.pdfExtractRegion(request));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 提取PDF文件的所有区域数据
|
||||||
|
*/
|
||||||
|
@GetMapping("/pdf/dimensions")
|
||||||
|
public ApiResult<List<DimensionResult>> extractDimensions(@RequestParam String url) throws IOException {
|
||||||
|
return ApiResult.success(inspectionStandardControllerService.extractDimensions(url));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -713,4 +713,8 @@ public class QmsInspectionStandardControllerService {
|
||||||
);
|
);
|
||||||
return regionFilterService.mergeRegionResults(filtered);
|
return regionFilterService.mergeRegionResults(filtered);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<DimensionResult> extractDimensions(String url) throws IOException {
|
||||||
|
return pdfExtractionService.extractDimensions(url);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -80,6 +81,30 @@ public class PdfExtractionService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<DimensionResult> extractDimensions(String pdfUrl) throws IOException {
|
||||||
|
byte[] pdfBytes;
|
||||||
|
try (InputStream in = new URL(pdfUrl).openStream();
|
||||||
|
ByteArrayOutputStream buffer = new ByteArrayOutputStream()) {
|
||||||
|
byte[] chunk = new byte[8192];
|
||||||
|
int len;
|
||||||
|
while ((len = in.read(chunk)) != -1) {
|
||||||
|
buffer.write(chunk, 0, len);
|
||||||
|
}
|
||||||
|
pdfBytes = buffer.toByteArray();
|
||||||
|
}
|
||||||
|
try (PDDocument document = Loader.loadPDF(pdfBytes)) {
|
||||||
|
int totalPages = document.getNumberOfPages();
|
||||||
|
PositionedTextStripper stripper = new PositionedTextStripper();
|
||||||
|
stripper.setSortByPosition(true);
|
||||||
|
StringWriter writer = new StringWriter();
|
||||||
|
stripper.writeText(document, writer);
|
||||||
|
List<TextElement> allElements = stripper.getElements();
|
||||||
|
|
||||||
|
List<TextGroup> groups = textGrouper.groupTextElements(allElements);
|
||||||
|
return dimensionIdentifier.identifyDimensions(groups);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static class ExtractionResult {
|
public static class ExtractionResult {
|
||||||
private final List<DimensionResult> dimensions;
|
private final List<DimensionResult> dimensions;
|
||||||
private final int totalPages;
|
private final int totalPages;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue