feat(pdf-extract): 新增PDF尺寸区域提取功能

- 在PdfExtractionService中实现从PDF URL中提取尺寸数据
- 通过URL读取PDF字节流并加载文档进行文本位置解析
- 利用文本分组和维度识别器提取尺寸信息列表
- 在QmsInspectionStandardController添加GET接口,支持通过URL参数提取尺寸数据
- 在QmsInspectionStandardControllerService中添加对应服务方法调用pdfExtractionService接口
This commit is contained in:
曹鹏飞 2026-05-08 09:03:02 +08:00
parent f2901111b3
commit 9eccd96b00
3 changed files with 38 additions and 0 deletions

View File

@ -13,6 +13,7 @@ import jakarta.annotation.Resource;
import jakarta.validation.Valid;
import jakarta.validation.constraints.NotNull;
import model.DimensionResult;
import org.springframework.http.ResponseEntity;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.*;
@ -106,4 +107,12 @@ public class QmsInspectionStandardController extends BaseController {
public ApiResult<List<DimensionResult>> pdfExtractRegion(@Valid @RequestBody QmsPdfExtractRegionQO request) throws IOException {
return ApiResult.success(inspectionStandardControllerService.pdfExtractRegion(request));
}
/**
* 提取PDF文件的所有区域数据
*/
@GetMapping("/pdf/dimensions")
public ApiResult<List<DimensionResult>> extractDimensions(@RequestParam String url) throws IOException {
return ApiResult.success(inspectionStandardControllerService.extractDimensions(url));
}
}

View File

@ -713,4 +713,8 @@ public class QmsInspectionStandardControllerService {
);
return regionFilterService.mergeRegionResults(filtered);
}
public List<DimensionResult> extractDimensions(String url) throws IOException {
return pdfExtractionService.extractDimensions(url);
}
}

View File

@ -15,6 +15,7 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.file.Path;
import java.util.List;
@ -80,6 +81,30 @@ public class PdfExtractionService {
}
}
public List<DimensionResult> extractDimensions(String pdfUrl) throws IOException {
byte[] pdfBytes;
try (InputStream in = new URL(pdfUrl).openStream();
ByteArrayOutputStream buffer = new ByteArrayOutputStream()) {
byte[] chunk = new byte[8192];
int len;
while ((len = in.read(chunk)) != -1) {
buffer.write(chunk, 0, len);
}
pdfBytes = buffer.toByteArray();
}
try (PDDocument document = Loader.loadPDF(pdfBytes)) {
int totalPages = document.getNumberOfPages();
PositionedTextStripper stripper = new PositionedTextStripper();
stripper.setSortByPosition(true);
StringWriter writer = new StringWriter();
stripper.writeText(document, writer);
List<TextElement> allElements = stripper.getElements();
List<TextGroup> groups = textGrouper.groupTextElements(allElements);
return dimensionIdentifier.identifyDimensions(groups);
}
}
public static class ExtractionResult {
private final List<DimensionResult> dimensions;
private final int totalPages;