feat(pdf-extract): 实现PDF尺寸和公差的提取识别功能
- 新增DimensionIdentifier用于识别PDF中的尺寸和多种公差格式 - 支持对称公差、非对称公差、配合公差和螺纹标注的正则匹配 - 实现基于TextGroup的文本预处理和位置判断,提高识别准确度 - 创建DimensionResult作为尺寸识别结果的封装实体 - 增加PdfExtractionService服务实现PDF解析、文本分组和尺寸提取流程 - 新增配置类PdfExtractConfig,提供文本分组和尺寸识别组件的Spring Bean - 增加nflg-qms-pdf-extract模块及相关依赖管理,完成PDF尺寸提取的整体集成
This commit is contained in:
parent
fe9db7ec86
commit
e8142d0480
|
|
@ -23,6 +23,10 @@
|
|||
<groupId>com.nflg</groupId>
|
||||
<artifactId>nflg-wms-starter</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.nflg</groupId>
|
||||
<artifactId>nflg-qms-pdf-extract</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
package com.nflg.qms.admin.config;
|
||||
|
||||
import extraction.DimensionIdentifier;
|
||||
import extraction.TextGrouper;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import service.PdfExtractionService;
|
||||
import service.RegionFilterService;
|
||||
|
||||
@Configuration
|
||||
public class PdfExtractConfig {
|
||||
|
||||
@Bean
|
||||
public PdfExtractionService pdfExtractionService(TextGrouper textGrouper, DimensionIdentifier dimensionIdentifier) {
|
||||
return new PdfExtractionService(textGrouper,dimensionIdentifier);
|
||||
}
|
||||
|
||||
@Bean
|
||||
public TextGrouper textGrouper() {
|
||||
return new TextGrouper();
|
||||
}
|
||||
|
||||
@Bean
|
||||
public DimensionIdentifier dimensionIdentifier() {
|
||||
return new DimensionIdentifier();
|
||||
}
|
||||
|
||||
@Bean
|
||||
public RegionFilterService regionFilterService() {
|
||||
return new RegionFilterService();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,18 +1,24 @@
|
|||
package com.nflg.qms.admin.controller;
|
||||
|
||||
import com.nflg.qms.admin.pojo.qo.QmsInspectionStandardSaveQO;
|
||||
import com.nflg.qms.admin.pojo.qo.QmsPdfExtractRegionQO;
|
||||
import com.nflg.qms.admin.service.QmsInspectionStandardControllerService;
|
||||
import com.nflg.wms.common.pojo.ApiResult;
|
||||
import com.nflg.wms.common.pojo.PageData;
|
||||
import com.nflg.wms.common.pojo.qo.*;
|
||||
import com.nflg.wms.common.pojo.vo.QmsInspectionStandardDetailVO;
|
||||
import com.nflg.qms.admin.pojo.vo.QmsInspectionStandardDetailVO;
|
||||
import com.nflg.wms.common.pojo.vo.QmsInspectionStandardVO;
|
||||
import com.nflg.wms.starter.BaseController;
|
||||
import jakarta.annotation.Resource;
|
||||
import jakarta.validation.Valid;
|
||||
import jakarta.validation.constraints.NotNull;
|
||||
import model.DimensionResult;
|
||||
import org.springframework.validation.annotation.Validated;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 检验标准
|
||||
*/
|
||||
|
|
@ -92,4 +98,12 @@ public class QmsInspectionStandardController extends BaseController {
|
|||
inspectionStandardControllerService.delete(request.getIds());
|
||||
return ApiResult.success();
|
||||
}
|
||||
|
||||
/**
|
||||
* PDF文件提取区域数据
|
||||
*/
|
||||
@PostMapping("/pdf/extract-region")
|
||||
public ApiResult<List<DimensionResult>> pdfExtractRegion(@Valid @RequestBody QmsPdfExtractRegionQO request) throws IOException {
|
||||
return ApiResult.success(inspectionStandardControllerService.pdfExtractRegion(request));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ package com.nflg.qms.admin.controller;
|
|||
|
||||
import com.nflg.qms.admin.service.QmsInspectionStandardItemControllerService;
|
||||
import com.nflg.wms.common.pojo.ApiResult;
|
||||
import com.nflg.wms.common.pojo.vo.QmsInspectionStandardItemContentVO;
|
||||
import com.nflg.qms.admin.pojo.vo.QmsInspectionStandardItemContentVO;
|
||||
import com.nflg.wms.starter.BaseController;
|
||||
import jakarta.annotation.Resource;
|
||||
import jakarta.validation.constraints.NotNull;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
package com.nflg.wms.common.pojo.qo;
|
||||
package com.nflg.qms.admin.pojo.qo;
|
||||
|
||||
import jakarta.validation.Valid;
|
||||
import jakarta.validation.constraints.NotBlank;
|
||||
import jakarta.validation.constraints.NotNull;
|
||||
import lombok.Data;
|
||||
import model.TextPosition;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -167,7 +168,7 @@ public class QmsInspectionStandardSaveQO {
|
|||
/**
|
||||
* PDF信息
|
||||
*/
|
||||
private String pdfInfo;
|
||||
private TextPosition pdfInfo;
|
||||
|
||||
/**
|
||||
* 判定类型,0:直接判定;1:测量值
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
package com.nflg.qms.admin.pojo.qo;
|
||||
|
||||
import jakarta.validation.constraints.Min;
|
||||
import jakarta.validation.constraints.NotBlank;
|
||||
import jakarta.validation.constraints.NotNull;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class QmsPdfExtractRegionQO {
|
||||
|
||||
/**
|
||||
* 文件地址
|
||||
*/
|
||||
@NotBlank
|
||||
private String url;
|
||||
|
||||
/**
|
||||
* 页码
|
||||
*/
|
||||
@Min(1)
|
||||
@NotNull
|
||||
private int pageNum;
|
||||
|
||||
/**
|
||||
* x轴起始位置
|
||||
*/
|
||||
@Min(0)
|
||||
@NotNull
|
||||
private float x;
|
||||
|
||||
/**
|
||||
* y轴起始位置
|
||||
*/
|
||||
@Min(0)
|
||||
@NotNull
|
||||
private float y;
|
||||
|
||||
/**
|
||||
* 宽度
|
||||
*/
|
||||
@Min(0)
|
||||
@NotNull
|
||||
private float width;
|
||||
|
||||
/**
|
||||
* 高度
|
||||
*/
|
||||
@Min(0)
|
||||
@NotNull
|
||||
private float height;
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package com.nflg.wms.common.pojo.vo;
|
||||
package com.nflg.qms.admin.pojo.vo;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
package com.nflg.wms.common.pojo.vo;
|
||||
package com.nflg.qms.admin.pojo.vo;
|
||||
|
||||
import lombok.Data;
|
||||
import model.TextPosition;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
|
|
@ -38,7 +39,7 @@ public class QmsInspectionStandardItemContentVO {
|
|||
/**
|
||||
* PDF信息
|
||||
*/
|
||||
private String pdfInfo;
|
||||
private TextPosition pdfInfo;
|
||||
|
||||
/**
|
||||
* 判定类型,0:直接判定;1:测量值
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package com.nflg.wms.common.pojo.vo;
|
||||
package com.nflg.qms.admin.pojo.vo;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
|
|
@ -1,18 +1,22 @@
|
|||
package com.nflg.qms.admin.service;
|
||||
|
||||
import cn.hutool.core.collection.CollectionUtil;
|
||||
import cn.hutool.core.convert.Convert;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.json.JSONUtil;
|
||||
import com.baomidou.mybatisplus.core.metadata.IPage;
|
||||
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
||||
import com.nflg.qms.admin.pojo.qo.QmsPdfExtractRegionQO;
|
||||
import com.nflg.wms.common.exception.NflgException;
|
||||
import com.nflg.wms.common.pojo.PageData;
|
||||
import com.nflg.wms.common.pojo.dto.QmsInspectionStandardDetailDTO;
|
||||
import com.nflg.wms.common.pojo.qo.QmsInspectionStandardAddQO;
|
||||
import com.nflg.wms.common.pojo.qo.QmsInspectionStandardEditQO;
|
||||
import com.nflg.wms.common.pojo.qo.QmsInspectionStandardSaveQO;
|
||||
import com.nflg.qms.admin.pojo.qo.QmsInspectionStandardSaveQO;
|
||||
import com.nflg.wms.common.pojo.qo.QmsInspectionStandardSearchQO;
|
||||
import com.nflg.wms.common.pojo.vo.QmsInspectionStandardDetailVO;
|
||||
import com.nflg.wms.common.pojo.vo.QmsInspectionStandardItemContentVO;
|
||||
import com.nflg.wms.common.pojo.vo.QmsInspectionStandardItemVO;
|
||||
import com.nflg.qms.admin.pojo.vo.QmsInspectionStandardDetailVO;
|
||||
import com.nflg.qms.admin.pojo.vo.QmsInspectionStandardItemContentVO;
|
||||
import com.nflg.qms.admin.pojo.vo.QmsInspectionStandardItemVO;
|
||||
import com.nflg.wms.common.pojo.vo.QmsInspectionStandardVO;
|
||||
import com.nflg.wms.common.util.UserUtil;
|
||||
import com.nflg.wms.common.util.VUtil;
|
||||
|
|
@ -21,9 +25,14 @@ import com.nflg.wms.repository.mapper.QmsInspectionStandardMapper;
|
|||
import com.nflg.wms.repository.service.*;
|
||||
import jakarta.annotation.Resource;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import model.DimensionResult;
|
||||
import model.TextPosition;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import service.PdfExtractionService;
|
||||
import service.RegionFilterService;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
|
@ -59,6 +68,12 @@ public class QmsInspectionStandardControllerService {
|
|||
@Resource
|
||||
private IQmsAqlPriorityValueService aqlPriorityValueService;
|
||||
|
||||
@Resource
|
||||
private PdfExtractionService pdfExtractionService;
|
||||
|
||||
@Resource
|
||||
private RegionFilterService regionFilterService;
|
||||
|
||||
/**
|
||||
* 分页查询检验标准
|
||||
*/
|
||||
|
|
@ -153,10 +168,10 @@ public class QmsInspectionStandardControllerService {
|
|||
}
|
||||
|
||||
// 2. 查询检验标准详情(关联物料等信息)
|
||||
QmsInspectionStandardDetailVO detail = inspectionStandardMapper.getDetailById(id);
|
||||
if (detail == null) {
|
||||
detail = new QmsInspectionStandardDetailVO();
|
||||
}
|
||||
QmsInspectionStandardDetailDTO detailDTO = inspectionStandardMapper.getDetailById(id);
|
||||
QmsInspectionStandardDetailVO detail = Objects.isNull(detailDTO)
|
||||
? new QmsInspectionStandardDetailVO()
|
||||
: Convert.convert(QmsInspectionStandardDetailVO.class, detailDTO);
|
||||
|
||||
// 填充基础字段
|
||||
detail.setId(standard.getId());
|
||||
|
|
@ -289,7 +304,9 @@ public class QmsInspectionStandardControllerService {
|
|||
vo.setName(content.getName());
|
||||
vo.setTestStandard(content.getTestStandard());
|
||||
vo.setLegend(content.getLegend());
|
||||
vo.setPdfInfo(content.getPdfInfo());
|
||||
if (StrUtil.isNotBlank(content.getPdfInfo())) {
|
||||
vo.setPdfInfo(JSONUtil.toBean(content.getPdfInfo(), TextPosition.class));
|
||||
}
|
||||
vo.setJudgmentType(content.getJudgmentType());
|
||||
vo.setCreateUserName(content.getCreateUserName());
|
||||
vo.setCreateTime(content.getCreateTime());
|
||||
|
|
@ -665,7 +682,7 @@ public class QmsInspectionStandardControllerService {
|
|||
content.setLegend(qo.getLegend());
|
||||
}
|
||||
if (qo.getPdfInfo() != null) {
|
||||
content.setPdfInfo(qo.getPdfInfo());
|
||||
content.setPdfInfo(JSONUtil.toJsonStr(qo.getPdfInfo()));
|
||||
}
|
||||
content.setJudgmentType(qo.getJudgmentType());
|
||||
|
||||
|
|
@ -680,4 +697,17 @@ public class QmsInspectionStandardControllerService {
|
|||
content.setCreateTime(now);
|
||||
}
|
||||
}
|
||||
|
||||
public List<DimensionResult> pdfExtractRegion(QmsPdfExtractRegionQO request) throws IOException {
|
||||
List<DimensionResult> allResults = pdfExtractionService.extractAllDimensionsForRegion(request.getUrl());
|
||||
List<DimensionResult> filtered = regionFilterService.filterByRegion(
|
||||
allResults,
|
||||
request.getPageNum(),
|
||||
request.getX(),
|
||||
request.getY(),
|
||||
request.getWidth(),
|
||||
request.getHeight()
|
||||
);
|
||||
return regionFilterService.mergeRegionResults(filtered);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,16 @@
|
|||
package com.nflg.qms.admin.service;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import cn.hutool.json.JSONUtil;
|
||||
import com.nflg.wms.common.exception.NflgException;
|
||||
import com.nflg.wms.common.pojo.vo.QmsInspectionStandardItemContentVO;
|
||||
import com.nflg.qms.admin.pojo.vo.QmsInspectionStandardItemContentVO;
|
||||
import com.nflg.wms.repository.entity.QmsInspectionStandardItem;
|
||||
import com.nflg.wms.repository.entity.QmsInspectionStandardItemContent;
|
||||
import com.nflg.wms.repository.service.IQmsInspectionStandardItemContentService;
|
||||
import com.nflg.wms.repository.service.IQmsInspectionStandardItemService;
|
||||
import jakarta.annotation.Resource;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import model.TextPosition;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
|
@ -59,7 +62,9 @@ public class QmsInspectionStandardItemControllerService {
|
|||
vo.setName(content.getName());
|
||||
vo.setTestStandard(content.getTestStandard());
|
||||
vo.setLegend(content.getLegend());
|
||||
vo.setPdfInfo(content.getPdfInfo());
|
||||
if (StrUtil.isNotBlank(content.getPdfInfo())) {
|
||||
vo.setPdfInfo(JSONUtil.toBean(content.getPdfInfo(), TextPosition.class));
|
||||
}
|
||||
vo.setJudgmentType(content.getJudgmentType());
|
||||
vo.setCreateUserName(content.getCreateUserName());
|
||||
vo.setCreateTime(content.getCreateTime());
|
||||
|
|
|
|||
|
|
@ -0,0 +1,35 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>com.nflg</groupId>
|
||||
<artifactId>nflg-wms</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>nflg-qms-pdf-extract</artifactId>
|
||||
<name>模块-qms pdf数据提取</name>
|
||||
<description>从pdf中提取数据</description>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<scope>provided</scope>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox</artifactId>
|
||||
<version>3.0.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<version>2.0.12</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
|
|
@ -0,0 +1,307 @@
|
|||
package extraction;
|
||||
|
||||
|
||||
import model.DimensionResult;
|
||||
import model.TextGroup;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class DimensionIdentifier {
|
||||
|
||||
// 尺寸 + 对称公差
|
||||
private static final Pattern PAT_DIM_SYM_TOL = Pattern.compile(
|
||||
"([ΦφØ∅]?\\s*\\d+\\.?\\d*)\\s*[±]\\s*(\\d+\\.?\\d*)");
|
||||
|
||||
// 尺寸 + 非对称公差(斜线分隔)
|
||||
private static final Pattern PAT_DIM_ASYM_TOL = Pattern.compile(
|
||||
"([ΦφØ∅]?\\s*\\d+\\.?\\d*)\\s*([+-]\\d+\\.?\\d*)\\s*/\\s*([+-]\\d+\\.?\\d*)");
|
||||
|
||||
// 尺寸 + 非对称公差(空格分隔)
|
||||
private static final Pattern PAT_DIM_LIMIT_TOL = Pattern.compile(
|
||||
"([ΦφØ∅]?\\s*\\d+\\.?\\d*)\\s+([+-]\\d+\\.?\\d*)\\s+([+-]\\d+\\.?\\d*)");
|
||||
|
||||
// 尺寸 + 配合公差代号
|
||||
private static final Pattern PAT_DIM_FIT = Pattern.compile(
|
||||
"([ΦφØ∅]?\\s*\\d+\\.?\\d*)\\s*([A-HJ-Zj-z]\\d{1,2})\\b");
|
||||
|
||||
// 螺纹标注(M型公制螺纹 + G型管螺纹 + Rc/NPT等)
|
||||
private static final Pattern PAT_THREAD = Pattern.compile(
|
||||
"(M\\d+\\.?\\d*(?:\\s*[xX×]\\s*\\d+\\.?\\d*)?|(?:G|Rc|Rp|NPT)\\s*\\d+(?:/\\d+)?)");
|
||||
|
||||
// 独立公差文本
|
||||
private static final Pattern PAT_TOLERANCE = Pattern.compile(
|
||||
"[±]\\s*\\d+\\.?\\d*|[+-]\\s*\\d+\\.?\\d*");
|
||||
|
||||
// 复合公差文本
|
||||
private static final Pattern PAT_COMPOUND_TOL = Pattern.compile(
|
||||
"^([+-]\\d+\\.?\\d*)\\s+(0|[+-]?\\d+\\.?\\d*)$");
|
||||
|
||||
// 纯尺寸数值
|
||||
private static final Pattern PAT_PLAIN_DIM = Pattern.compile(
|
||||
"([ΦφØ∅]\\s*\\d+\\.?\\d*|[Rr]\\d+\\.?\\d*|[Cc]\\d+\\.?\\d*|\\d+\\.\\d+|\\d+)");
|
||||
|
||||
public List<DimensionResult> identifyDimensions(List<TextGroup> groups) {
|
||||
return identifyDimensions(groups, true);
|
||||
}
|
||||
|
||||
public List<DimensionResult> identifyDimensions(List<TextGroup> groups, boolean toleranceOnly) {
|
||||
List<DimensionResult> results = new ArrayList<>();
|
||||
Set<Integer> processed = new HashSet<>();
|
||||
Set<Integer> usedAsTolerance = new HashSet<>();
|
||||
|
||||
for (int i = 0; i < groups.size(); i++) {
|
||||
if (processed.contains(i)) continue;
|
||||
TextGroup g = groups.get(i);
|
||||
|
||||
String text = TextNormalizer.normalizeText(g.getText().trim());
|
||||
|
||||
// --- 基本过滤(两种模式都适用)---
|
||||
if (text.length() > 40) continue;
|
||||
if (text.isEmpty()) continue;
|
||||
if (text.matches("^[A-Za-z\\s]+$")) continue;
|
||||
if (TitleBlockFilter.isInTitleBlockRegion(g)) continue;
|
||||
|
||||
if (toleranceOnly) {
|
||||
// === toleranceOnly 模式:严格过滤 ===
|
||||
Matcher threadEarly = PAT_THREAD.matcher(text);
|
||||
boolean isThreadText = threadEarly.find() && threadEarly.start() == 0;
|
||||
if (!isThreadText && TitleBlockFilter.shouldSkipText(text)) continue;
|
||||
if (TitleBlockFilter.isToleranceOnly(text)) continue;
|
||||
if (TitleBlockFilter.isSurfaceRoughness(text)) continue;
|
||||
if (TitleBlockFilter.isGdtTolerance(text)) continue;
|
||||
if (text.matches(".*\\d+\\.\\d+\\d+\\.\\d+.*") && !text.contains("±") && !text.contains("/")) continue;
|
||||
}
|
||||
// === 区域模式(!toleranceOnly):不做内容过滤,直接进入模式匹配 ===
|
||||
|
||||
Matcher m;
|
||||
|
||||
// 0) 复合公差文本
|
||||
m = PAT_COMPOUND_TOL.matcher(text);
|
||||
if (m.matches()) {
|
||||
processed.add(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 1) 对称公差
|
||||
m = PAT_DIM_SYM_TOL.matcher(text);
|
||||
if (m.find()) {
|
||||
results.add(new DimensionResult(
|
||||
m.group(1).trim(), "±" + m.group(2), "dimension", g));
|
||||
processed.add(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 2) 非对称公差(斜线)
|
||||
m = PAT_DIM_ASYM_TOL.matcher(text);
|
||||
if (m.find()) {
|
||||
results.add(new DimensionResult(
|
||||
m.group(1).trim(), m.group(2) + "/" + m.group(3), "dimension", g));
|
||||
processed.add(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 3) 非对称公差(空格)
|
||||
m = PAT_DIM_LIMIT_TOL.matcher(text);
|
||||
if (m.find()) {
|
||||
results.add(new DimensionResult(
|
||||
m.group(1).trim(), m.group(2) + " " + m.group(3), "dimension", g));
|
||||
processed.add(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 4) 配合公差
|
||||
m = PAT_DIM_FIT.matcher(text);
|
||||
if (m.find() && !text.contains("-") && !text.contains("/")) {
|
||||
results.add(new DimensionResult(
|
||||
m.group(1).trim(), m.group(2), "dimension", g));
|
||||
processed.add(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 5) 螺纹标注 - toleranceOnly 模式下跳过
|
||||
m = PAT_THREAD.matcher(text);
|
||||
if (m.find() && m.start() == 0) {
|
||||
if (!toleranceOnly) {
|
||||
results.add(new DimensionResult(text, null, "dimension", g));
|
||||
processed.add(i);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// 6) 纯尺寸数值
|
||||
m = PAT_PLAIN_DIM.matcher(text);
|
||||
if (m.find()) {
|
||||
String dim = m.group(1).trim();
|
||||
int maxExtra = toleranceOnly ? 3 : 15;
|
||||
if (text.length() > dim.length() + maxExtra) {
|
||||
// 区域模式下超长文本兜底输出全文
|
||||
if (!toleranceOnly) {
|
||||
results.add(new DimensionResult(text, null, "dimension", g));
|
||||
processed.add(i);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
String numPart = dim.replaceAll("[^\\d.]", "");
|
||||
if (numPart.isEmpty()) continue;
|
||||
double val;
|
||||
try {
|
||||
val = Double.parseDouble(numPart);
|
||||
double minVal = toleranceOnly ? 0.5 : 0.001;
|
||||
if (val < minVal || val > 9999) continue;
|
||||
} catch (NumberFormatException e) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String nearbyTol = findNearbyTolerance(g, groups, i, usedAsTolerance);
|
||||
|
||||
// toleranceOnly 模式下仅输出带公差的尺寸
|
||||
if (toleranceOnly && nearbyTol == null) continue;
|
||||
// 非 toleranceOnly 模式下,单字符无公差无Φ符号 → 输出完整文本(而非跳过)
|
||||
if (!toleranceOnly && nearbyTol == null && dim.length() == 1 && !hasNearbyPhiSymbol(g, groups)) {
|
||||
if (text.length() > 1) {
|
||||
results.add(new DimensionResult(text, null, "dimension", g));
|
||||
processed.add(i);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// 区域模式下使用完整文本(含描述),toleranceOnly 模式只用尺寸值
|
||||
String dimText = (!toleranceOnly && text.length() > dim.length() + 3) ? text : dim;
|
||||
results.add(new DimensionResult(dimText, nearbyTol, "dimension", g));
|
||||
processed.add(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 7) 区域模式兜底:未被任何模式匹配的文本,直接作为原始内容输出
|
||||
if (!toleranceOnly) {
|
||||
results.add(new DimensionResult(text, null, "dimension", g));
|
||||
processed.add(i);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private boolean hasNearbyPhiSymbol(TextGroup dimGroup, List<TextGroup> allGroups) {
|
||||
float searchDist = dimGroup.getFontSize() * 3.0f;
|
||||
for (TextGroup other : allGroups) {
|
||||
if (other.getPageNum() != dimGroup.getPageNum()) continue;
|
||||
String t = other.getText().trim();
|
||||
if (!t.equals("¡¤") && !t.equals("Φ") && !t.equals("φ") && !t.equals("Ø") && !t.equals("∅"))
|
||||
continue;
|
||||
float dx = Math.abs(other.getX() - dimGroup.getX());
|
||||
float dy = Math.abs(other.getY() - dimGroup.getY());
|
||||
if (dx < searchDist && dy < searchDist) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private String findNearbyTolerance(TextGroup dimGroup, List<TextGroup> allGroups,
|
||||
int dimIndex, Set<Integer> usedAsTolerance) {
|
||||
float effWidth = dimGroup.getWidth() > 0 ? dimGroup.getWidth()
|
||||
: dimGroup.getFontSize() * dimGroup.getText().trim().length() * 0.5f;
|
||||
|
||||
float searchXRight = dimGroup.getFontSize() * 2.5f;
|
||||
float searchYRight = dimGroup.getFontSize() * 1.5f;
|
||||
float searchXVert = dimGroup.getFontSize() * 1.0f;
|
||||
float searchYVert = dimGroup.getFontSize() * 5.0f;
|
||||
|
||||
List<String> tolParts = new ArrayList<>();
|
||||
List<Integer> tolIndices = new ArrayList<>();
|
||||
|
||||
for (int i = 0; i < allGroups.size(); i++) {
|
||||
if (i == dimIndex) continue;
|
||||
if (usedAsTolerance.contains(i)) continue;
|
||||
TextGroup other = allGroups.get(i);
|
||||
if (other.getPageNum() != dimGroup.getPageNum()) continue;
|
||||
|
||||
float dxFromRight = other.getX() - (dimGroup.getX() + effWidth);
|
||||
float absDxFromStart = Math.abs(other.getX() - dimGroup.getX());
|
||||
float dy = Math.abs(other.getY() - dimGroup.getY());
|
||||
|
||||
boolean rightZone = dxFromRight > -effWidth * 0.3f
|
||||
&& dxFromRight < searchXRight && dy < searchYRight;
|
||||
boolean vertZone = absDxFromStart < searchXVert
|
||||
&& dy > searchYRight && dy < searchYVert;
|
||||
|
||||
if (!rightZone && !vertZone) continue;
|
||||
|
||||
if (rightZone && !vertZone) {
|
||||
boolean hasBetterCandidate = false;
|
||||
for (TextGroup cand : allGroups) {
|
||||
if (cand == dimGroup || cand.getPageNum() != dimGroup.getPageNum()) continue;
|
||||
if (Math.abs(cand.getFontSize() - dimGroup.getFontSize()) > 1.0f) continue;
|
||||
String candText = TextNormalizer.normalizeText(cand.getText().trim());
|
||||
if (TitleBlockFilter.isSurfaceRoughness(candText) || TitleBlockFilter.isToleranceOnly(candText)) continue;
|
||||
float candDxFromTol = Math.abs(other.getX() - cand.getX());
|
||||
float candDyFromTol = Math.abs(other.getY() - cand.getY());
|
||||
if (candDxFromTol < searchXVert
|
||||
&& candDyFromTol > searchYRight && candDyFromTol < searchYVert) {
|
||||
hasBetterCandidate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (hasBetterCandidate) continue;
|
||||
}
|
||||
|
||||
String otherText = TextNormalizer.normalizeText(other.getText().trim());
|
||||
|
||||
if (TitleBlockFilter.isSurfaceRoughness(otherText)) continue;
|
||||
|
||||
boolean bareSmallDecimal = (rightZone || vertZone) && otherText.matches("^0\\.\\d{1,3}$");
|
||||
|
||||
if (TitleBlockFilter.isGdtTolerance(otherText) && !bareSmallDecimal) continue;
|
||||
|
||||
if (other.getFontSize() <= dimGroup.getFontSize() * 0.9 || otherText.contains("±") || bareSmallDecimal) {
|
||||
|
||||
Matcher cm = PAT_COMPOUND_TOL.matcher(otherText);
|
||||
if (cm.matches()) {
|
||||
tolParts.clear();
|
||||
tolIndices.clear();
|
||||
tolParts.add(cm.group(1) + " / " + cm.group(2));
|
||||
tolIndices.add(i);
|
||||
break;
|
||||
}
|
||||
|
||||
Matcher tm = PAT_TOLERANCE.matcher(otherText);
|
||||
if (tm.find()) {
|
||||
if (tm.start() > 0 && Character.isDigit(otherText.charAt(tm.start() - 1))) {
|
||||
continue;
|
||||
}
|
||||
tolParts.add(otherText);
|
||||
tolIndices.add(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (otherText.equals("0") && other.getFontSize() < dimGroup.getFontSize() * 0.85) {
|
||||
tolParts.add("0");
|
||||
tolIndices.add(i);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (bareSmallDecimal) {
|
||||
tolParts.add("±" + otherText);
|
||||
tolIndices.add(i);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!tolParts.isEmpty()) {
|
||||
usedAsTolerance.addAll(tolIndices);
|
||||
tolParts.sort((a, b) -> {
|
||||
boolean aPos = a.startsWith("+") || a.startsWith("±");
|
||||
boolean bPos = b.startsWith("+") || b.startsWith("±");
|
||||
return Boolean.compare(bPos, aPos);
|
||||
});
|
||||
return String.join(" / ", tolParts);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
package extraction;
|
||||
|
||||
import model.TextElement;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.apache.pdfbox.text.TextPosition;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class PositionedTextStripper extends PDFTextStripper {
|
||||
private final List<TextElement> elements = new ArrayList<>();
|
||||
private int currentPage = 0;
|
||||
private float currentPageWidth = 0;
|
||||
private float currentPageHeight = 0;
|
||||
|
||||
@Override
|
||||
protected void startPage(PDPage page) throws IOException {
|
||||
currentPage++;
|
||||
PDRectangle mediaBox = page.getMediaBox();
|
||||
currentPageWidth = mediaBox.getWidth();
|
||||
currentPageHeight = mediaBox.getHeight();
|
||||
super.startPage(page);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void writeString(String text, List<TextPosition> textPositions) throws IOException {
|
||||
if (textPositions == null || textPositions.isEmpty()) {
|
||||
super.writeString(text, textPositions);
|
||||
return;
|
||||
}
|
||||
|
||||
TextPosition first = textPositions.get(0);
|
||||
TextPosition last = textPositions.get(textPositions.size() - 1);
|
||||
|
||||
TextElement elem = new TextElement();
|
||||
elem.setText(text.trim());
|
||||
elem.setPageNum(currentPage);
|
||||
elem.setX(first.getX());
|
||||
elem.setY(first.getY());
|
||||
elem.setWidth(Math.abs((last.getX() + last.getWidth()) - first.getX()));
|
||||
elem.setHeight(Math.max(1, Math.abs(first.getHeight())));
|
||||
elem.setFontSize(first.getFontSizeInPt());
|
||||
elem.setPageWidth(currentPageWidth);
|
||||
elem.setPageHeight(currentPageHeight);
|
||||
|
||||
if (!elem.getText().isEmpty()) {
|
||||
elements.add(elem);
|
||||
}
|
||||
|
||||
super.writeString(text, textPositions);
|
||||
}
|
||||
|
||||
public List<TextElement> getElements() {
|
||||
return elements;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
package extraction;
|
||||
|
||||
import model.TextElement;
|
||||
import model.TextGroup;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
public class TextGrouper {
|
||||
|
||||
public List<TextGroup> groupTextElements(List<TextElement> elements) {
|
||||
if (elements.isEmpty()) return Collections.emptyList();
|
||||
|
||||
List<TextElement> sorted = new ArrayList<>(elements);
|
||||
sorted.sort(Comparator
|
||||
.comparingInt(TextElement::getPageNum)
|
||||
.thenComparingDouble(TextElement::getY)
|
||||
.thenComparingDouble(TextElement::getX));
|
||||
|
||||
List<TextGroup> groups = new ArrayList<>();
|
||||
TextGroup current = null;
|
||||
|
||||
for (TextElement elem : sorted) {
|
||||
boolean merge = false;
|
||||
if (current != null
|
||||
&& current.getPageNum() == elem.getPageNum()
|
||||
&& Math.abs(current.getY() - elem.getY()) < Math.max(elem.getHeight(), current.getHeight()) * 0.5
|
||||
&& (elem.getX() - (current.getX() + current.getWidth())) < Math.max(elem.getFontSize(), current.getFontSize()) * 0.8
|
||||
&& (elem.getX() - (current.getX() + current.getWidth())) > -Math.max(elem.getFontSize(), current.getFontSize()) * 2) {
|
||||
|
||||
float maxFs = Math.max(current.getFontSize(), elem.getFontSize());
|
||||
float minFs = Math.min(current.getFontSize(), elem.getFontSize());
|
||||
if (maxFs > 0 && minFs / maxFs < 0.7f) {
|
||||
merge = false;
|
||||
} else {
|
||||
boolean curEndsWithDigit = !current.getText().isEmpty()
|
||||
&& Character.isDigit(current.getText().charAt(current.getText().length() - 1));
|
||||
boolean elemStartsWithDigit = !elem.getText().isEmpty()
|
||||
&& Character.isDigit(elem.getText().charAt(0));
|
||||
if (curEndsWithDigit && elemStartsWithDigit) {
|
||||
if (current.getText().contains(".") && elem.getText().contains(".")) {
|
||||
merge = false;
|
||||
} else if (current.getText().matches("\\d+\\.?\\d*")) {
|
||||
merge = false;
|
||||
} else {
|
||||
merge = true;
|
||||
}
|
||||
} else {
|
||||
merge = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (merge) {
|
||||
float gap = elem.getX() - (current.getX() + current.getWidth());
|
||||
if (gap > elem.getFontSize() * 0.3) {
|
||||
current.setText(current.getText() + " ");
|
||||
}
|
||||
current.setText(current.getText() + elem.getText());
|
||||
float newRight = Math.max(current.getX() + current.getWidth(), elem.getX() + elem.getWidth());
|
||||
float newLeft = Math.min(current.getX(), elem.getX());
|
||||
current.setX(newLeft);
|
||||
current.setWidth(newRight - newLeft);
|
||||
current.setHeight(Math.max(current.getHeight(), elem.getHeight()));
|
||||
current.getElements().add(elem);
|
||||
} else {
|
||||
current = new TextGroup();
|
||||
current.setText(elem.getText());
|
||||
current.setPageNum(elem.getPageNum());
|
||||
current.setX(elem.getX());
|
||||
current.setY(elem.getY());
|
||||
current.setWidth(elem.getWidth());
|
||||
current.setHeight(elem.getHeight());
|
||||
current.setFontSize(elem.getFontSize());
|
||||
current.setPageWidth(elem.getPageWidth());
|
||||
current.setPageHeight(elem.getPageHeight());
|
||||
current.getElements().add(elem);
|
||||
groups.add(current);
|
||||
}
|
||||
}
|
||||
|
||||
return groups;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
package extraction;
|
||||
|
||||
public class TextNormalizer {
|
||||
|
||||
public static String normalizeText(String text) {
|
||||
return text
|
||||
.replace("\u00a1\u00a4", "\u03a6")
|
||||
.replace("\u00a1\u00e3", "\u00b0")
|
||||
.replace("\u00a1\u00c0", "\u00b1")
|
||||
.replace("\u00a6\u00b5", "\u03a6")
|
||||
.replace("\uffc3n\uffc3", "\u03a6")
|
||||
.replace("\uffc3$\uffc3", "\u03a6")
|
||||
.replace("\ufffdn\ufffd", "\u00d8")
|
||||
.replace("\ufffd", "");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
package extraction;
|
||||
|
||||
import model.TextGroup;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class TitleBlockFilter {
|
||||
|
||||
private static final Pattern PAT_SKIP_TEXT = Pattern.compile(
|
||||
".*(" +
|
||||
"GB/T|QT\\d{3}|CR\\d{3}|NF|" +
|
||||
"比例|截面|技术要求|锪平|螺纹深度|通孔|配作|" +
|
||||
"设\\s*计|校\\s*对|审|批\\s*准|工\\s*艺|" +
|
||||
"\\d{2}-\\d{2}-\\d{4}|" +
|
||||
"\\d{3}-[A-Z]\\d{2}|" +
|
||||
"版本|序号|修\\s*订|编\\s*码|代\\s*号|" +
|
||||
"重\\s*量|名\\s*称|材\\s*料|备\\s*注|数量|" +
|
||||
"页|共|阶\\s*段|标\\s*记|分\\s*区|更改|" +
|
||||
"级|涂层|膜厚|颜色|RAL|" +
|
||||
"铸[件造]|拔模|未注|倒角|去毛刺|热处理|" +
|
||||
"凸起|文字|Work in|JINRONG|" +
|
||||
"螺栓|螺纹|圆柱销|轴承座|上盖|底座|分体|" +
|
||||
"福建|南方|机械|有限公司|" +
|
||||
"其余|首版|赖金荣|10\\.9级|" +
|
||||
"单件|总计|腐蚀|丙烯酸|石墨烯|防腐|配套加工|" +
|
||||
"检验|标准化|设计变更" +
|
||||
").*", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
public static boolean shouldSkipText(String text) {
|
||||
return PAT_SKIP_TEXT.matcher(text).matches();
|
||||
}
|
||||
|
||||
public static boolean isInTitleBlockRegion(TextGroup g) {
|
||||
float relY = g.getY() / g.getPageHeight();
|
||||
float relX = g.getX() / g.getPageWidth();
|
||||
if (relX > 0.55 && relY > 0.60) return true;
|
||||
return relY > 0.77;
|
||||
}
|
||||
|
||||
public static boolean isToleranceOnly(String text) {
|
||||
String t = text.trim();
|
||||
return t.matches("^[+-]\\s*\\d+\\.?\\d*$");
|
||||
}
|
||||
|
||||
public static boolean isSurfaceRoughness(String text) {
|
||||
String t = text.trim();
|
||||
return t.matches("^(0\\.4|0\\.8|1\\.6|3\\.2|6\\.3|12\\.5|25|50)$");
|
||||
}
|
||||
|
||||
public static boolean isGdtTolerance(String text) {
|
||||
String t = text.trim();
|
||||
return t.matches("^0\\.\\d{1,3}(\\s+[A-Z]\\d?)?$");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
package model;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class DimensionResult {
|
||||
private String dimension;
|
||||
private String tolerance;
|
||||
private String type;
|
||||
private double x, y;
|
||||
private double width, height;
|
||||
private int page;
|
||||
private int sortOrder;
|
||||
|
||||
public DimensionResult() {}
|
||||
|
||||
public DimensionResult(String dimension, String tolerance, String type, TextGroup g) {
|
||||
this.dimension = dimension;
|
||||
this.tolerance = tolerance;
|
||||
this.type = type;
|
||||
this.x = round(g.getX());
|
||||
this.y = round(g.getY() - g.getHeight());
|
||||
this.width = round(g.getWidth());
|
||||
this.height = round(g.getHeight());
|
||||
this.page = g.getPageNum();
|
||||
}
|
||||
|
||||
private static double round(float v) {
|
||||
return Math.round(v * 100.0) / 100.0;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
package model;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class TextElement {
|
||||
private String text;
|
||||
private int pageNum;
|
||||
private float x, y;
|
||||
private float width, height;
|
||||
private float fontSize;
|
||||
private float pageWidth, pageHeight;
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
package model;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
public class TextGroup {
|
||||
private String text;
|
||||
private int pageNum;
|
||||
private float x, y, width, height;
|
||||
private float fontSize;
|
||||
private float pageWidth, pageHeight;
|
||||
private List<TextElement> elements = new ArrayList<>();
|
||||
}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
package model;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class TextPosition {
|
||||
|
||||
/**
|
||||
* 页码
|
||||
*/
|
||||
private int pageNum;
|
||||
|
||||
/**
|
||||
* x轴起始位置
|
||||
*/
|
||||
private float x;
|
||||
|
||||
/**
|
||||
* y轴起始位置
|
||||
*/
|
||||
private float y;
|
||||
|
||||
/**
|
||||
* 宽度
|
||||
*/
|
||||
private float width;
|
||||
|
||||
/**
|
||||
* 高度
|
||||
*/
|
||||
private float height;
|
||||
|
||||
/**
|
||||
* 页宽
|
||||
*/
|
||||
private float pageWidth;
|
||||
|
||||
/**
|
||||
* 页高
|
||||
*/
|
||||
private float pageHeight;
|
||||
|
||||
/**
|
||||
* 序号
|
||||
*/
|
||||
private int sortOrder;
|
||||
}
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
package service;
|
||||
|
||||
import extraction.DimensionIdentifier;
|
||||
import extraction.PositionedTextStripper;
|
||||
import extraction.TextGrouper;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import model.DimensionResult;
|
||||
import model.TextElement;
|
||||
import model.TextGroup;
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.StringWriter;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Slf4j
|
||||
public class PdfExtractionService {
|
||||
|
||||
private final TextGrouper textGrouper;
|
||||
private final DimensionIdentifier dimensionIdentifier;
|
||||
|
||||
public PdfExtractionService(TextGrouper textGrouper,
|
||||
DimensionIdentifier dimensionIdentifier) {
|
||||
this.textGrouper = textGrouper;
|
||||
this.dimensionIdentifier = dimensionIdentifier;
|
||||
}
|
||||
|
||||
public ExtractionResult extractDimensions(Path pdfPath, String fileId) throws IOException {
|
||||
long start = System.currentTimeMillis();
|
||||
File file = pdfPath.toFile();
|
||||
|
||||
try (PDDocument document = Loader.loadPDF(file)) {
|
||||
int totalPages = document.getNumberOfPages();
|
||||
|
||||
PositionedTextStripper stripper = new PositionedTextStripper();
|
||||
stripper.setSortByPosition(true);
|
||||
StringWriter writer = new StringWriter();
|
||||
stripper.writeText(document, writer);
|
||||
List<TextElement> allElements = stripper.getElements();
|
||||
|
||||
List<TextGroup> groups = textGrouper.groupTextElements(allElements);
|
||||
List<DimensionResult> dimensions = dimensionIdentifier.identifyDimensions(groups);
|
||||
|
||||
long elapsed = System.currentTimeMillis() - start;
|
||||
log.info("Extracted {} dimensions from {} in {}ms", dimensions.size(), pdfPath.getFileName(), elapsed);
|
||||
|
||||
return new ExtractionResult(dimensions, totalPages);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 区域提取:不限公差过滤,包含所有尺寸,不保存到数据库
|
||||
*/
|
||||
public List<DimensionResult> extractAllDimensionsForRegion(String pdfUrl) throws IOException {
|
||||
byte[] pdfBytes;
|
||||
try (InputStream in = new URL(pdfUrl).openStream();
|
||||
ByteArrayOutputStream buffer = new ByteArrayOutputStream()) {
|
||||
byte[] chunk = new byte[8192];
|
||||
int len;
|
||||
while ((len = in.read(chunk)) != -1) {
|
||||
buffer.write(chunk, 0, len);
|
||||
}
|
||||
pdfBytes = buffer.toByteArray();
|
||||
}
|
||||
try (PDDocument document = Loader.loadPDF(pdfBytes)) {
|
||||
PositionedTextStripper stripper = new PositionedTextStripper();
|
||||
stripper.setSortByPosition(true);
|
||||
StringWriter writer = new StringWriter();
|
||||
stripper.writeText(document, writer);
|
||||
List<TextElement> allElements = stripper.getElements();
|
||||
List<TextGroup> groups = textGrouper.groupTextElements(allElements);
|
||||
return dimensionIdentifier.identifyDimensions(groups, false);
|
||||
}
|
||||
}
|
||||
|
||||
public static class ExtractionResult {
|
||||
private final List<DimensionResult> dimensions;
|
||||
private final int totalPages;
|
||||
|
||||
public ExtractionResult(List<DimensionResult> dimensions, int totalPages) {
|
||||
this.dimensions = dimensions;
|
||||
this.totalPages = totalPages;
|
||||
}
|
||||
|
||||
public List<DimensionResult> getDimensions() { return dimensions; }
|
||||
public int getTotalPages() { return totalPages; }
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
package service;
|
||||
|
||||
|
||||
import model.DimensionResult;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class RegionFilterService {
|
||||
|
||||
public List<DimensionResult> filterByRegion(List<DimensionResult> all,
|
||||
int page, double rx, double ry, double rw, double rh) {
|
||||
return all.stream()
|
||||
.filter(d -> d.getPage() == page)
|
||||
.filter(d -> intersects(d, rx, ry, rw, rh))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private boolean intersects(DimensionResult d, double rx, double ry, double rw, double rh) {
|
||||
return d.getX() + d.getWidth() >= rx
|
||||
&& d.getX() <= rx + rw
|
||||
&& d.getY() + d.getHeight() >= ry
|
||||
&& d.getY() <= ry + rh;
|
||||
}
|
||||
|
||||
private static final Set<String> STANDALONE_SYMBOLS = Set.of("Φ", "φ", "Ø", "∅", "¡¤");
|
||||
|
||||
/**
|
||||
* 将区域筛选后的多条结果合并为一条,按阅读顺序拼接文本、取并集边界框。
|
||||
*/
|
||||
public List<DimensionResult> mergeRegionResults(List<DimensionResult> filtered) {
|
||||
if (filtered == null || filtered.size() <= 1) {
|
||||
return filtered;
|
||||
}
|
||||
|
||||
// 过滤掉独立装饰符号(单字符 Φ/Ø 等)
|
||||
List<DimensionResult> meaningful = filtered.stream()
|
||||
.filter(d -> {
|
||||
String dim = d.getDimension() == null ? "" : d.getDimension().trim();
|
||||
return !STANDALONE_SYMBOLS.contains(dim);
|
||||
})
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (meaningful.isEmpty()) {
|
||||
return filtered;
|
||||
}
|
||||
if (meaningful.size() == 1) {
|
||||
return meaningful;
|
||||
}
|
||||
|
||||
// 按阅读顺序排序:Y 升序,X 升序
|
||||
meaningful.sort(Comparator.comparingDouble(DimensionResult::getY)
|
||||
.thenComparingDouble(DimensionResult::getX));
|
||||
|
||||
// 拼接文本
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (DimensionResult d : meaningful) {
|
||||
String dim = d.getDimension() == null ? "" : d.getDimension().trim();
|
||||
sb.append(dim);
|
||||
}
|
||||
|
||||
// 合并 tolerance
|
||||
List<String> tols = meaningful.stream()
|
||||
.map(DimensionResult::getTolerance)
|
||||
.filter(t -> t != null && !t.isEmpty())
|
||||
.collect(Collectors.toList());
|
||||
String mergedTol = tols.isEmpty() ? null : String.join(" / ", tols);
|
||||
|
||||
// 计算并集边界框
|
||||
double minX = meaningful.stream().mapToDouble(DimensionResult::getX).min().orElse(0);
|
||||
double minY = meaningful.stream().mapToDouble(DimensionResult::getY).min().orElse(0);
|
||||
double maxX = meaningful.stream().mapToDouble(d -> d.getX() + d.getWidth()).max().orElse(0);
|
||||
double maxY = meaningful.stream().mapToDouble(d -> d.getY() + d.getHeight()).max().orElse(0);
|
||||
|
||||
DimensionResult merged = new DimensionResult();
|
||||
merged.setDimension(sb.toString());
|
||||
merged.setTolerance(mergedTol);
|
||||
merged.setType("dimension");
|
||||
merged.setX(minX);
|
||||
merged.setY(minY);
|
||||
merged.setWidth(maxX - minX);
|
||||
merged.setHeight(maxY - minY);
|
||||
merged.setPage(meaningful.get(0).getPage());
|
||||
|
||||
return Collections.singletonList(merged);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,170 @@
|
|||
package com.nflg.wms.common.pojo.dto;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
@Data
|
||||
public class QmsInspectionStandardDetailDTO {
|
||||
|
||||
/**
|
||||
* 检验标准ID
|
||||
*/
|
||||
private Long id;
|
||||
|
||||
/**
|
||||
* 物料ID
|
||||
*/
|
||||
private Long materialId;
|
||||
|
||||
/**
|
||||
* 物料编号
|
||||
*/
|
||||
private String materialNo;
|
||||
|
||||
/**
|
||||
* 物料类别路径名称
|
||||
*/
|
||||
private String materialCategoryCodePathName;
|
||||
|
||||
/**
|
||||
* 物料描述
|
||||
*/
|
||||
private String materialDesc;
|
||||
|
||||
/**
|
||||
* 图号版本号
|
||||
*/
|
||||
private String drawingNoVer;
|
||||
|
||||
/**
|
||||
* 图纸URL
|
||||
*/
|
||||
private String drawingUrl;
|
||||
|
||||
/**
|
||||
* 版本号
|
||||
*/
|
||||
private String version;
|
||||
|
||||
/**
|
||||
* 是否启用
|
||||
*/
|
||||
private Boolean isEnabled;
|
||||
|
||||
/**
|
||||
* 包装方式ID
|
||||
*/
|
||||
private Long packagingMethodId;
|
||||
|
||||
/**
|
||||
* 检验周期
|
||||
*/
|
||||
private Integer inspectionCycle;
|
||||
|
||||
/**
|
||||
* 检测方式字典项ID
|
||||
*/
|
||||
private Long testingMethodDictItemId;
|
||||
|
||||
/**
|
||||
* 检测方式字典项名称
|
||||
*/
|
||||
private String testingMethodDictItemName;
|
||||
|
||||
/**
|
||||
* 抽样方案ID
|
||||
*/
|
||||
private Long samplingPlanId;
|
||||
|
||||
/**
|
||||
* 抽样方案名称
|
||||
*/
|
||||
private String samplingPlanName;
|
||||
|
||||
/**
|
||||
* 检验水平字典项ID
|
||||
*/
|
||||
private Long inspectionLevelDictItemId;
|
||||
|
||||
/**
|
||||
* 检验水平字典项名称
|
||||
*/
|
||||
private String inspectionLevelDictItemName;
|
||||
|
||||
/**
|
||||
* AQL值字典项ID
|
||||
*/
|
||||
private Long aqlPriorityValueId;
|
||||
|
||||
/**
|
||||
* AQL值
|
||||
*/
|
||||
private BigDecimal aqlPriorityValue;
|
||||
|
||||
/**
|
||||
* AQL类型字典项ID
|
||||
*/
|
||||
private Long aqlTypeDictItemId;
|
||||
|
||||
/**
|
||||
* AQL类型字典项名称
|
||||
*/
|
||||
private String aqlTypeDictItemName;
|
||||
|
||||
/**
|
||||
* 发布状态:0-未发布,1-已发布
|
||||
*/
|
||||
private Short publishStatus;
|
||||
|
||||
/**
|
||||
* 发布人ID
|
||||
*/
|
||||
private Long publishUserId;
|
||||
|
||||
/**
|
||||
* 发布人姓名
|
||||
*/
|
||||
private String publishUserName;
|
||||
|
||||
/**
|
||||
* 发布时间
|
||||
*/
|
||||
private LocalDateTime publishTime;
|
||||
|
||||
/**
|
||||
* 所属IQE姓名
|
||||
*/
|
||||
private String iqeName;
|
||||
|
||||
/**
|
||||
* 创建人ID
|
||||
*/
|
||||
private Long createUserId;
|
||||
|
||||
/**
|
||||
* 创建人姓名
|
||||
*/
|
||||
private String createUserName;
|
||||
|
||||
/**
|
||||
* 创建时间
|
||||
*/
|
||||
private LocalDateTime createTime;
|
||||
|
||||
/**
|
||||
* 更新人ID
|
||||
*/
|
||||
private Long updateUserId;
|
||||
|
||||
/**
|
||||
* 更新人姓名
|
||||
*/
|
||||
private String updateUserName;
|
||||
|
||||
/**
|
||||
* 更新时间
|
||||
*/
|
||||
private LocalDateTime updateTime;
|
||||
}
|
||||
|
|
@ -3,9 +3,9 @@ package com.nflg.wms.repository.mapper;
|
|||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||
import com.baomidou.mybatisplus.core.metadata.IPage;
|
||||
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
||||
import com.nflg.wms.common.pojo.dto.QmsInspectionStandardDetailDTO;
|
||||
import com.nflg.wms.common.pojo.qo.QmsInspectionStandardSearchQO;
|
||||
import com.nflg.wms.common.pojo.vo.QmsIncomingInspectionTaskCheckItemVO;
|
||||
import com.nflg.wms.common.pojo.vo.QmsInspectionStandardDetailVO;
|
||||
import com.nflg.wms.common.pojo.vo.QmsInspectionStandardVO;
|
||||
import com.nflg.wms.repository.entity.QmsInspectionStandard;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
|
|
@ -25,7 +25,7 @@ public interface QmsInspectionStandardMapper extends BaseMapper<QmsInspectionSta
|
|||
/**
|
||||
* 根据ID查询检验标准详情(关联物料等信息)
|
||||
*/
|
||||
QmsInspectionStandardDetailVO getDetailById(@Param("id") Long id);
|
||||
QmsInspectionStandardDetailDTO getDetailById(@Param("id") Long id);
|
||||
|
||||
List<QmsIncomingInspectionTaskCheckItemVO> getItemsForCheck(Long id);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@
|
|||
<!--
|
||||
根据ID查询检验标准详情(关联物料等信息)
|
||||
-->
|
||||
<select id="getDetailById" resultType="com.nflg.wms.common.pojo.vo.QmsInspectionStandardDetailVO">
|
||||
<select id="getDetailById" resultType="com.nflg.wms.common.pojo.dto.QmsInspectionStandardDetailDTO">
|
||||
SELECT
|
||||
s.id,
|
||||
s.material_id AS materialId,
|
||||
|
|
|
|||
6
pom.xml
6
pom.xml
|
|
@ -25,6 +25,7 @@
|
|||
<module>nflg-wms-srm-receive</module>
|
||||
<module>nflg-wms-shipment</module>
|
||||
<module>nflg-qms-admin</module>
|
||||
<module>nflg-qms-pdf-extract</module>
|
||||
</modules>
|
||||
<properties>
|
||||
<java.version>17</java.version>
|
||||
|
|
@ -125,6 +126,11 @@
|
|||
<artifactId>nflg-wms-repository</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.nflg</groupId>
|
||||
<artifactId>nflg-qms-pdf-extract</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>com.mysql</groupId>-->
|
||||
<!-- <artifactId>mysql-connector-j</artifactId>-->
|
||||
|
|
|
|||
Loading…
Reference in New Issue