Skip to content

Commit

Permalink
Refactor: counter를 객체로 캡슐화
Browse files Browse the repository at this point in the history
  • Loading branch information
yoonseon12 committed Jan 29, 2024
1 parent 69060c7 commit ba8fa0a
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import static kernel.jdon.util.StringUtil.*;

import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;

Expand All @@ -28,6 +27,8 @@
import kernel.jdon.crawler.wanted.search.JobSearchJobPosition;
import kernel.jdon.crawler.wanted.search.JobSearchLocation;
import kernel.jdon.crawler.wanted.search.JobSearchSort;
import kernel.jdon.crawler.wanted.service.infrastructure.JobDetailProcessingCounter;
import kernel.jdon.crawler.wanted.service.infrastructure.JobListProcessingCounter;
import kernel.jdon.crawler.wanted.skill.BackendSkillType;
import kernel.jdon.crawler.wanted.skill.FrontendSkillType;
import kernel.jdon.crawler.wanted.skill.SkillType;
Expand Down Expand Up @@ -66,35 +67,31 @@ private JobCategory findByJobPosition(final JobSearchJobPosition jobPosition) {

private void processJobDetails(final JobSearchJobPosition jobPosition, final JobCategory jobCategory,
final Set<Long> fetchJobIds) throws InterruptedException {
final int thresholdCount = scrapingWantedConfig.getSleep().getThresholdCount();
final int sleepTimeMillis = scrapingWantedConfig.getSleep().getTimeMillis();
final int failLimitCount = scrapingWantedConfig.getLimit().getFailCount();
int sleepCounter = 0;
int consecutiveFailCount = 0;
JobDetailProcessingCounter jobProcessingCounter = new JobDetailProcessingCounter(scrapingWantedConfig);

for (Long detailId : fetchJobIds) {
if (consecutiveFailCount == failLimitCount) {
for (Long jobDetailId : fetchJobIds) {
if (jobProcessingCounter.isBreakRequired()) {
break;
}
if (isJobDetailExist(jobCategory, detailId)) {
consecutiveFailCount++;
if (isJobDetailExist(jobCategory, jobDetailId)) {
jobProcessingCounter.incrementFailCount();
continue;
}
if (sleepCounter == thresholdCount) {
Thread.sleep(sleepTimeMillis);
sleepCounter = 0;
if (jobProcessingCounter.isSleepRequired()) {
performSleep();
jobProcessingCounter.resetSleepCounter();
}

consecutiveFailCount = 0; // 연속으로 JD가 추출되지 않았다면 변수 초기화
jobProcessingCounter.resetFailCount(); // 연속으로 JD가 추출되지 않았다면 초기화

createJobDetail(jobPosition, jobCategory, detailId);
createJobDetail(jobPosition, jobCategory, jobDetailId);

sleepCounter++;
jobProcessingCounter.incrementSleepCounter();
}
}

private void createJobDetail(final JobSearchJobPosition jobPosition, final JobCategory jobCategory, final Long detailId) {
WantedJobDetailResponse jobDetailResponse = getJobDetail(jobCategory, detailId);
private void createJobDetail(final JobSearchJobPosition jobPosition, final JobCategory jobCategory, final Long jobDetailId) {
WantedJobDetailResponse jobDetailResponse = getJobDetail(jobCategory, jobDetailId);
WantedJd savedWantedJd = createWantedJd(jobDetailResponse);

List<WantedJobDetailResponse.WantedSkill> wantedDetailSkillList =
Expand All @@ -104,8 +101,8 @@ private void createJobDetail(final JobSearchJobPosition jobPosition, final JobCa
createWantedJdSkill(jobPosition, jobCategory, savedWantedJd, wantedDetailSkillList);
}

private boolean isJobDetailExist(final JobCategory jobCategory, final Long detailId) {
return wantedJdRepository.existsByJobCategoryAndDetailId(jobCategory, detailId);
private boolean isJobDetailExist(final JobCategory jobCategory, final Long jobDetailId) {
return wantedJdRepository.existsByJobCategoryAndDetailId(jobCategory, jobDetailId);
}

private void createSkillHistory(final JobCategory jobCategory, final WantedJd wantedJd,
Expand Down Expand Up @@ -149,17 +146,17 @@ private Skill findByJobCategoryIdAndKeyword(final JobCategory jobCategory, final
.orElseThrow(() -> new IllegalArgumentException("해당하는 기술스택이 없음 -> 데이터베이스와 동기화되지 않은 키워드"));
}

private WantedJobDetailResponse getJobDetail(final JobCategory jobCategory, final Long detailId) {
WantedJobDetailResponse wantedJobDetailResponse = createFetchJobDetail(detailId);
addWantedJobDetailResponse(wantedJobDetailResponse, jobCategory, detailId);
private WantedJobDetailResponse getJobDetail(final JobCategory jobCategory, final Long jobDetailId) {
WantedJobDetailResponse wantedJobDetailResponse = createFetchJobDetail(jobDetailId);
addWantedJobDetailResponse(wantedJobDetailResponse, jobCategory, jobDetailId);

return wantedJobDetailResponse;
}

private void addWantedJobDetailResponse(final WantedJobDetailResponse jobDetailResponse, final JobCategory jobCategory,
final Long detailId) {
final Long jobDetailId) {
final String jobUrlDetail = scrapingWantedConfig.getUrl().getDetail();
jobDetailResponse.addDetailInfo(joinToString(jobUrlDetail, detailId), jobCategory);
jobDetailResponse.addDetailInfo(joinToString(jobUrlDetail, jobDetailId), jobCategory);
}

private WantedJd createWantedJd(final WantedJobDetailResponse jobDetailResponse) {
Expand All @@ -174,28 +171,25 @@ private WantedJobDetailResponse createFetchJobDetail(final Long jobId) {
}

private Set<Long> fetchJobIdList(final JobSearchJobPosition jobPosition) {
final int maxFetchJDListSize = scrapingWantedConfig.getMaxFetchJdList().getSize();
final int maxFetchJDListOffset = scrapingWantedConfig.getMaxFetchJdList().getOffset();
int offset = 0;
Set<Long> fetchJobIds = new LinkedHashSet<>();
JobListProcessingCounter jobListCounter = new JobListProcessingCounter(scrapingWantedConfig);

while (fetchJobIds.size() < maxFetchJDListSize) {
WantedJobListResponse jobListResponse = fetchJobList(jobPosition, offset);
while (jobListCounter.isBelowSizeLimit()) {
WantedJobListResponse jobListResponse = fetchJobList(jobPosition, jobListCounter.getOffset());

List<Long> jobIdList = jobListResponse.getData().stream()
.map(WantedJobListResponse.Data::getId)
.toList();

fetchJobIds.addAll(jobIdList);
jobListCounter.addFetchedJobIds(jobIdList);

if (jobIdList.size() < maxFetchJDListOffset) {
if (jobListCounter.isBelowOffsetLimit(jobIdList.size())) {
break;
}

offset += maxFetchJDListOffset;
jobListCounter.incrementOffset();
}

return fetchJobIds;
return jobListCounter.getFetchedJobIds();
}

private WantedJobListResponse fetchJobList(final JobSearchJobPosition jobPosition, final int offset) {
Expand All @@ -220,4 +214,8 @@ private String createJobListUrl(final JobSearchJobPosition jobPosition, final in
);
}

private void performSleep() throws InterruptedException {
final int sleepTimeMillis = scrapingWantedConfig.getSleep().getTimeMillis();
Thread.sleep(sleepTimeMillis);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package kernel.jdon.crawler.wanted.service.infrastructure;

import kernel.jdon.crawler.config.ScrapingWantedConfig;

public class JobDetailProcessingCounter {
private final int thresholdCount;
private final int failLimitCount;
private int sleepCount = 0;
private int consecutiveFailCount = 0;

public JobDetailProcessingCounter(ScrapingWantedConfig scrapingWantedConfig) {
this.thresholdCount = scrapingWantedConfig.getSleep().getThresholdCount();
this.failLimitCount = scrapingWantedConfig.getLimit().getFailCount();
}

public void incrementSleepCounter() {
sleepCount++;
}

public void resetSleepCounter() {
sleepCount = 0;
}

public void incrementFailCount() {
consecutiveFailCount++;
}

public void resetFailCount() {
consecutiveFailCount = 0;
}

public boolean isSleepRequired() {
return sleepCount == thresholdCount;
}

public boolean isBreakRequired() {
return consecutiveFailCount == failLimitCount;
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package kernel.jdon.crawler.wanted.service.infrastructure;

import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;

import kernel.jdon.crawler.config.ScrapingWantedConfig;

public class JobListProcessingCounter {
private final int maxFetchJDListSize;
private final int maxFetchJDListOffset;
private int offset = 0;
private Set<Long> fetchedJobIds = new LinkedHashSet<>();

public JobListProcessingCounter(ScrapingWantedConfig scrapingWantedConfig) {
this.maxFetchJDListSize = scrapingWantedConfig.getMaxFetchJdList().getSize();
this.maxFetchJDListOffset = scrapingWantedConfig.getMaxFetchJdList().getOffset();
}

public boolean isBelowSizeLimit() {
return fetchedJobIds.size() < maxFetchJDListSize;
}

public boolean isBelowOffsetLimit(int jobIdListSize) {
return jobIdListSize < maxFetchJDListOffset;
}

public void incrementOffset() {
offset += maxFetchJDListOffset;
}

public int getOffset() {
return this.offset;
}

public Set<Long> getFetchedJobIds() {
return fetchedJobIds;
}

public void addFetchedJobIds(List<Long> jobIdList) {
fetchedJobIds.addAll(jobIdList);
}
}

0 comments on commit ba8fa0a

Please sign in to comment.