## HR Article classification Schema without ML
company_names = ['Google', 'Microsoft', 'Apple']
job_titles = ['CFO', 'CEO', 'CFO']
text = 'Microsoft recently hired such and such person AS CFO'
SOME_THRESHOLD = 20
confidence =0 
### HR article keywords
if 'hire' in text or 'hiring' in text or 'join' or 'joining' in text or 'laying off' in text or 'resign' in text:   
    confidence += 10

# Job titles are definitely good again, HR articles generally say the position of the new hire
for job_title in job_titles:
  if job_title in text:
    confidence += 10

# If we have company name, that is a good sign as article could be in business domain
for company_name in company_names:
    if company_name in text:
        confidence += 10
label = 0
if confidence >= SOME_THRESHOLD:
    label = 1 # It is an hr article

if label == 1:
    print('-- {} -- is an HR article'.format(text))
else:
    print('-- {} -- is not an HR article'.format(text))

# Given trained classifier, vectorizer and feature selection method
# This is how one may classify an article in Scikit-learn(assuming the classifier is also trained on labeled data)
## Convert into a vector
count = vectorizer.transform(np.asarray(text).toarray())
## Do feature selection
selected_feats = feat_selector.transform(count)
## Algorithm to classify
pred_class = clf.predict(selected_feats)
if label == 1:
    print('-- {} -- is an HR article'.format(text))
else:
    print('-- {} -- is not an HR article'.format(text))